Ticket #3493: wikitopdf.py

File wikitopdf.py, 3.6 kB (added by nagrigoriadis@gmail.com, 5 months ago)

modified version to download all images from the webserver

Line 
1 """
2 Copyright (C) 2008 Prognus Software Livre - www.prognus.com.br
3 Author: Diorgenes Felipe Grzesiuk <diorgenes@prognus.com.br>
4 """
5
6 from trac.core import *
7 from trac.util import escape
8 from trac.mimeview.api import IContentConverter
9 from trac.wiki.formatter import wiki_to_html
10 from tempfile import mkstemp
11 import os
12 import re
13 import random
14
15 EXCLUDE_RES = [
16         re.compile(r'\[\[PageOutline([^]]*)\]\]'),
17         re.compile(r'\[\[TracGuideToc([^]]*)\]\]'),
18         re.compile(r'----(\r)?$\n^Back up: \[\[ParentWiki\]\]', re.M|re.I)
19 ]
20
21 class WikiToPdfPage(Component):
22     """Convert Wiki pages to PDF using HTMLDOC (http://www.htmldoc.org/)."""
23     implements(IContentConverter)
24
25     # IContentConverter methods
26     def get_supported_conversions(self):
27         yield ('pdf', 'WikiToPdf', 'pdf', 'text/x-trac-wiki', 'application/pdf', 7)
28
29     def convert_content(self, req, input_type, text, output_type):
30
31         os.system("rm -f /tmp/tmp*wikitopdf")
32
33         tracuri = self.env.config.get('wikitopdf', 'trac_uri')
34         tmp_dir = self.env.config.get('wikitopdf', 'tmp_dir')
35         os.system('mkdir %s 2>/dev/null' % (tmp_dir))
36         if tmp_dir == "":
37                 tmp_dir = "/tmp/wikitopdf"
38
39         random.seed()
40         tmp_dir += '/%(#)04x_' %{"#":random.randint(0,65535)}
41
42         hfile, hfilename = mkstemp('wikitopdf')
43         # htmldoc doesn't support utf-8, we need to use some other input encoding
44
45         codepage = self.env.config.get('trac', 'charset', 'iso-8859-1')
46
47         for r in EXCLUDE_RES:
48                 text = r.sub('', text)
49                
50         page = wiki_to_html(text, self.env, req).encode(codepage, 'replace')
51
52         self.env.log.debug('WikiToPdf => HTML output for WikiToPdf in charset: %s' % codepage)
53         self.env.log.debug('WikiToPdf => HTML intput for WikiToPdf: %s' % text)
54
55         page = page.replace('attachment', 'attachments')
56         page = page.replace('?format=raw','')
57         page = page.replace('<pre class="wiki">', '<table align="center" width="95%" border="1" bordercolor="#d7d7d7">'
58                                                 + '<tr><td bgcolor="#f7f7f7"><pre class="wiki">')
59         page = page.replace('</pre>', '</pre></td></tr></table>')
60         page = page.replace('<table class="wiki">', '<table class="wiki" border="1" width="100%">')
61        
62         imgcounter = 0
63         imgpos = page.find('<img')
64
65         while imgpos != -1:
66                 addrpos = page.find('src="',imgpos)
67                 theimg = page[addrpos+5:]
68                 thepos = theimg.find('"')
69                 theimg = theimg[:thepos]
70
71                 if theimg[:1] == '/':
72                         theimg = tracuri + theimg
73                
74                 newimg = tmp_dir + '%(#)d_' %{"#":imgcounter} + theimg[theimg.rfind('/')+1:]
75                 #download
76                 os.system('wget "%s" -O "%s" >>/dev/null 2>>/dev/null' % (theimg, newimg))
77
78                 page = page[:addrpos+5] + newimg + page[addrpos+5+thepos:]
79                 imgcounter += 1
80                 imgpos = page.find('<img', addrpos)
81
82         self.env.log.debug('WikiToPdf => Html code: %r' % page)
83
84         meta = ('<meta http-equiv="Content-Type" content="text/html; charset=%s"/>' % codepage).encode(codepage)
85
86         os.write(hfile, '<html><head>' + meta + '</head><body>' + page + '</body></html>')
87         os.close(hfile)
88
89         pfile, pfilename = mkstemp('wikitopdf')
90         os.close(pfile)
91
92         os.environ["HTMLDOC_NOCGI"] = 'yes'
93         htmldoc_args = { 'webpage': None, 'format': 'pdf14', 'charset': codepage }
94
95         htmldoc_args.update(dict(self.env.config.options('wikitopdf-page')))
96
97         args_string = ' '.join(['--%s %s' % (arg, value or '') for arg, value
98                                 in htmldoc_args.iteritems()])
99
100         self.env.log.debug('WikiToPdf => Htmldoc code out: %s' % args_string)
101
102         os.system('htmldoc %s %s -f %s' % (args_string, hfilename, pfilename))
103         os.system('rm %s* 2>/dev/null' % (tmp_dir))
104
105         out = open(pfilename, 'rb').read()
106         os.unlink(pfilename)
107         os.unlink(hfilename)
108         return (out, 'application/pdf')