5 import xml.parsers.expat
7 from optparse import OptionParser
9 ###############################################################################
13 ###############################################################################
16 def __init__( self, object, isElement ):
18 self.isElement = isElement
20 ###############################################################################
23 def __init__( self, parent, text=True, strip=True, delimBegin=None, delimEnd=None, newline=0 ):
27 self._delimBegin = delimBegin
28 self._delimEnd = delimEnd
29 self._newline = newline
32 def _addElement( self, child ):
33 self._stream.append( StreamEntry( child, True ))
35 def _addText( self, text ):
37 self._stream.append( StreamEntry( text, False ))
39 def _write( self, file ):
41 file.write( self._delimBegin )
42 for entry in self._stream:
44 entry.object.write( file )
46 file.write( str(entry.object) )
48 file.write( self._delimEnd )
50 def write( self, file ):
51 if self._newline > writer.newlineCount:
52 file.write( '\n' * (self._newline - writer.newlineCount))
55 ###############################################################################
57 class Document( Element ):
59 Element.__init__( self, None )
60 self._stack = [ self ]
63 self._debugIndent = ''
64 self._chapterLevel = 0
65 self._sectionLevel = 0
66 self._dividerCount = 0
68 #self._pragmaSummary = PragmaElement( self, 'summary' )
69 #self._pragmaLabels = PragmaElement( self, 'labels' )
70 #self._pragmaLabels._addText( 'xml2wiki,Distribution,Featured' )
74 self._pending = self._stack[-1]
77 def _pushChild( self, child, add=True ):
79 self._pending._addElement( child );
80 self._stack.append( child )
84 def _chapterBegin( self ):
85 self._chapterLevel = self._chapterLevel + 1
87 def _chapterEnd( self ):
88 self._chapterLevel = self._chapterLevel - 1
90 def _sectionBegin( self ):
91 self._sectionLevel = self._sectionLevel + 1
93 def _sectionEnd( self ):
94 self._sectionLevel = self._sectionLevel - 1
96 def _write( self, file ):
97 #self._pragmaSummary.write( file )
99 #self._pragmaLabels.write( file )
101 file.write( "\n\n ===== `[`generated by xml2wiki on %s`]` =====" % (time.strftime( '%c' ) ))
103 # file.write( '\n\n<wiki:toc max_depth="3" />' )
104 Element._write( self, file )
107 def handleElementBegin( self, name, attrs ):
108 self._debugIndent = ' ' * (len(self._stack) - 1)
110 print( '%sBEGIN %s %s' % (self._debugIndent, name, attrs))
117 anchor = attrs['name']
119 e = Element( self._pending, delimBegin="'''", delimEnd="'''" )
120 elif name == 'chapter':
123 e = Element( self._pending, delimBegin='{{{{', delimEnd='}}}' )
124 elif name == 'command':
125 e = Element( self._pending, delimBegin='{{{', delimEnd='}}}' )
126 elif name == 'enumerate':
127 e = EnumerateElement( self._pending )
128 elif name == 'example':
129 e = CodeElement( self._pending )
131 e = Element( self._pending, delimBegin='{{{', delimEnd='}}}' )
133 e = Element( self._pending, delimBegin="''", delimEnd="''" )
134 elif name == 'itemize':
135 e = ItemizeElement( self._pending )
137 e = ItemElement( self._pending )
138 #elif name == 'majorheading':
139 # e = self._pragmaSummary
142 e = ParagraphElement( self._pending )
143 elif name == 'quotation':
144 e = IndentedElement( self._pending )
146 e = Element( self._pending, delimBegin='{{{', delimEnd='}}}' )
147 elif name == 'section' or name == 'subsection':
149 #elif name == 'table':
150 # e = Element( self._pending, newline=1, delimBegin='<table border="1" cellpadding="4">', delimEnd='</table>', strip=True )
151 elif name == 'tableitem':
152 e = TableItemElement( self._pending )
153 elif name == 'tableterm':
154 e = Element( self._pending, delimBegin=' ', delimEnd='::\n' )
155 elif name == 'title':
156 e = HeadingElement( self._pending, self._chapterLevel + self._sectionLevel )
157 elif name == 'unnumbered' or name == 'unnumberedsec':
160 e = UrefInline( self._pending )
161 elif name == 'urefdesc':
162 e = UrefDescInline( self._pending )
163 elif name == 'urefurl':
164 e = UrefUrlInline( self._pending )
166 e = XrefInline( self._pending )
167 elif name == 'xrefnodename':
168 e = XrefNodenameInline( self._pending )
171 self._pushChild( UnknownElement( self._pending ) )
172 if options.verbose > 2:
173 print( 'UNKNOWN:', name )
175 self._pushChild( e, add=shouldAdd )
177 def handleElementEnd( self, name ):
178 if name == 'chapter':
180 elif name == 'section' or name == 'subsection':
182 elif name == 'unnumbered' or name == 'unnumberedsec':
186 self._debugIndent = ' ' * (len(self._stack) - 1)
188 print( '%sEND %s' % (self._debugIndent, name))
190 def handleCharacterData( self, data ):
191 if options.verbose > 1:
192 print( '%s[%s]' % (self._debugIndent, data.strip()))
193 self._pending._addText( data )
195 ###############################################################################
197 class UnknownElement( Element ):
198 def __init__( self, parent ):
199 Element.__init__( self, parent, text=False )
201 ###############################################################################
203 class PragmaElement( Element ):
204 def __init__( self, parent, keyword ):
205 Element.__init__( self, parent, delimBegin=('#' + keyword + ' ') )
207 ###############################################################################
209 class BlockElement( Element ):
210 def __init__( self, parent ):
211 Element.__init__( self, parent, newline=2, text=False )
213 ###############################################################################
215 class CodeElement( Element ):
216 def __init__( self, parent ):
217 Element.__init__( self, parent, newline=2, delimBegin='{{{\n', delimEnd='\n}}}\n' )
219 ###############################################################################
221 class HeadingElement( Element ):
222 def __init__( self, parent, level ):
223 Element.__init__( self, parent, newline=2 )
225 self._anchor = anchor
228 self._delimBegin = ('=' * level) + ' '
229 self._delimEnd = ' %s #%s\n' % (('=' * level), self._anchor)
230 #self._delimEnd = ' ' + ('=' * level) + ' #%s\n' % (self._anchor)
232 # insert divider for level 1 headers
234 if options.toc or doc._dividerCount:
235 self._delimBegin = '----\n%s' % (self._delimBegin)
236 doc._dividerCount = doc._dividerCount + 1
240 ###############################################################################
242 class IndentedElement( BlockElement ):
243 def _write( self, file ):
245 Element._write( self, file )
248 ###############################################################################
250 class EnumerateElement( IndentedElement ):
253 ###############################################################################
255 class ItemizeElement( IndentedElement ):
258 ###############################################################################
260 class ItemElement( BlockElement ):
261 def __init__( self, parent ):
262 BlockElement.__init__( self, parent )
264 if isinstance( parent, TableItemElement ):
266 #self._delimBegin = '<td>'
267 #self._delimEnd = '</td>'
268 self._delimBegin = ' '
269 self._delimEnd = '\n'
271 ###############################################################################
273 class ParagraphElement( Element ):
274 def __init__( self, parent ):
275 Element.__init__( self, parent, newline=2 )
276 if isinstance( parent, ItemElement ):
277 if isinstance( parent._parent, TableItemElement ):
279 elif isinstance( parent._parent, EnumerateElement ):
281 self._delimBegin = '# '
284 self._delimBegin = '* '
286 ###############################################################################
288 class TableItemElement( Element ):
289 def __init__( self, parent ):
290 Element.__init__( self, parent, newline=1, text=False )
291 #self._delimBegin = '<tr>'
292 #self._delimEnd = '</tr>'
294 ###############################################################################
296 class UrefInline( Element ):
297 def __init__( self, parent ):
298 Element.__init__( self, parent, text=False, delimBegin='[', delimEnd=']' )
300 ###############################################################################
302 class UrefDescInline( Element ):
303 def __init__( self, parent ):
304 Element.__init__( self, parent, delimBegin=' ' )
306 ###############################################################################
308 class UrefUrlInline( Element ):
309 def __init__( self, parent ):
310 Element.__init__( self, parent )
312 ###############################################################################
314 class XrefInline( Element ):
315 def __init__( self, parent ):
316 Element.__init__( self, parent, text=False )
318 ###############################################################################
320 class XrefNodenameInline( Element ):
321 def __init__( self, parent ):
322 Element.__init__( self, parent )
324 def _write( self, file ):
325 buffer = io.StringIO()
326 Element._write( self, buffer )
327 name = str( buffer.getvalue() )
328 anchor = re.sub( ' ', '_', name )
329 file.write( '[#%s %s]' % (anchor, name) )
331 ###############################################################################
333 class IndentedWriter:
334 def __init__( self, size, file ):
335 self._chunk = ' ' * size
339 self._pending = False
341 self.newlineCount = 0
343 def decrease( self ):
344 self._level = self._level - 1
345 self._indent = self._chunk * self._level
347 def increase( self ):
348 self._level = self._level + 1
349 self._indent = self._chunk * self._level
351 def write( self, data ):
354 self._pending = False
355 self._file.write( self._indent )
357 self.newlineCount = self.newlineCount + 1
360 self.newlineCount = 0
361 self._file.write( b )
363 ###############################################################################
365 parser = OptionParser( 'Usage: %prog [OPTIONS] xml' )
366 parser.add_option( '-d', '--date', action='store_true', default=False, help='generate date-stamp under title' )
367 parser.add_option( '-t', '--toc', action='store_true', default=False, help='generate table of contents' )
368 parser.add_option( '-v', '--verbose', action='count', default=False, help='increase verbosity' )
370 (options, args) = parser.parse_args()
372 if( len(args) != 1 ):
373 parser.error( 'incorrect number of arguments' )
375 ###############################################################################
378 xml = xml.parsers.expat.ParserCreate()
380 xml.StartElementHandler = doc.handleElementBegin
381 xml.EndElementHandler = doc.handleElementEnd
382 xml.CharacterDataHandler = doc.handleCharacterData
386 with open( args[0], 'rb' ) as fin:
389 writer = IndentedWriter( 4, sys.stdout )
393 writer.write( '%s* [#%s %s]\n' % (' ' * e._level,e._anchor,e._stream[0].object) )