Attachment 'markdown.py'
Download 1 #!/usr/bin/env python
2
3 """
4 ====================================================================
5 IF YOU ARE LOOKING TO EXTEND MARKDOWN, SEE THE "FOOTNOTES" SECTION
6 ====================================================================
7
8 Python-Markdown
9 ===============
10
11 Converts Markdown to HTML. Basic usage as a module:
12
13 import markdown
14 html = markdown.markdown(your_text_string)
15
16 Started by [Manfred Stienstra](http://www.dwerg.net/). Continued and
17 maintained by [Yuri Takhteyev](http://www.freewisdom.org).
18
19 Project website: http://www.freewisdom.org/projects/python-markdown
20 Contact: yuri [at] freewisdom.org
21
22 License: GPL 2 (http://www.gnu.org/copyleft/gpl.html) or BSD
23
24 Version: 1.3 (Feb. 28, 2006)
25
26 For changelog, see end of file
27 """
28
29 import re, sys, os, random
30
31 # set debug level: 3 none, 2 critical, 1 informative, 0 all
32 (VERBOSE, INFO, CRITICAL, NONE) = range(4)
33
34 MESSAGE_THRESHOLD = CRITICAL
35
36 def message(level, text) :
37 if level >= MESSAGE_THRESHOLD :
38 print text
39
40
41 # --------------- CONSTANTS YOU MIGHT WANT TO MODIFY -----------------
42
43 # all tabs will be expanded to up to this many spaces
44 TAB_LENGTH = 4
45 ENABLE_ATTRIBUTES = 1
46 SMART_EMPHASIS = 1
47
48 # --------------- CONSTANTS YOU _SHOULD NOT_ HAVE TO CHANGE ----------
49
50 FN_BACKLINK_TEXT = "zz1337820767766393qq"
51 # a template for html placeholders
52 HTML_PLACEHOLDER_PREFIX = "qaodmasdkwaspemas"
53 HTML_PLACEHOLDER = HTML_PLACEHOLDER_PREFIX + "%dajkqlsmdqpakldnzsdfls"
54
55 BLOCK_LEVEL_ELEMENTS = ['p', 'div', 'blockquote', 'pre', 'table',
56 'dl', 'ol', 'ul', 'script', 'noscript',
57 'form', 'fieldset', 'iframe', 'math', 'ins',
58 'del', 'hr', 'hr/']
59
60 def is_block_level (tag) :
61 return ( (tag in BLOCK_LEVEL_ELEMENTS) or
62 (tag[0] == 'h' and tag[1] in "0123456789") )
63
64 """
65 ======================================================================
66 ========================== NANODOM ===================================
67 ======================================================================
68
69 The three classes below implement some of the most basic DOM
70 methods. I use this instead of minidom because I need a simpler
71 functionality and do not want to require additional libraries.
72
73 Importantly, NanoDom does not do normalization, which is what we
74 want. It also adds extra white space when converting DOM to string
75 """
76
77
78 class Document :
79
80 def appendChild(self, child) :
81 self.documentElement = child
82 child.parent = self
83 self.entities = {}
84
85 def createElement(self, tag, textNode=None) :
86 el = Element(tag)
87 el.doc = self
88 if textNode :
89 el.appendChild(self.createTextNode(textNode))
90 return el
91
92 def createTextNode(self, text) :
93 node = TextNode(text)
94 node.doc = self
95 return node
96
97 def createEntityReference(self, entity):
98 if entity not in self.entities:
99 self.entities[entity] = EntityReference(entity)
100 return self.entities[entity]
101
102 def toxml (self) :
103 return self.documentElement.toxml()
104
105 def normalizeEntities(self, text) :
106
107 pairs = [ #("&", "&"),
108 ("<", "<"),
109 (">", ">"),
110 ("\"", """)]
111
112 for old, new in pairs :
113 text = text.replace(old, new)
114 return text
115
116 def find(self, test) :
117 return self.documentElement.find(test)
118
119 def unlink(self) :
120 self.documentElement.unlink()
121 self.documentElement = None
122
123
124 class Element :
125
126 type = "element"
127
128 def __init__ (self, tag) :
129
130 self.nodeName = tag
131 self.attributes = []
132 self.attribute_values = {}
133 self.childNodes = []
134
135 def unlink(self) :
136 for child in self.childNodes :
137 if child.type == "element" :
138 child.unlink()
139 self.childNodes = None
140
141 def setAttribute(self, attr, value) :
142 if not attr in self.attributes :
143 self.attributes.append(attr)
144
145 self.attribute_values[attr] = value
146
147 def insertChild(self, position, child) :
148 self.childNodes.insert(position, child)
149 child.parent = self
150
151 def removeChild(self, child) :
152 self.childNodes.remove(child)
153
154 def replaceChild(self, oldChild, newChild) :
155 position = self.childNodes.index(oldChild)
156 self.removeChild(oldChild)
157 self.insertChild(position, newChild)
158
159 def appendChild(self, child) :
160 self.childNodes.append(child)
161 child.parent = self
162
163 def handleAttributes(self) :
164 pass
165
166 def find(self, test, depth=0) :
167 """ Returns a list of descendants that pass the test function """
168 matched_nodes = []
169 for child in self.childNodes :
170 if test(child) :
171 matched_nodes.append(child)
172 if child.type == "element" :
173 matched_nodes += child.find(test, depth+1)
174 return matched_nodes
175
176 def toxml(self):
177 if ENABLE_ATTRIBUTES :
178 for child in self.childNodes:
179 child.handleAttributes()
180 buffer = ""
181 if self.nodeName in ['h1', 'h2', 'h3', 'h4'] :
182 buffer += "\n"
183 elif self.nodeName in ['li'] :
184 buffer += "\n "
185 buffer += "<" + self.nodeName
186 for attr in self.attributes :
187 value = self.attribute_values[attr]
188 value = self.doc.normalizeEntities(value)
189 buffer += ' %s="%s"' % (attr, value)
190 if self.childNodes :
191 buffer += ">"
192 for child in self.childNodes :
193 buffer += child.toxml()
194 if self.nodeName == 'p' :
195 buffer += "\n"
196 elif self.nodeName == 'li' :
197 buffer += "\n "
198 buffer += "</%s>" % self.nodeName
199 else :
200 buffer += "/>"
201 if self.nodeName in ['p', 'li', 'ul', 'ol',
202 'h1', 'h2', 'h3', 'h4'] :
203 buffer += "\n"
204
205 return buffer
206
207
208 class TextNode :
209
210 type = "text"
211 attrRegExp = re.compile(r'\{@([^\}]*)=([^\}]*)}') # {@id=123}
212
213 def __init__ (self, text) :
214 self.value = text
215
216 def attributeCallback(self, match) :
217 self.parent.setAttribute(match.group(1), match.group(2))
218
219 def handleAttributes(self) :
220 self.value = self.attrRegExp.sub(self.attributeCallback, self.value)
221
222 def toxml(self) :
223 text = self.value
224 if not text.startswith(HTML_PLACEHOLDER_PREFIX):
225 if self.parent.nodeName == "p" :
226 text = text.replace("\n", "\n ")
227 elif (self.parent.nodeName == "li"
228 and self.parent.childNodes[0]==self):
229 text = "\n " + text.replace("\n", "\n ")
230 text = self.doc.normalizeEntities(text)
231 return text
232
233
234 class EntityReference:
235
236 type = "entity_ref"
237
238 def __init__(self, entity):
239 self.entity = entity
240
241 def handleAttributes(self):
242 pass
243
244 def toxml(self):
245 return "&" + self.entity + ";"
246
247
248 """
249 ======================================================================
250 ========================== PRE-PROCESSORS ============================
251 ======================================================================
252
253 Preprocessors munge source text before we start doing anything too
254 complicated.
255
256 Each preprocessor implements a "run" method that takes a pointer to
257 a list of lines of the document, modifies it as necessary and
258 returns either the same pointer or a pointer to a new list.
259 """
260
261 class HeaderPreprocessor :
262
263 """
264 Replaces underlined headers with hashed headers to avoid
265 the nead for lookahead later.
266 """
267
268 def run (self, lines) :
269
270 for i in range(len(lines)) :
271 if not lines[i] :
272 continue
273
274 if (i+1 <= len(lines)
275 and lines[i+1]
276 and lines[i+1][0] in ['-', '=']) :
277
278 underline = lines[i+1].strip()
279
280 if underline == "="*len(underline) :
281 lines[i] = "# " + lines[i].strip()
282 lines[i+1] = ""
283 elif underline == "-"*len(underline) :
284 lines[i] = "## " + lines[i].strip()
285 lines[i+1] = ""
286
287 return lines
288
289 HEADER_PREPROCESSOR = HeaderPreprocessor()
290
291 class LinePreprocessor :
292 """Deals with HR lines (needs to be done before processing lists)"""
293
294 def run (self, lines) :
295 for i in range(len(lines)) :
296 if self._isLine(lines[i]) :
297 lines[i] = "<hr />"
298 return lines
299
300 def _isLine(self, block) :
301 """Determines if a block should be replaced with an <HR>"""
302 if block.startswith(" ") : return 0 # a code block
303 text = "".join([x for x in block if not x.isspace()])
304 if len(text) <= 2 :
305 return 0
306 for pattern in ['isline1', 'isline2', 'isline3'] :
307 m = RE.regExp[pattern].match(text)
308 if (m and m.group(1)) :
309 return 1
310 else:
311 return 0
312
313 LINE_PREPROCESSOR = LinePreprocessor()
314
315
316 class LineBreaksPreprocessor :
317 """Replaces double spaces at the end of the lines with <br/ >."""
318
319 def run (self, lines) :
320 for i in range(len(lines)) :
321 if (lines[i].endswith(" ")
322 and not RE.regExp['tabbed'].match(lines[i]) ):
323 lines[i] += "<br />"
324 return lines
325
326 LINE_BREAKS_PREPROCESSOR = LineBreaksPreprocessor()
327
328
329 class HtmlBlockPreprocessor :
330 """Removes html blocks from self.lines"""
331
332 def run (self, lines) :
333 new_blocks = []
334 text = "\n".join(lines)
335 for block in text.split("\n\n") :
336 if block.startswith("\n") :
337 block = block[1:]
338 if ( (block.startswith("<") and block.rstrip().endswith(">"))
339 and (block[1] in ["!", "?", "@", "%"]
340 or is_block_level( block[1:].replace(">", " ")
341 .split()[0].lower()))) :
342 new_blocks.append(
343 self.stash.store(block.strip()))
344 else :
345 new_blocks.append(block)
346 return "\n\n".join(new_blocks).split("\n")
347
348 HTML_BLOCK_PREPROCESSOR = HtmlBlockPreprocessor()
349
350
351 class ReferencePreprocessor :
352
353 def run (self, lines) :
354 new_text = [];
355 for line in lines:
356 m = RE.regExp['reference-def'].match(line)
357 if m:
358 id = m.group(2).strip().lower()
359 title = dequote(m.group(4).strip()) #.replace('"', """)
360 self.references[id] = (m.group(3), title)
361 else:
362 new_text.append(line)
363 return new_text #+ "\n"
364
365 REFERENCE_PREPROCESSOR = ReferencePreprocessor()
366
367 """
368 ======================================================================
369 ========================== INLINE PATTERNS ===========================
370 ======================================================================
371
372 Inline patterns such as *emphasis* are handled by means of auxiliary
373 objects, one per pattern. Each pattern object uses a single regular
374 expression and needs support the following methods:
375
376 pattern.getCompiledRegExp() - returns a regular expression
377
378 pattern.handleMatch(m, doc) - takes a match object and returns
379 a NanoDom node (as a part of the provided
380 doc) or None
381
382 All of python markdown's built-in patterns subclass from BasePatter,
383 but you can add additional patterns that don't.
384
385 Also note that all the regular expressions used by inline must
386 capture the whole block. For this reason, they all start with
387 '^(.*)' and end with '(.*)!'. In case with built-in expression
388 BasePattern takes care of adding the "^(.*)" and "(.*)!".
389
390 Finally, the order in which regular expressions are applied is very
391 important - e.g. if we first replace http://.../ links with <a> tags
392 and _then_ try to replace inline html, we would end up with a mess.
393 So, we apply the expressions in the following order:
394
395 * escape and backticks have to go before everything else, so
396 that we can preempt any markdown patterns by escaping them.
397
398 * then we handle auto-links (must be done before inline html)
399
400 * then we handle inline HTML. At this point we will simply
401 replace all inline HTML strings with a placeholder and add
402 the actual HTML to a hash.
403
404 * then inline images (must be done before links)
405
406 * then bracketed links, first regular then reference-style
407
408 * finally we apply strong and emphasis
409 """
410
411 NOBRACKET = r'[^\]\[]*'
412 BRK = ( r'\[('
413 + (NOBRACKET + r'(\['+NOBRACKET)*6
414 + (NOBRACKET+ r'\])*'+NOBRACKET)*6
415 + NOBRACKET + r')\]' )
416
417 BACKTICK_RE = r'\`([^\`]*)\`' # `e= m*c^2`
418 DOUBLE_BACKTICK_RE = r'\`\`(.*)\`\`' # ``e=f("`")``
419 ESCAPE_RE = r'\\(.)' # \<
420 EMPHASIS_RE = r'\*([^\*]*)\*' # *emphasis*
421 STRONG_RE = r'\*\*(.*)\*\*' # **strong**
422 STRONG_EM_RE = r'\*\*\*([^_]*)\*\*\*' # ***strong***
423
424 if SMART_EMPHASIS:
425 EMPHASIS_2_RE = r'(?<!\S)_(\S[^_]*)_' # _emphasis_
426 else :
427 EMPHASIS_2_RE = r'_([^_]*)_' # _emphasis_
428
429 STRONG_2_RE = r'__([^_]*)__' # __strong__
430 STRONG_EM_2_RE = r'___([^_]*)___' # ___strong___
431
432 LINK_RE = BRK + r'\s*\(([^\)]*)\)' # [text](url)
433 LINK_ANGLED_RE = BRK + r'\s*\(<([^\)]*)>\)' # [text](<url>)
434 IMAGE_LINK_RE = r'\!' + BRK + r'\s*\(([^\)]*)\)' # ![alttxt](http://x.com/)
435 REFERENCE_RE = BRK+ r'\s*\[([^\]]*)\]' # [Google][3]
436 IMAGE_REFERENCE_RE = r'\!' + BRK + '\s*\[([^\]]*)\]' # ![alt text][2]
437 NOT_STRONG_RE = r'( \* )' # stand-alone * or _
438 AUTOLINK_RE = r'<(http://[^>]*)>' # <http://www.123.com>
439 AUTOMAIL_RE = r'<([^> ]*@[^> ]*)>' # <me@example.com>
440 HTML_RE = r'(\<[^\>]*\>)' # <...>
441 ENTITY_RE = r'(&[\#a-zA-Z0-9]*;)' # &
442
443 class BasePattern:
444
445 def __init__ (self, pattern) :
446 self.pattern = pattern
447 self.compiled_re = re.compile("^(.*)%s(.*)$" % pattern, re.DOTALL)
448
449 def getCompiledRegExp (self) :
450 return self.compiled_re
451
452 class SimpleTextPattern (BasePattern) :
453
454 def handleMatch(self, m, doc) :
455 return doc.createTextNode(m.group(2))
456
457 class SimpleTagPattern (BasePattern):
458
459 def __init__ (self, pattern, tag) :
460 BasePattern.__init__(self, pattern)
461 self.tag = tag
462
463 def handleMatch(self, m, doc) :
464 el = doc.createElement(self.tag)
465 el.appendChild(doc.createTextNode(m.group(2)))
466 return el
467
468 class BacktickPattern (BasePattern):
469
470 def __init__ (self, pattern):
471 BasePattern.__init__(self, pattern)
472 self.tag = "code"
473
474 def handleMatch(self, m, doc) :
475 el = doc.createElement(self.tag)
476 text = m.group(2).strip()
477 text = text.replace("&", "&")
478 el.appendChild(doc.createTextNode(text))
479 return el
480
481
482 class DoubleTagPattern (SimpleTagPattern) :
483
484 def handleMatch(self, m, doc) :
485 tag1, tag2 = self.tag.split(",")
486 el1 = doc.createElement(tag1)
487 el2 = doc.createElement(tag2)
488 el1.appendChild(el2)
489 el2.appendChild(doc.createTextNode(m.group(2)))
490 return el1
491
492
493 class HtmlPattern (BasePattern):
494
495 def handleMatch (self, m, doc) :
496 place_holder = self.stash.store(m.group(2))
497 return doc.createTextNode(place_holder)
498
499
500 class LinkPattern (BasePattern):
501
502 def handleMatch(self, m, doc) :
503 el = doc.createElement('a')
504 el.appendChild(doc.createTextNode(m.group(2)))
505 parts = m.group(9).split()
506 # We should now have [], [href], or [href, title]
507 if parts :
508 el.setAttribute('href', parts[0])
509 else :
510 el.setAttribute('href', "")
511 if len(parts) > 1 :
512 # we also got a title
513 title = " ".join(parts[1:]).strip()
514 title = dequote(title) #.replace('"', """)
515 el.setAttribute('title', title)
516 return el
517
518
519 class ImagePattern (BasePattern):
520
521 def handleMatch(self, m, doc):
522 el = doc.createElement('img')
523 src_parts = m.group(9).split()
524 el.setAttribute('src', src_parts[0])
525 if len(src_parts) > 1 :
526 el.setAttribute('title', dequote(" ".join(src_parts[1:])))
527 if ENABLE_ATTRIBUTES :
528 text = doc.createTextNode(m.group(2))
529 el.appendChild(text)
530 text.handleAttributes()
531 truealt = text.value
532 el.childNodes.remove(text)
533 else:
534 truealt = m.group(2)
535 el.setAttribute('alt', truealt)
536 return el
537
538 class ReferencePattern (BasePattern):
539
540 def handleMatch(self, m, doc):
541 if m.group(9) :
542 id = m.group(9).lower()
543 else :
544 # if we got something like "[Google][]"
545 # we'll use "google" as the id
546 id = m.group(2).lower()
547 if not self.references.has_key(id) : # ignore undefined refs
548 return None
549 href, title = self.references[id]
550 text = m.group(2)
551 return self.makeTag(href, title, text, doc)
552
553 def makeTag(self, href, title, text, doc):
554 el = doc.createElement('a')
555 el.setAttribute('href', href)
556 if title :
557 el.setAttribute('title', title)
558 el.appendChild(doc.createTextNode(text))
559 return el
560
561
562 class ImageReferencePattern (ReferencePattern):
563
564 def makeTag(self, href, title, text, doc):
565 el = doc.createElement('img')
566 el.setAttribute('src', href)
567 if title :
568 el.setAttribute('title', title)
569 el.setAttribute('alt', text)
570 return el
571
572
573 class AutolinkPattern (BasePattern):
574
575 def handleMatch(self, m, doc):
576 el = doc.createElement('a')
577 el.setAttribute('href', m.group(2))
578 el.appendChild(doc.createTextNode(m.group(2)))
579 return el
580
581 class AutomailPattern (BasePattern):
582
583 def handleMatch(self, m, doc) :
584 el = doc.createElement('a')
585 email = m.group(2)
586 if email.startswith("mailto:"):
587 email = email[len("mailto:"):]
588 for letter in email:
589 entity = doc.createEntityReference("#%d" % ord(letter))
590 el.appendChild(entity)
591 mailto = "mailto:" + email
592 mailto = "".join(['&#%d;' % ord(letter) for letter in mailto])
593 el.setAttribute('href', mailto)
594 return el
595
596 ESCAPE_PATTERN = SimpleTextPattern(ESCAPE_RE)
597 NOT_STRONG_PATTERN = SimpleTextPattern(NOT_STRONG_RE)
598
599 BACKTICK_PATTERN = BacktickPattern(BACKTICK_RE)
600 DOUBLE_BACKTICK_PATTERN = BacktickPattern(DOUBLE_BACKTICK_RE)
601 STRONG_PATTERN = SimpleTagPattern(STRONG_RE, 'strong')
602 STRONG_PATTERN_2 = SimpleTagPattern(STRONG_2_RE, 'strong')
603 EMPHASIS_PATTERN = SimpleTagPattern(EMPHASIS_RE, 'em')
604 EMPHASIS_PATTERN_2 = SimpleTagPattern(EMPHASIS_2_RE, 'em')
605
606 STRONG_EM_PATTERN = DoubleTagPattern(STRONG_EM_RE, 'strong,em')
607 STRONG_EM_PATTERN_2 = DoubleTagPattern(STRONG_EM_2_RE, 'strong,em')
608
609 LINK_PATTERN = LinkPattern(LINK_RE)
610 LINK_ANGLED_PATTERN = LinkPattern(LINK_ANGLED_RE)
611 IMAGE_LINK_PATTERN = ImagePattern(IMAGE_LINK_RE)
612 IMAGE_REFERENCE_PATTERN = ImageReferencePattern(IMAGE_REFERENCE_RE)
613 REFERENCE_PATTERN = ReferencePattern(REFERENCE_RE)
614
615 HTML_PATTERN = HtmlPattern(HTML_RE)
616 ENTITY_PATTERN = HtmlPattern(ENTITY_RE)
617
618 AUTOLINK_PATTERN = AutolinkPattern(AUTOLINK_RE)
619 AUTOMAIL_PATTERN = AutomailPattern(AUTOMAIL_RE)
620
621
622 """
623 ======================================================================
624 ========================== POST-PROCESSORS ===========================
625 ======================================================================
626
627 Markdown also allows post-processors, which are similar to
628 preprocessors in that they need to implement a "run" method. Unlike
629 pre-processors, they take a NanoDom document as a parameter and work
630 with that.
631 #
632 There are currently no standard post-processors, but the footnote
633 extension below uses one.
634 """
635 """
636 ======================================================================
637 ========================== MISC AUXILIARY CLASSES ====================
638 ======================================================================
639 """
640
641 class HtmlStash :
642 """This class is used for stashing HTML objects that we extract
643 in the beginning and replace with place-holders."""
644
645 def __init__ (self) :
646 self.html_counter = 0 # for counting inline html segments
647 self.rawHtmlBlocks=[]
648
649 def store(self, html) :
650 """Saves an HTML segment for later reinsertion. Returns a
651 placeholder string that needs to be inserted into the
652 document.
653
654 @param html: an html segment
655 @returns : a placeholder string """
656 self.rawHtmlBlocks.append(html)
657 placeholder = HTML_PLACEHOLDER % self.html_counter
658 self.html_counter += 1
659 return placeholder
660
661
662 class BlockGuru :
663
664 def _findHead(self, lines, fn, allowBlank=0) :
665
666 """Functional magic to help determine boundaries of indented
667 blocks.
668
669 @param lines: an array of strings
670 @param fn: a function that returns a substring of a string
671 if the string matches the necessary criteria
672 @param allowBlank: specifies whether it's ok to have blank
673 lines between matching functions
674 @returns: a list of post processes items and the unused
675 remainder of the original list"""
676
677 items = []
678 item = -1
679
680 i = 0 # to keep track of where we are
681
682 for line in lines :
683
684 if not line.strip() and not allowBlank:
685 return items, lines[i:]
686
687 if not line.strip() and allowBlank:
688 # If we see a blank line, this _might_ be the end
689 i += 1
690
691 # Find the next non-blank line
692 for j in range(i, len(lines)) :
693 if lines[j].strip() :
694 next = lines[j]
695 break
696 else :
697 # There is no more text => this is the end
698 break
699
700 # Check if the next non-blank line is still a part of the list
701
702 part = fn(next)
703
704 if part :
705 items.append("")
706 continue
707 else :
708 break # found end of the list
709
710 part = fn(line)
711
712 if part :
713 items.append(part)
714 i += 1
715 continue
716 else :
717 return items, lines[i:]
718 else :
719 i += 1
720
721 return items, lines[i:]
722
723
724 def detabbed_fn(self, line) :
725 """ An auxiliary method to be passed to _findHead """
726 m = RE.regExp['tabbed'].match(line)
727 if m:
728 return m.group(4)
729 else :
730 return None
731
732
733 def detectTabbed(self, lines) :
734
735 return self._findHead(lines, self.detabbed_fn,
736 allowBlank = 1)
737
738
739 def print_error(string):
740 """Print an error string to stderr"""
741 sys.stderr.write(string +'\n')
742
743
744 def dequote(string) :
745 """ Removes quotes from around a string """
746 if ( ( string.startswith('"') and string.endswith('"'))
747 or (string.startswith("'") and string.endswith("'")) ) :
748 return string[1:-1]
749 else :
750 return string
751
752 """
753 ======================================================================
754 ========================== CORE MARKDOWN =============================
755 ======================================================================
756
757 This stuff is ugly, so if you are thinking of extending the syntax,
758 see first if you can do it via pre-processors, post-processors,
759 inline patterns or a combination of the three.
760 """
761
762 class CorePatterns :
763 """This class is scheduled for removal as part of a refactoring
764 effort."""
765
766 patterns = {
767 'header': r'(#*)([^#]*)(#*)', # # A title
768 'reference-def' : r'(\ ?\ ?\ ?)\[([^\]]*)\]:\s*([^ ]*)(.*)',
769 # [Google]: http://www.google.com/
770 'containsline': r'([-]*)$|^([=]*)', # -----, =====, etc.
771 'ol': r'[ ]{0,3}[\d]*\.\s+(.*)', # 1. text
772 'ul': r'[ ]{0,3}[*+-]\s+(.*)', # "* text"
773 'isline1': r'(\**)', # ***
774 'isline2': r'(\-*)', # ---
775 'isline3': r'(\_*)', # ___
776 'tabbed': r'((\t)|( ))(.*)', # an indented line
777 'quoted' : r'> ?(.*)', # a quoted block ("> ...")
778 }
779
780 def __init__ (self) :
781
782 self.regExp = {}
783 for key in self.patterns.keys() :
784 self.regExp[key] = re.compile("^%s$" % self.patterns[key],
785 re.DOTALL)
786
787 self.regExp['containsline'] = re.compile(r'^([-]*)$|^([=]*)$', re.M)
788
789 RE = CorePatterns()
790
791
792 class Markdown:
793 """ Markdown formatter class for creating an html document from
794 Markdown text """
795
796
797 def __init__(self, source=None):
798 """Creates a new Markdown instance.
799
800 @param source: The text in Markdown format. """
801
802 self.source = source
803 self.blockGuru = BlockGuru()
804 self.registeredExtensions = []
805 self.stripTopLevelTags = 1
806
807 self.preprocessors = [ HEADER_PREPROCESSOR,
808 LINE_PREPROCESSOR,
809 HTML_BLOCK_PREPROCESSOR,
810 LINE_BREAKS_PREPROCESSOR,
811 # A footnote preprocessor will
812 # get inserted here
813 REFERENCE_PREPROCESSOR ]
814
815
816 self.postprocessors = [] # a footnote postprocessor will get
817 # inserted later
818
819 self.inlinePatterns = [ DOUBLE_BACKTICK_PATTERN,
820 BACKTICK_PATTERN,
821 ESCAPE_PATTERN,
822 IMAGE_LINK_PATTERN,
823 IMAGE_REFERENCE_PATTERN,
824 REFERENCE_PATTERN,
825 LINK_ANGLED_PATTERN,
826 LINK_PATTERN,
827 AUTOLINK_PATTERN,
828 AUTOMAIL_PATTERN,
829 HTML_PATTERN,
830 ENTITY_PATTERN,
831 NOT_STRONG_PATTERN,
832 STRONG_EM_PATTERN,
833 STRONG_EM_PATTERN_2,
834 STRONG_PATTERN,
835 STRONG_PATTERN_2,
836 EMPHASIS_PATTERN,
837 EMPHASIS_PATTERN_2
838 # The order of the handlers matters!!!
839 ]
840
841 self.reset()
842
843 def registerExtension(self, extension) :
844 self.registeredExtensions.append(extension)
845
846 def reset(self) :
847 """Resets all state variables so that we can start
848 with a new text."""
849 self.references={}
850 self.htmlStash = HtmlStash()
851
852 HTML_BLOCK_PREPROCESSOR.stash = self.htmlStash
853 REFERENCE_PREPROCESSOR.references = self.references
854 HTML_PATTERN.stash = self.htmlStash
855 ENTITY_PATTERN.stash = self.htmlStash
856 REFERENCE_PATTERN.references = self.references
857 IMAGE_REFERENCE_PATTERN.references = self.references
858
859 for extension in self.registeredExtensions :
860 extension.reset()
861
862
863 def _transform(self):
864 """Transforms the Markdown text into a XHTML body document
865
866 @returns: A NanoDom Document """
867
868 # Setup the document
869
870 self.doc = Document()
871 self.top_element = self.doc.createElement("span")
872 self.top_element.appendChild(self.doc.createTextNode('\n'))
873 self.top_element.setAttribute('class', 'markdown')
874 self.doc.appendChild(self.top_element)
875
876 # Fixup the source text
877 text = self.source.strip()
878 text = text.replace("\r\n", "\n").replace("\r", "\n")
879 text += "\n\n"
880 text = text.expandtabs(TAB_LENGTH)
881
882 # Split into lines and run the preprocessors that will work with
883 # self.lines
884
885 self.lines = text.split("\n")
886
887 # Run the pre-processors on the lines
888 for prep in self.preprocessors :
889 self.lines = prep.run(self.lines)
890
891 # Create a NanoDom tree from the lines and attach it to Document
892 self._processSection(self.top_element, self.lines)
893
894 # Not sure why I put this in but let's leave it for now.
895 self.top_element.appendChild(self.doc.createTextNode('\n'))
896
897 # Run the post-processors
898 for postprocessor in self.postprocessors :
899 postprocessor.run(self.doc)
900
901 return self.doc
902
903
904 def _processSection(self, parent_elem, lines,
905 inList = 0, looseList = 0) :
906
907 """Process a section of a source document, looking for high
908 level structural elements like lists, block quotes, code
909 segments, html blocks, etc. Some those then get stripped
910 of their high level markup (e.g. get unindented) and the
911 lower-level markup is processed recursively.
912
913 @param parent_elem: A NanoDom element to which the content
914 will be added
915 @param lines: a list of lines
916 @param inList: a level
917 @returns: None"""
918
919 if not lines :
920 return
921
922 # Check if this section starts with a list, a blockquote or
923 # a code block
924
925 processFn = { 'ul' : self._processUList,
926 'ol' : self._processOList,
927 'quoted' : self._processQuote,
928 'tabbed' : self._processCodeBlock }
929
930 for regexp in ['ul', 'ol', 'quoted', 'tabbed'] :
931 m = RE.regExp[regexp].match(lines[0])
932 if m :
933 processFn[regexp](parent_elem, lines, inList)
934 return
935
936 # We are NOT looking at one of the high-level structures like
937 # lists or blockquotes. So, it's just a regular paragraph
938 # (though perhaps nested inside a list or something else). If
939 # we are NOT inside a list, we just need to look for a blank
940 # line to find the end of the block. If we ARE inside a
941 # list, however, we need to consider that a sublist does not
942 # need to be separated by a blank line. Rather, the following
943 # markup is legal:
944 #
945 # * The top level list item
946 #
947 # Another paragraph of the list. This is where we are now.
948 # * Underneath we might have a sublist.
949 #
950
951 if inList :
952
953 start, theRest = self._linesUntil(lines, (lambda line:
954 RE.regExp['ul'].match(line)
955 or RE.regExp['ol'].match(line)
956 or not line.strip()))
957
958 self._processSection(parent_elem, start,
959 inList - 1, looseList = looseList)
960 self._processSection(parent_elem, theRest,
961 inList - 1, looseList = looseList)
962
963
964 else : # Ok, so it's just a simple block
965
966 paragraph, theRest = self._linesUntil(lines, lambda line:
967 not line.strip())
968
969 if len(paragraph) and paragraph[0].startswith('#') :
970 m = RE.regExp['header'].match(paragraph[0])
971 if m :
972 level = len(m.group(1))
973 h = self.doc.createElement("h%d" % level)
974 parent_elem.appendChild(h)
975 for item in self._handleInlineWrapper(m.group(2)) :
976 h.appendChild(item)
977 else :
978 message(CRITICAL, "We've got a problem header!")
979
980 elif paragraph :
981
982 list = self._handleInlineWrapper("\n".join(paragraph))
983
984 if ( parent_elem.nodeName == 'li'
985 and not (looseList or parent_elem.childNodes)):
986
987 #and not parent_elem.childNodes) :
988 # If this is the first paragraph inside "li", don't
989 # put <p> around it - append the paragraph bits directly
990 # onto parent_elem
991 el = parent_elem
992 else :
993 # Otherwise make a "p" element
994 el = self.doc.createElement("p")
995 parent_elem.appendChild(el)
996
997 for item in list :
998 el.appendChild(item)
999
1000 if theRest :
1001 theRest = theRest[1:] # skip the first (blank) line
1002
1003 self._processSection(parent_elem, theRest, inList)
1004
1005
1006
1007 def _processUList(self, parent_elem, lines, inList) :
1008 self._processList(parent_elem, lines, inList,
1009 listexpr='ul', tag = 'ul')
1010
1011 def _processOList(self, parent_elem, lines, inList) :
1012 self._processList(parent_elem, lines, inList,
1013 listexpr='ol', tag = 'ol')
1014
1015
1016 def _processList(self, parent_elem, lines, inList, listexpr, tag) :
1017 """Given a list of document lines starting with a list item,
1018 finds the end of the list, breaks it up, and recursively
1019 processes each list item and the remainder of the text file.
1020
1021 @param parent_elem: A dom element to which the content will be added
1022 @param lines: a list of lines
1023 @param inList: a level
1024 @returns: None"""
1025
1026 ul = self.doc.createElement(tag) # ul might actually be '<ol>'
1027 parent_elem.appendChild(ul)
1028
1029 looseList = 0
1030
1031 # Make a list of list items
1032 items = []
1033 item = -1
1034
1035 i = 0 # a counter to keep track of where we are
1036
1037 for line in lines :
1038
1039 loose = 0
1040 if not line.strip() :
1041 # If we see a blank line, this _might_ be the end of the list
1042 i += 1
1043 loose = 1
1044
1045 # Find the next non-blank line
1046 for j in range(i, len(lines)) :
1047 if lines[j].strip() :
1048 next = lines[j]
1049 break
1050 else :
1051 # There is no more text => end of the list
1052 break
1053
1054 # Check if the next non-blank line is still a part of the list
1055 if ( RE.regExp[listexpr].match(next) or
1056 RE.regExp['tabbed'].match(next) ):
1057 # get rid of any white space in the line
1058 items[item].append(line.strip())
1059 looseList = loose or looseList
1060 continue
1061 else :
1062 break # found end of the list
1063
1064 # Now we need to detect list items (at the current level)
1065 # while also detabing child elements if necessary
1066
1067 for expr in [listexpr, 'tabbed']:
1068
1069 m = RE.regExp[expr].match(line)
1070 if m :
1071 if expr == listexpr : # We are looking at a new item
1072 if m.group(1) :
1073 items.append([m.group(1)])
1074 item += 1
1075 elif expr == 'tabbed' : # This line needs to be detabbed
1076 items[item].append(m.group(4)) #after the 'tab'
1077
1078 i += 1
1079 break
1080 else :
1081 items[item].append(line) # Just regular continuation
1082 i += 1 # added on 2006.02.25
1083 else :
1084 i += 1
1085
1086 # Add the dom elements
1087 for item in items :
1088 li = self.doc.createElement("li")
1089 ul.appendChild(li)
1090
1091 self._processSection(li, item, inList + 1, looseList = looseList)
1092
1093 # Process the remaining part of the section
1094
1095 self._processSection(parent_elem, lines[i:], inList)
1096
1097
1098 def _linesUntil(self, lines, condition) :
1099 """ A utility function to break a list of lines upon the
1100 first line that satisfied a condition. The condition
1101 argument should be a predicate function.
1102 """
1103
1104 i = -1
1105 for line in lines :
1106 i += 1
1107 if condition(line) : break
1108 else :
1109 i += 1
1110 return lines[:i], lines[i:]
1111
1112 def _processQuote(self, parent_elem, lines, inList) :
1113 """Given a list of document lines starting with a quote finds
1114 the end of the quote, unindents it and recursively
1115 processes the body of the quote and the remainder of the
1116 text file.
1117
1118 @param parent_elem: DOM element to which the content will be added
1119 @param lines: a list of lines
1120 @param inList: a level
1121 @returns: None """
1122
1123 dequoted = []
1124 i = 0
1125 for line in lines :
1126 m = RE.regExp['quoted'].match(line)
1127 if m :
1128 dequoted.append(m.group(1))
1129 i += 1
1130 else :
1131 break
1132 else :
1133 i += 1
1134
1135 blockquote = self.doc.createElement('blockquote')
1136 parent_elem.appendChild(blockquote)
1137
1138 self._processSection(blockquote, dequoted, inList)
1139 self._processSection(parent_elem, lines[i:], inList)
1140
1141
1142
1143
1144 def _processCodeBlock(self, parent_elem, lines, inList) :
1145 """Given a list of document lines starting with a code block
1146 finds the end of the block, puts it into the dom verbatim
1147 wrapped in ("<pre><code>") and recursively processes the
1148 the remainder of the text file.
1149
1150 @param parent_elem: DOM element to which the content will be added
1151 @param lines: a list of lines
1152 @param inList: a level
1153 @returns: None"""
1154
1155 detabbed, theRest = self.blockGuru.detectTabbed(lines)
1156
1157 pre = self.doc.createElement('pre')
1158 code = self.doc.createElement('code')
1159 parent_elem.appendChild(pre)
1160 pre.appendChild(code)
1161 text = "\n".join(detabbed).rstrip()+"\n"
1162 text = text.replace("&", "&")
1163 code.appendChild(self.doc.createTextNode(text))
1164 self._processSection(parent_elem, theRest, inList)
1165
1166
1167 def _handleInlineWrapper (self, line) :
1168
1169 # A wrapper around _handleInline to avoid recursion
1170
1171 strtype = type("string")
1172 parts = [line]
1173 dirty = 1
1174
1175 while dirty:
1176 dirty = 0
1177 for x in parts :
1178 if type(x) == strtype :
1179 i = parts.index(x)
1180 parts.remove(x)
1181 result = self._handleInline(x)
1182 result.reverse()
1183 for y in result :
1184 parts.insert(i,y)
1185 dirty = 1
1186
1187 return parts
1188
1189 def _handleInline(self, line):
1190 """Transform a Markdown line with inline elements to an XHTML
1191 fragment.
1192
1193 This function uses auxiliary objects called inline patterns.
1194 See notes on inline patterns above.
1195
1196 @param item: A block of Markdown text
1197 @return: A list of NanoDomnodes """
1198 if not(line):
1199 return [self.doc.createTextNode(' ')]
1200 # two spaces at the end of the line denote a <br/>
1201 #if line.endswith(' '):
1202 # list = self._handleInline( line.rstrip())
1203 # list.append(self.doc.createElement('br'))
1204 # return list
1205 #
1206 # ::TODO:: Replace with a preprocessor
1207
1208 for pattern in self.inlinePatterns :
1209 list = self._applyPattern( line, pattern)
1210 if list: return list
1211
1212 return [self.doc.createTextNode(line)]
1213
1214 def _applyPattern(self, line, pattern) :
1215 """ Given a pattern name, this function checks if the line
1216 fits the pattern, creates the necessary elements, and returns
1217 back a list consisting of NanoDom elements and/or strings.
1218
1219 @param line: the text to be processed
1220 @param pattern: the pattern to be checked
1221
1222 @returns: the appropriate newly created NanoDom element if the
1223 pattern matches, None otherwise.
1224 """
1225
1226 # match the line to pattern's pre-compiled reg exp.
1227 # if no match, move on.
1228
1229 m = pattern.getCompiledRegExp().match(line)
1230 if not m :
1231 return None
1232
1233 # if we got a match let the pattern make us a NanoDom node
1234 # if it doesn't, move on
1235 node = pattern.handleMatch(m, self.doc)
1236
1237 if node :
1238 return [m.group(1), # the string to the right of the match
1239 node, # the new node
1240 m.groups()[-1]] # the string to the left
1241 else :
1242 return None
1243
1244 def __str__(self):
1245 """Return the document in XHTML format.
1246
1247 @returns: A serialized XHTML body."""
1248 #try :
1249 doc = self._transform()
1250 xml = doc.toxml()
1251 #finally:
1252 # doc.unlink()
1253
1254 # Let's stick in all the raw html pieces
1255
1256 for i in range(self.htmlStash.html_counter) :
1257 xml = xml.replace("<p>%s\n</p>" % (HTML_PLACEHOLDER % i),
1258 self.htmlStash.rawHtmlBlocks[i] + "\n")
1259 xml = xml.replace(HTML_PLACEHOLDER % i,
1260 self.htmlStash.rawHtmlBlocks[i])
1261
1262 xml = xml.replace(FN_BACKLINK_TEXT, "↩")
1263
1264 # And return everything but the top level tag
1265
1266 if self.stripTopLevelTags :
1267 xml = xml.strip()[23:-7]
1268
1269 return xml
1270
1271
1272 toString = __str__
1273
1274
1275 """
1276 ========================= FOOTNOTES =================================
1277
1278 This section adds footnote handling to markdown. It can be used as
1279 an example for extending python-markdown with relatively complex
1280 functionality. While in this case the extension is included inside
1281 the module itself, it could just as easily be added from outside the
1282 module. Not that all markdown classes above are ignorant about
1283 footnotes. All footnote functionality is provided separately and
1284 then added to the markdown instance at the run time.
1285
1286 Footnote functionality is attached by calling extendMarkdown()
1287 method of FootnoteExtension. The method also registers the
1288 extension to allow it's state to be reset by a call to reset()
1289 method.
1290 """
1291
1292 class FootnoteExtension :
1293
1294 DEF_RE = re.compile(r'(\ ?\ ?\ ?)\[\^([^\]]*)\]:\s*(.*)')
1295 SHORT_USE_RE = re.compile(r'\[\^([^\]]*)\]', re.M) # [^a]
1296
1297 def __init__ (self) :
1298 self.reset()
1299
1300 def extendMarkdown(self, md) :
1301
1302 self.md = md
1303
1304 # Stateless extensions do not need to be registered
1305 md.registerExtension(self)
1306
1307 # Insert a preprocessor before ReferencePreprocessor
1308 index = md.preprocessors.index(REFERENCE_PREPROCESSOR)
1309 preprocessor = FootnotePreprocessor(self)
1310 preprocessor.md = md
1311 md.preprocessors.insert(index, preprocessor)
1312
1313 # Insert an inline pattern before ImageReferencePattern
1314 FOOTNOTE_RE = r'\[\^([^\]]*)\]' # blah blah [^1] blah
1315 index = md.inlinePatterns.index(IMAGE_REFERENCE_PATTERN)
1316 md.inlinePatterns.insert(index, FootnotePattern(FOOTNOTE_RE, self))
1317
1318 # Insert a post-processor that would actually add the footnote div
1319 md.postprocessors.append(FootnotePostprocessor(self))
1320
1321 def reset(self) :
1322 # May be called by Markdown is state reset is desired
1323
1324 self.footnote_suffix = "-" + str(int(random.random()*1000000000))
1325 self.used_footnotes={}
1326 self.footnotes = {}
1327
1328 def setFootnote(self, id, text) :
1329 self.footnotes[id] = text
1330
1331 def makeFootnoteId(self, num) :
1332 return 'fn%d%s' % (num, self.footnote_suffix)
1333
1334 def makeFootnoteRefId(self, num) :
1335 return 'fnr%d%s' % (num, self.footnote_suffix)
1336
1337 def makeFootnotesDiv (self, doc) :
1338 """Creates the div with class='footnote' and populates it with
1339 the text of the footnotes.
1340
1341 @returns: the footnote div as a dom element """
1342
1343 if not self.footnotes.keys() :
1344 return None
1345
1346 div = doc.createElement("div")
1347 div.setAttribute('class', 'footnote')
1348 hr = doc.createElement("hr")
1349 div.appendChild(hr)
1350 ol = doc.createElement("ol")
1351 div.appendChild(ol)
1352
1353 footnotes = [(self.used_footnotes[id], id)
1354 for id in self.footnotes.keys()]
1355 footnotes.sort()
1356
1357 for i, id in footnotes :
1358 li = doc.createElement('li')
1359 li.setAttribute('id', self.makeFootnoteId(i))
1360
1361 self.md._processSection(li, self.footnotes[id].split("\n"))
1362
1363 #li.appendChild(doc.createTextNode(self.footnotes[id]))
1364
1365 backlink = doc.createElement('a')
1366 backlink.setAttribute('href', '#' + self.makeFootnoteRefId(i))
1367 backlink.setAttribute('class', 'footnoteBackLink')
1368 backlink.setAttribute('title',
1369 'Jump back to footnote %d in the text' % 1)
1370 backlink.appendChild(doc.createTextNode(FN_BACKLINK_TEXT))
1371
1372 if li.childNodes :
1373 node = li.childNodes[-1]
1374 if node.type == "text" :
1375 node = li
1376 node.appendChild(backlink)
1377
1378 ol.appendChild(li)
1379
1380 return div
1381
1382
1383 class FootnotePreprocessor :
1384
1385 def __init__ (self, footnotes) :
1386 self.footnotes = footnotes
1387
1388 def run(self, lines) :
1389
1390 self.blockGuru = BlockGuru()
1391 lines = self._handleFootnoteDefinitions (lines)
1392
1393 # Make a hash of all footnote marks in the text so that we
1394 # know in what order they are supposed to appear. (This
1395 # function call doesn't really substitute anything - it's just
1396 # a way to get a callback for each occurence.
1397
1398 text = "\n".join(lines)
1399 self.footnotes.SHORT_USE_RE.sub(self.recordFootnoteUse, text)
1400
1401 return text.split("\n")
1402
1403
1404 def recordFootnoteUse(self, match) :
1405
1406 id = match.group(1)
1407 id = id.strip()
1408 nextNum = len(self.footnotes.used_footnotes.keys()) + 1
1409 self.footnotes.used_footnotes[id] = nextNum
1410
1411
1412 def _handleFootnoteDefinitions(self, lines) :
1413 """Recursively finds all footnote definitions in the lines.
1414
1415 @param lines: a list of lines of text
1416 @returns: a string representing the text with footnote
1417 definitions removed """
1418
1419 i, id, footnote = self._findFootnoteDefinition(lines)
1420
1421 if id :
1422
1423 plain = lines[:i]
1424
1425 detabbed, theRest = self.blockGuru.detectTabbed(lines[i+1:])
1426
1427 self.footnotes.setFootnote(id,
1428 footnote + "\n"
1429 + "\n".join(detabbed))
1430
1431 more_plain = self._handleFootnoteDefinitions(theRest)
1432 return plain + [""] + more_plain
1433
1434 else :
1435 return lines
1436
1437 def _findFootnoteDefinition(self, lines) :
1438 """Finds the first line of a footnote definition.
1439
1440 @param lines: a list of lines of text
1441 @returns: the index of the line containing a footnote definition """
1442
1443 counter = 0
1444 for line in lines :
1445 m = self.footnotes.DEF_RE.match(line)
1446 if m :
1447 return counter, m.group(2), m.group(3)
1448 counter += 1
1449 return counter, None, None
1450
1451
1452 class FootnotePattern (BasePattern) :
1453
1454 def __init__ (self, pattern, footnotes) :
1455
1456 BasePattern.__init__(self, pattern)
1457 self.footnotes = footnotes
1458
1459 def handleMatch(self, m, doc) :
1460 sup = doc.createElement('sup')
1461 a = doc.createElement('a')
1462 sup.appendChild(a)
1463 id = m.group(2)
1464 num = self.footnotes.used_footnotes[id]
1465 sup.setAttribute('id', self.footnotes.makeFootnoteRefId(num))
1466 a.setAttribute('href', '#' + self.footnotes.makeFootnoteId(num))
1467 a.appendChild(doc.createTextNode(str(num)))
1468 return sup
1469
1470 class FootnotePostprocessor :
1471
1472 def __init__ (self, footnotes) :
1473 self.footnotes = footnotes
1474
1475 def run(self, doc) :
1476 footnotesDiv = self.footnotes.makeFootnotesDiv(doc)
1477 if footnotesDiv :
1478 doc.documentElement.appendChild(footnotesDiv)
1479
1480 # ====================================================================
1481
1482 def markdown(text) :
1483 message(VERBOSE, "in markdown.py, received text:\n%s" % text)
1484 return str(Markdown(text))
1485
1486 def markdownWithFootnotes(text):
1487 message(VERBOSE, "Running markdown with footnotes, "
1488 + "received text:\n%s" % text)
1489 md = Markdown()
1490 footnoteExtension = FootnoteExtension()
1491 footnoteExtension.extendMarkdown(md)
1492 md.source = text
1493
1494 return str(md)
1495
1496 def test_markdown(args):
1497 """test markdown at the command line.
1498 in each test, arg 0 is the module name"""
1499 print "\nTEST 1: no arguments on command line"
1500 cmd_line(["markdown.py"])
1501 print "\nTEST 2a: 1 argument on command line: a good option"
1502 cmd_line(["markdown.py","-footnotes"])
1503 print "\nTEST 2b: 1 argument on command line: a bad option"
1504 cmd_line(["markdown.py","-foodnotes"])
1505 print "\nTEST 3: 1 argument on command line: non-existent input file"
1506 cmd_line(["markdown.py","junk.txt"])
1507 print "\nTEST 4: 1 argument on command line: existing input file"
1508 lines = """
1509 Markdown text with[^1]:
1510
1511 2. **bold text**,
1512 3. *italic text*.
1513
1514 Then more:
1515
1516 beginning of code block;
1517 another line of code block.
1518
1519 a second paragraph of code block.
1520
1521 more text to end our file.
1522
1523 [^1]: "italic" means emphasis.
1524 """
1525 fid = "markdown-test.txt"
1526 f1 = open(fid, 'w+')
1527 f1.write(lines)
1528 f1.close()
1529 cmd_line(["markdown.py",fid])
1530 print "\nTEST 5: 2 arguments on command line: nofootnotes and input file"
1531 cmd_line(["markdown.py","-nofootnotes", fid])
1532 print "\nTEST 6: 2 arguments on command line: footnotes and input file"
1533 cmd_line(["markdown.py","-footnotes", fid])
1534 print "\nTEST 7: 3 arguments on command line: nofootnotes,inputfile, outputfile"
1535 fidout = "markdown-test.html"
1536 cmd_line(["markdown.py","-nofootnotes", fid, fidout])
1537
1538
1539 def get_vars(args):
1540 """process the command-line args received; return usable variables"""
1541 #firstly get the variables
1542
1543 message(VERBOSE, "in get_vars(), args: %s" % args)
1544
1545 if len(args) <= 1:
1546 option, inFile, outFile = (None, None, None)
1547 elif len(args) >= 4:
1548 option, inFile, outFile = args[1:4]
1549 elif len(args) == 3:
1550 temp1, temp2 = args[1:3]
1551 if temp1[0] == '-':
1552 #then we have an option and inFile
1553 option, inFile, outFile = temp1, temp2, None
1554 else:
1555 #we have no option, so we must have inFile and outFile
1556 option, inFile, outFile = None, temp1, temp2
1557 else:
1558 #len(args) = 2
1559 #we have only one usable arg: might be an option or a file
1560 temp1 = args[1]
1561
1562 message(VERBOSE, "our single arg is: %s" % str(temp1))
1563
1564 if temp1[0] == '-':
1565 #then we have an option
1566 option, inFile, outFile = temp1, None, None
1567 else:
1568 #we have no option, so we must have inFile
1569 option, inFile, outFile = None, temp1, None
1570
1571 message(VERBOSE,
1572 "prior to validation, option: %s, inFile: %s, outFile: %s" %
1573 (str(option), str(inFile), str(outFile),))
1574
1575 return option, inFile, outFile
1576
1577
1578 USAGE = """
1579 \nUsing markdown.py:
1580
1581 python markdown.py [option] input_file_with_markdown.txt [output_file.html]
1582
1583 Options:
1584
1585 -footnotes or -fn : generate markdown with footnotes
1586 -test or -t : run a self-test
1587 -help or -h : print this message
1588
1589 """
1590
1591 VALID_OPTIONS = ['footnotes','nofootnotes', 'fn', 'test', 't', 'f',
1592 'help', 'h']
1593
1594 EXPANDED_OPTIONS = { "fn" : "footnotes",
1595 "t" : "test",
1596 "h" : "help" }
1597
1598
1599 def validate_option(option) :
1600
1601 """ Check if the option makes sense and print an appropriate message
1602 if it isn't.
1603
1604 @return: valid option string or None
1605 """
1606
1607 #now validate the variables
1608 if (option is not None):
1609 if (len(option) > 1 and option[1:] in VALID_OPTIONS) :
1610 option = option[1:]
1611
1612 if option in EXPANDED_OPTIONS.keys() :
1613 option = EXPANDED_OPTIONS[option]
1614 return option
1615 else:
1616 message(CRITICAL,
1617 "\nSorry, I don't understand option %s" % option)
1618 message(CRITICAL, USAGE)
1619 return None
1620
1621
1622 def validate_input_file(inFile) :
1623 """ Check if the input file is specified and exists.
1624
1625 @return: valid input file path or None
1626 """
1627
1628 if not inFile :
1629 message(CRITICAL,
1630 "\nI need an input filename.\n")
1631 message(CRITICAL, USAGE)
1632 return None
1633
1634
1635 if os.access(inFile, os.R_OK):
1636 return inFile
1637 else :
1638 message(CRITICAL, "Sorry, I can't find input file %s" % str(inFile))
1639 return None
1640
1641
1642
1643
1644 def cmd_line(args):
1645
1646 message(VERBOSE, "in cmd_line with args: %s" % args)
1647
1648 option, inFile, outFile = get_vars(args)
1649
1650 if option :
1651 option = validate_option(option)
1652 if not option : return
1653
1654 if option == "help" :
1655 message(CRITICAL, USAGE)
1656 return
1657 elif option == "test" :
1658 test_markdown(None)
1659 return
1660
1661 inFile = validate_input_file(inFile)
1662 if not inFile :
1663 return
1664 else :
1665 input = file(inFile).read()
1666
1667 message(VERBOSE, "Validated command line parameters:" +
1668 "\n\toption: %s, \n\tinFile: %s, \n\toutFile: %s" % (
1669 str(option), str(inFile), str(outFile),))
1670
1671 if option == "footnotes" :
1672 md_function = markdownWithFootnotes
1673 else :
1674 md_function = markdown
1675
1676 if outFile is None:
1677 print md_function(input)
1678 else:
1679 output = md_function(input)
1680 f1 = open(outFile, "w+")
1681 f1.write(output)
1682 f1.close()
1683
1684 if os.access(outFile, os.F_OK):
1685 message(INFO, "Successfully wrote %s" % outFile)
1686 else:
1687 message(INFO, "Failed to write %s" % outFile)
1688
1689
1690 if __name__ == '__main__':
1691 """ Run Markdown from the command line.
1692 Set debug = 3 at top of file to get diagnostic output"""
1693 args = sys.argv
1694
1695 #set testing=1 to test the command-line response of markdown.py
1696 testing = 0
1697 if testing:
1698 test_markdown(args)
1699 else:
1700 cmd_line(args)
1701
1702 """
1703 CHANGELOG
1704 =========
1705
1706
1707 Mar. 24, 2006: Switched to a not-so-recursive algorithm with
1708 _handleInline. (Version 1.4)
1709
1710 Mar. 15, 2006: Replaced some instance variables with class variables
1711 (a patch from Stelios Xanthakis). Chris Clark's new regexps that do
1712 not trigger midword underlining.
1713
1714 Feb. 28, 2006: Clean-up and command-line handling by Stewart
1715 Midwinter. (Version 1.3)
1716
1717 Feb. 24, 2006: Fixed a bug with the last line of the list appearing
1718 again as a separate paragraph. Incorporated Chris Clark's "mailto"
1719 patch. Added support for <br /> at the end of lines ending in two or
1720 more spaces. Fixed a crashing bug when using ImageReferencePattern.
1721 Added several utility methods to Nanodom. (Version 1.2)
1722
1723 Jan. 31, 2006: Added "hr" and "hr/" to BLOCK_LEVEL_ELEMENTS and
1724 changed <hr/> to <hr />. (Thanks to Sergej Chodarev.)
1725
1726 Nov. 26, 2005: Fixed a bug with certain tabbed lines inside lists
1727 getting wrapped in <pre><code>. (v. 1.1)
1728
1729 Nov. 19, 2005: Made "<!...", "<?...", etc. behave like block-level
1730 HTML tags.
1731
1732 Nov. 14, 2005: Added entity code and email autolink fix by Tiago
1733 Cogumbreiro. Fixed some small issues with backticks to get 100%
1734 compliance with John's test suite. (v. 1.0)
1735
1736 Nov. 7, 2005: Added an unlink method for documents to aid with memory
1737 collection (per Doug Sauder's suggestion).
1738
1739 Oct. 29, 2005: Restricted a set of html tags that get treated as
1740 block-level elements.
1741
1742 Sept. 18, 2005: Refactored the whole script to make it easier to
1743 customize it and made footnote functionality into an extension.
1744 (v. 0.9)
1745
1746 Sept. 5, 2005: Fixed a bug with multi-paragraph footnotes. Added
1747 attribute support.
1748
1749 Sept. 1, 2005: Changed the way headers are handled to allow inline
1750 syntax in headers (e.g. links) and got the lists to use p-tags
1751 correctly (v. 0.8)
1752
1753 Aug. 29, 2005: Added flexible tabs, fixed a few small issues, added
1754 basic support for footnotes. Got rid of xml.dom.minidom and added
1755 pretty-printing. (v. 0.7)
1756
1757 Aug. 13, 2005: Fixed a number of small bugs in order to conform to the
1758 test suite. (v. 0.6)
1759
1760 Aug. 11, 2005: Added support for inline html and entities, inline
1761 images, autolinks, underscore emphasis. Cleaned up and refactored the
1762 code, added some more comments.
1763
1764 Feb. 19, 2005: Rewrote the handling of high-level elements to allow
1765 multi-line list items and all sorts of nesting.
1766
1767 Feb. 3, 2005: Reference-style links, single-line lists, backticks,
1768 escape, emphasis in the beginning of the paragraph.
1769
1770 Nov. 2004: Added links, blockquotes, html blocks to Manfred
1771 Stienstra's code
1772
1773 Apr. 2004: Manfred's version at http://www.dwerg.net/projects/markdown/
1774
1775 """
Attached Files
To refer to attachments on a page, use attachment:filename, as shown below in the list of files. Do NOT use the URL of the [get] link, since this is subject to change and can break easily.You are not allowed to attach a file to this page.