# -*- coding: utf-8 -*- # lp4all: literate programming embedded in source code as wiki comments # Copyright (C) 2006 Jean-Marie Favreau, Frédéric Lehobey, David Mentré # and Thomas Petazzoni # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # the Free Software Foundation; either version 2 of the License, or # (at your option) any later version. # # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # # You should have received a copy of the GNU General Public License along # with this program; if not, write to the Free Software Foundation, Inc., # 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. import sys try: import lex import yacc except ImportError: try: from ply import lex from ply import yacc except ImportError: print "python-ply is required, please install." sys.exit (1) from iface import * from tree_struct import * from wiki_simpleparser import * from parse_exception import *
### Main Parser class class Parser: def __init__(self, debug = 0): # build lex/yacc objects self.lexer = lex.lex(module = self, debug = debug) self.yacc = yacc.yacc(module = self, debug = debug) # build parser for simple comments self.sp = SimpleParser(debug = debug) def trace(self, text): #print text pass tokens = ( 'LINK', 'TEXT', 'NEWLINE', 'PRE', 'CODE', 'LABEL', 'REF', 'LISTITEM', 'SIMPLELINE', 'SPECIALCHAR', 'STRONG', 'CANCEL', 'EM', 'T1', 'T2', 'T3', 'ENDLINK', 'HR', 'TEXT_IN_LINK' ) # Regular expression rules for simple tokens def t_HR(self, t): r'\-\-\-\-' return t def t_STRONG(self, t): r'\*\*(\*?[^\*])*\*\*' t.lineno += t.value.count("\n") return t def t_CANCEL(self, t): r'~~(~?[^~])*~~' t.lineno += t.value.count("\n") return t def t_EM(self, t): r'\/\/(\/?[^\/])*\/\/' t.lineno += t.value.count("\n") return t def t_T1(self, t): r'====([^=]=?)*[^=]====' t.lineno += t.value.count("\n") return t def t_T2(self, t): r'===([^=]=?)*[^=]===' t.lineno += t.value.count("\n") return t def t_T3(self, t): r'==([^=]=?)*[^=]==' t.lineno += t.value.count("\n") return t def t_PRE(self, t): r'%%(%?[^%])*%%' t.lineno += t.value.count("\n") return t def t_CODE(self, t): r'\^\^(\^?[^\^])*\^\^' t.lineno += t.value.count("\n") return t def t_REF(self, t): r'\[\[\#[^]\#@\|]+' t.lineno += t.value.count("\n") return t def t_LINK(self, t): r'\[\[[^]\ \|]+' t.lineno += t.value.count("\n") return t def t_LABEL(self, t): r'\[@[^]\#@\|]+@\]' t.lineno += t.value.count("\n") return t def t_ENDLINK(self, t): r'\]\]' return t def t_LISTITEM(self, t): r'(\n[\ \t]*)*\n[\ \t]+\-\ ' t.lineno += t.value.count("\n") return t def t_NEWLINE(self, t): r'\n([\ \t]*\n)+' t.lineno += t.value.count("\n") return t def t_SIMPLELINE(self, t): r'\n[\ \t]*' t.lineno += t.value.count("\n") return t def t_TEXT(self, t): r'[^]%\/\*\|\[@=\n\#\^~\-]+' t.lineno += t.value.count("\n") return t def t_TEXT_IN_LINK(self, t): r'\|(\]?[^]]+)+' t.lineno += t.value.count("\n") return t def t_SPECIALCHAR(self, t): r'[%\/\*\[=\]#@~^\-\|]' return t def t_error(self, t): print "Illegal character '%s' (line %s)" % (t.value[0], t.lineno) t.skip(1) # initialisation def p_statement_text(self, p): 'statement : text' self.trace("text") p[0] = p[1] def p_statement_newline(self, p): '''statement : NEWLINE''' p[0] = NodeContentNewLine() def p_statement_emptytext(self, p): '''statement : SIMPLELINE''' p[0] = NodeContentText(text = " ") def p_text_line(self, p): 'text : line text' self.trace("return special char") p[0] = NodeContentText(children = [p[1], p[2]]) def p_text_expr_line_end(self, p): '''text : line | line SIMPLELINE''' p[0] = p[1] def p_statement_expr_line_end_return(self, p): '''text : line NEWLINE''' self.trace("line") p[0] = NodeContentText(children = [p[1], NodeContentNewLine()]) # simple line def p_line_simple(self, p): 'line : SIMPLELINE expressionline' self.trace("simple line") p[0] = NodeContentText(text = p[1], children = [p[2]]) # new line def p_line_newline(self, p): 'line : NEWLINE expressionline' self.trace("new line") p[0] = NodeContentText(children = [NodeContentNewLine(), p[2]]) # list line def p_line_list(self, p): 'line : LISTITEM expressionline' self.trace("list") p[0] = NodeContentList(level = len(p[1].split('\n')[-1]) - 3, children = [p[2]]) def p_line_expressionline(self, p): 'line : expressionline' self.trace("expressionline") p[0] = p[1] # empty list line def p_line_list_empty(self, p): 'line : LISTITEM' self.trace("list") p[0] = NodeContentList(level = len(p[1].split('\n')[-1]) - 3) # expression line def p_expressionline_sp_start(self, p): '''expressionline : SPECIALCHAR expression''' p[0] = NodeContentText(children = [NodeContentText(p[1]), p[2]]) def p_expressionline_sp_end(self, p): '''expressionline : expression SPECIALCHAR''' p[0] = NodeContentText(children = [p[1], NodeContentText(p[2])]) def p_expressionline(self, p): '''expressionline : expression''' p[0] = p[1] # only one special char def p_expression_addspecialchar(self, p): '''expression : expression SPECIALCHAR expression''' self.trace("special char: " + p[2]) p[0] = NodeContentText(children = [p[1], NodeContentText(p[2]), p[3]]) # define expression properties def p_expression_expression(self, p): 'expression : expression expression' p[0] = NodeContentText(children = [p[1], p[2]]) # match hr def p_expression_hr_left(self, p): 'expression : HR expression' p[0] = NodeContentText(children = [NodeContentHR(), p[2]]) def p_expression_hr_right(self, p): 'expression : expression HR' p[0] = NodeContentText(children = [p[2], NodeContentHR()]) def p_expression_hr(self, p): 'expression : HR' p[0] = NodeContentHR() def p_expression_block(self, p): 'expression : block' p[0] = p[1] def p_expression_all(self, p): '''expression : TEXT''' self.trace('text: ' + p[1]) p[0] = NodeContentText(text = p[1]) ## define logical blocks (link, bold, etc) # bold def p_block_strong(self, p): '''block : STRONG''' self.trace("strong") p[0] = NodeContentStrong(children = [self.sp.parseSimpleComment(content = p[1][2:][:-2], lineno = p.lineno(1))]) # cancel def p_block_tilde(self, p): '''block : CANCEL''' self.trace("cancel") p[0] = NodeContentCancel(children = [self.sp.parseSimpleComment(content = p[1][2:][:-2], lineno = p.lineno(1))]) # emphasize def p_block_emphasize(self, p): '''block : EM''' self.trace("emphasize") p[0] = NodeContentEmphasize(children = [self.sp.parseSimpleComment(content = p[1][2:][:-2], lineno = p.lineno(1))]) # code def p_block_code(self, p): 'block : CODE' self.trace("code: " + p[1][2:][:-2] + "\n\n") p[0] = NodeContentCode(text = p[1][2:][:-2]) # pre def p_block_pre(self, p): 'block : PRE' self.trace("pre: " + p[1][2:][:-2] + "\n\n") p[0] = NodeContentPre(text = p[1][2:][:-2]) # title (1) def p_block_title1_simple(self, p): '''block : T1''' self.trace("title (1)") p[0] = NodeContentTitle(level = 0, children = [self.sp.parseSimpleComment(p[1][4:][:-4], lineno = p.lineno(1))]) # title (2) def p_block_title2_simple(self, p): '''block : T2''' self.trace("title (2)") p[0] = NodeContentTitle(level = 1, children = [self.sp.parseSimpleComment(p[1][3:][:-3], lineno = p.lineno(1))]) # title (3) def p_block_title3_simple(self, p): '''block : T3''' self.trace("title (3)") p[0] = NodeContentTitle(level = 2, children = [self.sp.parseSimpleComment(p[1][2:][:-2], lineno = p.lineno(1))]) # link def p_block_link_expr(self, p): '''block : LINK TEXT_IN_LINK ENDLINK''' self.trace("link (+): " + p[1][2:]) p[0] = NodeContentLink(uri = p[1][2:], children = [self.sp.parseSimpleComment(p[2][1:], lineno = p.lineno(2))]) def p_block_link(self, p): '''block : LINK ENDLINK''' self.trace("link: " + p[1][2:]) p[0] = NodeContentLink(uri = p[1][2:]) # ref def p_block_ref_expr(self, p): '''block : REF TEXT_IN_LINK ENDLINK''' self.trace("ref (+): " + p[1][3:]) p[0] = NodeContentRef(ref = p[1][3:], children = [self.sp.parseSimpleComment(content = p[2][1:], lineno = p.lineno(2))]) def p_block_ref(self, p): '''block : REF ENDLINK''' self.trace("ref: " + p[1][3:]) p[0] = NodeContentRef(ref = p[1][3:]) # label def p_block_label(self, p): '''block : LABEL''' self.trace("label " + p[1][2:][:-2]) p[0] = NodeContentLabel(label = p[1][2:][:-2]) def p_error(self, p): try: raise ParseException("Syntax error at '%s'" % p.value, p.lineno) except AttributeError: raise ParseException("Syntax error") # parse a wiki comment and build associated nodes def parseComment(self, content, debug = 0, lineno = 1): # init lineno self.lexer.lineno = lineno # run parser return self.yacc.parse(content.text, debug = debug, lexer = self.lexer)
Wiki syntax parser
Wiki syntax parser using ply (a
lex/yaccpython binding)Wiki syntax
title 1
====title 1====title 2
===title 2===title 3
==title 3==**strong**//emphasize//~~cancel~~is
----verbatim:^ ^verbatim^ ^(without spaces between ^)[[http://www.gnu.org/licenses/gpl.txt|gpl (external link)]][[http://www.gnu.org/licenses/gpl.txt]][[#id1|internal link]][[#id1]][@id1@](see in first page another description)