# -*- coding: utf-8 -*- 

# lp4all: literate programming embedded in source code as wiki comments
# Copyright (C) 2006 Jean-Marie Favreau, Frédéric Lehobey, David Mentré
#                    and Thomas Petazzoni
# 
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation; either version 2 of the License, or
# (at your option) any later version.
# 
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
# GNU General Public License for more details.
# 
# You should have received a copy of the GNU General Public License along
# with this program; if not, write to the Free Software Foundation, Inc.,
# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.

import sys
try:
    import lex
    import yacc
except ImportError:
    try:
        from ply import lex
        from ply import yacc
    except ImportError:
        print "python-ply is required, please install."
        sys.exit (1)

from iface import *
from tree_struct import *
from wiki_simpleparser import *
from parse_exception import *

Wiki syntax parser

Wiki syntax parser using ply (a lex/yacc python binding)

Wiki syntax

title 1

====title 1====

title 2

===title 2===

title 3

==title 3==

  • strong: **strong**
  • emphasize: //emphasize//
  • cancel: ~~cancel~~

  • is ----

  • verbatim: ^ ^verbatim^ ^ (without spaces between ^)
  • verbatim block
     % %verbatim block% % 
    (without spaces between %)
  • gpl (external link): [[http://www.gnu.org/licenses/gpl.txt|gpl (external link)]]
  • http://www.gnu.org/licenses/gpl4.txt: [[http://www.gnu.org/licenses/gpl.txt]]
  • internal link: [[#id1|internal link]]
  • id1 (internal link): [[#id1]]
  • Label (target internal link): [@id1@]
  • broken internal link

(see in first page another description)





### Main Parser class
class Parser:

    def __init__(self, debug = 0):
        # build lex/yacc objects
        self.lexer = lex.lex(module = self, debug = debug)
        self.yacc = yacc.yacc(module = self, debug = debug)
        # build parser for simple comments
        self.sp = SimpleParser(debug = debug)


    def trace(self, text):
        #print text
        pass

    tokens = (

        'LINK',
        'TEXT',
        'NEWLINE',
        'PRE',
        'CODE',
        'LABEL',

        'REF',

        'LISTITEM',
        'SIMPLELINE',
        'SPECIALCHAR',

        'STRONG',
        'CANCEL',
        'EM',

        'T1',
        'T2',
        'T3',
        'ENDLINK',

        'HR',

        'TEXT_IN_LINK'
    )



    # Regular expression rules for simple tokens
    def t_HR(self, t):
        r'\-\-\-\-'
        return t

    def t_STRONG(self, t):
        r'\*\*(\*?[^\*])*\*\*'
        t.lineno += t.value.count("\n")
        return t

    def t_CANCEL(self, t):
        r'~~(~?[^~])*~~'
        t.lineno += t.value.count("\n")
        return t

    def t_EM(self, t):
        r'\/\/(\/?[^\/])*\/\/'
        t.lineno += t.value.count("\n")
        return t

    def t_T1(self, t):
        r'====([^=]=?)*[^=]===='
        t.lineno += t.value.count("\n")
        return t

    def t_T2(self, t):
        r'===([^=]=?)*[^=]==='
        t.lineno += t.value.count("\n")
        return t

    def t_T3(self, t):
        r'==([^=]=?)*[^=]=='
        t.lineno += t.value.count("\n")
        return t

    def t_PRE(self, t):
        r'%%(%?[^%])*%%'
        t.lineno += t.value.count("\n")
        return t

    def t_CODE(self, t):
        r'\^\^(\^?[^\^])*\^\^'
        t.lineno += t.value.count("\n")
        return t

    def t_REF(self, t):
        r'\[\[\#[^]\#@\|]+'
        t.lineno += t.value.count("\n")
        return t

    def t_LINK(self, t):
        r'\[\[[^]\ \|]+'
        t.lineno += t.value.count("\n")
        return t

    def t_LABEL(self, t):
        r'\[@[^]\#@\|]+@\]'
        t.lineno += t.value.count("\n")
        return t


    def t_ENDLINK(self, t):
        r'\]\]'
        return t


    def t_LISTITEM(self, t):
        r'(\n[\ \t]*)*\n[\ \t]+\-\ '
        t.lineno += t.value.count("\n")
        return t

    def t_NEWLINE(self, t):
        r'\n([\ \t]*\n)+'
        t.lineno += t.value.count("\n")
        return t

    def t_SIMPLELINE(self, t):
        r'\n[\ \t]*'
        t.lineno += t.value.count("\n")
        return t

    def t_TEXT(self, t):
        r'[^]%\/\*\|\[@=\n\#\^~\-]+'
        t.lineno += t.value.count("\n")
        return t


    def t_TEXT_IN_LINK(self, t):
        r'\|(\]?[^]]+)+'
        t.lineno += t.value.count("\n")
        return t

    def t_SPECIALCHAR(self, t):
        r'[%\/\*\[=\]#@~^\-\|]'
        return t




    def t_error(self, t):
        print "Illegal character '%s' (line %s)" % (t.value[0], t.lineno)
        t.skip(1)


    # initialisation
    def p_statement_text(self, p):
        'statement : text'
        self.trace("text")
        p[0] = p[1]

    def p_statement_newline(self, p):
        '''statement : NEWLINE'''
        p[0] = NodeContentNewLine()

    def p_statement_emptytext(self, p):
        '''statement : SIMPLELINE'''
        p[0] = NodeContentText(text = " ")


    def p_text_line(self, p):
        'text : line text'
        self.trace("return special char")
        p[0] = NodeContentText(children = [p[1], p[2]])

    def p_text_expr_line_end(self, p):
        '''text : line
        | line SIMPLELINE'''
        p[0] = p[1]

    def p_statement_expr_line_end_return(self, p):
        '''text : line NEWLINE'''
        self.trace("line")
        p[0] = NodeContentText(children = [p[1], NodeContentNewLine()])

    # simple line
    def p_line_simple(self, p):
        'line : SIMPLELINE expressionline'
        self.trace("simple line")
        p[0] = NodeContentText(text = p[1], children = [p[2]])

    # new line
    def p_line_newline(self, p):
        'line : NEWLINE expressionline'
        self.trace("new line")
        p[0] = NodeContentText(children = [NodeContentNewLine(), p[2]])

    # list line
    def p_line_list(self, p):
        'line : LISTITEM expressionline'
        self.trace("list")
        p[0] = NodeContentList(level = len(p[1].split('\n')[-1]) - 3, children = [p[2]])

    def p_line_expressionline(self, p):
        'line : expressionline'
        self.trace("expressionline")
        p[0] = p[1]


    # empty list line
    def p_line_list_empty(self, p):
        'line : LISTITEM'
        self.trace("list")
        p[0] = NodeContentList(level = len(p[1].split('\n')[-1]) - 3)

    # expression line
    def p_expressionline_sp_start(self, p):
        '''expressionline : SPECIALCHAR expression'''
        p[0] = NodeContentText(children = [NodeContentText(p[1]), p[2]])

    def p_expressionline_sp_end(self, p):
        '''expressionline : expression SPECIALCHAR'''
        p[0] = NodeContentText(children = [p[1], NodeContentText(p[2])])

    def p_expressionline(self, p):
        '''expressionline : expression'''
        p[0] = p[1]


    # only one special char
    def p_expression_addspecialchar(self, p):
        '''expression : expression SPECIALCHAR expression'''
        self.trace("special char: " + p[2])
        p[0] = NodeContentText(children = [p[1], NodeContentText(p[2]), p[3]])


    # define expression properties
    def p_expression_expression(self, p):
        'expression : expression expression'
        p[0] = NodeContentText(children = [p[1], p[2]])


    # match hr
    def p_expression_hr_left(self, p):
        'expression : HR expression'
        p[0] = NodeContentText(children = [NodeContentHR(), p[2]])

    def p_expression_hr_right(self, p):
        'expression : expression HR'
        p[0] = NodeContentText(children = [p[2], NodeContentHR()])

    def p_expression_hr(self, p):
        'expression : HR'
        p[0] = NodeContentHR()


    def p_expression_block(self, p):
        'expression : block'
        p[0] = p[1]

    def p_expression_all(self, p):
        '''expression : TEXT'''
        self.trace('text: ' + p[1])
        p[0] = NodeContentText(text = p[1])



    ## define logical blocks (link, bold, etc)
    # bold
    def p_block_strong(self, p):
        '''block : STRONG'''
        self.trace("strong")
        p[0] = NodeContentStrong(children = [self.sp.parseSimpleComment(content = p[1][2:][:-2], lineno = p.lineno(1))])

    # cancel
    def p_block_tilde(self, p):
        '''block : CANCEL'''
        self.trace("cancel")
        p[0] = NodeContentCancel(children = [self.sp.parseSimpleComment(content = p[1][2:][:-2], lineno = p.lineno(1))])

    # emphasize
    def p_block_emphasize(self, p):
        '''block : EM'''
        self.trace("emphasize")
        p[0] = NodeContentEmphasize(children = [self.sp.parseSimpleComment(content = p[1][2:][:-2], lineno = p.lineno(1))])

    # code
    def p_block_code(self, p):
        'block : CODE'
        self.trace("code: " + p[1][2:][:-2] + "\n\n")
        p[0] = NodeContentCode(text = p[1][2:][:-2])

    # pre
    def p_block_pre(self, p):
        'block : PRE'
        self.trace("pre: " + p[1][2:][:-2] + "\n\n")
        p[0] = NodeContentPre(text = p[1][2:][:-2])


    # title (1)
    def p_block_title1_simple(self, p):
        '''block : T1'''
        self.trace("title (1)")
        p[0] = NodeContentTitle(level = 0, children = [self.sp.parseSimpleComment(p[1][4:][:-4], lineno = p.lineno(1))])

    # title (2)
    def p_block_title2_simple(self, p):
        '''block : T2'''
        self.trace("title (2)")
        p[0] = NodeContentTitle(level = 1, children = [self.sp.parseSimpleComment(p[1][3:][:-3], lineno = p.lineno(1))])

    # title (3)
    def p_block_title3_simple(self, p):
        '''block : T3'''
        self.trace("title (3)")
        p[0] = NodeContentTitle(level = 2, children = [self.sp.parseSimpleComment(p[1][2:][:-2], lineno = p.lineno(1))])


    # link
    def p_block_link_expr(self, p):
        '''block : LINK TEXT_IN_LINK ENDLINK'''
        self.trace("link (+): " + p[1][2:])
        p[0] = NodeContentLink(uri = p[1][2:], children = [self.sp.parseSimpleComment(p[2][1:], lineno = p.lineno(2))])

    def p_block_link(self, p):
        '''block : LINK ENDLINK'''
        self.trace("link: " + p[1][2:])
        p[0] = NodeContentLink(uri = p[1][2:])

    # ref
    def p_block_ref_expr(self, p):
        '''block : REF TEXT_IN_LINK ENDLINK'''
        self.trace("ref (+): " + p[1][3:])
        p[0] = NodeContentRef(ref = p[1][3:], children = [self.sp.parseSimpleComment(content = p[2][1:], lineno = p.lineno(2))])

    def p_block_ref(self, p):
        '''block : REF ENDLINK'''
        self.trace("ref: " + p[1][3:])
        p[0] = NodeContentRef(ref = p[1][3:])

    # label
    def p_block_label(self, p):
        '''block : LABEL'''
        self.trace("label " + p[1][2:][:-2])
        p[0] = NodeContentLabel(label = p[1][2:][:-2])


    def p_error(self, p):
        try:
            raise ParseException("Syntax error at '%s'" % p.value, p.lineno)
        except AttributeError:
            raise ParseException("Syntax error")



    # parse a wiki comment and build associated nodes
    def parseComment(self, content, debug = 0, lineno = 1):
        # init lineno
        self.lexer.lineno = lineno
        # run parser
        return self.yacc.parse(content.text, debug = debug, lexer = self.lexer)