# -*- python -*-
"""Translation tags for Translate.py"""

__version__ = "0.3 (tiepin) of 1999-11-15"
__author__ = "Tibs"

# This is a back-translation of the tuples which were written by hand
# within Translate.py, *but* it is also suitable for translation (by
# said module) into Python code, and then importing into Translate.py
# for use instead of the original (hand written) tuples.

from TextTools import *

# ------------------------------------------------------------
# We are not, initially, going to try for anything very sophisticated
# - just something that will get us bootstrapped, so that I can use the
#   "little language" to write more sophisticated stuff (without having
#   to worry about dropped commas between tuples, and so on!)


# Whitespace is always useful
t_whitespace is:
    AllIn ' \t'

t_opt_whitespace is:
    t_whitespace F:next       # move to next tuple if not whitespace

# Comments are fairly simple
t_comment is:
    'comment' = Table is:
        Is '#'
        AllNotIn '\n\r' F:MatchOk

# We care about the "content" of the indentation at the start of a
# line, but note that it is optional
t_indent is:
    'indent' = AllIn ' \t'
t_indentation is:
    t_indent F:next           # zero indentation doesn't show

# A string is text within single or double quotes
# (of course, this is an oversimplification, because we should also
#  deal with things like "This is a \"substring\"", and it would be
#  nice to be able to cope with triple-quoted strings too, but it
#  will do for a start)

# Major bug - doesn't recognised zero length strings...
# (since "AllNotIn" must match at least one character)
t_string is:
    'str' = Table is:
        Is "'":
            'text' = AllNotIn "'"
            Is "'" F:MatchFail T:MatchOk
        Is '"'
        'text' = AllNotIn '"'
        Is '"'

# An integer is a series of digits...
t_integer is:
    'int' = AllIn number

t_signed_integer is:
    'signed_int' = Table is:
        'sign' = Is "+" F:next T:<int>
        'sign' = Is "-" F:next         # sign is optional
        <int>
        t_integer

# We'll only go for the simplest words
# Remember to be careful to specify the LONGEST possible match first, so that
# we try for "IsIn" before we try for "Is" (because "IsIn" would *match* "Is",
# leaving us with a spurious "In" hanging around...)
t_operation is:
    'op' = Table is:
        'op' = Word "AllInSet"        F:next T:MatchOk
        'op' = Word "AllIn"           F:next T:MatchOk
        'op' = Word "AllNotIn"        F:next T:MatchOk
        'op' = Word "CallArg"         F:next T:MatchOk
        'op' = Word "Call"            F:next T:MatchOk
        'op' = Word "EOF"             F:next T:MatchOk
        'op' = Word "Fail"            F:next T:MatchOk
        'op' = Word "IsInSet"         F:next T:MatchOk
        'op' = Word "IsIn"            F:next T:MatchOk
        'op' = Word "IsNotIn"         F:next T:MatchOk
        'op' = Word "IsNot"           F:next T:MatchOk
        'op' = Word "Is"              F:next T:MatchOk
        'op' = Word "Jump"            F:next T:MatchOk
        'op' = Word "LoopControl"     F:next T:MatchOk
        'op' = Word "Loop"            F:next T:MatchOk
        'op' = Word "Move"            F:next T:MatchOk
        'op' = Word "NoWord"          F:next T:MatchOk # alias for WordStart
        'op' = Word "Skip"            F:next T:MatchOk
        'op' = Word "SubTableInList"  F:next T:MatchOk
        'op' = Word "SubTable"        F:next T:MatchOk
        'op' = Word "sFindWord"       F:next T:MatchOk
        'op' = Word "sWordStart"      F:next T:MatchOk
        'op' = Word "sWordEnd"        F:next T:MatchOk
        'op' = Word "TableInList"     F:next T:MatchOk
        'op' = Word "Table"           F:next T:MatchOk
        'op' = Word "WordStart"       F:next T:MatchOk
        'op' = Word "WordEnd"         F:next T:MatchOk
        'op' = Word "Word"            F:MatchFail T:MatchOk

# Python keywords
t_keyword is:
    'keyword' = Table is:
        Word "and"      F:next T:<check>
        Word "assert"   F:next T:<check>
        Word "break"    F:next T:<check>
        Word "class"    F:next T:<check>
        Word "continue" F:next T:<check>
        Word "def"      F:next T:<check>
        Word "del"      F:next T:<check>
        Word "elif"     F:next T:<check>
        Word "else"     F:next T:<check>
        Word "except"   F:next T:<check>
        Word "exec"     F:next T:<check>
        Word "finally"  F:next T:<check>
        Word "for"      F:next T:<check>
        Word "from"     F:next T:<check>
        Word "global"   F:next T:<check>
        Word "if"       F:next T:<check>
        Word "import"   F:next T:<check>
        Word "in"       F:next T:<check>
        Word "is"       F:next T:<check>
        Word "lambda"   F:next T:<check>
        Word "not"      F:next T:<check>
        Word "or"       F:next T:<check>
        Word "pass"     F:next T:<check>
        Word "print"    F:next T:<check>
        Word "raise"    F:next T:<check>
        Word "return"   F:next T:<check>
        Word "try"      F:next T:<check>
        Word "while"    F:MatchFail T:<check>
        <check>
        # In order to not recognise things like "in_THIS_CASE"
        # we must check that the next character is not legitimate
        # within an identifier
        IsIn alpha+"_"+number F:next T:MatchFail
        # If it wasn't another identifier character, we need to
        # unread it so that it can be recognised as something else
        # (so that, for instance, "else:" is seen as "else" followed
        #  by ":")
        Skip back

# Do the same for mxText commands
t_mxkeyword is:
    'mxKeyword' = Table is:
        t_operation
        IsIn alpha+"_"+number F:next T:MatchFail
        Skip back

# Traditional identifiers
t_identifier is:
    'identifier' = Table is:
        t_keyword F:next T:MatchFail      # don't allow Python keywords
        t_mxkeyword F:next T:MatchFail    # don't allow mxText commands
        IsIn alpha+"_"                    # can't start with a digit
        AllIn alpha+'_'+number F:MatchOk

# We don't yet deal with the following with anything in parentheses,
# which means we can't handle functions or command lists, or other
# things which "look like" a tuple
t_argument is:
    'arg' = Table is:
        'arg' = Word "Here"      F:next T:MatchOk # EOF Here, Fail Here
        'arg' = Word "ToEOF"     F:next T:MatchOk # Move ToEOF
        'arg' = Word "To"        F:next T:MatchOk # Jump To
        'arg' = Word "ThisTable" F:next T:MatchOk # [Sub]Table ThisTable
        'arg' = Word "back"      F:next T:MatchOk # Skip back
        'arg' = Word "Break"     F:next T:MatchOk # LoopControl Break
        'arg' = Word "Reset"     F:next T:MatchOk # LoopControl Reset
        t_string                 F:next T:MatchOk # e.g., Word "Fred"
        t_signed_integer         F:next T:MatchOk # e.g., Skip -4, Move 3
        t_identifier                              # e.g., Table Fred

# Recognise a plus sign bordered by optional whitespace
t_plus is:
    'plus' = Table is:
        t_opt_whitespace
        Is "+"
        t_opt_whitespace

# Arguments can contain "+"
t_plus_arg is:
    'plusarg' = Table is:
        t_argument              # start with a single argument
        <again>
        t_plus F:MatchOk        # if we have a "+"
        t_argument              # then we expect another argument
        Jump To <again>         # then look for another "+"

# Match, for example:
#        <fred>
t_label is:
    'label' = Table is:
        Is "<"
        t_identifier
        Is ">"

# Targets for Jump and F:/T:
t_target is:
    'target' = Table is:
        'tgt' = Word "next"      F:next T:MatchOk
        'tgt' = Word "previous"  F:next T:MatchOk
        'tgt' = Word "repeat"    F:next T:MatchOk
        'tgt' = Word "MatchOk"   F:next T:MatchOk
        'tgt' = Word "MatchOK"   F:next T:MatchOk # for kindness' sake
        'tgt' = Word "MatchFail" F:next T:MatchOk
        t_label

# A value is either an identifier, or a string, or an integer
t_value is:
    'val' = Table is:
        t_identifier F:next T:MatchOk
        t_string     F:next T:MatchOk
        t_integer

# An assignment is (optionally) used in Tuple and Table definitions...
t_assignment is:
    'assignment' = Table is:
        t_value
        t_opt_whitespace
        Is '='

# A common error when writing tuples is to miss off the "=" sign
# - the following is used in diagnosing that (see t_bad_tuple below)
# (it's useful to have something with identical structure to the
#  "real thing")
t_bad_tagobj is:
    'tagobj' = Table is:
        t_string

t_bad_assignment is:
    'assignment' = Table is:
        t_value

# This is the line that starts the definition of a single tuple.
# For the moment, restrict what it gets assigned to to a simple
# identifier.
# Match, for example:
#        Fred is:
t_tupleblock is:
    'tupleblock' = Table is:
        t_identifier
        t_whitespace
        Word "is:"

# This is the line that starts a new table or sub-table.
# For the moment, we only cope with full Tables.
# NOTE that this is used for the "outer" declaration of a tag table,
# and also for the "inner" declaration of an inner table or sub-table.
# The discrimination between these is done after initial parsing.
# Match, for example:
#        'keyword' = Table is:      (inner)
#        tagtable = Table is:       (outer)
t_tableblock is:
    'tableblock' = Table is:
        t_assignment:  # left hand side is optional
            t_opt_whitespace
        'type' = Word "Table" F:next T:<ok>  # Either "Table"
        'type' = Word "SubTable"             # or "SubTable"
        <ok>
        t_whitespace   # whitespace is required
        Word "is:"     # "is:" is required

# This is the line that starts an "if" block
# Match, for example:
#        Is "Fred":
#        controlsymbol:
t_ifblock is:
    'ifblock' = Table is:
        t_assignment:     # left hand side is optional
            t_opt_whitespace
        t_operation:
            t_whitespace
            t_plus_arg
            Is ":" F:MatchFail T:MatchOk
        # Else:
        t_identifier
        Is ":"

# Note that we don't allow spaces WITHIN our false and true thingies
t_onfalse is:
    'onfalse' = Table is:
        t_whitespace
        Word "F:"
        t_target

t_ontrue is:
    'ontrue' = Table is:
        t_whitespace
        Word "T:"
        t_target

# Valid examples are things like:
#        'fred' = Is "xxx" F:<wow> T:MatchOk
#       AllIn jim T:<foundJim>
#
# For the moment, we're not trying to recognise things in any detail
t_tuple is:
    'tuple' = Table is:
        t_assignment:                 # left hand side is optional
            t_opt_whitespace
        t_operation                   # operation is required
        t_whitespace                  # for now, always require space here
        t_plus_arg                    # argument is required
        t_onfalse F:next    T:next    # F:target is optional
        t_ontrue  F:MatchOk T:MatchOk # T:target is also optional

# If the user has defined a "partial" tuple, they might use something
# of the form:
#       match_fred  F:MatchFalse T:MatchOk
t_tupleplus is:
    'tupleplus' = Table is:
        t_identifier
        t_onfalse F:next    T:next    # F:target is optional
        t_ontrue  F:MatchOk T:MatchOk # T:target is also optional

# Treat Jump To specially - for example:
#       Jump To <top>
# so that they don't have to do the less obvious "Jump To F:<label>"
# (although that will still be recognised, of course, for people who
# are used to the tag tuple format itself)
t_jumpto is:
    'jumpto' = Table is:
        Word "Jump"
        t_whitespace
        Word "To"
        t_whitespace
        t_target

# Is it worth coping with these?
t_bad_jumpto is:
    'jumpto' = Table is:
        Word "Jump":                  # cope with "Jump to"
            t_whitespace
            Word "to" T:<target>
        Word "JumpTo"                 # and with "JumpTo"
        <target>
        t_target

# The "content" of a line is the bit after any indentation, and before
# any comment...
# For the moment, we won't try to maintain ANY context, so it is up
# to the user of the tuples produced to see if they make sense...
t_content is:
    'content' = Table is:
        t_label        F:next T:MatchOk
        t_tableblock   F:next T:MatchOk        # [<tagobj> =] [Sub]Table is:
        t_tupleblock   F:next T:MatchOk        # <identifier> is:
        t_ifblock      F:next T:MatchOk        # <cmd> <arg>: OR <identifier>:
        t_jumpto       F:next T:MatchOk        # Jump To <target>
        t_tuple        F:next T:MatchOk
        t_tupleplus    F:next T:MatchOk        # name [F:<label> [T:<label>]]

t_contentline is:
    'contentline' = Table is:
        t_content                    # something that we care about
        t_opt_whitespace
        t_comment F:next T:next      # always allow a comment
        IsIn newline                 # the end of the line

# Sometimes, the user (e.g., me) writes:
#	'fred' = Table:
# instead of:
#	'fred' = Table is:
# Unfortunately, without the "is", it would get too confusing whether
# we actually wanted an if block...
t_bad_tableblock is:
    'tableblock' = Table is:
        t_assignment:     # left hand side is optional
            t_opt_whitespace
        Word "Table"      # "Table" is required
        Is ":"            # "is" is needed before the ":"

# Sometimes, the use (e.g., me again) write:
#	'fred' IsIn jim
# instead of:
#	'fred' = IsIn jim
# Whilst I'm not entirely convinced that "=" is the best character
# to use here, I think we do need something!
t_bad_tuple is:
    'tuple' = Table is:
        t_bad_assignment  # obviously we have to have this!
        t_whitespace      # in which case the whitespace IS needed
        t_operation       # operation is required
        t_whitespace      # for the moment, we must have space here
        t_plus_arg        # argument is required
        t_onfalse F:next    T:next     # F:target is optional
        t_ontrue  F:MatchOk T:MatchOk  # T:target is also optional

# Make some attempt to recognise common errors...
t_badcontent is:
    'badcontent' = Table is:
        t_bad_tableblock F:next T:MatchOk
        t_bad_tuple

t_badline is:
    'badline' = Table is:
        t_badcontent            # something that we sort of care about
        t_opt_whitespace
        t_comment F:next T:next # always allow a comment
        IsIn newline            # the end of the line

t_emptyline is:
    'emptyline' = Table is:
        t_opt_whitespace
        IsIn newline               # the end of the line

t_commentline is:
    'commentline' = Table is:
        t_comment
        IsIn newline               # the end of the line

t_passthruline is:
    'passthruline' = Table is:
        'passthru' = AllNotIn newline F:next # anything else on the line
        IsIn newline                         # the end of the line

# Basically, a file is a series of lines
t_line is:
    'line' = Table is:
        t_emptyline   F:next T:MatchOk  # empty lines are simple enough
        t_indent      F:next T:next     # optional indentation
        t_commentline F:next T:MatchOk  # always allow a comment
        t_contentline F:next T:MatchOk  # a line we care about
        t_badline     F:next T:MatchOk  # a line we think is wrong
        t_passthruline                  # a line we don't care about

# So read lines until we find the EOF
t_file = Table is:
    t_line
    EOF Here F:previous


# ----------------------------------------------------------------------
if __name__ == '__main__':

    test_data = "#Test data\n"

    def print_tuples(tuplist):
        print "("
        for item in tuplist:
            print " ",item
        print ")"

    lines = string.split(test_data,"\n")
    count = 0
    print "Test data"
    print "---------"
    for line in lines:
        count = count+1
        print "%2d: %s"%(count,line)
    print

    print "Tagging text"
    print "------------"

    PYTAG = 0

    if PYTAG:
        import pytag
        pytag.set_verbosity(1)
        pytag.use_debugger()
        result,taglist,next = pytag.pytag(test_data,t_file)
    else:
        timer = TextTools._timer()
        timer.start()
        result, taglist, next = tag(test_data,t_file)
        print "Tagging took",timer.stop()[0],"seconds"


    print "Result: ",result
    print "Taglist:"
    print_tuples(taglist)