# -*- python -*-
"""Translation tags for Translate.py"""
__version__ = "0.3 (tiepin) of 1999-11-15"
__author__ = "Tibs"
# This is a back-translation of the tuples which were written by hand
# within Translate.py, *but* it is also suitable for translation (by
# said module) into Python code, and then importing into Translate.py
# for use instead of the original (hand written) tuples.
from TextTools import *
# ------------------------------------------------------------
# We are not, initially, going to try for anything very sophisticated
# - just something that will get us bootstrapped, so that I can use the
# "little language" to write more sophisticated stuff (without having
# to worry about dropped commas between tuples, and so on!)
# Whitespace is always useful
t_whitespace is:
AllIn ' \t'
t_opt_whitespace is:
t_whitespace F:next # move to next tuple if not whitespace
# Comments are fairly simple
t_comment is:
'comment' = Table is:
Is '#'
AllNotIn '\n\r' F:MatchOk
# We care about the "content" of the indentation at the start of a
# line, but note that it is optional
t_indent is:
'indent' = AllIn ' \t'
t_indentation is:
t_indent F:next # zero indentation doesn't show
# A string is text within single or double quotes
# (of course, this is an oversimplification, because we should also
# deal with things like "This is a \"substring\"", and it would be
# nice to be able to cope with triple-quoted strings too, but it
# will do for a start)
# Major bug - doesn't recognised zero length strings...
# (since "AllNotIn" must match at least one character)
t_string is:
'str' = Table is:
Is "'":
'text' = AllNotIn "'"
Is "'" F:MatchFail T:MatchOk
Is '"'
'text' = AllNotIn '"'
Is '"'
# An integer is a series of digits...
t_integer is:
'int' = AllIn number
t_signed_integer is:
'signed_int' = Table is:
'sign' = Is "+" F:next T:<int>
'sign' = Is "-" F:next # sign is optional
<int>
t_integer
# We'll only go for the simplest words
# Remember to be careful to specify the LONGEST possible match first, so that
# we try for "IsIn" before we try for "Is" (because "IsIn" would *match* "Is",
# leaving us with a spurious "In" hanging around...)
t_operation is:
'op' = Table is:
'op' = Word "AllInSet" F:next T:MatchOk
'op' = Word "AllIn" F:next T:MatchOk
'op' = Word "AllNotIn" F:next T:MatchOk
'op' = Word "CallArg" F:next T:MatchOk
'op' = Word "Call" F:next T:MatchOk
'op' = Word "EOF" F:next T:MatchOk
'op' = Word "Fail" F:next T:MatchOk
'op' = Word "IsInSet" F:next T:MatchOk
'op' = Word "IsIn" F:next T:MatchOk
'op' = Word "IsNotIn" F:next T:MatchOk
'op' = Word "IsNot" F:next T:MatchOk
'op' = Word "Is" F:next T:MatchOk
'op' = Word "Jump" F:next T:MatchOk
'op' = Word "LoopControl" F:next T:MatchOk
'op' = Word "Loop" F:next T:MatchOk
'op' = Word "Move" F:next T:MatchOk
'op' = Word "NoWord" F:next T:MatchOk # alias for WordStart
'op' = Word "Skip" F:next T:MatchOk
'op' = Word "SubTableInList" F:next T:MatchOk
'op' = Word "SubTable" F:next T:MatchOk
'op' = Word "sFindWord" F:next T:MatchOk
'op' = Word "sWordStart" F:next T:MatchOk
'op' = Word "sWordEnd" F:next T:MatchOk
'op' = Word "TableInList" F:next T:MatchOk
'op' = Word "Table" F:next T:MatchOk
'op' = Word "WordStart" F:next T:MatchOk
'op' = Word "WordEnd" F:next T:MatchOk
'op' = Word "Word" F:MatchFail T:MatchOk
# Python keywords
t_keyword is:
'keyword' = Table is:
Word "and" F:next T:<check>
Word "assert" F:next T:<check>
Word "break" F:next T:<check>
Word "class" F:next T:<check>
Word "continue" F:next T:<check>
Word "def" F:next T:<check>
Word "del" F:next T:<check>
Word "elif" F:next T:<check>
Word "else" F:next T:<check>
Word "except" F:next T:<check>
Word "exec" F:next T:<check>
Word "finally" F:next T:<check>
Word "for" F:next T:<check>
Word "from" F:next T:<check>
Word "global" F:next T:<check>
Word "if" F:next T:<check>
Word "import" F:next T:<check>
Word "in" F:next T:<check>
Word "is" F:next T:<check>
Word "lambda" F:next T:<check>
Word "not" F:next T:<check>
Word "or" F:next T:<check>
Word "pass" F:next T:<check>
Word "print" F:next T:<check>
Word "raise" F:next T:<check>
Word "return" F:next T:<check>
Word "try" F:next T:<check>
Word "while" F:MatchFail T:<check>
<check>
# In order to not recognise things like "in_THIS_CASE"
# we must check that the next character is not legitimate
# within an identifier
IsIn alpha+"_"+number F:next T:MatchFail
# If it wasn't another identifier character, we need to
# unread it so that it can be recognised as something else
# (so that, for instance, "else:" is seen as "else" followed
# by ":")
Skip back
# Do the same for mxText commands
t_mxkeyword is:
'mxKeyword' = Table is:
t_operation
IsIn alpha+"_"+number F:next T:MatchFail
Skip back
# Traditional identifiers
t_identifier is:
'identifier' = Table is:
t_keyword F:next T:MatchFail # don't allow Python keywords
t_mxkeyword F:next T:MatchFail # don't allow mxText commands
IsIn alpha+"_" # can't start with a digit
AllIn alpha+'_'+number F:MatchOk
# We don't yet deal with the following with anything in parentheses,
# which means we can't handle functions or command lists, or other
# things which "look like" a tuple
t_argument is:
'arg' = Table is:
'arg' = Word "Here" F:next T:MatchOk # EOF Here, Fail Here
'arg' = Word "ToEOF" F:next T:MatchOk # Move ToEOF
'arg' = Word "To" F:next T:MatchOk # Jump To
'arg' = Word "ThisTable" F:next T:MatchOk # [Sub]Table ThisTable
'arg' = Word "back" F:next T:MatchOk # Skip back
'arg' = Word "Break" F:next T:MatchOk # LoopControl Break
'arg' = Word "Reset" F:next T:MatchOk # LoopControl Reset
t_string F:next T:MatchOk # e.g., Word "Fred"
t_signed_integer F:next T:MatchOk # e.g., Skip -4, Move 3
t_identifier # e.g., Table Fred
# Recognise a plus sign bordered by optional whitespace
t_plus is:
'plus' = Table is:
t_opt_whitespace
Is "+"
t_opt_whitespace
# Arguments can contain "+"
t_plus_arg is:
'plusarg' = Table is:
t_argument # start with a single argument
<again>
t_plus F:MatchOk # if we have a "+"
t_argument # then we expect another argument
Jump To <again> # then look for another "+"
# Match, for example:
# <fred>
t_label is:
'label' = Table is:
Is "<"
t_identifier
Is ">"
# Targets for Jump and F:/T:
t_target is:
'target' = Table is:
'tgt' = Word "next" F:next T:MatchOk
'tgt' = Word "previous" F:next T:MatchOk
'tgt' = Word "repeat" F:next T:MatchOk
'tgt' = Word "MatchOk" F:next T:MatchOk
'tgt' = Word "MatchOK" F:next T:MatchOk # for kindness' sake
'tgt' = Word "MatchFail" F:next T:MatchOk
t_label
# A value is either an identifier, or a string, or an integer
t_value is:
'val' = Table is:
t_identifier F:next T:MatchOk
t_string F:next T:MatchOk
t_integer
# An assignment is (optionally) used in Tuple and Table definitions...
t_assignment is:
'assignment' = Table is:
t_value
t_opt_whitespace
Is '='
# A common error when writing tuples is to miss off the "=" sign
# - the following is used in diagnosing that (see t_bad_tuple below)
# (it's useful to have something with identical structure to the
# "real thing")
t_bad_tagobj is:
'tagobj' = Table is:
t_string
t_bad_assignment is:
'assignment' = Table is:
t_value
# This is the line that starts the definition of a single tuple.
# For the moment, restrict what it gets assigned to to a simple
# identifier.
# Match, for example:
# Fred is:
t_tupleblock is:
'tupleblock' = Table is:
t_identifier
t_whitespace
Word "is:"
# This is the line that starts a new table or sub-table.
# For the moment, we only cope with full Tables.
# NOTE that this is used for the "outer" declaration of a tag table,
# and also for the "inner" declaration of an inner table or sub-table.
# The discrimination between these is done after initial parsing.
# Match, for example:
# 'keyword' = Table is: (inner)
# tagtable = Table is: (outer)
t_tableblock is:
'tableblock' = Table is:
t_assignment: # left hand side is optional
t_opt_whitespace
'type' = Word "Table" F:next T:<ok> # Either "Table"
'type' = Word "SubTable" # or "SubTable"
<ok>
t_whitespace # whitespace is required
Word "is:" # "is:" is required
# This is the line that starts an "if" block
# Match, for example:
# Is "Fred":
# controlsymbol:
t_ifblock is:
'ifblock' = Table is:
t_assignment: # left hand side is optional
t_opt_whitespace
t_operation:
t_whitespace
t_plus_arg
Is ":" F:MatchFail T:MatchOk
# Else:
t_identifier
Is ":"
# Note that we don't allow spaces WITHIN our false and true thingies
t_onfalse is:
'onfalse' = Table is:
t_whitespace
Word "F:"
t_target
t_ontrue is:
'ontrue' = Table is:
t_whitespace
Word "T:"
t_target
# Valid examples are things like:
# 'fred' = Is "xxx" F:<wow> T:MatchOk
# AllIn jim T:<foundJim>
#
# For the moment, we're not trying to recognise things in any detail
t_tuple is:
'tuple' = Table is:
t_assignment: # left hand side is optional
t_opt_whitespace
t_operation # operation is required
t_whitespace # for now, always require space here
t_plus_arg # argument is required
t_onfalse F:next T:next # F:target is optional
t_ontrue F:MatchOk T:MatchOk # T:target is also optional
# If the user has defined a "partial" tuple, they might use something
# of the form:
# match_fred F:MatchFalse T:MatchOk
t_tupleplus is:
'tupleplus' = Table is:
t_identifier
t_onfalse F:next T:next # F:target is optional
t_ontrue F:MatchOk T:MatchOk # T:target is also optional
# Treat Jump To specially - for example:
# Jump To <top>
# so that they don't have to do the less obvious "Jump To F:<label>"
# (although that will still be recognised, of course, for people who
# are used to the tag tuple format itself)
t_jumpto is:
'jumpto' = Table is:
Word "Jump"
t_whitespace
Word "To"
t_whitespace
t_target
# Is it worth coping with these?
t_bad_jumpto is:
'jumpto' = Table is:
Word "Jump": # cope with "Jump to"
t_whitespace
Word "to" T:<target>
Word "JumpTo" # and with "JumpTo"
<target>
t_target
# The "content" of a line is the bit after any indentation, and before
# any comment...
# For the moment, we won't try to maintain ANY context, so it is up
# to the user of the tuples produced to see if they make sense...
t_content is:
'content' = Table is:
t_label F:next T:MatchOk
t_tableblock F:next T:MatchOk # [<tagobj> =] [Sub]Table is:
t_tupleblock F:next T:MatchOk # <identifier> is:
t_ifblock F:next T:MatchOk # <cmd> <arg>: OR <identifier>:
t_jumpto F:next T:MatchOk # Jump To <target>
t_tuple F:next T:MatchOk
t_tupleplus F:next T:MatchOk # name [F:<label> [T:<label>]]
t_contentline is:
'contentline' = Table is:
t_content # something that we care about
t_opt_whitespace
t_comment F:next T:next # always allow a comment
IsIn newline # the end of the line
# Sometimes, the user (e.g., me) writes:
# 'fred' = Table:
# instead of:
# 'fred' = Table is:
# Unfortunately, without the "is", it would get too confusing whether
# we actually wanted an if block...
t_bad_tableblock is:
'tableblock' = Table is:
t_assignment: # left hand side is optional
t_opt_whitespace
Word "Table" # "Table" is required
Is ":" # "is" is needed before the ":"
# Sometimes, the use (e.g., me again) write:
# 'fred' IsIn jim
# instead of:
# 'fred' = IsIn jim
# Whilst I'm not entirely convinced that "=" is the best character
# to use here, I think we do need something!
t_bad_tuple is:
'tuple' = Table is:
t_bad_assignment # obviously we have to have this!
t_whitespace # in which case the whitespace IS needed
t_operation # operation is required
t_whitespace # for the moment, we must have space here
t_plus_arg # argument is required
t_onfalse F:next T:next # F:target is optional
t_ontrue F:MatchOk T:MatchOk # T:target is also optional
# Make some attempt to recognise common errors...
t_badcontent is:
'badcontent' = Table is:
t_bad_tableblock F:next T:MatchOk
t_bad_tuple
t_badline is:
'badline' = Table is:
t_badcontent # something that we sort of care about
t_opt_whitespace
t_comment F:next T:next # always allow a comment
IsIn newline # the end of the line
t_emptyline is:
'emptyline' = Table is:
t_opt_whitespace
IsIn newline # the end of the line
t_commentline is:
'commentline' = Table is:
t_comment
IsIn newline # the end of the line
t_passthruline is:
'passthruline' = Table is:
'passthru' = AllNotIn newline F:next # anything else on the line
IsIn newline # the end of the line
# Basically, a file is a series of lines
t_line is:
'line' = Table is:
t_emptyline F:next T:MatchOk # empty lines are simple enough
t_indent F:next T:next # optional indentation
t_commentline F:next T:MatchOk # always allow a comment
t_contentline F:next T:MatchOk # a line we care about
t_badline F:next T:MatchOk # a line we think is wrong
t_passthruline # a line we don't care about
# So read lines until we find the EOF
t_file = Table is:
t_line
EOF Here F:previous
# ----------------------------------------------------------------------
if __name__ == '__main__':
test_data = "#Test data\n"
def print_tuples(tuplist):
print "("
for item in tuplist:
print " ",item
print ")"
lines = string.split(test_data,"\n")
count = 0
print "Test data"
print "---------"
for line in lines:
count = count+1
print "%2d: %s"%(count,line)
print
print "Tagging text"
print "------------"
PYTAG = 0
if PYTAG:
import pytag
pytag.set_verbosity(1)
pytag.use_debugger()
result,taglist,next = pytag.pytag(test_data,t_file)
else:
timer = TextTools._timer()
timer.start()
result, taglist, next = tag(test_data,t_file)
print "Tagging took",timer.stop()[0],"seconds"
print "Result: ",result
print "Taglist:"
print_tuples(taglist)