6.16.13. Perl Tangler

Contrary to the perlpod manpage, Interscript pod commands are terminated at the end of a line, not the end of a 'paragraph'. It shouldn't make the slightest bit of difference, however, since weavers ignore blank lines anyhow.
When the tangler is in paragraph mode, blank lines are translated to end of paragraph commands. Paragraph mode is triggered by any non-command non-blank data lines, so you won't get an end of paragraph after a command before the first text.
Currently, support for the for/begin/end pod constructions is a hack. Interscript uses a different (better!) mechanism.
     1: #line 22 "perl_tangler.ipk"
     2: from interscript.tanglers.base import tangler_base
     3: from interscript.tanglers.c import c_string_tangler
     4: import re
     5: import string
     6: 
     7: class perl_tangler(tangler_base):
     8:   def __init__(self,sink,weaver, heading_level_offset = 2):
     9:     tangler_base.__init__(self,sink,weaver)
    10:     self.language = 'perl'
    11: 
    12:     self.mode = 'code'
    13:     self.list_type = []
    14:     self.pod_re = re.compile('^=([A-Za-z][A-Za-z0-9_]*) *(.*)$')
    15:     self.heading_level_offset = heading_level_offset
    16:     self.esc_re = re.compile('^(.*?)(>|[IBSCLFXZE]<)(.*)$')
    17:     self.digits_re = re.compile('^([0-9]+)>(.*)$')
    18:     self.entity_re = re.compile('^([A-Za-z]+)>(.*)$')
    19:     # this is not a full list, we should in fact call a weaver routine.
    20:     self.html_entity = {
    21:       'amp':'&',
    22:       'lt':'<',
    23:       'gt':'>',
    24:       'quot':'"',
    25:       'copy':'C',
    26:       'trade':'T',
    27:       'reg':'R'}
    28: 
    29:   def __del__(self):
    30:     self.flow_escape()
    31:     self.end_lists()
    32: 
    33:   def flow_escape(self):
    34:     line = self.flow_text
    35:     if not line: return
    36:     self.flow_text = ''
    37:     # process balanced text,
    38:     # if there is an unbalanced >, the text after it is returned
    39:     # write a >, and then try again.
    40:     tail = self.flow_parse(line)
    41:     while tail:
    42:       if 'tanglers' in self.process.trace:
    43:         print 'Unbalanced > in perl POD text'
    44:       self.weaver.write('>')
    45:       tail = self.flow_parse(tail)
    46: 
    47:   # recursive descent parser
    48:   def flow_parse(self,tail):
    49:     if not tail: return ''
    50:     weaver = self.weaver
    51: 
    52:     match = self.esc_re.match(tail)
    53:     while match:
    54:       pre, cmd, tail = match.group(1,2,3)
    55:       if pre: weaver.write(pre)
    56:       if cmd=='>': return tail
    57: 
    58:       assert len(cmd)==2 and cmd[1]=='<'
    59:       cmd = cmd[0]
    60:       if cmd == 'I':
    61:         weaver.begin_italic()
    62:         tail = self.flow_parse(tail)
    63:         weaver.end_italic()
    64:       elif cmd == 'B':
    65:         weaver.begin_bold()
    66:         tail = self.flow_parse(tail)
    67:         weaver.end_bold()
    68:       elif cmd == 'S':
    69:         # should be non-breaking spaces, but interscript
    70:         # doesn't implement that
    71:         tail = self.flow_parse(tail)
    72:       elif cmd == 'C':
    73:         weaver.begin_code()
    74:         tail = self.flow_parse(tail)
    75:         weaver.end_code()
    76:       elif cmd == 'L':
    77:         # a link: we just hack it for now
    78:         weaver.write('[')
    79:         tail = self.flow_parse(tail)
    80:         weaver.write(']')
    81:       elif cmd == 'F':
    82:         # filename
    83:         weaver.begin_code()
    84:         tail = self.flow_parse(tail)
    85:         weaver.end_code()
    86:       elif cmd == 'X':
    87:         # index entry??  (Does this mean print it, or index it?)
    88:         # I'll just print it as code :-)
    89:         weaver.begin_code()
    90:         tail = self.flow_parse(tail)
    91:         weaver.end_code()
    92:       elif cmd == 'Z':
    93:         # zero width character? What's that mean?
    94:         tail = self.flow_parse(tail)
    95:       elif cmd == 'E':
    96:         match = self.digits_re.match(tail)
    97:         if match:
    98:           digits, tail = match.group(1,2)
    99:           n = chr(int(digits))
   100:           weaver.write(n)
   101:         else:
   102:           match = self.entity_re.match(tail)
   103:           if match:
   104:             entity, tail = match.group(1,2)
   105:             data = self.html_entity.get(entity,'E<'+entity+'>')
   106:             weaver.write(data)
   107:           else:
   108:             # nothing we recognize, print literally
   109:             weaver.write('E<')
   110:             tail = self.flow_parse(tail)
   111:             weaver.write('>')
   112: 
   113:       match = self.esc_re.match(tail)
   114: 
   115:     # no (more) matches, so just weave the tail
   116:     self.weaver.writeline(tail)
   117:     return ''
   118: 
   119: 
   120:   def end_list_item(self):
   121:     kind = self.list_type[-1]
   122:     weaver = self.weaver
   123:     if kind == 'keyed': weaver.end_keyed_list_item()
   124:     elif kind == 'bullet': weaver.end_bullet_list_item()
   125:     elif kind == 'numbered': weaver.end_numbered_list_item()
   126: 
   127:   def end_list(self):
   128:     kind = self.list_type[-1]
   129:     weaver = self.weaver
   130:     if kind == 'keyed': weaver.end_keyed_list()
   131:     elif kind == 'bullet': weaver.end_bullet_list()
   132:     elif kind == 'numbered': weaver.end_numbered_list()
   133:     del self.list_type[-1]
   134: 
   135:   def end_lists(self):
   136:     while self.list_type: self.end_list()
   137: 
   138:   def begin_list(self,kind):
   139:     # print '** list type:',kind
   140:     self.list_type.append(kind)
   141:     weaver = self.weaver
   142:     if kind == 'keyed': weaver.begin_keyed_list()
   143:     elif kind == 'bullet': weaver.begin_bullet_list()
   144:     elif kind == 'numbered': weaver.begin_numbered_list()
   145: 
   146:   def begin_list_item(self,key=None):
   147:     kind = self.list_type[-1]
   148:     weaver = self.weaver
   149:     if kind == 'keyed': weaver.begin_keyed_list_item(key)
   150:     elif kind == 'bullet': weaver.begin_bullet_list_item()
   151:     elif kind == 'numbered': weaver.begin_numbered_list_item()
   152: 
   153:   def writeline(self,data,file,count,inhibit_sref=0):
   154:     if not inhibit_sref and not self.inhibit_sref:
   155:       if (file != self.sink.last_source_file or
   156:         count != self.sink.last_source_count+1):
   157:         self.start_section(file,count)
   158:     self.sink.last_source_file = file
   159:     self.sink.last_source_count = count
   160:     tangler_base._writeline(self,data)
   161: 
   162:     # try to find a pod command
   163:     pod = self.pod_re.match(data)
   164: 
   165:     # if we're in code mode, and we didn't
   166:     # get a pod command, just echotangle as code
   167:     # otherwise, switch to pod mode
   168: 
   169:     if self.mode == 'code':
   170:       if pod: self.mode = 'pod'
   171:       else:
   172:         self.weaver.echotangle(self.sink.lines_written,data)
   173:         return
   174: 
   175:     # now we're in pod mode, if we didn't get a pod command,
   176:     # strip the line to see if it's blank.
   177:     # if not, weave it and switching pod end of para detection on
   178:     # otherwise, emit an end of paragraph if detection is on
   179:     # unless we're in litpar mode, in which case we have to
   180:     # emulate an 'end' cmd
   181:     # pod_par means: 0 - begin of para, 1 - flowing text, 2 - literal text
   182:     assert self.mode == 'pod'
   183:     if not pod:
   184:       line = string.rstrip(data)
   185:       if line:
   186:         if not self.pod_par:
   187:           self.pod_par = (line[0] in ' \t')+1
   188:           if self.pod_par == 1: self.flow_text = ''
   189:         if self.pod_par-1:
   190:           self.weaver.writecode(line)
   191:         else:
   192:           # we have to search for escapes here!
   193:           self.flow_text = self.flow_text + line + ' '
   194:       elif self.pod_par:
   195:         self.flow_escape()
   196:         self.weaver.par()
   197:         self.pod_par = 0 # beginning of paragraph
   198:       return
   199: 
   200:     # we've got a pod command, so turn para detection off
   201:     assert pod
   202:     self.pod_par = 0
   203:     cmd = pod.group(1)
   204: 
   205:     # if we're cuttiung back to code, terminate lists and list
   206:     # items correctly if nececcary and switch back to code mode
   207: 
   208:     if cmd == 'cut':
   209:       self.end_lists()
   210:       if hasattr(self,'pod_mode'):
   211:         if self.pod_mode in ['lit','litpar']:
   212:           self.weaver.enable() # disable rawmode
   213:           self.weaver.translate() # disable rawmode
   214:         del self.pod_mode
   215:       self.mode = 'code'
   216:       return
   217: 
   218:     # Otherwise, just process the command
   219: 
   220:     if cmd == 'head1':
   221:       self.end_lists()
   222:       self.weaver.head(1+self.heading_level_offset, pod.group(2))
   223: 
   224:     elif cmd == 'head2':
   225:       self.end_lists()
   226:       self.weaver.head(2+self.heading_level_offset, pod.group(2))
   227: 
   228:     elif cmd == 'over':
   229:       # list of unknown type pending, wait for =item
   230:       self.pod_mode = 'list'
   231: 
   232:     elif cmd == 'back':
   233:       self.end_list_item()
   234:       self.end_list()
   235: 
   236:     elif cmd == 'item':
   237:       if not hasattr(self,'pod_mode'):
   238:         if 'tanglers' in self.process.trace:
   239:           print 'POD: item before over'
   240:         self.pod_mode = 'list'
   241:       key = pod.group(2)
   242:       key = string.strip(key)
   243:       if self.pod_mode == 'item':
   244:         self.end_list_item()
   245:       else:
   246:         self.pod_mode = 'item'
   247:         list_type = 'keyed'
   248:         if len(key)==1:
   249:           if key in '*+.-':
   250:             list_type = 'bullet'
   251:         self.begin_list(list_type)
   252:       if self.list_type[-1] == 'keyed':
   253:         # interscript doesn't support formatting of any kind
   254:         # in keyed list keys (because LaTeX doesn't)
   255:         # we need another kind of list (LaTeX can be given one)
   256:         # For now, we remove any X<...> stuff
   257:         stripkey = ''
   258:         tail = key
   259:         match = self.esc_re.match(tail)
   260:         while match:
   261:           pre, cmd, tail = match.group(1,2,3)
   262:           stripkey = stripkey + pre
   263:           match = self.esc_re.match(tail)
   264:         if tail: stripkey = stripkey + tail
   265:         key = stripkey
   266: 
   267:       self.begin_list_item(key)
   268: 
   269:     elif cmd == 'for':
   270:       self.weaver.raw_if(pod.group(2))
   271:       self.pod_mode = 'litpar'
   272:     elif cmd == 'begin':
   273:       self.weaver.raw_if(pod.group(2))
   274:       self.pod_mode = 'lit'
   275:     elif cmd == 'end':
   276:       self.weaver.enable()
   277:       self.weaver.translate()
   278:       self.weaver.pod_mode = ''
   279: 
   280:   def write_comment(self,line):
   281:     self._writeline('# '+line)
   282: 
   283:   def start_section(self, file, count):
   284:     data = '#line '+str(count)+' '+'"'+file+'"'
   285:     self._writeline(data)
   286:     self.weaver.echotangle(self.sink.lines_written,data)
   287: 
   288:   def get_comment_tangler(self):
   289:     return hash_comment_tangler(self.sink,weaver, '# ')
   290: 
   291:   def get_string_tangler(self,eol,width):
   292:     # This is _wrong_ and needs to be fixed!
   293:     return c_string_tangler(self.sink,self.get_weaver(),eol,width)
   294: