Contrary to the perlpod manpage, Interscript pod commands are terminated at the end of a line, not the end of a 'paragraph'. It shouldn't make the slightest bit of difference, however, since weavers ignore blank lines anyhow.
When the tangler is in paragraph mode, blank lines are translated to end of paragraph commands. Paragraph mode is triggered by any non-command non-blank data lines, so you won't get an end of paragraph after a command before the first text.
Currently, support for the for/begin/end pod constructions is a hack. Interscript uses a different (better!) mechanism.
1: #line 22 "perl_tangler.ipk" 2: from interscript.tanglers.base import tangler_base 3: from interscript.tanglers.c import c_string_tangler 4: import re 5: import string 6: 7: class perl_tangler(tangler_base): 8: def __init__(self,sink,weaver, heading_level_offset = 2): 9: tangler_base.__init__(self,sink,weaver) 10: self.language = 'perl' 11: 12: self.mode = 'code' 13: self.list_type = [] 14: self.pod_re = re.compile('^=([A-Za-z][A-Za-z0-9_]*) *(.*)$') 15: self.heading_level_offset = heading_level_offset 16: self.esc_re = re.compile('^(.*?)(>|[IBSCLFXZE]<)(.*)$') 17: self.digits_re = re.compile('^([0-9]+)>(.*)$') 18: self.entity_re = re.compile('^([A-Za-z]+)>(.*)$') 19: # this is not a full list, we should in fact call a weaver routine. 20: self.html_entity = { 21: 'amp':'&', 22: 'lt':'<', 23: 'gt':'>', 24: 'quot':'"', 25: 'copy':'C', 26: 'trade':'T', 27: 'reg':'R'} 28: 29: def __del__(self): 30: self.flow_escape() 31: self.end_lists() 32: 33: def flow_escape(self): 34: line = self.flow_text 35: if not line: return 36: self.flow_text = '' 37: # process balanced text, 38: # if there is an unbalanced >, the text after it is returned 39: # write a >, and then try again. 40: tail = self.flow_parse(line) 41: while tail: 42: if 'tanglers' in self.process.trace: 43: print 'Unbalanced > in perl POD text' 44: self.weaver.write('>') 45: tail = self.flow_parse(tail) 46: 47: # recursive descent parser 48: def flow_parse(self,tail): 49: if not tail: return '' 50: weaver = self.weaver 51: 52: match = self.esc_re.match(tail) 53: while match: 54: pre, cmd, tail = match.group(1,2,3) 55: if pre: weaver.write(pre) 56: if cmd=='>': return tail 57: 58: assert len(cmd)==2 and cmd[1]=='<' 59: cmd = cmd[0] 60: if cmd == 'I': 61: weaver.begin_italic() 62: tail = self.flow_parse(tail) 63: weaver.end_italic() 64: elif cmd == 'B': 65: weaver.begin_bold() 66: tail = self.flow_parse(tail) 67: weaver.end_bold() 68: elif cmd == 'S': 69: # should be non-breaking spaces, but interscript 70: # doesn't implement that 71: tail = self.flow_parse(tail) 72: elif cmd == 'C': 73: weaver.begin_code() 74: tail = self.flow_parse(tail) 75: weaver.end_code() 76: elif cmd == 'L': 77: # a link: we just hack it for now 78: weaver.write('[') 79: tail = self.flow_parse(tail) 80: weaver.write(']') 81: elif cmd == 'F': 82: # filename 83: weaver.begin_code() 84: tail = self.flow_parse(tail) 85: weaver.end_code() 86: elif cmd == 'X': 87: # index entry?? (Does this mean print it, or index it?) 88: # I'll just print it as code :-) 89: weaver.begin_code() 90: tail = self.flow_parse(tail) 91: weaver.end_code() 92: elif cmd == 'Z': 93: # zero width character? What's that mean? 94: tail = self.flow_parse(tail) 95: elif cmd == 'E': 96: match = self.digits_re.match(tail) 97: if match: 98: digits, tail = match.group(1,2) 99: n = chr(int(digits)) 100: weaver.write(n) 101: else: 102: match = self.entity_re.match(tail) 103: if match: 104: entity, tail = match.group(1,2) 105: data = self.html_entity.get(entity,'E<'+entity+'>') 106: weaver.write(data) 107: else: 108: # nothing we recognize, print literally 109: weaver.write('E<') 110: tail = self.flow_parse(tail) 111: weaver.write('>') 112: 113: match = self.esc_re.match(tail) 114: 115: # no (more) matches, so just weave the tail 116: self.weaver.writeline(tail) 117: return '' 118: 119: 120: def end_list_item(self): 121: kind = self.list_type[-1] 122: weaver = self.weaver 123: if kind == 'keyed': weaver.end_keyed_list_item() 124: elif kind == 'bullet': weaver.end_bullet_list_item() 125: elif kind == 'numbered': weaver.end_numbered_list_item() 126: 127: def end_list(self): 128: kind = self.list_type[-1] 129: weaver = self.weaver 130: if kind == 'keyed': weaver.end_keyed_list() 131: elif kind == 'bullet': weaver.end_bullet_list() 132: elif kind == 'numbered': weaver.end_numbered_list() 133: del self.list_type[-1] 134: 135: def end_lists(self): 136: while self.list_type: self.end_list() 137: 138: def begin_list(self,kind): 139: # print '** list type:',kind 140: self.list_type.append(kind) 141: weaver = self.weaver 142: if kind == 'keyed': weaver.begin_keyed_list() 143: elif kind == 'bullet': weaver.begin_bullet_list() 144: elif kind == 'numbered': weaver.begin_numbered_list() 145: 146: def begin_list_item(self,key=None): 147: kind = self.list_type[-1] 148: weaver = self.weaver 149: if kind == 'keyed': weaver.begin_keyed_list_item(key) 150: elif kind == 'bullet': weaver.begin_bullet_list_item() 151: elif kind == 'numbered': weaver.begin_numbered_list_item() 152: 153: def writeline(self,data,file,count,inhibit_sref=0): 154: if not inhibit_sref and not self.inhibit_sref: 155: if (file != self.sink.last_source_file or 156: count != self.sink.last_source_count+1): 157: self.start_section(file,count) 158: self.sink.last_source_file = file 159: self.sink.last_source_count = count 160: tangler_base._writeline(self,data) 161: 162: # try to find a pod command 163: pod = self.pod_re.match(data) 164: 165: # if we're in code mode, and we didn't 166: # get a pod command, just echotangle as code 167: # otherwise, switch to pod mode 168: 169: if self.mode == 'code': 170: if pod: self.mode = 'pod' 171: else: 172: self.weaver.echotangle(self.sink.lines_written,data) 173: return 174: 175: # now we're in pod mode, if we didn't get a pod command, 176: # strip the line to see if it's blank. 177: # if not, weave it and switching pod end of para detection on 178: # otherwise, emit an end of paragraph if detection is on 179: # unless we're in litpar mode, in which case we have to 180: # emulate an 'end' cmd 181: # pod_par means: 0 - begin of para, 1 - flowing text, 2 - literal text 182: assert self.mode == 'pod' 183: if not pod: 184: line = string.rstrip(data) 185: if line: 186: if not self.pod_par: 187: self.pod_par = (line[0] in ' \t')+1 188: if self.pod_par == 1: self.flow_text = '' 189: if self.pod_par-1: 190: self.weaver.writecode(line) 191: else: 192: # we have to search for escapes here! 193: self.flow_text = self.flow_text + line + ' ' 194: elif self.pod_par: 195: self.flow_escape() 196: self.weaver.par() 197: self.pod_par = 0 # beginning of paragraph 198: return 199: 200: # we've got a pod command, so turn para detection off 201: assert pod 202: self.pod_par = 0 203: cmd = pod.group(1) 204: 205: # if we're cuttiung back to code, terminate lists and list 206: # items correctly if nececcary and switch back to code mode 207: 208: if cmd == 'cut': 209: self.end_lists() 210: if hasattr(self,'pod_mode'): 211: if self.pod_mode in ['lit','litpar']: 212: self.weaver.enable() # disable rawmode 213: self.weaver.translate() # disable rawmode 214: del self.pod_mode 215: self.mode = 'code' 216: return 217: 218: # Otherwise, just process the command 219: 220: if cmd == 'head1': 221: self.end_lists() 222: self.weaver.head(1+self.heading_level_offset, pod.group(2)) 223: 224: elif cmd == 'head2': 225: self.end_lists() 226: self.weaver.head(2+self.heading_level_offset, pod.group(2)) 227: 228: elif cmd == 'over': 229: # list of unknown type pending, wait for =item 230: self.pod_mode = 'list' 231: 232: elif cmd == 'back': 233: self.end_list_item() 234: self.end_list() 235: 236: elif cmd == 'item': 237: if not hasattr(self,'pod_mode'): 238: if 'tanglers' in self.process.trace: 239: print 'POD: item before over' 240: self.pod_mode = 'list' 241: key = pod.group(2) 242: key = string.strip(key) 243: if self.pod_mode == 'item': 244: self.end_list_item() 245: else: 246: self.pod_mode = 'item' 247: list_type = 'keyed' 248: if len(key)==1: 249: if key in '*+.-': 250: list_type = 'bullet' 251: self.begin_list(list_type) 252: if self.list_type[-1] == 'keyed': 253: # interscript doesn't support formatting of any kind 254: # in keyed list keys (because LaTeX doesn't) 255: # we need another kind of list (LaTeX can be given one) 256: # For now, we remove any X<...> stuff 257: stripkey = '' 258: tail = key 259: match = self.esc_re.match(tail) 260: while match: 261: pre, cmd, tail = match.group(1,2,3) 262: stripkey = stripkey + pre 263: match = self.esc_re.match(tail) 264: if tail: stripkey = stripkey + tail 265: key = stripkey 266: 267: self.begin_list_item(key) 268: 269: elif cmd == 'for': 270: self.weaver.raw_if(pod.group(2)) 271: self.pod_mode = 'litpar' 272: elif cmd == 'begin': 273: self.weaver.raw_if(pod.group(2)) 274: self.pod_mode = 'lit' 275: elif cmd == 'end': 276: self.weaver.enable() 277: self.weaver.translate() 278: self.weaver.pod_mode = '' 279: 280: def write_comment(self,line): 281: self._writeline('# '+line) 282: 283: def start_section(self, file, count): 284: data = '#line '+str(count)+' '+'"'+file+'"' 285: self._writeline(data) 286: self.weaver.echotangle(self.sink.lines_written,data) 287: 288: def get_comment_tangler(self): 289: return hash_comment_tangler(self.sink,weaver, '# ') 290: 291: def get_string_tangler(self,eol,width): 292: # This is _wrong_ and needs to be fixed! 293: return c_string_tangler(self.sink,self.get_weaver(),eol,width) 294: