6.17.1.2.1. Main Routine

This is the main function to supply a line of data to the tokeniser. It was adapted from the original function by control inversion: when it runs out of data, instead of fetching more, it just returns.
Start python section to interscript/tokenisers/python.py[4 /4 ] Prev First
   183: #line 251 "python_tokeniser.ipk"
   184:   def writeline(self,line):
   185:     lnum = self.lnum = self.lnum + 1
   186:     pos, max = 0, len(line)
   187:     tokeneater = self.tokeneater
   188: 
   189:     if self.contstr:                                   # continued string
   190:         if not line:
   191:             raise TokenError, ("EOF in multi-line string", self.strstart)
   192:         endmatch = self.endprog.match(line)
   193:         if endmatch:
   194:             pos = end = endmatch.end(0)
   195:             if self.split_multiline_strings:
   196:               tokeneater(MULTILINE_STRING_LAST,
   197:                 line[:end], (lnum,0),(lnum,end), line)
   198:             else:
   199:               tokeneater(STRING, self.contstr + line[:end],
   200:                 self.strstart, (lnum, end), self.contline + line)
   201:             self.contstr, self.needcont = '', 0
   202:             self.contline = None
   203:         elif self.needcont and line[-2:] != '\\\n' and line[-3:] != '\\\r\n':
   204:             tokeneater(ERRORTOKEN, self.contstr + line,
   205:                        self.strstart, (lnum, len(line)), self.contline)
   206:             self.contstr = ''
   207:             self.contline = None
   208:             return
   209:         else:
   210:             self.contstr = self.contstr + line
   211:             self.contline = self.contline + line
   212:             if self.split_multiline_strings:
   213:               tokeneater(MULTILINE_STRING_MIDDLE,
   214:                 line, (lnum, 0), (lnum, len(line)), line)
   215:             return
   216: 
   217:     elif self.parenlev == 0 and not self.continued:    # new statement
   218:         if not line: self._close(); return
   219: 
   220:         column = 0
   221:         while pos < max:                               # measure leading whitespace
   222:             if line[pos] == ' ': column = column + 1
   223:             elif line[pos] == '\t': column = (column/tabsize + 1)*tabsize
   224:             elif line[pos] == '\f': column = 0
   225:             else: break
   226:             pos = pos + 1
   227:         if pos == max: self._close(); return           # omitted newline
   228: 
   229:         if line[pos] in '#\r\n':                       # skip comments or blank lines
   230:             if self.report_comments:
   231:               tokeneater((NL, COMMENT)[line[pos] == '#'], line[pos:],
   232:                        (lnum, pos), (lnum, len(line)), line)
   233:             return
   234: 
   235:         if column > self.indents[-1]:                  # count indents or dedents
   236:             self.indents.append(column)
   237:             tokeneater(INDENT, line[:pos], (lnum, 0), (lnum, pos), line)
   238:         while column < self.indents[-1]:
   239:             self.indents = self.indents[:-1]
   240:             tokeneater(DEDENT, '', (lnum, pos), (lnum, pos), line)
   241: 
   242:     else:                                              # continued statement
   243:         if not line:
   244:             raise TokenError, ("EOF in multi-line statement", (lnum, 0))
   245:         self.continued = 0
   246: 
   247:     while pos < max:
   248:         pseudomatch = pseudoprog.match(line, pos)
   249:         if pseudomatch:                                # scan for tokens
   250:             start, end = pseudomatch.span(1)
   251:             spos, epos, pos = (lnum, start), (lnum, end), end
   252:             token, initial = line[start:end], line[start]
   253: 
   254:             if initial in numchars \
   255:                 or (initial == '.' and token != '.'):  # ordinary number
   256:                 tokeneater(NUMBER, token, spos, epos, line)
   257:             elif initial in '\r\n':
   258:                 if self.parenlev == 0:
   259:                   tokeneater(NEWLINE, token, spos, epos, line)
   260:                 elif self.report_comments:
   261:                   tokeneater(NL, token, spos, epos, line)
   262: 
   263:             elif initial == '#':
   264:                 if self.report_comments:
   265:                   tokeneater(COMMENT, token, spos, epos, line)
   266:             elif token in ("'''", '"""',               # triple-quoted
   267:                            "r'''", 'r"""', "R'''", 'R"""'):
   268:                 self.endprog = endprogs[token]
   269:                 endmatch = self.endprog.match(line, pos)
   270:                 if endmatch:                           # all on one line
   271:                     pos = endmatch.end(0)
   272:                     token = line[start:pos]
   273:                     tokeneater(STRING, token, spos, (lnum, pos), line)
   274:                 else:
   275:                     if self.split_multiline_strings:
   276:                       token = line[start:]
   277:                       tokeneater(MULTILINE_STRING_FIRST,
   278:                         token, spos, (lnum, len(line)), line)
   279:                     self.strstart = (lnum, start)    # multiple lines
   280:                     self.contstr = line[start:]
   281:                     self.contline = line
   282:                     break
   283:             elif initial in ("'", '"') or \
   284:                 token[:2] in ("r'", 'r"', "R'", 'R"'):
   285:                 if token[-1] == '\n':                  # continued string
   286:                     self.strstart = (lnum, start)
   287:                     self.endprog = endprogs[initial] or endprogs[token[1]]
   288:                     self.contstr, self.needcont = line[start:], 1
   289:                     self.contline = line
   290:                     if self.split_multiline_strings:
   291:                       tokeneater(MULTILINE_STRING_FIRST,
   292:                         line[start:], (lnum, start), (lnum, len(line)), line)
   293:                     break
   294:                 else:                                  # ordinary string
   295:                     tokeneater(STRING, token, spos, epos, line)
   296:             elif initial in namechars:                 # ordinary name
   297:                 tokeneater(NAME, token, spos, epos, line)
   298:             elif initial == '\\':                      # continued stmt
   299:                 self.continued = 1
   300:             else:
   301:                 if initial in '([{': self.parenlev = self.parenlev + 1
   302:                 elif initial in ')]}': self.parenlev = self.parenlev - 1
   303:                 if self.squashop:
   304:                   tokeneater(OP, token, spos, epos, line)
   305:                 else:
   306:                   op = opdict[token]
   307:                   tokeneater(op, token, spos, epos, line)
   308:         else:
   309:             tokeneater(ERRORTOKEN, line[pos],
   310:                        (lnum, pos), (lnum, pos+1), line)
   311:             pos = pos + 1
   312: 
   313: 
   314:   def _close(self):
   315:       for indent in self.indents[1:]:          # pop remaining indent levels
   316:           self.tokeneater(DEDENT, '', (self.lnum, 0), (self.lnum, 0), '')
   317:       self.tokeneater(ENDMARKER, '', (self.lnum, 0), (self.lnum, 0), '')
   318: 
   319: if __name__ == '__main__':                     # testing
   320:     import sys
   321:     if len(sys.argv) > 1: tokenise(open(sys.argv[1]).readline)
   322:     else: tokenise(sys.stdin.readline)
   323: 
End python section to interscript/tokenisers/python.py[4]