183: #line 251 "python_tokeniser.ipk"
184: def writeline(self,line):
185: lnum = self.lnum = self.lnum + 1
186: pos, max = 0, len(line)
187: tokeneater = self.tokeneater
189: if self.contstr:
190: if not line:
191: raise TokenError, ("EOF in multi-line string", self.strstart)
192: endmatch = self.endprog.match(line)
193: if endmatch:
194: pos = end = endmatch.end(0)
195: if self.split_multiline_strings:
196: tokeneater(MULTILINE_STRING_LAST,
197: line[:end], (lnum,0),(lnum,end), line)
198: else:
199: tokeneater(STRING, self.contstr + line[:end],
200: self.strstart, (lnum, end), self.contline + line)
201: self.contstr, self.needcont = '', 0
202: self.contline = None
203: elif self.needcont and line[-2:] != '\\\n' and line[-3:] != '\\\r\n':
204: tokeneater(ERRORTOKEN, self.contstr + line,
205: self.strstart, (lnum, len(line)), self.contline)
206: self.contstr = ''
207: self.contline = None
208: return
209: else:
210: self.contstr = self.contstr + line
211: self.contline = self.contline + line
212: if self.split_multiline_strings:
214: line, (lnum, 0), (lnum, len(line)), line)
215: return
217: elif self.parenlev == 0 and not self.continued:
218: if not line: self._close(); return
220: column = 0
221: while pos < max:
222: if line[pos] == ' ': column = column + 1
223: elif line[pos] == '\t': column = (column/tabsize + 1)*tabsize
224: elif line[pos] == '\f': column = 0
225: else: break
226: pos = pos + 1
227: if pos == max: self._close(); return
229: if line[pos] in '#\r\n':
230: if self.report_comments:
231: tokeneater((NL, COMMENT)[line[pos] == '#'], line[pos:],
232: (lnum, pos), (lnum, len(line)), line)
233: return
235: if column > self.indents[-1]:
236: self.indents.append(column)
237: tokeneater(INDENT, line[:pos], (lnum, 0), (lnum, pos), line)
238: while column < self.indents[-1]:
239: self.indents = self.indents[:-1]
240: tokeneater(DEDENT, '', (lnum, pos), (lnum, pos), line)
242: else:
243: if not line:
244: raise TokenError, ("EOF in multi-line statement", (lnum, 0))
245: self.continued = 0
247: while pos < max:
248: pseudomatch = pseudoprog.match(line, pos)
249: if pseudomatch:
250: start, end = pseudomatch.span(1)
251: spos, epos, pos = (lnum, start), (lnum, end), end
252: token, initial = line[start:end], line[start]
254: if initial in numchars \
255: or (initial == '.' and token != '.'):
256: tokeneater(NUMBER, token, spos, epos, line)
257: elif initial in '\r\n':
258: if self.parenlev == 0:
259: tokeneater(NEWLINE, token, spos, epos, line)
260: elif self.report_comments:
261: tokeneater(NL, token, spos, epos, line)
263: elif initial == '#':
264: if self.report_comments:
265: tokeneater(COMMENT, token, spos, epos, line)
266: elif token in ("'''", '"""',
267: "r'''", 'r"""', "R'''", 'R"""'):
268: self.endprog = endprogs[token]
269: endmatch = self.endprog.match(line, pos)
270: if endmatch:
271: pos = endmatch.end(0)
272: token = line[start:pos]
273: tokeneater(STRING, token, spos, (lnum, pos), line)
274: else:
275: if self.split_multiline_strings:
276: token = line[start:]
278: token, spos, (lnum, len(line)), line)
279: self.strstart = (lnum, start)
280: self.contstr = line[start:]
281: self.contline = line
282: break
283: elif initial in ("'", '"') or \
284: token[:2] in ("r'", 'r"', "R'", 'R"'):
285: if token[-1] == '\n':
286: self.strstart = (lnum, start)
287: self.endprog = endprogs[initial] or endprogs[token[1]]
288: self.contstr, self.needcont = line[start:], 1
289: self.contline = line
290: if self.split_multiline_strings:
292: line[start:], (lnum, start), (lnum, len(line)), line)
293: break
294: else:
295: tokeneater(STRING, token, spos, epos, line)
296: elif initial in namechars:
297: tokeneater(NAME, token, spos, epos, line)
298: elif initial == '\\':
299: self.continued = 1
300: else:
301: if initial in '([{': self.parenlev = self.parenlev + 1
302: elif initial in ')]}': self.parenlev = self.parenlev - 1
303: if self.squashop:
304: tokeneater(OP, token, spos, epos, line)
305: else:
306: op = opdict[token]
307: tokeneater(op, token, spos, epos, line)
308: else:
309: tokeneater(ERRORTOKEN, line[pos],
310: (lnum, pos), (lnum, pos+1), line)
311: pos = pos + 1
314: def _close(self):
315: for indent in self.indents[1:]:
316: self.tokeneater(DEDENT, '', (self.lnum, 0), (self.lnum, 0), '')
317: self.tokeneater(ENDMARKER, '', (self.lnum, 0), (self.lnum, 0), '')
319: if __name__ == '__main__':
320: import sys
321: if len(sys.argv) > 1: tokenise(open(sys.argv[1]).readline)
322: else: tokenise(sys.stdin.readline)