1: #line 349 "utf8.ipk"
2: import string
3:
4: def utf16le(i):
5: if i<0x10000:
6: return chr(i &0xff) + chr(i>>8)
7: else
8: w1 = 0xD800 + ((i - 0x10000) >> 10)
9: w2 = 0xDC00 + ((i - 0x10000) & 0x3FF)
10: return\
11: chr(w1 &0xff)+ chr(w1>>8) +\
12: chr(w2 &0xff)+ chr(w2>>8)
13:
14: def seq_to_utf16le(a):
15: s = ''
16: for ch in a: s = s + utf16le(ch)
17: return s
18:
19:
20: def parse_utf16le(s, i):
21: if s[1] < 0xDC or s[1] > 0xE0:
22: return s[1] << 8 + s[0],2
23: else:
24: w1 = s[1] << 8 + s[0]
25: w2 = s[3] << 8 + s[2]
26: return 0x10000 + ((w1 - 0xD800) << 10) + w2 -0xDC00, 4
27:
28: def utf16le_to_array(s):
29: n = len(s)
30: i = 0
31: m = 0
32: while i < n:
33: parse_utf16le(s,i)
34: m = m + 1
35: a = array('H',(0,)*m)
36: i = 0
37: while i < n:
38: a[i/4],i = parse_utf16le(s,i)
39:
40: def utf16le_to_utf8(s):
41: return seq_to_utf8(utf16le_to_array(s))
42:
43:
44: