1: #line 302 "utf8.ipk"
2: import string
3:
4: def utf16(i):
5: if i<0x10000:
6: return chr(i>>8) + chr(i &0xff)
7: else
8: w1 = 0xD800 + ((i - 0x10000) >> 10)
9: w2 = 0xDC00 + ((i - 0x10000) & 0x3FF)
10: return\
11: chr(w1>>8) + chr(w1 &0xff) +\
12: chr(w2>>8) + chr(w2 &0xff)
13:
14: def seq_to_utf16(a):
15: s = ''
16: for ch in a: s = s + utf16(ch)
17: return s
18:
19:
20: def parse_utf16(s, i):
21: if s[0] < 0xDC or s[0] > 0xE0:
22: return s[0] << 8 + s[1],2
23: else:
24: w1 = s[0] << 8 + s[1]
25: w2 = s[2] << 8 + s[3]
26: return 0x10000 + ((w1 - 0xD800) << 10) + w2 -0xDC00, 4
27:
28: def utf16_to_array(s):
29: n = len(s)
30: i = 0
31: m = 0
32: while i < n:
33: parse_utf8(s,i)
34: m = m + 1
35: a = array('H',(0,)*m)
36: i = 0
37: while i < n:
38: a[i/4],i = parse_utf16(s,i)
39:
40: def utf16_to_utf8(s):
41: return seq_to_utf8(utf16_to_array(s))
42:
43: