1: #line 76 "big5.ipk" 2: from array import array 3: 4: lead_first = 0xA1 5: lead_last = 0xF9 6: trail_first1 = 0x41 7: trail_last1 = 0x7E 8: group1_width = trail_last1 - trail_first1 + 1 9: trail_first2 = 0xA1 10: trail_last2 = 0xFE 11: width = trail_last1 - trail_first1 + 1 + trail_last2 - trail_first2 + 1 12: big5size = (lead_last - lead_first + 1) * width 13: 14: 15: 16: tou = array('H') 17: filename = 'interscript/encoding/big5.dat' 18: f = open(filename,'rb') 19: tou.fromfile(f,big5size) 20: f.close() 21: 22: def big5_to_unicode(ch): 23: hi = ch >> 8 24: lo = ch & 0xFF 25: if lead_first <= hi <= lead_last: 26: if trail_first1 <= lo <= trail_last1: 27: return tou[(hi-lead_first)*width+lo-trail_first1] 28: elif trail_first2 <= lo <= trail_last2: 29: return tou[(hi-lead_first)*width+lo-trail_first2+group1_width] 30: else: 31: return 0xFFFF 32: else: 33: return 0xFFFF 34: 35: def big5_to_utf8(s): 36: u = '' 37: i = 0 38: n = len(s) 39: while 1: 40: ch = s[i] 41: i = i + 1 42: ch = ch << 8 | s[i] 43: u = u + utf8(big5_to_unicode(ch)) 44: i = i + 1 45: if i==n: break 46: return u 47: