1: #line 104 "ms_codepages.ipk" 2: import string 3: import re 4: from array import array 5: 6: local_file_head = 'interscript/ftp/ftp.unicode.org/public/unidata/Mappings/Vendors/Micsft/' 7: files =[ 8: ['Ebcdic_Cp037','Ebcdic'], 9: ['Ebcdic_Cp1026','Ebcdic'], 10: ['Ebcdic_Cp500','Ebcdic 500V3'], 11: ['Ebcdic_Cp875','Ebcdic'], 12: ['Mac_Cyrillic','10007'], 13: ['Mac_Greek','10006'], 14: ['Mac_Iceland','10079'], 15: ['Mac_Latin2','10029'], 16: ['Mac_Roman','10000'], 17: ['Mac_Turkish','10081'], 18: ['PC_Cp437','Latin US'], 19: ['PC_Cp737','Greek'], 20: ['PC_Cp775','Baltic Rim'], 21: ['PC_Cp850','Latin 1'], 22: ['PC_Cp852','Latin 2'], 23: ['PC_Cp855','Cyrillic'], 24: ['PC_Cp857','Turkish'], 25: ['PC_Cp860','Portuguese'], 26: ['PC_Cp861','Icelandic'], 27: ['PC_Cp862','Hebrew'], 28: ['PC_Cp863','French Canada'], 29: ['PC_Cp864','Arabic'], 30: ['PC_Cp865','Nordic'], 31: ['PC_Cp866','Cyrillic CIS 1'], 32: ['PC_Cp869','Greek 2'], 33: ['PC_Cp874','Thai'], 34: ['Windows_Cp1250','Central Europe'], 35: ['Windows_Cp1251','Cyrillic (Slavic)'], 36: ['Windows_Cp1252','Latin 1 (ANSI)'], 37: ['Windows_Cp1253','Greek'], 38: ['Windows_Cp1254','Latin 5 (Turkish)'], 39: ['Windows_Cp1255','Hebrew'], 40: ['Windows_Cp1256','Arabic'], 41: ['Windows_Cp1257','Baltic Rim'], 42: ['Windows_Cp1258','?'], 43: ['Windows_Cp874','Thai'] 44: ] 45: 46: tou = {} 47: for file, descr in files: 48: tou[file] = array('H') 49: filename = 'interscript/encoding/'+file+'.dat' 50: f = open(filename,'rb') 51: tou[file].fromfile(f,256) 52: f.close() 53: 54: def microsoft_to_unicode(x,ch): 55: return tou[x][ch] 56: 57: def microsoft_to_utf8(x,s): 58: u = '' 59: d = tou[x] 60: for ch in s: 61: u = a + utf8(d[ch]) 62: return u 63: