First, the module initialisation package contains a special routine to autodetect the ISO-10646 encodings.
1: #line 63 "encoding.ipk" 2: # encoding subpackage 3: 4: iso10646_signatures = ( 5: ('\xFE\xFF' , 'utf16'), # unicode, also ucs2 6: ('\xFF\xFE' , 'utf16le'), # little endian unicode, also ucs2le 7: ('\x00\x00\xFE\xFE' , 'ucs4'), # full four byte encoding 8: ('\xFE\xFE\x00\x00' , 'ucs4le'), # little endian four byte encoding 9: ('\xEF\xBB\xBF' , 'utf8') # utf8 10: ) 11: 12: def autodetect(filename): 13: f = open(filename,'rb') 14: data = f.read(4) 15: f.close() 16: data = data +'XXXX' 17: for signature,name in signatures: 18: if signature == data[:len(signature)]: 19: return name 20: return 'utf8' 21: