diff -r 22a6b6947743 -r 44a3eda179b6 iro/anbieter/gsm0338.py --- a/iro/anbieter/gsm0338.py Sun Nov 22 15:26:48 2009 +0100 +++ b/iro/anbieter/gsm0338.py Sun Nov 22 19:49:46 2009 +0100 @@ -1,216 +1,171 @@ #!/usr/bin/python -# -*- coding: iso-8859-7 -*- +# -*- coding: utf-8 -*- import codecs +import _multibytecodec as mbc ENCODING_NAME = "gsm0338" -decoding_map = { - 0x00:0x0040, # COMMERCIAL AT - 0x01:0x00A3, # POUND SIGN - 0x02:0x0024, # DOLLAR SIGN - 0x03:0x00A5, # YEN SIGN - 0x04:0x00E8, # LATIN SMALL LETTER E WITH GRAVE - 0x05:0x00E9, # LATIN SMALL LETTER E WITH ACUTE - 0x06:0x00F9, # LATIN SMALL LETTER U WITH GRAVE - 0x07:0x00EC, # LATIN SMALL LETTER I WITH GRAVE - 0x08:0x00F2, # LATIN SMALL LETTER O WITH GRAVE - 0x09:0x00E7, # LATIN SMALL LETTER C WITH CEDILLA - 0x0A:0x000A, # LINE FEED - 0x0B:0x00D8, # LATIN CAPITAL LETTER O WITH STROKE - 0x0C:0x00F8, # LATIN SMALL LETTER O WITH STROKE - 0x0D:0x000D, # CARRIAGE RETURN - 0x0E:0x00C5, # LATIN CAPITAL LETTER A WITH RING ABOVE - 0x0F:0x00E5, # LATIN SMALL LETTER A WITH RING ABOVE - 0x10:0x0394, # GREEK CAPITAL LETTER DELTA - 0x11:0x005F, # LOW LINE - 0x12:0x03A6, # GREEK CAPITAL LETTER PHI - 0x13:0x0393, # GREEK CAPITAL LETTER GAMMA - 0x14:0x039B, # GREEK CAPITAL LETTER LAMDA - 0x15:0x03A9, # GREEK CAPITAL LETTER OMEGA - 0x16:0x03A0, # GREEK CAPITAL LETTER PI - 0x17:0x03A8, # GREEK CAPITAL LETTER PSI - 0x18:0x03A3, # GREEK CAPITAL LETTER SIGMA - 0x19:0x0398, # GREEK CAPITAL LETTER THETA - 0x1A:0x039E, # GREEK CAPITAL LETTER XI - 0x1B:0x00A0, # ESCAPE TO EXTENSION TABLE (or displayed as NBSP, see note above) - 0x1B0A:0x000C, # FORM FEED - 0x1B14:0x005E, # CIRCUMFLEX ACCENT - 0x1B28:0x007B, # LEFT CURLY BRACKET - 0x1B29:0x007D, # RIGHT CURLY BRACKET - 0x1B2F:0x005C, # REVERSE SOLIDUS - 0x1B3C:0x005B, # LEFT SQUARE BRACKET - 0x1B3D:0x007E, # TILDE - 0x1B3E:0x005D, # RIGHT SQUARE BRACKET - 0x1B40:0x007C, # VERTICAL LINE - 0x1B65:0x1E82, # EURO SIGN - 0x1C:0x00C6, # LATIN CAPITAL LETTER AE - 0x1D:0x00E6, # LATIN SMALL LETTER AE - 0x1E:0x00DF, # LATIN SMALL LETTER SHARP S (German) - 0x1F:0x00C9, # LATIN CAPITAL LETTER E WITH ACUTE - 0x20:0x0020, # SPACE - 0x21:0x0021, # EXCLAMATION MARK - 0x22:0x0022, # QUOTATION MARK - 0x23:0x0023, # NUMBER SIGN - 0x24:0x00A4, # CURRENCY SIGN - 0x25:0x0025, # PERCENT SIGN - 0x26:0x0026, # AMPERSAND - 0x27:0x0027, # APOSTROPHE - 0x28:0x0028, # LEFT PARENTHESIS - 0x29:0x0029, # RIGHT PARENTHESIS - 0x2A:0x002A, # ASTERISK - 0x2B:0x002B, # PLUS SIGN - 0x2C:0x002C, # COMMA - 0x2D:0x002D, # HYPHEN-MINUS - 0x2E:0x002E, # FULL STOP - 0x2F:0x002F, # SOLIDUS - 0x30:0x0030, # DIGIT ZERO - 0x31:0x0031, # DIGIT ONE - 0x32:0x0032, # DIGIT TWO - 0x33:0x0033, # DIGIT THREE - 0x34:0x0034, # DIGIT FOUR - 0x35:0x0035, # DIGIT FIVE - 0x36:0x0036, # DIGIT SIX - 0x37:0x0037, # DIGIT SEVEN - 0x38:0x0038, # DIGIT EIGHT - 0x39:0x0039, # DIGIT NINE - 0x3A:0x003A, # COLON - 0x3B:0x003B, # SEMICOLON - 0x3C:0x003C, # LESS-THAN SIGN - 0x3D:0x003D, # EQUALS SIGN - 0x3E:0x003E, # GREATER-THAN SIGN - 0x3F:0x003F, # QUESTION MARK - 0x40:0x00A1, # INVERTED EXCLAMATION MARK - 0x41:0x0041, # LATIN CAPITAL LETTER A - 0x41:0x0391, # GREEK CAPITAL LETTER ALPHA - 0x42:0x0042, # LATIN CAPITAL LETTER B - 0x42:0x0392, # GREEK CAPITAL LETTER BETA - 0x43:0x0043, # LATIN CAPITAL LETTER C - 0x44:0x0044, # LATIN CAPITAL LETTER D - 0x45:0x0045, # LATIN CAPITAL LETTER E - 0x45:0x0395, # GREEK CAPITAL LETTER EPSILON - 0x46:0x0046, # LATIN CAPITAL LETTER F - 0x47:0x0047, # LATIN CAPITAL LETTER G - 0x48:0x0048, # LATIN CAPITAL LETTER H - 0x48:0x0397, # GREEK CAPITAL LETTER ETA - 0x49:0x0049, # LATIN CAPITAL LETTER I - 0x49:0x0399, # GREEK CAPITAL LETTER IOTA - 0x4A:0x004A, # LATIN CAPITAL LETTER J - 0x4B:0x004B, # LATIN CAPITAL LETTER K - 0x4B:0x039A, # GREEK CAPITAL LETTER KAPPA - 0x4C:0x004C, # LATIN CAPITAL LETTER L - 0x4D:0x004D, # LATIN CAPITAL LETTER M - 0x4D:0x039C, # GREEK CAPITAL LETTER MU - 0x4E:0x004E, # LATIN CAPITAL LETTER N - 0x4E:0x039D, # GREEK CAPITAL LETTER NU - 0x4F:0x004F, # LATIN CAPITAL LETTER O - 0x4F:0x039F, # GREEK CAPITAL LETTER OMICRON - 0x50:0x0050, # LATIN CAPITAL LETTER P - 0x50:0x03A1, # GREEK CAPITAL LETTER RHO - 0x51:0x0051, # LATIN CAPITAL LETTER Q - 0x52:0x0052, # LATIN CAPITAL LETTER R - 0x53:0x0053, # LATIN CAPITAL LETTER S - 0x54:0x0054, # LATIN CAPITAL LETTER T - 0x54:0x03A4, # GREEK CAPITAL LETTER TAU - 0x55:0x0055, # LATIN CAPITAL LETTER U - 0x55:0x03A5, # GREEK CAPITAL LETTER UPSILON - 0x56:0x0056, # LATIN CAPITAL LETTER V - 0x57:0x0057, # LATIN CAPITAL LETTER W - 0x58:0x0058, # LATIN CAPITAL LETTER X - 0x58:0x03A7, # GREEK CAPITAL LETTER CHI - 0x59:0x0059, # LATIN CAPITAL LETTER Y - 0x5A:0x005A, # LATIN CAPITAL LETTER Z - 0x5A:0x0396, # GREEK CAPITAL LETTER ZETA - 0x5B:0x00C4, # LATIN CAPITAL LETTER A WITH DIAERESIS - 0x5C:0x00D6, # LATIN CAPITAL LETTER O WITH DIAERESIS - 0x5D:0x00D1, # LATIN CAPITAL LETTER N WITH TILDE - 0x5E:0x00DC, # LATIN CAPITAL LETTER U WITH DIAERESIS - 0x5F:0x00A7, # SECTION SIGN - 0x60:0x00BF, # INVERTED QUESTION MARK - 0x61:0x0061, # LATIN SMALL LETTER A - 0x62:0x0062, # LATIN SMALL LETTER B - 0x63:0x0063, # LATIN SMALL LETTER C - 0x64:0x0064, # LATIN SMALL LETTER D - 0x65:0x0065, # LATIN SMALL LETTER E - 0x66:0x0066, # LATIN SMALL LETTER F - 0x67:0x0067, # LATIN SMALL LETTER G - 0x68:0x0068, # LATIN SMALL LETTER H - 0x69:0x0069, # LATIN SMALL LETTER I - 0x6A:0x006A, # LATIN SMALL LETTER J - 0x6B:0x006B, # LATIN SMALL LETTER K - 0x6C:0x006C, # LATIN SMALL LETTER L - 0x6D:0x006D, # LATIN SMALL LETTER M - 0x6E:0x006E, # LATIN SMALL LETTER N - 0x6F:0x006F, # LATIN SMALL LETTER O - 0x70:0x0070, # LATIN SMALL LETTER P - 0x71:0x0071, # LATIN SMALL LETTER Q - 0x72:0x0072, # LATIN SMALL LETTER R - 0x73:0x0073, # LATIN SMALL LETTER S - 0x74:0x0074, # LATIN SMALL LETTER T - 0x75:0x0075, # LATIN SMALL LETTER U - 0x76:0x0076, # LATIN SMALL LETTER V - 0x77:0x0077, # LATIN SMALL LETTER W - 0x78:0x0078, # LATIN SMALL LETTER X - 0x79:0x0079, # LATIN SMALL LETTER Y - 0x7A:0x007A, # LATIN SMALL LETTER Z - 0x7B:0x00E4, # LATIN SMALL LETTER A WITH DIAERESIS - 0x7C:0x00F6, # LATIN SMALL LETTER O WITH DIAERESIS - 0x7D:0x00F1, # LATIN SMALL LETTER N WITH TILDE - 0x7E:0x00FC, # LATIN SMALL LETTER U WITH DIAERESIS - 0x7F:0x00E0 # LATIN SMALL LETTER A WITH GRAVE +decoding_map= { +unichr(0x00):unichr(0x0040), # COMMERCIAL AT +unichr(0x01):unichr(0x00A3), # POUND SIGN +unichr(0x02):unichr(0x0024), # DOLLAR SIGN +unichr(0x03):unichr(0x00A5), # YEN SIGN +unichr(0x04):unichr(0x00E8), # LATIN SMALL LETTER E WITH GRAVE +unichr(0x05):unichr(0x00E9), # LATIN SMALL LETTER E WITH ACUTE +unichr(0x06):unichr(0x00F9), # LATIN SMALL LETTER U WITH GRAVE +unichr(0x07):unichr(0x00EC), # LATIN SMALL LETTER I WITH GRAVE +unichr(0x08):unichr(0x00F2), # LATIN SMALL LETTER O WITH GRAVE +unichr(0x09):unichr(0x00E7), # LATIN SMALL LETTER C WITH CEDILLA +unichr(0x0A):unichr(0x000A), # LINE FEED +unichr(0x0B):unichr(0x00D8), # LATIN CAPITAL LETTER O WITH STROKE +unichr(0x0C):unichr(0x00F8), # LATIN SMALL LETTER O WITH STROKE +unichr(0x0D):unichr(0x000D), # CARRIAGE RETURN +unichr(0x0E):unichr(0x00C5), # LATIN CAPITAL LETTER A WITH RING ABOVE +unichr(0x0F):unichr(0x00E5), # LATIN SMALL LETTER A WITH RING ABOVE +unichr(0x10):unichr(0x0394), # GREEK CAPITAL LETTER DELTA +unichr(0x11):unichr(0x005F), # LOW LINE +unichr(0x12):unichr(0x03A6), # GREEK CAPITAL LETTER PHI +unichr(0x13):unichr(0x0393), # GREEK CAPITAL LETTER GAMMA +unichr(0x14):unichr(0x039B), # GREEK CAPITAL LETTER LAMDA +unichr(0x15):unichr(0x03A9), # GREEK CAPITAL LETTER OMEGA +unichr(0x16):unichr(0x03A0), # GREEK CAPITAL LETTER PI +unichr(0x17):unichr(0x03A8), # GREEK CAPITAL LETTER PSI +unichr(0x18):unichr(0x03A3), # GREEK CAPITAL LETTER SIGMA +unichr(0x19):unichr(0x0398), # GREEK CAPITAL LETTER THETA +unichr(0x1A):unichr(0x039E), # GREEK CAPITAL LETTER XI +unichr(0x1B):unichr(0x00A0), # ESCAPE TO EXTENSION TABLE (or displayed as NBSP, see note above) +unichr(0x1B0A):unichr(0x000C), # FORM FEED +unichr(0x1B14):unichr(0x005E), # CIRCUMFLEX ACCENT +unichr(0x1B28):unichr(0x007B), # LEFT CURLY BRACKET +unichr(0x1B29):unichr(0x007D), # RIGHT CURLY BRACKET +unichr(0x1B2F):unichr(0x005C), # REVERSE SOLIDUS +unichr(0x1B3C):unichr(0x005B), # LEFT SQUARE BRACKET +unichr(0x1B3D):unichr(0x007E), # TILDE +unichr(0x1B3E):unichr(0x005D), # RIGHT SQUARE BRACKET +unichr(0x1B40):unichr(0x007C), # VERTICAL LINE +unichr(0x1B65):unichr(0x20AC), # EURO SIGN +unichr(0x1C):unichr(0x00C6), # LATIN CAPITAL LETTER AE +unichr(0x1D):unichr(0x00E6), # LATIN SMALL LETTER AE +unichr(0x1E):unichr(0x00DF), # LATIN SMALL LETTER SHARP S (German) +unichr(0x1F):unichr(0x00C9), # LATIN CAPITAL LETTER E WITH ACUTE +unichr(0x20):unichr(0x0020), # SPACE +unichr(0x21):unichr(0x0021), # EXCLAMATION MARK +unichr(0x22):unichr(0x0022), # QUOTATION MARK +unichr(0x23):unichr(0x0023), # NUMBER SIGN +unichr(0x24):unichr(0x00A4), # CURRENCY SIGN +unichr(0x25):unichr(0x0025), # PERCENT SIGN +unichr(0x26):unichr(0x0026), # AMPERSAND +unichr(0x27):unichr(0x0027), # APOSTROPHE +unichr(0x28):unichr(0x0028), # LEFT PARENTHESIS +unichr(0x29):unichr(0x0029), # RIGHT PARENTHESIS +unichr(0x2A):unichr(0x002A), # ASTERISK +unichr(0x2B):unichr(0x002B), # PLUS SIGN +unichr(0x2C):unichr(0x002C), # COMMA +unichr(0x2D):unichr(0x002D), # HYPHEN-MINUS +unichr(0x2E):unichr(0x002E), # FULL STOP +unichr(0x2F):unichr(0x002F), # SOLIDUS +unichr(0x30):unichr(0x0030), # DIGIT ZERO +unichr(0x31):unichr(0x0031), # DIGIT ONE +unichr(0x32):unichr(0x0032), # DIGIT TWO +unichr(0x33):unichr(0x0033), # DIGIT THREE +unichr(0x34):unichr(0x0034), # DIGIT FOUR +unichr(0x35):unichr(0x0035), # DIGIT FIVE +unichr(0x36):unichr(0x0036), # DIGIT SIX +unichr(0x37):unichr(0x0037), # DIGIT SEVEN +unichr(0x38):unichr(0x0038), # DIGIT EIGHT +unichr(0x39):unichr(0x0039), # DIGIT NINE +unichr(0x3A):unichr(0x003A), # COLON +unichr(0x3B):unichr(0x003B), # SEMICOLON +unichr(0x3C):unichr(0x003C), # LESS-THAN SIGN +unichr(0x3D):unichr(0x003D), # EQUALS SIGN +unichr(0x3E):unichr(0x003E), # GREATER-THAN SIGN +unichr(0x3F):unichr(0x003F), # QUESTION MARK +unichr(0x40):unichr(0x00A1), # INVERTED EXCLAMATION MARK +unichr(0x41):unichr(0x0041), # LATIN CAPITAL LETTER A +unichr(0x42):unichr(0x0042), # LATIN CAPITAL LETTER B +unichr(0x43):unichr(0x0043), # LATIN CAPITAL LETTER C +unichr(0x44):unichr(0x0044), # LATIN CAPITAL LETTER D +unichr(0x45):unichr(0x0045), # LATIN CAPITAL LETTER E +unichr(0x46):unichr(0x0046), # LATIN CAPITAL LETTER F +unichr(0x47):unichr(0x0047), # LATIN CAPITAL LETTER G +unichr(0x48):unichr(0x0048), # LATIN CAPITAL LETTER H +unichr(0x49):unichr(0x0049), # LATIN CAPITAL LETTER I +unichr(0x4A):unichr(0x004A), # LATIN CAPITAL LETTER J +unichr(0x4B):unichr(0x004B), # LATIN CAPITAL LETTER K +unichr(0x4C):unichr(0x004C), # LATIN CAPITAL LETTER L +unichr(0x4D):unichr(0x004D), # LATIN CAPITAL LETTER M +unichr(0x4E):unichr(0x004E), # LATIN CAPITAL LETTER N +unichr(0x4F):unichr(0x004F), # LATIN CAPITAL LETTER O +unichr(0x50):unichr(0x0050), # LATIN CAPITAL LETTER P +unichr(0x51):unichr(0x0051), # LATIN CAPITAL LETTER Q +unichr(0x52):unichr(0x0052), # LATIN CAPITAL LETTER R +unichr(0x53):unichr(0x0053), # LATIN CAPITAL LETTER S +unichr(0x54):unichr(0x0054), # LATIN CAPITAL LETTER T +unichr(0x55):unichr(0x0055), # LATIN CAPITAL LETTER U +unichr(0x56):unichr(0x0056), # LATIN CAPITAL LETTER V +unichr(0x57):unichr(0x0057), # LATIN CAPITAL LETTER W +unichr(0x58):unichr(0x0058), # LATIN CAPITAL LETTER X +unichr(0x59):unichr(0x0059), # LATIN CAPITAL LETTER Y +unichr(0x5A):unichr(0x005A), # LATIN CAPITAL LETTER Z +unichr(0x5B):unichr(0x00C4), # LATIN CAPITAL LETTER A WITH DIAERESIS +unichr(0x5C):unichr(0x00D6), # LATIN CAPITAL LETTER O WITH DIAERESIS +unichr(0x5D):unichr(0x00D1), # LATIN CAPITAL LETTER N WITH TILDE +unichr(0x5E):unichr(0x00DC), # LATIN CAPITAL LETTER U WITH DIAERESIS +unichr(0x5F):unichr(0x00A7), # SECTION SIGN +unichr(0x60):unichr(0x00BF), # INVERTED QUESTION MARK +unichr(0x61):unichr(0x0061), # LATIN SMALL LETTER A +unichr(0x62):unichr(0x0062), # LATIN SMALL LETTER B +unichr(0x63):unichr(0x0063), # LATIN SMALL LETTER C +unichr(0x64):unichr(0x0064), # LATIN SMALL LETTER D +unichr(0x65):unichr(0x0065), # LATIN SMALL LETTER E +unichr(0x66):unichr(0x0066), # LATIN SMALL LETTER F +unichr(0x67):unichr(0x0067), # LATIN SMALL LETTER G +unichr(0x68):unichr(0x0068), # LATIN SMALL LETTER H +unichr(0x69):unichr(0x0069), # LATIN SMALL LETTER I +unichr(0x6A):unichr(0x006A), # LATIN SMALL LETTER J +unichr(0x6B):unichr(0x006B), # LATIN SMALL LETTER K +unichr(0x6C):unichr(0x006C), # LATIN SMALL LETTER L +unichr(0x6D):unichr(0x006D), # LATIN SMALL LETTER M +unichr(0x6E):unichr(0x006E), # LATIN SMALL LETTER N +unichr(0x6F):unichr(0x006F), # LATIN SMALL LETTER O +unichr(0x70):unichr(0x0070), # LATIN SMALL LETTER P +unichr(0x71):unichr(0x0071), # LATIN SMALL LETTER Q +unichr(0x72):unichr(0x0072), # LATIN SMALL LETTER R +unichr(0x73):unichr(0x0073), # LATIN SMALL LETTER S +unichr(0x74):unichr(0x0074), # LATIN SMALL LETTER T +unichr(0x75):unichr(0x0075), # LATIN SMALL LETTER U +unichr(0x76):unichr(0x0076), # LATIN SMALL LETTER V +unichr(0x77):unichr(0x0077), # LATIN SMALL LETTER W +unichr(0x78):unichr(0x0078), # LATIN SMALL LETTER X +unichr(0x79):unichr(0x0079), # LATIN SMALL LETTER Y +unichr(0x7A):unichr(0x007A), # LATIN SMALL LETTER Z +unichr(0x7B):unichr(0x00E4), # LATIN SMALL LETTER A WITH DIAERESIS +unichr(0x7C):unichr(0x00F6), # LATIN SMALL LETTER O WITH DIAERESIS +unichr(0x7D):unichr(0x00F1), # LATIN SMALL LETTER N WITH TILDE +unichr(0x7E):unichr(0x00FC), # LATIN SMALL LETTER U WITH DIAERESIS +unichr(0x7F):unichr(0x00E0), # LATIN SMALL LETTER A WITH GRAVE } -encoding_map = codecs.make_encoding_map(decoding_map) -encoding_map.update({ - 0x03AC:0x41, # GREEK SMALL LETTER ALPHA WITH TONOS - 0x03AD:0x45, # GREEK SMALL LETTER EPSILON WITH TONOS - 0x03AE:0x48, # GREEK SMALL LETTER ETA WITH TONOS - 0x03AF:0x49, # GREEK SMALL LETTER IOTA WITH TONOS - 0x03B0:0x59, # GREEK SMALL LETTER UPSILON WITH DIALYTIKA AND TONOS - 0x03B1:0x41, # GREEK SMALL LETTER ALPHA - 0x03B2:0x42, # GREEK SMALL LETTER BETA - 0x03B3:0x13, # GREEK SMALL LETTER GAMMA - 0x03B4:0x10, # GREEK SMALL LETTER DELTA - 0x03B5:0x45, # GREEK SMALL LETTER EPSILON - 0x03B6:0x5A, # GREEK SMALL LETTER ZETA - 0x03B7:0x48, # GREEK SMALL LETTER ETA - 0x03B8:0x19, # GREEK SMALL LETTER THETA - 0x03B9:0x49, # GREEK SMALL LETTER IOTA - 0x03BA:0x4B, # GREEK SMALL LETTER KAPPA - 0x03BB:0x14, # GREEK SMALL LETTER LAMDA - 0x03BC:0x4D, # GREEK SMALL LETTER MU - 0x03BD:0x4E, # GREEK SMALL LETTER NU - 0x03BE:0x1A, # GREEK SMALL LETTER XI - 0x03BF:0x4F, # GREEK SMALL LETTER OMICRON - 0x03C0:0x16, # GREEK SMALL LETTER PI - 0x03C1:0x50, # GREEK SMALL LETTER RHO - 0x03C2:0x18, # GREEK SMALL LETTER FINAL SIGMA - 0x03C3:0x18, # GREEK SMALL LETTER SIGMA - 0x03C4:0x54, # GREEK SMALL LETTER TAU - 0x03C5:0x59, # GREEK SMALL LETTER UPSILON - 0x03C6:0x12, # GREEK SMALL LETTER PHI - 0x03C7:0x58, # GREEK SMALL LETTER CHI - 0x03C8:0x17, # GREEK SMALL LETTER PSI - 0x03C9:0x15, # GREEK SMALL LETTER OMEGA - 0x03CA:0x49, # GREEK SMALL LETTER IOTA WITH DIALYTIKA - 0x03CB:0x49, # GREEK SMALL LETTER UPSILON WITH DIALYTIKA - 0x03CC:0x4F, # GREEK SMALL LETTER OMICRON WITH TONOS - 0x03CD:0x49, # GREEK SMALL LETTER UPSILON WITH TONOS - 0x03CE:0x15 # GREEK SMALL LETTER OMEGA WITH TONOS -}) - +encoding_map=dict([(v,k) for (k,v) in decoding_map.items()]) class Codec(codecs.Codec): def encode(self,input,errors='strict'): - return codecs.charmap_encode(input,errors,encoding_map) + ret="" + for i in input: + ret+=encoding_map[i] + return (ret,len(ret)) def decode(self,input,errors='strict'): - return codecs.charmap_decode(input,errors,decoding_map) + ret="" + for i in input: + ret+=decoding_map[i] + return (ret,len(ret)) -class StreamWriter(Codec,codecs.StreamWriter): +class StreamWriter(Codec,mbc.MultibyteStreamWriter,codecs.StreamWriter): pass -class StreamReader(Codec,codecs.StreamReader): +class StreamReader(Codec,mbc.MultibyteStreamReader,codecs.StreamReader): pass ### encodings module API @@ -222,16 +177,16 @@ def gsm_search(encoding): if not encoding == ENCODING_NAME: return - print "Using gsm codec" return getregentry() # Register our codec when we load the module codecs.register(gsm_search) if __name__ == "__main__": - text = u"λολ" - print text.upper() - text2 = text.encode("gsm0338") - for char in text2: - print "%d"%ord(char) + text = "β‚¬ΓΆΓ€ΓΌΓŸ" + text2 = unicode(text,"utf-8").encode("gsm0338") + assert(text2==u"\u1B65\x7C\x7B\x7E\x1E") + text="" + text2 = unicode(text,"utf-8").encode("gsm0338") + assert(text==text2)