iro/anbieter/gsm0338.py
changeset 16 22a6b6947743
child 17 44a3eda179b6
equal deleted inserted replaced
15:c04a21066aad 16:22a6b6947743
       
     1 #!/usr/bin/python
       
     2 # -*- coding: iso-8859-7 -*-
       
     3 import codecs
       
     4 
       
     5 ENCODING_NAME = "gsm0338"
       
     6 
       
     7 decoding_map = {
       
     8 	0x00:0x0040, #	COMMERCIAL AT
       
     9 	0x01:0x00A3, #	POUND SIGN
       
    10 	0x02:0x0024, #	DOLLAR SIGN
       
    11 	0x03:0x00A5, #	YEN SIGN
       
    12 	0x04:0x00E8, #	LATIN SMALL LETTER E WITH GRAVE
       
    13 	0x05:0x00E9, #	LATIN SMALL LETTER E WITH ACUTE
       
    14 	0x06:0x00F9, #	LATIN SMALL LETTER U WITH GRAVE
       
    15 	0x07:0x00EC, #	LATIN SMALL LETTER I WITH GRAVE
       
    16 	0x08:0x00F2, #	LATIN SMALL LETTER O WITH GRAVE
       
    17 	0x09:0x00E7, #	LATIN SMALL LETTER C WITH CEDILLA
       
    18 	0x0A:0x000A, #	LINE FEED
       
    19 	0x0B:0x00D8, #	LATIN CAPITAL LETTER O WITH STROKE
       
    20 	0x0C:0x00F8, #	LATIN SMALL LETTER O WITH STROKE
       
    21 	0x0D:0x000D, #	CARRIAGE RETURN
       
    22 	0x0E:0x00C5, #	LATIN CAPITAL LETTER A WITH RING ABOVE
       
    23 	0x0F:0x00E5, #	LATIN SMALL LETTER A WITH RING ABOVE
       
    24 	0x10:0x0394, #	GREEK CAPITAL LETTER DELTA
       
    25 	0x11:0x005F, #	LOW LINE
       
    26 	0x12:0x03A6, #	GREEK CAPITAL LETTER PHI
       
    27 	0x13:0x0393, #	GREEK CAPITAL LETTER GAMMA
       
    28 	0x14:0x039B, #	GREEK CAPITAL LETTER LAMDA
       
    29 	0x15:0x03A9, #	GREEK CAPITAL LETTER OMEGA
       
    30 	0x16:0x03A0, #	GREEK CAPITAL LETTER PI
       
    31 	0x17:0x03A8, #	GREEK CAPITAL LETTER PSI
       
    32 	0x18:0x03A3, #	GREEK CAPITAL LETTER SIGMA
       
    33 	0x19:0x0398, #	GREEK CAPITAL LETTER THETA
       
    34 	0x1A:0x039E, #	GREEK CAPITAL LETTER XI
       
    35 	0x1B:0x00A0, #	ESCAPE TO EXTENSION TABLE (or displayed as NBSP, see note above)
       
    36 	0x1B0A:0x000C, #	FORM FEED
       
    37 	0x1B14:0x005E, #	CIRCUMFLEX ACCENT
       
    38 	0x1B28:0x007B, #	LEFT CURLY BRACKET
       
    39 	0x1B29:0x007D, #	RIGHT CURLY BRACKET
       
    40 	0x1B2F:0x005C, #	REVERSE SOLIDUS
       
    41 	0x1B3C:0x005B, #	LEFT SQUARE BRACKET
       
    42 	0x1B3D:0x007E, #	TILDE
       
    43 	0x1B3E:0x005D, #	RIGHT SQUARE BRACKET
       
    44 	0x1B40:0x007C, #	VERTICAL LINE
       
    45 	0x1B65:0x1E82, #	EURO SIGN
       
    46 	0x1C:0x00C6, #	LATIN CAPITAL LETTER AE
       
    47 	0x1D:0x00E6, #	LATIN SMALL LETTER AE
       
    48 	0x1E:0x00DF, #	LATIN SMALL LETTER SHARP S (German)
       
    49 	0x1F:0x00C9, #	LATIN CAPITAL LETTER E WITH ACUTE
       
    50 	0x20:0x0020, #	SPACE
       
    51 	0x21:0x0021, #	EXCLAMATION MARK
       
    52 	0x22:0x0022, #	QUOTATION MARK
       
    53 	0x23:0x0023, #	NUMBER SIGN
       
    54 	0x24:0x00A4, #	CURRENCY SIGN
       
    55 	0x25:0x0025, #	PERCENT SIGN
       
    56 	0x26:0x0026, #	AMPERSAND
       
    57 	0x27:0x0027, #	APOSTROPHE
       
    58 	0x28:0x0028, #	LEFT PARENTHESIS
       
    59 	0x29:0x0029, #	RIGHT PARENTHESIS
       
    60 	0x2A:0x002A, #	ASTERISK
       
    61 	0x2B:0x002B, #	PLUS SIGN
       
    62 	0x2C:0x002C, #	COMMA
       
    63 	0x2D:0x002D, #	HYPHEN-MINUS
       
    64 	0x2E:0x002E, #	FULL STOP
       
    65 	0x2F:0x002F, #	SOLIDUS
       
    66 	0x30:0x0030, #	DIGIT ZERO
       
    67 	0x31:0x0031, #	DIGIT ONE
       
    68 	0x32:0x0032, #	DIGIT TWO
       
    69 	0x33:0x0033, #	DIGIT THREE
       
    70 	0x34:0x0034, #	DIGIT FOUR
       
    71 	0x35:0x0035, #	DIGIT FIVE
       
    72 	0x36:0x0036, #	DIGIT SIX
       
    73 	0x37:0x0037, #	DIGIT SEVEN
       
    74 	0x38:0x0038, #	DIGIT EIGHT
       
    75 	0x39:0x0039, #	DIGIT NINE
       
    76 	0x3A:0x003A, #	COLON
       
    77 	0x3B:0x003B, #	SEMICOLON
       
    78 	0x3C:0x003C, #	LESS-THAN SIGN
       
    79 	0x3D:0x003D, #	EQUALS SIGN
       
    80 	0x3E:0x003E, #	GREATER-THAN SIGN
       
    81 	0x3F:0x003F, #	QUESTION MARK
       
    82 	0x40:0x00A1, #	INVERTED EXCLAMATION MARK
       
    83 	0x41:0x0041, #	LATIN CAPITAL LETTER A
       
    84 	0x41:0x0391, #	GREEK CAPITAL LETTER ALPHA
       
    85 	0x42:0x0042, #	LATIN CAPITAL LETTER B
       
    86 	0x42:0x0392, #	GREEK CAPITAL LETTER BETA
       
    87 	0x43:0x0043, #	LATIN CAPITAL LETTER C
       
    88 	0x44:0x0044, #	LATIN CAPITAL LETTER D
       
    89 	0x45:0x0045, #	LATIN CAPITAL LETTER E
       
    90 	0x45:0x0395, #	GREEK CAPITAL LETTER EPSILON
       
    91 	0x46:0x0046, #	LATIN CAPITAL LETTER F
       
    92 	0x47:0x0047, #	LATIN CAPITAL LETTER G
       
    93 	0x48:0x0048, #	LATIN CAPITAL LETTER H
       
    94 	0x48:0x0397, #	GREEK CAPITAL LETTER ETA
       
    95 	0x49:0x0049, #	LATIN CAPITAL LETTER I
       
    96 	0x49:0x0399, #	GREEK CAPITAL LETTER IOTA
       
    97 	0x4A:0x004A, #	LATIN CAPITAL LETTER J
       
    98 	0x4B:0x004B, #	LATIN CAPITAL LETTER K
       
    99 	0x4B:0x039A, #	GREEK CAPITAL LETTER KAPPA
       
   100 	0x4C:0x004C, #	LATIN CAPITAL LETTER L
       
   101 	0x4D:0x004D, #	LATIN CAPITAL LETTER M
       
   102 	0x4D:0x039C, #	GREEK CAPITAL LETTER MU
       
   103 	0x4E:0x004E, #	LATIN CAPITAL LETTER N
       
   104 	0x4E:0x039D, #	GREEK CAPITAL LETTER NU
       
   105 	0x4F:0x004F, #	LATIN CAPITAL LETTER O
       
   106 	0x4F:0x039F, #	GREEK CAPITAL LETTER OMICRON
       
   107 	0x50:0x0050, #	LATIN CAPITAL LETTER P
       
   108 	0x50:0x03A1, #	GREEK CAPITAL LETTER RHO
       
   109 	0x51:0x0051, #	LATIN CAPITAL LETTER Q
       
   110 	0x52:0x0052, #	LATIN CAPITAL LETTER R
       
   111 	0x53:0x0053, #	LATIN CAPITAL LETTER S
       
   112 	0x54:0x0054, #	LATIN CAPITAL LETTER T
       
   113 	0x54:0x03A4, #	GREEK CAPITAL LETTER TAU
       
   114 	0x55:0x0055, #	LATIN CAPITAL LETTER U
       
   115 	0x55:0x03A5, #	GREEK CAPITAL LETTER UPSILON
       
   116 	0x56:0x0056, #	LATIN CAPITAL LETTER V
       
   117 	0x57:0x0057, #	LATIN CAPITAL LETTER W
       
   118 	0x58:0x0058, #	LATIN CAPITAL LETTER X
       
   119 	0x58:0x03A7, #	GREEK CAPITAL LETTER CHI
       
   120 	0x59:0x0059, #	LATIN CAPITAL LETTER Y
       
   121 	0x5A:0x005A, #	LATIN CAPITAL LETTER Z
       
   122 	0x5A:0x0396, #	GREEK CAPITAL LETTER ZETA
       
   123 	0x5B:0x00C4, #	LATIN CAPITAL LETTER A WITH DIAERESIS
       
   124 	0x5C:0x00D6, #	LATIN CAPITAL LETTER O WITH DIAERESIS
       
   125 	0x5D:0x00D1, #	LATIN CAPITAL LETTER N WITH TILDE
       
   126 	0x5E:0x00DC, #	LATIN CAPITAL LETTER U WITH DIAERESIS
       
   127 	0x5F:0x00A7, #	SECTION SIGN
       
   128 	0x60:0x00BF, #	INVERTED QUESTION MARK
       
   129 	0x61:0x0061, #	LATIN SMALL LETTER A
       
   130 	0x62:0x0062, #	LATIN SMALL LETTER B
       
   131 	0x63:0x0063, #	LATIN SMALL LETTER C
       
   132 	0x64:0x0064, #	LATIN SMALL LETTER D
       
   133 	0x65:0x0065, #	LATIN SMALL LETTER E
       
   134 	0x66:0x0066, #	LATIN SMALL LETTER F
       
   135 	0x67:0x0067, #	LATIN SMALL LETTER G
       
   136 	0x68:0x0068, #	LATIN SMALL LETTER H
       
   137 	0x69:0x0069, #	LATIN SMALL LETTER I
       
   138 	0x6A:0x006A, #	LATIN SMALL LETTER J
       
   139 	0x6B:0x006B, #	LATIN SMALL LETTER K
       
   140 	0x6C:0x006C, #	LATIN SMALL LETTER L
       
   141 	0x6D:0x006D, #	LATIN SMALL LETTER M
       
   142 	0x6E:0x006E, #	LATIN SMALL LETTER N
       
   143 	0x6F:0x006F, #	LATIN SMALL LETTER O
       
   144 	0x70:0x0070, #	LATIN SMALL LETTER P
       
   145 	0x71:0x0071, #	LATIN SMALL LETTER Q
       
   146 	0x72:0x0072, #	LATIN SMALL LETTER R
       
   147 	0x73:0x0073, #	LATIN SMALL LETTER S
       
   148 	0x74:0x0074, #	LATIN SMALL LETTER T
       
   149 	0x75:0x0075, #	LATIN SMALL LETTER U
       
   150 	0x76:0x0076, #	LATIN SMALL LETTER V
       
   151 	0x77:0x0077, #	LATIN SMALL LETTER W
       
   152 	0x78:0x0078, #	LATIN SMALL LETTER X
       
   153 	0x79:0x0079, #	LATIN SMALL LETTER Y
       
   154 	0x7A:0x007A, #	LATIN SMALL LETTER Z
       
   155 	0x7B:0x00E4, #	LATIN SMALL LETTER A WITH DIAERESIS
       
   156 	0x7C:0x00F6, #	LATIN SMALL LETTER O WITH DIAERESIS
       
   157 	0x7D:0x00F1, #	LATIN SMALL LETTER N WITH TILDE
       
   158 	0x7E:0x00FC, #	LATIN SMALL LETTER U WITH DIAERESIS
       
   159 	0x7F:0x00E0  #	LATIN SMALL LETTER A WITH GRAVE
       
   160 }
       
   161 
       
   162 encoding_map = codecs.make_encoding_map(decoding_map)
       
   163 encoding_map.update({
       
   164 	0x03AC:0x41,	#	GREEK SMALL LETTER ALPHA WITH TONOS
       
   165 	0x03AD:0x45,	#	GREEK SMALL LETTER EPSILON WITH TONOS
       
   166 	0x03AE:0x48,	#	GREEK SMALL LETTER ETA WITH TONOS
       
   167 	0x03AF:0x49,	#	GREEK SMALL LETTER IOTA WITH TONOS
       
   168 	0x03B0:0x59,	#	GREEK SMALL LETTER UPSILON WITH DIALYTIKA AND TONOS
       
   169 	0x03B1:0x41,	#	GREEK SMALL LETTER ALPHA
       
   170 	0x03B2:0x42,	#	GREEK SMALL LETTER BETA
       
   171 	0x03B3:0x13,	#	GREEK SMALL LETTER GAMMA
       
   172 	0x03B4:0x10,	#	GREEK SMALL LETTER DELTA
       
   173 	0x03B5:0x45,	#	GREEK SMALL LETTER EPSILON
       
   174 	0x03B6:0x5A,	#	GREEK SMALL LETTER ZETA
       
   175 	0x03B7:0x48,	#	GREEK SMALL LETTER ETA
       
   176 	0x03B8:0x19,	#	GREEK SMALL LETTER THETA
       
   177 	0x03B9:0x49,	#	GREEK SMALL LETTER IOTA
       
   178 	0x03BA:0x4B,	#	GREEK SMALL LETTER KAPPA
       
   179 	0x03BB:0x14,	#	GREEK SMALL LETTER LAMDA
       
   180 	0x03BC:0x4D,	#	GREEK SMALL LETTER MU
       
   181 	0x03BD:0x4E,	#	GREEK SMALL LETTER NU
       
   182 	0x03BE:0x1A,	#	GREEK SMALL LETTER XI
       
   183 	0x03BF:0x4F,	#	GREEK SMALL LETTER OMICRON
       
   184 	0x03C0:0x16,	#	GREEK SMALL LETTER PI
       
   185 	0x03C1:0x50,	#	GREEK SMALL LETTER RHO
       
   186 	0x03C2:0x18,	#	GREEK SMALL LETTER FINAL SIGMA
       
   187 	0x03C3:0x18,	#	GREEK SMALL LETTER SIGMA
       
   188 	0x03C4:0x54, #	GREEK SMALL LETTER TAU
       
   189 	0x03C5:0x59, #	GREEK SMALL LETTER UPSILON
       
   190 	0x03C6:0x12, #	GREEK SMALL LETTER PHI
       
   191 	0x03C7:0x58, #	GREEK SMALL LETTER CHI
       
   192 	0x03C8:0x17, #	GREEK SMALL LETTER PSI
       
   193 	0x03C9:0x15, #	GREEK SMALL LETTER OMEGA
       
   194 	0x03CA:0x49, #	GREEK SMALL LETTER IOTA WITH DIALYTIKA
       
   195 	0x03CB:0x49, #	GREEK SMALL LETTER UPSILON WITH DIALYTIKA
       
   196 	0x03CC:0x4F, #	GREEK SMALL LETTER OMICRON WITH TONOS
       
   197 	0x03CD:0x49, #	GREEK SMALL LETTER UPSILON WITH TONOS
       
   198 	0x03CE:0x15	 #	GREEK SMALL LETTER OMEGA WITH TONOS
       
   199 })
       
   200 
       
   201 
       
   202 class Codec(codecs.Codec):
       
   203 	def encode(self,input,errors='strict'):
       
   204 		return codecs.charmap_encode(input,errors,encoding_map)
       
   205 	def decode(self,input,errors='strict'):
       
   206 		return codecs.charmap_decode(input,errors,decoding_map)
       
   207 
       
   208 
       
   209 
       
   210 class StreamWriter(Codec,codecs.StreamWriter):
       
   211     pass
       
   212 
       
   213 class StreamReader(Codec,codecs.StreamReader):
       
   214     pass
       
   215 
       
   216 ### encodings module API
       
   217 
       
   218 def getregentry():
       
   219 	return (Codec().encode,Codec().decode,StreamReader,StreamWriter)
       
   220 
       
   221 
       
   222 def gsm_search(encoding):
       
   223 	if not encoding == ENCODING_NAME:
       
   224 		return
       
   225 	print "Using gsm codec"	
       
   226 	return getregentry()
       
   227 	
       
   228 # Register our codec when we load the module
       
   229 codecs.register(gsm_search)
       
   230 
       
   231 if __name__ == "__main__":
       
   232 	text = u"λολ"
       
   233 	print text.upper()
       
   234 	text2 = text.encode("gsm0338")
       
   235 	for char in text2:
       
   236 		print "%d"%ord(char)
       
   237