|
16
|
1 |
#!/usr/bin/python |
|
|
2 |
# -*- coding: iso-8859-7 -*- |
|
|
3 |
import codecs |
|
|
4 |
|
|
|
5 |
ENCODING_NAME = "gsm0338" |
|
|
6 |
|
|
|
7 |
decoding_map = { |
|
|
8 |
0x00:0x0040, # COMMERCIAL AT |
|
|
9 |
0x01:0x00A3, # POUND SIGN |
|
|
10 |
0x02:0x0024, # DOLLAR SIGN |
|
|
11 |
0x03:0x00A5, # YEN SIGN |
|
|
12 |
0x04:0x00E8, # LATIN SMALL LETTER E WITH GRAVE |
|
|
13 |
0x05:0x00E9, # LATIN SMALL LETTER E WITH ACUTE |
|
|
14 |
0x06:0x00F9, # LATIN SMALL LETTER U WITH GRAVE |
|
|
15 |
0x07:0x00EC, # LATIN SMALL LETTER I WITH GRAVE |
|
|
16 |
0x08:0x00F2, # LATIN SMALL LETTER O WITH GRAVE |
|
|
17 |
0x09:0x00E7, # LATIN SMALL LETTER C WITH CEDILLA |
|
|
18 |
0x0A:0x000A, # LINE FEED |
|
|
19 |
0x0B:0x00D8, # LATIN CAPITAL LETTER O WITH STROKE |
|
|
20 |
0x0C:0x00F8, # LATIN SMALL LETTER O WITH STROKE |
|
|
21 |
0x0D:0x000D, # CARRIAGE RETURN |
|
|
22 |
0x0E:0x00C5, # LATIN CAPITAL LETTER A WITH RING ABOVE |
|
|
23 |
0x0F:0x00E5, # LATIN SMALL LETTER A WITH RING ABOVE |
|
|
24 |
0x10:0x0394, # GREEK CAPITAL LETTER DELTA |
|
|
25 |
0x11:0x005F, # LOW LINE |
|
|
26 |
0x12:0x03A6, # GREEK CAPITAL LETTER PHI |
|
|
27 |
0x13:0x0393, # GREEK CAPITAL LETTER GAMMA |
|
|
28 |
0x14:0x039B, # GREEK CAPITAL LETTER LAMDA |
|
|
29 |
0x15:0x03A9, # GREEK CAPITAL LETTER OMEGA |
|
|
30 |
0x16:0x03A0, # GREEK CAPITAL LETTER PI |
|
|
31 |
0x17:0x03A8, # GREEK CAPITAL LETTER PSI |
|
|
32 |
0x18:0x03A3, # GREEK CAPITAL LETTER SIGMA |
|
|
33 |
0x19:0x0398, # GREEK CAPITAL LETTER THETA |
|
|
34 |
0x1A:0x039E, # GREEK CAPITAL LETTER XI |
|
|
35 |
0x1B:0x00A0, # ESCAPE TO EXTENSION TABLE (or displayed as NBSP, see note above) |
|
|
36 |
0x1B0A:0x000C, # FORM FEED |
|
|
37 |
0x1B14:0x005E, # CIRCUMFLEX ACCENT |
|
|
38 |
0x1B28:0x007B, # LEFT CURLY BRACKET |
|
|
39 |
0x1B29:0x007D, # RIGHT CURLY BRACKET |
|
|
40 |
0x1B2F:0x005C, # REVERSE SOLIDUS |
|
|
41 |
0x1B3C:0x005B, # LEFT SQUARE BRACKET |
|
|
42 |
0x1B3D:0x007E, # TILDE |
|
|
43 |
0x1B3E:0x005D, # RIGHT SQUARE BRACKET |
|
|
44 |
0x1B40:0x007C, # VERTICAL LINE |
|
|
45 |
0x1B65:0x1E82, # EURO SIGN |
|
|
46 |
0x1C:0x00C6, # LATIN CAPITAL LETTER AE |
|
|
47 |
0x1D:0x00E6, # LATIN SMALL LETTER AE |
|
|
48 |
0x1E:0x00DF, # LATIN SMALL LETTER SHARP S (German) |
|
|
49 |
0x1F:0x00C9, # LATIN CAPITAL LETTER E WITH ACUTE |
|
|
50 |
0x20:0x0020, # SPACE |
|
|
51 |
0x21:0x0021, # EXCLAMATION MARK |
|
|
52 |
0x22:0x0022, # QUOTATION MARK |
|
|
53 |
0x23:0x0023, # NUMBER SIGN |
|
|
54 |
0x24:0x00A4, # CURRENCY SIGN |
|
|
55 |
0x25:0x0025, # PERCENT SIGN |
|
|
56 |
0x26:0x0026, # AMPERSAND |
|
|
57 |
0x27:0x0027, # APOSTROPHE |
|
|
58 |
0x28:0x0028, # LEFT PARENTHESIS |
|
|
59 |
0x29:0x0029, # RIGHT PARENTHESIS |
|
|
60 |
0x2A:0x002A, # ASTERISK |
|
|
61 |
0x2B:0x002B, # PLUS SIGN |
|
|
62 |
0x2C:0x002C, # COMMA |
|
|
63 |
0x2D:0x002D, # HYPHEN-MINUS |
|
|
64 |
0x2E:0x002E, # FULL STOP |
|
|
65 |
0x2F:0x002F, # SOLIDUS |
|
|
66 |
0x30:0x0030, # DIGIT ZERO |
|
|
67 |
0x31:0x0031, # DIGIT ONE |
|
|
68 |
0x32:0x0032, # DIGIT TWO |
|
|
69 |
0x33:0x0033, # DIGIT THREE |
|
|
70 |
0x34:0x0034, # DIGIT FOUR |
|
|
71 |
0x35:0x0035, # DIGIT FIVE |
|
|
72 |
0x36:0x0036, # DIGIT SIX |
|
|
73 |
0x37:0x0037, # DIGIT SEVEN |
|
|
74 |
0x38:0x0038, # DIGIT EIGHT |
|
|
75 |
0x39:0x0039, # DIGIT NINE |
|
|
76 |
0x3A:0x003A, # COLON |
|
|
77 |
0x3B:0x003B, # SEMICOLON |
|
|
78 |
0x3C:0x003C, # LESS-THAN SIGN |
|
|
79 |
0x3D:0x003D, # EQUALS SIGN |
|
|
80 |
0x3E:0x003E, # GREATER-THAN SIGN |
|
|
81 |
0x3F:0x003F, # QUESTION MARK |
|
|
82 |
0x40:0x00A1, # INVERTED EXCLAMATION MARK |
|
|
83 |
0x41:0x0041, # LATIN CAPITAL LETTER A |
|
|
84 |
0x41:0x0391, # GREEK CAPITAL LETTER ALPHA |
|
|
85 |
0x42:0x0042, # LATIN CAPITAL LETTER B |
|
|
86 |
0x42:0x0392, # GREEK CAPITAL LETTER BETA |
|
|
87 |
0x43:0x0043, # LATIN CAPITAL LETTER C |
|
|
88 |
0x44:0x0044, # LATIN CAPITAL LETTER D |
|
|
89 |
0x45:0x0045, # LATIN CAPITAL LETTER E |
|
|
90 |
0x45:0x0395, # GREEK CAPITAL LETTER EPSILON |
|
|
91 |
0x46:0x0046, # LATIN CAPITAL LETTER F |
|
|
92 |
0x47:0x0047, # LATIN CAPITAL LETTER G |
|
|
93 |
0x48:0x0048, # LATIN CAPITAL LETTER H |
|
|
94 |
0x48:0x0397, # GREEK CAPITAL LETTER ETA |
|
|
95 |
0x49:0x0049, # LATIN CAPITAL LETTER I |
|
|
96 |
0x49:0x0399, # GREEK CAPITAL LETTER IOTA |
|
|
97 |
0x4A:0x004A, # LATIN CAPITAL LETTER J |
|
|
98 |
0x4B:0x004B, # LATIN CAPITAL LETTER K |
|
|
99 |
0x4B:0x039A, # GREEK CAPITAL LETTER KAPPA |
|
|
100 |
0x4C:0x004C, # LATIN CAPITAL LETTER L |
|
|
101 |
0x4D:0x004D, # LATIN CAPITAL LETTER M |
|
|
102 |
0x4D:0x039C, # GREEK CAPITAL LETTER MU |
|
|
103 |
0x4E:0x004E, # LATIN CAPITAL LETTER N |
|
|
104 |
0x4E:0x039D, # GREEK CAPITAL LETTER NU |
|
|
105 |
0x4F:0x004F, # LATIN CAPITAL LETTER O |
|
|
106 |
0x4F:0x039F, # GREEK CAPITAL LETTER OMICRON |
|
|
107 |
0x50:0x0050, # LATIN CAPITAL LETTER P |
|
|
108 |
0x50:0x03A1, # GREEK CAPITAL LETTER RHO |
|
|
109 |
0x51:0x0051, # LATIN CAPITAL LETTER Q |
|
|
110 |
0x52:0x0052, # LATIN CAPITAL LETTER R |
|
|
111 |
0x53:0x0053, # LATIN CAPITAL LETTER S |
|
|
112 |
0x54:0x0054, # LATIN CAPITAL LETTER T |
|
|
113 |
0x54:0x03A4, # GREEK CAPITAL LETTER TAU |
|
|
114 |
0x55:0x0055, # LATIN CAPITAL LETTER U |
|
|
115 |
0x55:0x03A5, # GREEK CAPITAL LETTER UPSILON |
|
|
116 |
0x56:0x0056, # LATIN CAPITAL LETTER V |
|
|
117 |
0x57:0x0057, # LATIN CAPITAL LETTER W |
|
|
118 |
0x58:0x0058, # LATIN CAPITAL LETTER X |
|
|
119 |
0x58:0x03A7, # GREEK CAPITAL LETTER CHI |
|
|
120 |
0x59:0x0059, # LATIN CAPITAL LETTER Y |
|
|
121 |
0x5A:0x005A, # LATIN CAPITAL LETTER Z |
|
|
122 |
0x5A:0x0396, # GREEK CAPITAL LETTER ZETA |
|
|
123 |
0x5B:0x00C4, # LATIN CAPITAL LETTER A WITH DIAERESIS |
|
|
124 |
0x5C:0x00D6, # LATIN CAPITAL LETTER O WITH DIAERESIS |
|
|
125 |
0x5D:0x00D1, # LATIN CAPITAL LETTER N WITH TILDE |
|
|
126 |
0x5E:0x00DC, # LATIN CAPITAL LETTER U WITH DIAERESIS |
|
|
127 |
0x5F:0x00A7, # SECTION SIGN |
|
|
128 |
0x60:0x00BF, # INVERTED QUESTION MARK |
|
|
129 |
0x61:0x0061, # LATIN SMALL LETTER A |
|
|
130 |
0x62:0x0062, # LATIN SMALL LETTER B |
|
|
131 |
0x63:0x0063, # LATIN SMALL LETTER C |
|
|
132 |
0x64:0x0064, # LATIN SMALL LETTER D |
|
|
133 |
0x65:0x0065, # LATIN SMALL LETTER E |
|
|
134 |
0x66:0x0066, # LATIN SMALL LETTER F |
|
|
135 |
0x67:0x0067, # LATIN SMALL LETTER G |
|
|
136 |
0x68:0x0068, # LATIN SMALL LETTER H |
|
|
137 |
0x69:0x0069, # LATIN SMALL LETTER I |
|
|
138 |
0x6A:0x006A, # LATIN SMALL LETTER J |
|
|
139 |
0x6B:0x006B, # LATIN SMALL LETTER K |
|
|
140 |
0x6C:0x006C, # LATIN SMALL LETTER L |
|
|
141 |
0x6D:0x006D, # LATIN SMALL LETTER M |
|
|
142 |
0x6E:0x006E, # LATIN SMALL LETTER N |
|
|
143 |
0x6F:0x006F, # LATIN SMALL LETTER O |
|
|
144 |
0x70:0x0070, # LATIN SMALL LETTER P |
|
|
145 |
0x71:0x0071, # LATIN SMALL LETTER Q |
|
|
146 |
0x72:0x0072, # LATIN SMALL LETTER R |
|
|
147 |
0x73:0x0073, # LATIN SMALL LETTER S |
|
|
148 |
0x74:0x0074, # LATIN SMALL LETTER T |
|
|
149 |
0x75:0x0075, # LATIN SMALL LETTER U |
|
|
150 |
0x76:0x0076, # LATIN SMALL LETTER V |
|
|
151 |
0x77:0x0077, # LATIN SMALL LETTER W |
|
|
152 |
0x78:0x0078, # LATIN SMALL LETTER X |
|
|
153 |
0x79:0x0079, # LATIN SMALL LETTER Y |
|
|
154 |
0x7A:0x007A, # LATIN SMALL LETTER Z |
|
|
155 |
0x7B:0x00E4, # LATIN SMALL LETTER A WITH DIAERESIS |
|
|
156 |
0x7C:0x00F6, # LATIN SMALL LETTER O WITH DIAERESIS |
|
|
157 |
0x7D:0x00F1, # LATIN SMALL LETTER N WITH TILDE |
|
|
158 |
0x7E:0x00FC, # LATIN SMALL LETTER U WITH DIAERESIS |
|
|
159 |
0x7F:0x00E0 # LATIN SMALL LETTER A WITH GRAVE |
|
|
160 |
} |
|
|
161 |
|
|
|
162 |
encoding_map = codecs.make_encoding_map(decoding_map) |
|
|
163 |
encoding_map.update({ |
|
|
164 |
0x03AC:0x41, # GREEK SMALL LETTER ALPHA WITH TONOS |
|
|
165 |
0x03AD:0x45, # GREEK SMALL LETTER EPSILON WITH TONOS |
|
|
166 |
0x03AE:0x48, # GREEK SMALL LETTER ETA WITH TONOS |
|
|
167 |
0x03AF:0x49, # GREEK SMALL LETTER IOTA WITH TONOS |
|
|
168 |
0x03B0:0x59, # GREEK SMALL LETTER UPSILON WITH DIALYTIKA AND TONOS |
|
|
169 |
0x03B1:0x41, # GREEK SMALL LETTER ALPHA |
|
|
170 |
0x03B2:0x42, # GREEK SMALL LETTER BETA |
|
|
171 |
0x03B3:0x13, # GREEK SMALL LETTER GAMMA |
|
|
172 |
0x03B4:0x10, # GREEK SMALL LETTER DELTA |
|
|
173 |
0x03B5:0x45, # GREEK SMALL LETTER EPSILON |
|
|
174 |
0x03B6:0x5A, # GREEK SMALL LETTER ZETA |
|
|
175 |
0x03B7:0x48, # GREEK SMALL LETTER ETA |
|
|
176 |
0x03B8:0x19, # GREEK SMALL LETTER THETA |
|
|
177 |
0x03B9:0x49, # GREEK SMALL LETTER IOTA |
|
|
178 |
0x03BA:0x4B, # GREEK SMALL LETTER KAPPA |
|
|
179 |
0x03BB:0x14, # GREEK SMALL LETTER LAMDA |
|
|
180 |
0x03BC:0x4D, # GREEK SMALL LETTER MU |
|
|
181 |
0x03BD:0x4E, # GREEK SMALL LETTER NU |
|
|
182 |
0x03BE:0x1A, # GREEK SMALL LETTER XI |
|
|
183 |
0x03BF:0x4F, # GREEK SMALL LETTER OMICRON |
|
|
184 |
0x03C0:0x16, # GREEK SMALL LETTER PI |
|
|
185 |
0x03C1:0x50, # GREEK SMALL LETTER RHO |
|
|
186 |
0x03C2:0x18, # GREEK SMALL LETTER FINAL SIGMA |
|
|
187 |
0x03C3:0x18, # GREEK SMALL LETTER SIGMA |
|
|
188 |
0x03C4:0x54, # GREEK SMALL LETTER TAU |
|
|
189 |
0x03C5:0x59, # GREEK SMALL LETTER UPSILON |
|
|
190 |
0x03C6:0x12, # GREEK SMALL LETTER PHI |
|
|
191 |
0x03C7:0x58, # GREEK SMALL LETTER CHI |
|
|
192 |
0x03C8:0x17, # GREEK SMALL LETTER PSI |
|
|
193 |
0x03C9:0x15, # GREEK SMALL LETTER OMEGA |
|
|
194 |
0x03CA:0x49, # GREEK SMALL LETTER IOTA WITH DIALYTIKA |
|
|
195 |
0x03CB:0x49, # GREEK SMALL LETTER UPSILON WITH DIALYTIKA |
|
|
196 |
0x03CC:0x4F, # GREEK SMALL LETTER OMICRON WITH TONOS |
|
|
197 |
0x03CD:0x49, # GREEK SMALL LETTER UPSILON WITH TONOS |
|
|
198 |
0x03CE:0x15 # GREEK SMALL LETTER OMEGA WITH TONOS |
|
|
199 |
}) |
|
|
200 |
|
|
|
201 |
|
|
|
202 |
class Codec(codecs.Codec): |
|
|
203 |
def encode(self,input,errors='strict'): |
|
|
204 |
return codecs.charmap_encode(input,errors,encoding_map) |
|
|
205 |
def decode(self,input,errors='strict'): |
|
|
206 |
return codecs.charmap_decode(input,errors,decoding_map) |
|
|
207 |
|
|
|
208 |
|
|
|
209 |
|
|
|
210 |
class StreamWriter(Codec,codecs.StreamWriter): |
|
|
211 |
pass |
|
|
212 |
|
|
|
213 |
class StreamReader(Codec,codecs.StreamReader): |
|
|
214 |
pass |
|
|
215 |
|
|
|
216 |
### encodings module API |
|
|
217 |
|
|
|
218 |
def getregentry(): |
|
|
219 |
return (Codec().encode,Codec().decode,StreamReader,StreamWriter) |
|
|
220 |
|
|
|
221 |
|
|
|
222 |
def gsm_search(encoding): |
|
|
223 |
if not encoding == ENCODING_NAME: |
|
|
224 |
return |
|
|
225 |
print "Using gsm codec" |
|
|
226 |
return getregentry() |
|
|
227 |
|
|
|
228 |
# Register our codec when we load the module |
|
|
229 |
codecs.register(gsm_search) |
|
|
230 |
|
|
|
231 |
if __name__ == "__main__": |
|
|
232 |
text = u"���" |
|
|
233 |
print text.upper() |
|
|
234 |
text2 = text.encode("gsm0338") |
|
|
235 |
for char in text2: |
|
|
236 |
print "%d"%ord(char) |
|
|
237 |
|