|
16
|
1 |
#!/usr/bin/python |
|
17
|
2 |
# -*- coding: utf-8 -*- |
|
16
|
3 |
import codecs |
|
17
|
4 |
import _multibytecodec as mbc |
|
16
|
5 |
|
|
|
6 |
ENCODING_NAME = "gsm0338" |
|
|
7 |
|
|
17
|
8 |
decoding_map= { |
|
|
9 |
unichr(0x00):unichr(0x0040), # COMMERCIAL AT |
|
|
10 |
unichr(0x01):unichr(0x00A3), # POUND SIGN |
|
|
11 |
unichr(0x02):unichr(0x0024), # DOLLAR SIGN |
|
|
12 |
unichr(0x03):unichr(0x00A5), # YEN SIGN |
|
|
13 |
unichr(0x04):unichr(0x00E8), # LATIN SMALL LETTER E WITH GRAVE |
|
|
14 |
unichr(0x05):unichr(0x00E9), # LATIN SMALL LETTER E WITH ACUTE |
|
|
15 |
unichr(0x06):unichr(0x00F9), # LATIN SMALL LETTER U WITH GRAVE |
|
|
16 |
unichr(0x07):unichr(0x00EC), # LATIN SMALL LETTER I WITH GRAVE |
|
|
17 |
unichr(0x08):unichr(0x00F2), # LATIN SMALL LETTER O WITH GRAVE |
|
|
18 |
unichr(0x09):unichr(0x00E7), # LATIN SMALL LETTER C WITH CEDILLA |
|
|
19 |
unichr(0x0A):unichr(0x000A), # LINE FEED |
|
|
20 |
unichr(0x0B):unichr(0x00D8), # LATIN CAPITAL LETTER O WITH STROKE |
|
|
21 |
unichr(0x0C):unichr(0x00F8), # LATIN SMALL LETTER O WITH STROKE |
|
|
22 |
unichr(0x0D):unichr(0x000D), # CARRIAGE RETURN |
|
|
23 |
unichr(0x0E):unichr(0x00C5), # LATIN CAPITAL LETTER A WITH RING ABOVE |
|
|
24 |
unichr(0x0F):unichr(0x00E5), # LATIN SMALL LETTER A WITH RING ABOVE |
|
|
25 |
unichr(0x10):unichr(0x0394), # GREEK CAPITAL LETTER DELTA |
|
|
26 |
unichr(0x11):unichr(0x005F), # LOW LINE |
|
|
27 |
unichr(0x12):unichr(0x03A6), # GREEK CAPITAL LETTER PHI |
|
|
28 |
unichr(0x13):unichr(0x0393), # GREEK CAPITAL LETTER GAMMA |
|
|
29 |
unichr(0x14):unichr(0x039B), # GREEK CAPITAL LETTER LAMDA |
|
|
30 |
unichr(0x15):unichr(0x03A9), # GREEK CAPITAL LETTER OMEGA |
|
|
31 |
unichr(0x16):unichr(0x03A0), # GREEK CAPITAL LETTER PI |
|
|
32 |
unichr(0x17):unichr(0x03A8), # GREEK CAPITAL LETTER PSI |
|
|
33 |
unichr(0x18):unichr(0x03A3), # GREEK CAPITAL LETTER SIGMA |
|
|
34 |
unichr(0x19):unichr(0x0398), # GREEK CAPITAL LETTER THETA |
|
|
35 |
unichr(0x1A):unichr(0x039E), # GREEK CAPITAL LETTER XI |
|
|
36 |
unichr(0x1B):unichr(0x00A0), # ESCAPE TO EXTENSION TABLE (or displayed as NBSP, see note above) |
|
|
37 |
unichr(0x1B0A):unichr(0x000C), # FORM FEED |
|
|
38 |
unichr(0x1B14):unichr(0x005E), # CIRCUMFLEX ACCENT |
|
|
39 |
unichr(0x1B28):unichr(0x007B), # LEFT CURLY BRACKET |
|
|
40 |
unichr(0x1B29):unichr(0x007D), # RIGHT CURLY BRACKET |
|
|
41 |
unichr(0x1B2F):unichr(0x005C), # REVERSE SOLIDUS |
|
|
42 |
unichr(0x1B3C):unichr(0x005B), # LEFT SQUARE BRACKET |
|
|
43 |
unichr(0x1B3D):unichr(0x007E), # TILDE |
|
|
44 |
unichr(0x1B3E):unichr(0x005D), # RIGHT SQUARE BRACKET |
|
|
45 |
unichr(0x1B40):unichr(0x007C), # VERTICAL LINE |
|
|
46 |
unichr(0x1B65):unichr(0x20AC), # EURO SIGN |
|
|
47 |
unichr(0x1C):unichr(0x00C6), # LATIN CAPITAL LETTER AE |
|
|
48 |
unichr(0x1D):unichr(0x00E6), # LATIN SMALL LETTER AE |
|
|
49 |
unichr(0x1E):unichr(0x00DF), # LATIN SMALL LETTER SHARP S (German) |
|
|
50 |
unichr(0x1F):unichr(0x00C9), # LATIN CAPITAL LETTER E WITH ACUTE |
|
|
51 |
unichr(0x20):unichr(0x0020), # SPACE |
|
|
52 |
unichr(0x21):unichr(0x0021), # EXCLAMATION MARK |
|
|
53 |
unichr(0x22):unichr(0x0022), # QUOTATION MARK |
|
|
54 |
unichr(0x23):unichr(0x0023), # NUMBER SIGN |
|
|
55 |
unichr(0x24):unichr(0x00A4), # CURRENCY SIGN |
|
|
56 |
unichr(0x25):unichr(0x0025), # PERCENT SIGN |
|
|
57 |
unichr(0x26):unichr(0x0026), # AMPERSAND |
|
|
58 |
unichr(0x27):unichr(0x0027), # APOSTROPHE |
|
|
59 |
unichr(0x28):unichr(0x0028), # LEFT PARENTHESIS |
|
|
60 |
unichr(0x29):unichr(0x0029), # RIGHT PARENTHESIS |
|
|
61 |
unichr(0x2A):unichr(0x002A), # ASTERISK |
|
|
62 |
unichr(0x2B):unichr(0x002B), # PLUS SIGN |
|
|
63 |
unichr(0x2C):unichr(0x002C), # COMMA |
|
|
64 |
unichr(0x2D):unichr(0x002D), # HYPHEN-MINUS |
|
|
65 |
unichr(0x2E):unichr(0x002E), # FULL STOP |
|
|
66 |
unichr(0x2F):unichr(0x002F), # SOLIDUS |
|
|
67 |
unichr(0x30):unichr(0x0030), # DIGIT ZERO |
|
|
68 |
unichr(0x31):unichr(0x0031), # DIGIT ONE |
|
|
69 |
unichr(0x32):unichr(0x0032), # DIGIT TWO |
|
|
70 |
unichr(0x33):unichr(0x0033), # DIGIT THREE |
|
|
71 |
unichr(0x34):unichr(0x0034), # DIGIT FOUR |
|
|
72 |
unichr(0x35):unichr(0x0035), # DIGIT FIVE |
|
|
73 |
unichr(0x36):unichr(0x0036), # DIGIT SIX |
|
|
74 |
unichr(0x37):unichr(0x0037), # DIGIT SEVEN |
|
|
75 |
unichr(0x38):unichr(0x0038), # DIGIT EIGHT |
|
|
76 |
unichr(0x39):unichr(0x0039), # DIGIT NINE |
|
|
77 |
unichr(0x3A):unichr(0x003A), # COLON |
|
|
78 |
unichr(0x3B):unichr(0x003B), # SEMICOLON |
|
|
79 |
unichr(0x3C):unichr(0x003C), # LESS-THAN SIGN |
|
|
80 |
unichr(0x3D):unichr(0x003D), # EQUALS SIGN |
|
|
81 |
unichr(0x3E):unichr(0x003E), # GREATER-THAN SIGN |
|
|
82 |
unichr(0x3F):unichr(0x003F), # QUESTION MARK |
|
|
83 |
unichr(0x40):unichr(0x00A1), # INVERTED EXCLAMATION MARK |
|
|
84 |
unichr(0x41):unichr(0x0041), # LATIN CAPITAL LETTER A |
|
|
85 |
unichr(0x42):unichr(0x0042), # LATIN CAPITAL LETTER B |
|
|
86 |
unichr(0x43):unichr(0x0043), # LATIN CAPITAL LETTER C |
|
|
87 |
unichr(0x44):unichr(0x0044), # LATIN CAPITAL LETTER D |
|
|
88 |
unichr(0x45):unichr(0x0045), # LATIN CAPITAL LETTER E |
|
|
89 |
unichr(0x46):unichr(0x0046), # LATIN CAPITAL LETTER F |
|
|
90 |
unichr(0x47):unichr(0x0047), # LATIN CAPITAL LETTER G |
|
|
91 |
unichr(0x48):unichr(0x0048), # LATIN CAPITAL LETTER H |
|
|
92 |
unichr(0x49):unichr(0x0049), # LATIN CAPITAL LETTER I |
|
|
93 |
unichr(0x4A):unichr(0x004A), # LATIN CAPITAL LETTER J |
|
|
94 |
unichr(0x4B):unichr(0x004B), # LATIN CAPITAL LETTER K |
|
|
95 |
unichr(0x4C):unichr(0x004C), # LATIN CAPITAL LETTER L |
|
|
96 |
unichr(0x4D):unichr(0x004D), # LATIN CAPITAL LETTER M |
|
|
97 |
unichr(0x4E):unichr(0x004E), # LATIN CAPITAL LETTER N |
|
|
98 |
unichr(0x4F):unichr(0x004F), # LATIN CAPITAL LETTER O |
|
|
99 |
unichr(0x50):unichr(0x0050), # LATIN CAPITAL LETTER P |
|
|
100 |
unichr(0x51):unichr(0x0051), # LATIN CAPITAL LETTER Q |
|
|
101 |
unichr(0x52):unichr(0x0052), # LATIN CAPITAL LETTER R |
|
|
102 |
unichr(0x53):unichr(0x0053), # LATIN CAPITAL LETTER S |
|
|
103 |
unichr(0x54):unichr(0x0054), # LATIN CAPITAL LETTER T |
|
|
104 |
unichr(0x55):unichr(0x0055), # LATIN CAPITAL LETTER U |
|
|
105 |
unichr(0x56):unichr(0x0056), # LATIN CAPITAL LETTER V |
|
|
106 |
unichr(0x57):unichr(0x0057), # LATIN CAPITAL LETTER W |
|
|
107 |
unichr(0x58):unichr(0x0058), # LATIN CAPITAL LETTER X |
|
|
108 |
unichr(0x59):unichr(0x0059), # LATIN CAPITAL LETTER Y |
|
|
109 |
unichr(0x5A):unichr(0x005A), # LATIN CAPITAL LETTER Z |
|
|
110 |
unichr(0x5B):unichr(0x00C4), # LATIN CAPITAL LETTER A WITH DIAERESIS |
|
|
111 |
unichr(0x5C):unichr(0x00D6), # LATIN CAPITAL LETTER O WITH DIAERESIS |
|
|
112 |
unichr(0x5D):unichr(0x00D1), # LATIN CAPITAL LETTER N WITH TILDE |
|
|
113 |
unichr(0x5E):unichr(0x00DC), # LATIN CAPITAL LETTER U WITH DIAERESIS |
|
|
114 |
unichr(0x5F):unichr(0x00A7), # SECTION SIGN |
|
|
115 |
unichr(0x60):unichr(0x00BF), # INVERTED QUESTION MARK |
|
|
116 |
unichr(0x61):unichr(0x0061), # LATIN SMALL LETTER A |
|
|
117 |
unichr(0x62):unichr(0x0062), # LATIN SMALL LETTER B |
|
|
118 |
unichr(0x63):unichr(0x0063), # LATIN SMALL LETTER C |
|
|
119 |
unichr(0x64):unichr(0x0064), # LATIN SMALL LETTER D |
|
|
120 |
unichr(0x65):unichr(0x0065), # LATIN SMALL LETTER E |
|
|
121 |
unichr(0x66):unichr(0x0066), # LATIN SMALL LETTER F |
|
|
122 |
unichr(0x67):unichr(0x0067), # LATIN SMALL LETTER G |
|
|
123 |
unichr(0x68):unichr(0x0068), # LATIN SMALL LETTER H |
|
|
124 |
unichr(0x69):unichr(0x0069), # LATIN SMALL LETTER I |
|
|
125 |
unichr(0x6A):unichr(0x006A), # LATIN SMALL LETTER J |
|
|
126 |
unichr(0x6B):unichr(0x006B), # LATIN SMALL LETTER K |
|
|
127 |
unichr(0x6C):unichr(0x006C), # LATIN SMALL LETTER L |
|
|
128 |
unichr(0x6D):unichr(0x006D), # LATIN SMALL LETTER M |
|
|
129 |
unichr(0x6E):unichr(0x006E), # LATIN SMALL LETTER N |
|
|
130 |
unichr(0x6F):unichr(0x006F), # LATIN SMALL LETTER O |
|
|
131 |
unichr(0x70):unichr(0x0070), # LATIN SMALL LETTER P |
|
|
132 |
unichr(0x71):unichr(0x0071), # LATIN SMALL LETTER Q |
|
|
133 |
unichr(0x72):unichr(0x0072), # LATIN SMALL LETTER R |
|
|
134 |
unichr(0x73):unichr(0x0073), # LATIN SMALL LETTER S |
|
|
135 |
unichr(0x74):unichr(0x0074), # LATIN SMALL LETTER T |
|
|
136 |
unichr(0x75):unichr(0x0075), # LATIN SMALL LETTER U |
|
|
137 |
unichr(0x76):unichr(0x0076), # LATIN SMALL LETTER V |
|
|
138 |
unichr(0x77):unichr(0x0077), # LATIN SMALL LETTER W |
|
|
139 |
unichr(0x78):unichr(0x0078), # LATIN SMALL LETTER X |
|
|
140 |
unichr(0x79):unichr(0x0079), # LATIN SMALL LETTER Y |
|
|
141 |
unichr(0x7A):unichr(0x007A), # LATIN SMALL LETTER Z |
|
|
142 |
unichr(0x7B):unichr(0x00E4), # LATIN SMALL LETTER A WITH DIAERESIS |
|
|
143 |
unichr(0x7C):unichr(0x00F6), # LATIN SMALL LETTER O WITH DIAERESIS |
|
|
144 |
unichr(0x7D):unichr(0x00F1), # LATIN SMALL LETTER N WITH TILDE |
|
|
145 |
unichr(0x7E):unichr(0x00FC), # LATIN SMALL LETTER U WITH DIAERESIS |
|
|
146 |
unichr(0x7F):unichr(0x00E0), # LATIN SMALL LETTER A WITH GRAVE |
|
16
|
147 |
} |
|
|
148 |
|
|
17
|
149 |
encoding_map=dict([(v,k) for (k,v) in decoding_map.items()]) |
|
16
|
150 |
|
|
|
151 |
class Codec(codecs.Codec): |
|
|
152 |
def encode(self,input,errors='strict'): |
|
17
|
153 |
ret="" |
|
|
154 |
for i in input: |
|
|
155 |
ret+=encoding_map[i] |
|
|
156 |
return (ret,len(ret)) |
|
16
|
157 |
def decode(self,input,errors='strict'): |
|
17
|
158 |
ret="" |
|
|
159 |
for i in input: |
|
|
160 |
ret+=decoding_map[i] |
|
|
161 |
return (ret,len(ret)) |
|
16
|
162 |
|
|
|
163 |
|
|
|
164 |
|
|
17
|
165 |
class StreamWriter(Codec,mbc.MultibyteStreamWriter,codecs.StreamWriter): |
|
16
|
166 |
pass |
|
|
167 |
|
|
17
|
168 |
class StreamReader(Codec,mbc.MultibyteStreamReader,codecs.StreamReader): |
|
16
|
169 |
pass |
|
|
170 |
|
|
|
171 |
### encodings module API |
|
|
172 |
|
|
|
173 |
def getregentry(): |
|
|
174 |
return (Codec().encode,Codec().decode,StreamReader,StreamWriter) |
|
|
175 |
|
|
|
176 |
|
|
|
177 |
def gsm_search(encoding): |
|
|
178 |
if not encoding == ENCODING_NAME: |
|
|
179 |
return |
|
|
180 |
return getregentry() |
|
|
181 |
|
|
|
182 |
# Register our codec when we load the module |
|
|
183 |
codecs.register(gsm_search) |
|
|
184 |
|
|
|
185 |
if __name__ == "__main__": |
|
17
|
186 |
text = "€öäüß" |
|
|
187 |
text2 = unicode(text,"utf-8").encode("gsm0338") |
|
|
188 |
assert(text2==u"\u1B65\x7C\x7B\x7E\x1E") |
|
|
189 |
text="" |
|
|
190 |
text2 = unicode(text,"utf-8").encode("gsm0338") |
|
|
191 |
assert(text==text2) |
|
16
|
192 |
|