iro/anbieter/gsm0338.py
branchdevel
changeset 240 3406d3bf05d4
parent 239 4cf5e664c847
child 241 546316b0b09c
equal deleted inserted replaced
239:4cf5e664c847 240:3406d3bf05d4
     1 #!/usr/bin/python
       
     2 # -*- coding: utf-8 -*-
       
     3 import codecs
       
     4 import _multibytecodec as mbc 
       
     5 
       
     6 ENCODING_NAME = "gsm0338"
       
     7 
       
     8 decoding_map= {
       
     9 unichr(0x00):unichr(0x0040), # COMMERCIAL AT
       
    10 unichr(0x01):unichr(0x00A3), # POUND SIGN
       
    11 unichr(0x02):unichr(0x0024), # DOLLAR SIGN
       
    12 unichr(0x03):unichr(0x00A5), # YEN SIGN
       
    13 unichr(0x04):unichr(0x00E8), # LATIN SMALL LETTER E WITH GRAVE
       
    14 unichr(0x05):unichr(0x00E9), # LATIN SMALL LETTER E WITH ACUTE
       
    15 unichr(0x06):unichr(0x00F9), # LATIN SMALL LETTER U WITH GRAVE
       
    16 unichr(0x07):unichr(0x00EC), # LATIN SMALL LETTER I WITH GRAVE
       
    17 unichr(0x08):unichr(0x00F2), # LATIN SMALL LETTER O WITH GRAVE
       
    18 unichr(0x09):unichr(0x00E7), # LATIN SMALL LETTER C WITH CEDILLA
       
    19 unichr(0x0A):unichr(0x000A), # LINE FEED
       
    20 unichr(0x0B):unichr(0x00D8), # LATIN CAPITAL LETTER O WITH STROKE
       
    21 unichr(0x0C):unichr(0x00F8), # LATIN SMALL LETTER O WITH STROKE
       
    22 unichr(0x0D):unichr(0x000D), # CARRIAGE RETURN
       
    23 unichr(0x0E):unichr(0x00C5), # LATIN CAPITAL LETTER A WITH RING ABOVE
       
    24 unichr(0x0F):unichr(0x00E5), # LATIN SMALL LETTER A WITH RING ABOVE
       
    25 unichr(0x10):unichr(0x0394), # GREEK CAPITAL LETTER DELTA
       
    26 unichr(0x11):unichr(0x005F), # LOW LINE
       
    27 unichr(0x12):unichr(0x03A6), # GREEK CAPITAL LETTER PHI
       
    28 unichr(0x13):unichr(0x0393), # GREEK CAPITAL LETTER GAMMA
       
    29 unichr(0x14):unichr(0x039B), # GREEK CAPITAL LETTER LAMDA
       
    30 unichr(0x15):unichr(0x03A9), # GREEK CAPITAL LETTER OMEGA
       
    31 unichr(0x16):unichr(0x03A0), # GREEK CAPITAL LETTER PI
       
    32 unichr(0x17):unichr(0x03A8), # GREEK CAPITAL LETTER PSI
       
    33 unichr(0x18):unichr(0x03A3), # GREEK CAPITAL LETTER SIGMA
       
    34 unichr(0x19):unichr(0x0398), # GREEK CAPITAL LETTER THETA
       
    35 unichr(0x1A):unichr(0x039E), # GREEK CAPITAL LETTER XI
       
    36 unichr(0x1B):unichr(0x00A0), # ESCAPE TO EXTENSION TABLE (or displayed as NBSP, see note above)
       
    37 unichr(0x1B0A):unichr(0x000C), # FORM FEED
       
    38 unichr(0x1B14):unichr(0x005E), # CIRCUMFLEX ACCENT
       
    39 unichr(0x1B28):unichr(0x007B), # LEFT CURLY BRACKET
       
    40 unichr(0x1B29):unichr(0x007D), # RIGHT CURLY BRACKET
       
    41 unichr(0x1B2F):unichr(0x005C), # REVERSE SOLIDUS
       
    42 unichr(0x1B3C):unichr(0x005B), # LEFT SQUARE BRACKET
       
    43 unichr(0x1B3D):unichr(0x007E), # TILDE
       
    44 unichr(0x1B3E):unichr(0x005D), # RIGHT SQUARE BRACKET
       
    45 unichr(0x1B40):unichr(0x007C), # VERTICAL LINE
       
    46 unichr(0x1B65):unichr(0x20AC), # EURO SIGN
       
    47 unichr(0x1C):unichr(0x00C6), # LATIN CAPITAL LETTER AE
       
    48 unichr(0x1D):unichr(0x00E6), # LATIN SMALL LETTER AE
       
    49 unichr(0x1E):unichr(0x00DF), # LATIN SMALL LETTER SHARP S (German)
       
    50 unichr(0x1F):unichr(0x00C9), # LATIN CAPITAL LETTER E WITH ACUTE
       
    51 unichr(0x20):unichr(0x0020), # SPACE
       
    52 unichr(0x21):unichr(0x0021), # EXCLAMATION MARK
       
    53 unichr(0x22):unichr(0x0022), # QUOTATION MARK
       
    54 unichr(0x23):unichr(0x0023), # NUMBER SIGN
       
    55 unichr(0x24):unichr(0x00A4), # CURRENCY SIGN
       
    56 unichr(0x25):unichr(0x0025), # PERCENT SIGN
       
    57 unichr(0x26):unichr(0x0026), # AMPERSAND
       
    58 unichr(0x27):unichr(0x0027), # APOSTROPHE
       
    59 unichr(0x28):unichr(0x0028), # LEFT PARENTHESIS
       
    60 unichr(0x29):unichr(0x0029), # RIGHT PARENTHESIS
       
    61 unichr(0x2A):unichr(0x002A), # ASTERISK
       
    62 unichr(0x2B):unichr(0x002B), # PLUS SIGN
       
    63 unichr(0x2C):unichr(0x002C), # COMMA
       
    64 unichr(0x2D):unichr(0x002D), # HYPHEN-MINUS
       
    65 unichr(0x2E):unichr(0x002E), # FULL STOP
       
    66 unichr(0x2F):unichr(0x002F), # SOLIDUS
       
    67 unichr(0x30):unichr(0x0030), # DIGIT ZERO
       
    68 unichr(0x31):unichr(0x0031), # DIGIT ONE
       
    69 unichr(0x32):unichr(0x0032), # DIGIT TWO
       
    70 unichr(0x33):unichr(0x0033), # DIGIT THREE
       
    71 unichr(0x34):unichr(0x0034), # DIGIT FOUR
       
    72 unichr(0x35):unichr(0x0035), # DIGIT FIVE
       
    73 unichr(0x36):unichr(0x0036), # DIGIT SIX
       
    74 unichr(0x37):unichr(0x0037), # DIGIT SEVEN
       
    75 unichr(0x38):unichr(0x0038), # DIGIT EIGHT
       
    76 unichr(0x39):unichr(0x0039), # DIGIT NINE
       
    77 unichr(0x3A):unichr(0x003A), # COLON
       
    78 unichr(0x3B):unichr(0x003B), # SEMICOLON
       
    79 unichr(0x3C):unichr(0x003C), # LESS-THAN SIGN
       
    80 unichr(0x3D):unichr(0x003D), # EQUALS SIGN
       
    81 unichr(0x3E):unichr(0x003E), # GREATER-THAN SIGN
       
    82 unichr(0x3F):unichr(0x003F), # QUESTION MARK
       
    83 unichr(0x40):unichr(0x00A1), # INVERTED EXCLAMATION MARK
       
    84 unichr(0x41):unichr(0x0041), # LATIN CAPITAL LETTER A
       
    85 unichr(0x42):unichr(0x0042), # LATIN CAPITAL LETTER B
       
    86 unichr(0x43):unichr(0x0043), # LATIN CAPITAL LETTER C
       
    87 unichr(0x44):unichr(0x0044), # LATIN CAPITAL LETTER D
       
    88 unichr(0x45):unichr(0x0045), # LATIN CAPITAL LETTER E
       
    89 unichr(0x46):unichr(0x0046), # LATIN CAPITAL LETTER F
       
    90 unichr(0x47):unichr(0x0047), # LATIN CAPITAL LETTER G
       
    91 unichr(0x48):unichr(0x0048), # LATIN CAPITAL LETTER H
       
    92 unichr(0x49):unichr(0x0049), # LATIN CAPITAL LETTER I
       
    93 unichr(0x4A):unichr(0x004A), # LATIN CAPITAL LETTER J
       
    94 unichr(0x4B):unichr(0x004B), # LATIN CAPITAL LETTER K
       
    95 unichr(0x4C):unichr(0x004C), # LATIN CAPITAL LETTER L
       
    96 unichr(0x4D):unichr(0x004D), # LATIN CAPITAL LETTER M
       
    97 unichr(0x4E):unichr(0x004E), # LATIN CAPITAL LETTER N
       
    98 unichr(0x4F):unichr(0x004F), # LATIN CAPITAL LETTER O
       
    99 unichr(0x50):unichr(0x0050), # LATIN CAPITAL LETTER P
       
   100 unichr(0x51):unichr(0x0051), # LATIN CAPITAL LETTER Q
       
   101 unichr(0x52):unichr(0x0052), # LATIN CAPITAL LETTER R
       
   102 unichr(0x53):unichr(0x0053), # LATIN CAPITAL LETTER S
       
   103 unichr(0x54):unichr(0x0054), # LATIN CAPITAL LETTER T
       
   104 unichr(0x55):unichr(0x0055), # LATIN CAPITAL LETTER U
       
   105 unichr(0x56):unichr(0x0056), # LATIN CAPITAL LETTER V
       
   106 unichr(0x57):unichr(0x0057), # LATIN CAPITAL LETTER W
       
   107 unichr(0x58):unichr(0x0058), # LATIN CAPITAL LETTER X
       
   108 unichr(0x59):unichr(0x0059), # LATIN CAPITAL LETTER Y
       
   109 unichr(0x5A):unichr(0x005A), # LATIN CAPITAL LETTER Z
       
   110 unichr(0x5B):unichr(0x00C4), # LATIN CAPITAL LETTER A WITH DIAERESIS
       
   111 unichr(0x5C):unichr(0x00D6), # LATIN CAPITAL LETTER O WITH DIAERESIS
       
   112 unichr(0x5D):unichr(0x00D1), # LATIN CAPITAL LETTER N WITH TILDE
       
   113 unichr(0x5E):unichr(0x00DC), # LATIN CAPITAL LETTER U WITH DIAERESIS
       
   114 unichr(0x5F):unichr(0x00A7), # SECTION SIGN
       
   115 unichr(0x60):unichr(0x00BF), # INVERTED QUESTION MARK
       
   116 unichr(0x61):unichr(0x0061), # LATIN SMALL LETTER A
       
   117 unichr(0x62):unichr(0x0062), # LATIN SMALL LETTER B
       
   118 unichr(0x63):unichr(0x0063), # LATIN SMALL LETTER C
       
   119 unichr(0x64):unichr(0x0064), # LATIN SMALL LETTER D
       
   120 unichr(0x65):unichr(0x0065), # LATIN SMALL LETTER E
       
   121 unichr(0x66):unichr(0x0066), # LATIN SMALL LETTER F
       
   122 unichr(0x67):unichr(0x0067), # LATIN SMALL LETTER G
       
   123 unichr(0x68):unichr(0x0068), # LATIN SMALL LETTER H
       
   124 unichr(0x69):unichr(0x0069), # LATIN SMALL LETTER I
       
   125 unichr(0x6A):unichr(0x006A), # LATIN SMALL LETTER J
       
   126 unichr(0x6B):unichr(0x006B), # LATIN SMALL LETTER K
       
   127 unichr(0x6C):unichr(0x006C), # LATIN SMALL LETTER L
       
   128 unichr(0x6D):unichr(0x006D), # LATIN SMALL LETTER M
       
   129 unichr(0x6E):unichr(0x006E), # LATIN SMALL LETTER N
       
   130 unichr(0x6F):unichr(0x006F), # LATIN SMALL LETTER O
       
   131 unichr(0x70):unichr(0x0070), # LATIN SMALL LETTER P
       
   132 unichr(0x71):unichr(0x0071), # LATIN SMALL LETTER Q
       
   133 unichr(0x72):unichr(0x0072), # LATIN SMALL LETTER R
       
   134 unichr(0x73):unichr(0x0073), # LATIN SMALL LETTER S
       
   135 unichr(0x74):unichr(0x0074), # LATIN SMALL LETTER T
       
   136 unichr(0x75):unichr(0x0075), # LATIN SMALL LETTER U
       
   137 unichr(0x76):unichr(0x0076), # LATIN SMALL LETTER V
       
   138 unichr(0x77):unichr(0x0077), # LATIN SMALL LETTER W
       
   139 unichr(0x78):unichr(0x0078), # LATIN SMALL LETTER X
       
   140 unichr(0x79):unichr(0x0079), # LATIN SMALL LETTER Y
       
   141 unichr(0x7A):unichr(0x007A), # LATIN SMALL LETTER Z
       
   142 unichr(0x7B):unichr(0x00E4), # LATIN SMALL LETTER A WITH DIAERESIS
       
   143 unichr(0x7C):unichr(0x00F6), # LATIN SMALL LETTER O WITH DIAERESIS
       
   144 unichr(0x7D):unichr(0x00F1), # LATIN SMALL LETTER N WITH TILDE
       
   145 unichr(0x7E):unichr(0x00FC), # LATIN SMALL LETTER U WITH DIAERESIS
       
   146 unichr(0x7F):unichr(0x00E0), # LATIN SMALL LETTER A WITH GRAVE
       
   147 }
       
   148 
       
   149 encoding_map=dict([(v,k) for (k,v) in decoding_map.items()])
       
   150 
       
   151 class Codec(codecs.Codec):
       
   152 	def encode(self,input,errors='strict'):
       
   153 		ret=""
       
   154 		for i in input:
       
   155 		  ret+=encoding_map[i]
       
   156 		return (ret,len(ret))
       
   157 	def decode(self,input,errors='strict'):
       
   158 		ret=""
       
   159 		for i in input:
       
   160 		  ret+=decoding_map[i]
       
   161 		return (ret,len(ret))
       
   162 
       
   163 
       
   164 
       
   165 class StreamWriter(Codec,mbc.MultibyteStreamWriter,codecs.StreamWriter):
       
   166     pass
       
   167 
       
   168 class StreamReader(Codec,mbc.MultibyteStreamReader,codecs.StreamReader):
       
   169     pass
       
   170 
       
   171 ### encodings module API
       
   172 
       
   173 def getregentry():
       
   174 	return (Codec().encode,Codec().decode,StreamReader,StreamWriter)
       
   175 
       
   176 
       
   177 def gsm_search(encoding):
       
   178 	if not encoding == ENCODING_NAME:
       
   179 		return
       
   180 	return getregentry()
       
   181 	
       
   182 # Register our codec when we load the module
       
   183 codecs.register(gsm_search)
       
   184 
       
   185 if __name__ == "__main__":
       
   186 	text = "€öäüß"
       
   187 	text2 = unicode(text,"utf-8").encode("gsm0338")
       
   188 	assert(text2==u"\u1B65\x7C\x7B\x7E\x1E")
       
   189 	text=""
       
   190 	text2 = unicode(text,"utf-8").encode("gsm0338")
       
   191 	assert(text==text2)
       
   192