iro/anbieter/gsm0338.py
author Sandro Knauß <knauss@netzguerilla.net>
Mon, 13 Feb 2012 17:11:44 +0100
branchdevel
changeset 145 b58ce5bb96cc
parent 17 44a3eda179b6
permissions -rw-r--r--
now offers are empty from start

#!/usr/bin/python
# -*- coding: utf-8 -*-
import codecs
import _multibytecodec as mbc 

ENCODING_NAME = "gsm0338"

decoding_map= {
unichr(0x00):unichr(0x0040), # COMMERCIAL AT
unichr(0x01):unichr(0x00A3), # POUND SIGN
unichr(0x02):unichr(0x0024), # DOLLAR SIGN
unichr(0x03):unichr(0x00A5), # YEN SIGN
unichr(0x04):unichr(0x00E8), # LATIN SMALL LETTER E WITH GRAVE
unichr(0x05):unichr(0x00E9), # LATIN SMALL LETTER E WITH ACUTE
unichr(0x06):unichr(0x00F9), # LATIN SMALL LETTER U WITH GRAVE
unichr(0x07):unichr(0x00EC), # LATIN SMALL LETTER I WITH GRAVE
unichr(0x08):unichr(0x00F2), # LATIN SMALL LETTER O WITH GRAVE
unichr(0x09):unichr(0x00E7), # LATIN SMALL LETTER C WITH CEDILLA
unichr(0x0A):unichr(0x000A), # LINE FEED
unichr(0x0B):unichr(0x00D8), # LATIN CAPITAL LETTER O WITH STROKE
unichr(0x0C):unichr(0x00F8), # LATIN SMALL LETTER O WITH STROKE
unichr(0x0D):unichr(0x000D), # CARRIAGE RETURN
unichr(0x0E):unichr(0x00C5), # LATIN CAPITAL LETTER A WITH RING ABOVE
unichr(0x0F):unichr(0x00E5), # LATIN SMALL LETTER A WITH RING ABOVE
unichr(0x10):unichr(0x0394), # GREEK CAPITAL LETTER DELTA
unichr(0x11):unichr(0x005F), # LOW LINE
unichr(0x12):unichr(0x03A6), # GREEK CAPITAL LETTER PHI
unichr(0x13):unichr(0x0393), # GREEK CAPITAL LETTER GAMMA
unichr(0x14):unichr(0x039B), # GREEK CAPITAL LETTER LAMDA
unichr(0x15):unichr(0x03A9), # GREEK CAPITAL LETTER OMEGA
unichr(0x16):unichr(0x03A0), # GREEK CAPITAL LETTER PI
unichr(0x17):unichr(0x03A8), # GREEK CAPITAL LETTER PSI
unichr(0x18):unichr(0x03A3), # GREEK CAPITAL LETTER SIGMA
unichr(0x19):unichr(0x0398), # GREEK CAPITAL LETTER THETA
unichr(0x1A):unichr(0x039E), # GREEK CAPITAL LETTER XI
unichr(0x1B):unichr(0x00A0), # ESCAPE TO EXTENSION TABLE (or displayed as NBSP, see note above)
unichr(0x1B0A):unichr(0x000C), # FORM FEED
unichr(0x1B14):unichr(0x005E), # CIRCUMFLEX ACCENT
unichr(0x1B28):unichr(0x007B), # LEFT CURLY BRACKET
unichr(0x1B29):unichr(0x007D), # RIGHT CURLY BRACKET
unichr(0x1B2F):unichr(0x005C), # REVERSE SOLIDUS
unichr(0x1B3C):unichr(0x005B), # LEFT SQUARE BRACKET
unichr(0x1B3D):unichr(0x007E), # TILDE
unichr(0x1B3E):unichr(0x005D), # RIGHT SQUARE BRACKET
unichr(0x1B40):unichr(0x007C), # VERTICAL LINE
unichr(0x1B65):unichr(0x20AC), # EURO SIGN
unichr(0x1C):unichr(0x00C6), # LATIN CAPITAL LETTER AE
unichr(0x1D):unichr(0x00E6), # LATIN SMALL LETTER AE
unichr(0x1E):unichr(0x00DF), # LATIN SMALL LETTER SHARP S (German)
unichr(0x1F):unichr(0x00C9), # LATIN CAPITAL LETTER E WITH ACUTE
unichr(0x20):unichr(0x0020), # SPACE
unichr(0x21):unichr(0x0021), # EXCLAMATION MARK
unichr(0x22):unichr(0x0022), # QUOTATION MARK
unichr(0x23):unichr(0x0023), # NUMBER SIGN
unichr(0x24):unichr(0x00A4), # CURRENCY SIGN
unichr(0x25):unichr(0x0025), # PERCENT SIGN
unichr(0x26):unichr(0x0026), # AMPERSAND
unichr(0x27):unichr(0x0027), # APOSTROPHE
unichr(0x28):unichr(0x0028), # LEFT PARENTHESIS
unichr(0x29):unichr(0x0029), # RIGHT PARENTHESIS
unichr(0x2A):unichr(0x002A), # ASTERISK
unichr(0x2B):unichr(0x002B), # PLUS SIGN
unichr(0x2C):unichr(0x002C), # COMMA
unichr(0x2D):unichr(0x002D), # HYPHEN-MINUS
unichr(0x2E):unichr(0x002E), # FULL STOP
unichr(0x2F):unichr(0x002F), # SOLIDUS
unichr(0x30):unichr(0x0030), # DIGIT ZERO
unichr(0x31):unichr(0x0031), # DIGIT ONE
unichr(0x32):unichr(0x0032), # DIGIT TWO
unichr(0x33):unichr(0x0033), # DIGIT THREE
unichr(0x34):unichr(0x0034), # DIGIT FOUR
unichr(0x35):unichr(0x0035), # DIGIT FIVE
unichr(0x36):unichr(0x0036), # DIGIT SIX
unichr(0x37):unichr(0x0037), # DIGIT SEVEN
unichr(0x38):unichr(0x0038), # DIGIT EIGHT
unichr(0x39):unichr(0x0039), # DIGIT NINE
unichr(0x3A):unichr(0x003A), # COLON
unichr(0x3B):unichr(0x003B), # SEMICOLON
unichr(0x3C):unichr(0x003C), # LESS-THAN SIGN
unichr(0x3D):unichr(0x003D), # EQUALS SIGN
unichr(0x3E):unichr(0x003E), # GREATER-THAN SIGN
unichr(0x3F):unichr(0x003F), # QUESTION MARK
unichr(0x40):unichr(0x00A1), # INVERTED EXCLAMATION MARK
unichr(0x41):unichr(0x0041), # LATIN CAPITAL LETTER A
unichr(0x42):unichr(0x0042), # LATIN CAPITAL LETTER B
unichr(0x43):unichr(0x0043), # LATIN CAPITAL LETTER C
unichr(0x44):unichr(0x0044), # LATIN CAPITAL LETTER D
unichr(0x45):unichr(0x0045), # LATIN CAPITAL LETTER E
unichr(0x46):unichr(0x0046), # LATIN CAPITAL LETTER F
unichr(0x47):unichr(0x0047), # LATIN CAPITAL LETTER G
unichr(0x48):unichr(0x0048), # LATIN CAPITAL LETTER H
unichr(0x49):unichr(0x0049), # LATIN CAPITAL LETTER I
unichr(0x4A):unichr(0x004A), # LATIN CAPITAL LETTER J
unichr(0x4B):unichr(0x004B), # LATIN CAPITAL LETTER K
unichr(0x4C):unichr(0x004C), # LATIN CAPITAL LETTER L
unichr(0x4D):unichr(0x004D), # LATIN CAPITAL LETTER M
unichr(0x4E):unichr(0x004E), # LATIN CAPITAL LETTER N
unichr(0x4F):unichr(0x004F), # LATIN CAPITAL LETTER O
unichr(0x50):unichr(0x0050), # LATIN CAPITAL LETTER P
unichr(0x51):unichr(0x0051), # LATIN CAPITAL LETTER Q
unichr(0x52):unichr(0x0052), # LATIN CAPITAL LETTER R
unichr(0x53):unichr(0x0053), # LATIN CAPITAL LETTER S
unichr(0x54):unichr(0x0054), # LATIN CAPITAL LETTER T
unichr(0x55):unichr(0x0055), # LATIN CAPITAL LETTER U
unichr(0x56):unichr(0x0056), # LATIN CAPITAL LETTER V
unichr(0x57):unichr(0x0057), # LATIN CAPITAL LETTER W
unichr(0x58):unichr(0x0058), # LATIN CAPITAL LETTER X
unichr(0x59):unichr(0x0059), # LATIN CAPITAL LETTER Y
unichr(0x5A):unichr(0x005A), # LATIN CAPITAL LETTER Z
unichr(0x5B):unichr(0x00C4), # LATIN CAPITAL LETTER A WITH DIAERESIS
unichr(0x5C):unichr(0x00D6), # LATIN CAPITAL LETTER O WITH DIAERESIS
unichr(0x5D):unichr(0x00D1), # LATIN CAPITAL LETTER N WITH TILDE
unichr(0x5E):unichr(0x00DC), # LATIN CAPITAL LETTER U WITH DIAERESIS
unichr(0x5F):unichr(0x00A7), # SECTION SIGN
unichr(0x60):unichr(0x00BF), # INVERTED QUESTION MARK
unichr(0x61):unichr(0x0061), # LATIN SMALL LETTER A
unichr(0x62):unichr(0x0062), # LATIN SMALL LETTER B
unichr(0x63):unichr(0x0063), # LATIN SMALL LETTER C
unichr(0x64):unichr(0x0064), # LATIN SMALL LETTER D
unichr(0x65):unichr(0x0065), # LATIN SMALL LETTER E
unichr(0x66):unichr(0x0066), # LATIN SMALL LETTER F
unichr(0x67):unichr(0x0067), # LATIN SMALL LETTER G
unichr(0x68):unichr(0x0068), # LATIN SMALL LETTER H
unichr(0x69):unichr(0x0069), # LATIN SMALL LETTER I
unichr(0x6A):unichr(0x006A), # LATIN SMALL LETTER J
unichr(0x6B):unichr(0x006B), # LATIN SMALL LETTER K
unichr(0x6C):unichr(0x006C), # LATIN SMALL LETTER L
unichr(0x6D):unichr(0x006D), # LATIN SMALL LETTER M
unichr(0x6E):unichr(0x006E), # LATIN SMALL LETTER N
unichr(0x6F):unichr(0x006F), # LATIN SMALL LETTER O
unichr(0x70):unichr(0x0070), # LATIN SMALL LETTER P
unichr(0x71):unichr(0x0071), # LATIN SMALL LETTER Q
unichr(0x72):unichr(0x0072), # LATIN SMALL LETTER R
unichr(0x73):unichr(0x0073), # LATIN SMALL LETTER S
unichr(0x74):unichr(0x0074), # LATIN SMALL LETTER T
unichr(0x75):unichr(0x0075), # LATIN SMALL LETTER U
unichr(0x76):unichr(0x0076), # LATIN SMALL LETTER V
unichr(0x77):unichr(0x0077), # LATIN SMALL LETTER W
unichr(0x78):unichr(0x0078), # LATIN SMALL LETTER X
unichr(0x79):unichr(0x0079), # LATIN SMALL LETTER Y
unichr(0x7A):unichr(0x007A), # LATIN SMALL LETTER Z
unichr(0x7B):unichr(0x00E4), # LATIN SMALL LETTER A WITH DIAERESIS
unichr(0x7C):unichr(0x00F6), # LATIN SMALL LETTER O WITH DIAERESIS
unichr(0x7D):unichr(0x00F1), # LATIN SMALL LETTER N WITH TILDE
unichr(0x7E):unichr(0x00FC), # LATIN SMALL LETTER U WITH DIAERESIS
unichr(0x7F):unichr(0x00E0), # LATIN SMALL LETTER A WITH GRAVE
}

encoding_map=dict([(v,k) for (k,v) in decoding_map.items()])

class Codec(codecs.Codec):
	def encode(self,input,errors='strict'):
		ret=""
		for i in input:
		  ret+=encoding_map[i]
		return (ret,len(ret))
	def decode(self,input,errors='strict'):
		ret=""
		for i in input:
		  ret+=decoding_map[i]
		return (ret,len(ret))



class StreamWriter(Codec,mbc.MultibyteStreamWriter,codecs.StreamWriter):
    pass

class StreamReader(Codec,mbc.MultibyteStreamReader,codecs.StreamReader):
    pass

### encodings module API

def getregentry():
	return (Codec().encode,Codec().decode,StreamReader,StreamWriter)


def gsm_search(encoding):
	if not encoding == ENCODING_NAME:
		return
	return getregentry()
	
# Register our codec when we load the module
codecs.register(gsm_search)

if __name__ == "__main__":
	text = "€öäüß"
	text2 = unicode(text,"utf-8").encode("gsm0338")
	assert(text2==u"\u1B65\x7C\x7B\x7E\x1E")
	text=""
	text2 = unicode(text,"utf-8").encode("gsm0338")
	assert(text==text2)