89 lines
		
	
	
		
			2.1 KiB
		
	
	
	
		
			Go
		
	
	
	
			
		
		
	
	
			89 lines
		
	
	
		
			2.1 KiB
		
	
	
	
		
			Go
		
	
	
	
| package mahonia
 | |
| 
 | |
| import (
 | |
| 	"sync"
 | |
| 	"unicode/utf8"
 | |
| )
 | |
| 
 | |
| // A kutenTable holds the data for a double-byte character set, arranged by ku
 | |
| // (区, zone) and ten (点, position). These can be converted to various actual
 | |
| // encoding schemes.
 | |
| type kutenTable struct {
 | |
| 	// Data[ku][ten] is the unicode value for the character at that zone and
 | |
| 	// position.
 | |
| 	Data [94][94]uint16
 | |
| 
 | |
| 	// FromUnicode holds the ku and ten for each Unicode code point.
 | |
| 	// It is not available until Reverse() has been called.
 | |
| 	FromUnicode [][2]byte
 | |
| 
 | |
| 	// once is used to synchronize the generation of FromUnicode.
 | |
| 	once sync.Once
 | |
| }
 | |
| 
 | |
| // Reverse generates FromUnicode.
 | |
| func (t *kutenTable) Reverse() {
 | |
| 	t.once.Do(func() {
 | |
| 		t.FromUnicode = make([][2]byte, 65536)
 | |
| 		for ku := range t.Data {
 | |
| 			for ten, unicode := range t.Data[ku] {
 | |
| 				t.FromUnicode[unicode] = [2]byte{byte(ku), byte(ten)}
 | |
| 			}
 | |
| 		}
 | |
| 	})
 | |
| }
 | |
| 
 | |
| // DecodeLow decodes a character from an encoding that does not have the high
 | |
| // bit set.
 | |
| func (t *kutenTable) DecodeLow(p []byte) (c rune, size int, status Status) {
 | |
| 	if len(p) < 2 {
 | |
| 		return 0, 0, NO_ROOM
 | |
| 	}
 | |
| 	ku := p[0] - 0x21
 | |
| 	ten := p[1] - 0x21
 | |
| 	if ku > 93 || ten > 93 {
 | |
| 		return utf8.RuneError, 1, INVALID_CHAR
 | |
| 	}
 | |
| 	u := t.Data[ku][ten]
 | |
| 	if u == 0 {
 | |
| 		return utf8.RuneError, 1, INVALID_CHAR
 | |
| 	}
 | |
| 	return rune(u), 2, SUCCESS
 | |
| }
 | |
| 
 | |
| // DecodeHigh decodes a character from an encoding that has the high bit set.
 | |
| func (t *kutenTable) DecodeHigh(p []byte) (c rune, size int, status Status) {
 | |
| 	if len(p) < 2 {
 | |
| 		return 0, 0, NO_ROOM
 | |
| 	}
 | |
| 	ku := p[0] - 0xa1
 | |
| 	ten := p[1] - 0xa1
 | |
| 	if ku > 93 || ten > 93 {
 | |
| 		return utf8.RuneError, 1, INVALID_CHAR
 | |
| 	}
 | |
| 	u := t.Data[ku][ten]
 | |
| 	if u == 0 {
 | |
| 		return utf8.RuneError, 1, INVALID_CHAR
 | |
| 	}
 | |
| 	return rune(u), 2, SUCCESS
 | |
| }
 | |
| 
 | |
| // EncodeHigh encodes a character in an encoding that has the high bit set.
 | |
| func (t *kutenTable) EncodeHigh(p []byte, c rune) (size int, status Status) {
 | |
| 	if len(p) < 2 {
 | |
| 		return 0, NO_ROOM
 | |
| 	}
 | |
| 	if c > 0xffff {
 | |
| 		p[0] = '?'
 | |
| 		return 1, INVALID_CHAR
 | |
| 	}
 | |
| 	kuten := t.FromUnicode[c]
 | |
| 	if kuten == [2]byte{0, 0} && c != rune(t.Data[0][0]) {
 | |
| 		p[0] = '?'
 | |
| 		return 1, INVALID_CHAR
 | |
| 	}
 | |
| 	p[0] = kuten[0] + 0xa1
 | |
| 	p[1] = kuten[1] + 0xa1
 | |
| 	return 2, SUCCESS
 | |
| }
 |