Платформа ЦРНП "Мирокод" для разработки проектов
https://git.mirocod.ru
You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
88 lines
2.1 KiB
88 lines
2.1 KiB
package mahonia |
|
|
|
import ( |
|
"sync" |
|
"unicode/utf8" |
|
) |
|
|
|
// A kutenTable holds the data for a double-byte character set, arranged by ku |
|
// (区, zone) and ten (点, position). These can be converted to various actual |
|
// encoding schemes. |
|
type kutenTable struct { |
|
// Data[ku][ten] is the unicode value for the character at that zone and |
|
// position. |
|
Data [94][94]uint16 |
|
|
|
// FromUnicode holds the ku and ten for each Unicode code point. |
|
// It is not available until Reverse() has been called. |
|
FromUnicode [][2]byte |
|
|
|
// once is used to synchronize the generation of FromUnicode. |
|
once sync.Once |
|
} |
|
|
|
// Reverse generates FromUnicode. |
|
func (t *kutenTable) Reverse() { |
|
t.once.Do(func() { |
|
t.FromUnicode = make([][2]byte, 65536) |
|
for ku := range t.Data { |
|
for ten, unicode := range t.Data[ku] { |
|
t.FromUnicode[unicode] = [2]byte{byte(ku), byte(ten)} |
|
} |
|
} |
|
}) |
|
} |
|
|
|
// DecodeLow decodes a character from an encoding that does not have the high |
|
// bit set. |
|
func (t *kutenTable) DecodeLow(p []byte) (c rune, size int, status Status) { |
|
if len(p) < 2 { |
|
return 0, 0, NO_ROOM |
|
} |
|
ku := p[0] - 0x21 |
|
ten := p[1] - 0x21 |
|
if ku > 93 || ten > 93 { |
|
return utf8.RuneError, 1, INVALID_CHAR |
|
} |
|
u := t.Data[ku][ten] |
|
if u == 0 { |
|
return utf8.RuneError, 1, INVALID_CHAR |
|
} |
|
return rune(u), 2, SUCCESS |
|
} |
|
|
|
// DecodeHigh decodes a character from an encoding that has the high bit set. |
|
func (t *kutenTable) DecodeHigh(p []byte) (c rune, size int, status Status) { |
|
if len(p) < 2 { |
|
return 0, 0, NO_ROOM |
|
} |
|
ku := p[0] - 0xa1 |
|
ten := p[1] - 0xa1 |
|
if ku > 93 || ten > 93 { |
|
return utf8.RuneError, 1, INVALID_CHAR |
|
} |
|
u := t.Data[ku][ten] |
|
if u == 0 { |
|
return utf8.RuneError, 1, INVALID_CHAR |
|
} |
|
return rune(u), 2, SUCCESS |
|
} |
|
|
|
// EncodeHigh encodes a character in an encoding that has the high bit set. |
|
func (t *kutenTable) EncodeHigh(p []byte, c rune) (size int, status Status) { |
|
if len(p) < 2 { |
|
return 0, NO_ROOM |
|
} |
|
if c > 0xffff { |
|
p[0] = '?' |
|
return 1, INVALID_CHAR |
|
} |
|
kuten := t.FromUnicode[c] |
|
if kuten == [2]byte{0, 0} && c != rune(t.Data[0][0]) { |
|
p[0] = '?' |
|
return 1, INVALID_CHAR |
|
} |
|
p[0] = kuten[0] + 0xa1 |
|
p[1] = kuten[1] + 0xa1 |
|
return 2, SUCCESS |
|
}
|
|
|