Платформа ЦРНП "Мирокод" для разработки проектов
https://git.mirocod.ru
You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
156 lines
2.8 KiB
156 lines
2.8 KiB
package mahonia |
|
|
|
import ( |
|
"sync" |
|
) |
|
|
|
// Converters for GB18030 encoding. |
|
|
|
func init() { |
|
RegisterCharset(&Charset{ |
|
Name: "GB18030", |
|
NewDecoder: func() Decoder { |
|
gb18030Once.Do(buildGB18030Tables) |
|
return decodeGB18030Rune |
|
}, |
|
NewEncoder: func() Encoder { |
|
gb18030Once.Do(buildGB18030Tables) |
|
return encodeGB18030Rune |
|
}, |
|
}) |
|
} |
|
|
|
func decodeGB18030Rune(p []byte) (r rune, size int, status Status) { |
|
if len(p) == 0 { |
|
status = NO_ROOM |
|
return |
|
} |
|
|
|
b := p[0] |
|
if b < 128 { |
|
return rune(b), 1, SUCCESS |
|
} |
|
|
|
if len(p) < 2 { |
|
status = NO_ROOM |
|
return |
|
} |
|
|
|
if p[0] < 0x81 || p[0] > 0xfe { |
|
return 0xfffd, 1, INVALID_CHAR |
|
} |
|
|
|
if p[1] >= 0x40 { |
|
// 2-byte character |
|
c := uint16(p[0])<<8 + uint16(p[1]) |
|
r = rune(gbkToUnicode[c]) |
|
if r == 0 { |
|
r = gbkToUnicodeExtra[c] |
|
} |
|
|
|
if r != 0 { |
|
return r, 2, SUCCESS |
|
} |
|
} else if p[1] >= 0x30 { |
|
// 4-byte character |
|
if len(p) < 4 { |
|
return 0, 0, NO_ROOM |
|
} |
|
if p[2] < 0x81 || p[2] > 0xfe || p[3] < 0x30 || p[3] > 0x39 { |
|
return 0xfffd, 1, INVALID_CHAR |
|
} |
|
|
|
code := uint32(p[0])<<24 + uint32(p[1])<<16 + uint32(p[2])<<8 + uint32(p[3]) |
|
lin := gb18030Linear(code) |
|
|
|
if lin <= maxGB18030Linear { |
|
r = rune(gb18030LinearToUnicode[lin]) |
|
if r != 0 { |
|
return r, 4, SUCCESS |
|
} |
|
} |
|
|
|
for _, rng := range gb18030Ranges { |
|
if lin >= rng.firstGB && lin <= rng.lastGB { |
|
return rng.firstRune + rune(lin) - rune(rng.firstGB), 4, SUCCESS |
|
} |
|
} |
|
} |
|
|
|
return 0xfffd, 1, INVALID_CHAR |
|
} |
|
|
|
func encodeGB18030Rune(p []byte, r rune) (size int, status Status) { |
|
if len(p) == 0 { |
|
status = NO_ROOM |
|
return |
|
} |
|
|
|
if r < 128 { |
|
p[0] = byte(r) |
|
return 1, SUCCESS |
|
} |
|
|
|
if len(p) < 2 { |
|
status = NO_ROOM |
|
return |
|
} |
|
|
|
var c uint16 |
|
if r < 0x10000 { |
|
c = unicodeToGBK[r] |
|
} else { |
|
c = unicodeToGBKExtra[r] |
|
} |
|
|
|
if c != 0 { |
|
p[0] = byte(c >> 8) |
|
p[1] = byte(c) |
|
return 2, SUCCESS |
|
} |
|
|
|
if len(p) < 4 { |
|
return 0, NO_ROOM |
|
} |
|
|
|
if r < 0x10000 { |
|
f := unicodeToGB18030[r] |
|
if f != 0 { |
|
p[0] = byte(f >> 24) |
|
p[1] = byte(f >> 16) |
|
p[2] = byte(f >> 8) |
|
p[3] = byte(f) |
|
return 4, SUCCESS |
|
} |
|
} |
|
|
|
for _, rng := range gb18030Ranges { |
|
if r >= rng.firstRune && r <= rng.lastRune { |
|
lin := rng.firstGB + uint32(r) - uint32(rng.firstRune) |
|
p[0] = byte(lin/(10*126*10)) + 0x81 |
|
p[1] = byte(lin/(126*10)%10) + 0x30 |
|
p[2] = byte(lin/10%126) + 0x81 |
|
p[3] = byte(lin%10) + 0x30 |
|
return 4, SUCCESS |
|
} |
|
} |
|
|
|
p[0] = 0x1a |
|
return 1, INVALID_CHAR |
|
} |
|
|
|
var gb18030Once sync.Once |
|
|
|
// Mapping from gb18039Linear values to Unicode. |
|
var gb18030LinearToUnicode []uint16 |
|
|
|
var unicodeToGB18030 []uint32 |
|
|
|
func buildGB18030Tables() { |
|
gb18030LinearToUnicode = make([]uint16, maxGB18030Linear+1) |
|
unicodeToGB18030 = make([]uint32, 65536) |
|
for _, data := range gb18030Data { |
|
gb18030LinearToUnicode[gb18030Linear(data.gb18030)] = data.unicode |
|
unicodeToGB18030[data.unicode] = data.gb18030 |
|
} |
|
}
|
|
|