Платформа ЦРНП "Мирокод" для разработки проектов
https://git.mirocod.ru
You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
170 lines
2.9 KiB
170 lines
2.9 KiB
package mahonia |
|
|
|
import ( |
|
"unicode/utf16" |
|
) |
|
|
|
func init() { |
|
for i := 0; i < len(utf16Charsets); i++ { |
|
RegisterCharset(&utf16Charsets[i]) |
|
} |
|
} |
|
|
|
var utf16Charsets = []Charset{ |
|
{ |
|
Name: "UTF-16", |
|
NewDecoder: func() Decoder { |
|
var decodeRune Decoder |
|
return func(p []byte) (c rune, size int, status Status) { |
|
if decodeRune == nil { |
|
// haven't read the BOM yet |
|
if len(p) < 2 { |
|
status = NO_ROOM |
|
return |
|
} |
|
|
|
switch { |
|
case p[0] == 0xfe && p[1] == 0xff: |
|
decodeRune = decodeUTF16beRune |
|
return 0, 2, STATE_ONLY |
|
case p[0] == 0xff && p[1] == 0xfe: |
|
decodeRune = decodeUTF16leRune |
|
return 0, 2, STATE_ONLY |
|
default: |
|
decodeRune = decodeUTF16beRune |
|
} |
|
} |
|
|
|
return decodeRune(p) |
|
} |
|
}, |
|
NewEncoder: func() Encoder { |
|
wroteBOM := false |
|
return func(p []byte, c rune) (size int, status Status) { |
|
if !wroteBOM { |
|
if len(p) < 2 { |
|
status = NO_ROOM |
|
return |
|
} |
|
|
|
p[0] = 0xfe |
|
p[1] = 0xff |
|
wroteBOM = true |
|
return 2, STATE_ONLY |
|
} |
|
|
|
return encodeUTF16beRune(p, c) |
|
} |
|
}, |
|
}, |
|
{ |
|
Name: "UTF-16BE", |
|
NewDecoder: func() Decoder { return decodeUTF16beRune }, |
|
NewEncoder: func() Encoder { return encodeUTF16beRune }, |
|
}, |
|
{ |
|
Name: "UTF-16LE", |
|
NewDecoder: func() Decoder { return decodeUTF16leRune }, |
|
NewEncoder: func() Encoder { return encodeUTF16leRune }, |
|
}, |
|
} |
|
|
|
func decodeUTF16beRune(p []byte) (r rune, size int, status Status) { |
|
if len(p) < 2 { |
|
status = NO_ROOM |
|
return |
|
} |
|
|
|
c := rune(p[0])<<8 + rune(p[1]) |
|
|
|
if utf16.IsSurrogate(c) { |
|
if len(p) < 4 { |
|
status = NO_ROOM |
|
return |
|
} |
|
|
|
c2 := rune(p[2])<<8 + rune(p[3]) |
|
c = utf16.DecodeRune(c, c2) |
|
|
|
if c == 0xfffd { |
|
return c, 2, INVALID_CHAR |
|
} else { |
|
return c, 4, SUCCESS |
|
} |
|
} |
|
|
|
return c, 2, SUCCESS |
|
} |
|
|
|
func encodeUTF16beRune(p []byte, c rune) (size int, status Status) { |
|
if c < 0x10000 { |
|
if len(p) < 2 { |
|
status = NO_ROOM |
|
return |
|
} |
|
p[0] = byte(c >> 8) |
|
p[1] = byte(c) |
|
return 2, SUCCESS |
|
} |
|
|
|
if len(p) < 4 { |
|
status = NO_ROOM |
|
return |
|
} |
|
s1, s2 := utf16.EncodeRune(c) |
|
p[0] = byte(s1 >> 8) |
|
p[1] = byte(s1) |
|
p[2] = byte(s2 >> 8) |
|
p[3] = byte(s2) |
|
return 4, SUCCESS |
|
} |
|
|
|
func decodeUTF16leRune(p []byte) (r rune, size int, status Status) { |
|
if len(p) < 2 { |
|
status = NO_ROOM |
|
return |
|
} |
|
|
|
c := rune(p[1])<<8 + rune(p[0]) |
|
|
|
if utf16.IsSurrogate(c) { |
|
if len(p) < 4 { |
|
status = NO_ROOM |
|
return |
|
} |
|
|
|
c2 := rune(p[3])<<8 + rune(p[2]) |
|
c = utf16.DecodeRune(c, c2) |
|
|
|
if c == 0xfffd { |
|
return c, 2, INVALID_CHAR |
|
} else { |
|
return c, 4, SUCCESS |
|
} |
|
} |
|
|
|
return c, 2, SUCCESS |
|
} |
|
|
|
func encodeUTF16leRune(p []byte, c rune) (size int, status Status) { |
|
if c < 0x10000 { |
|
if len(p) < 2 { |
|
status = NO_ROOM |
|
return |
|
} |
|
p[1] = byte(c >> 8) |
|
p[0] = byte(c) |
|
return 2, SUCCESS |
|
} |
|
|
|
if len(p) < 4 { |
|
status = NO_ROOM |
|
return |
|
} |
|
s1, s2 := utf16.EncodeRune(c) |
|
p[1] = byte(s1 >> 8) |
|
p[0] = byte(s1) |
|
p[3] = byte(s2 >> 8) |
|
p[2] = byte(s2) |
|
return 4, SUCCESS |
|
}
|
|
|