Платформа ЦРНП "Мирокод" для разработки проектов
https://git.mirocod.ru
You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
721 lines
16 KiB
721 lines
16 KiB
// Copyright 2014-2021 Ulrich Kunitz. All rights reserved. |
|
// Use of this source code is governed by a BSD-style |
|
// license that can be found in the LICENSE file. |
|
|
|
package xz |
|
|
|
import ( |
|
"bytes" |
|
"crypto/sha256" |
|
"errors" |
|
"fmt" |
|
"hash" |
|
"hash/crc32" |
|
"io" |
|
|
|
"github.com/ulikunitz/xz/lzma" |
|
) |
|
|
|
// allZeros checks whether a given byte slice has only zeros. |
|
func allZeros(p []byte) bool { |
|
for _, c := range p { |
|
if c != 0 { |
|
return false |
|
} |
|
} |
|
return true |
|
} |
|
|
|
// padLen returns the length of the padding required for the given |
|
// argument. |
|
func padLen(n int64) int { |
|
k := int(n % 4) |
|
if k > 0 { |
|
k = 4 - k |
|
} |
|
return k |
|
} |
|
|
|
/*** Header ***/ |
|
|
|
// headerMagic stores the magic bytes for the header |
|
var headerMagic = []byte{0xfd, '7', 'z', 'X', 'Z', 0x00} |
|
|
|
// HeaderLen provides the length of the xz file header. |
|
const HeaderLen = 12 |
|
|
|
// Constants for the checksum methods supported by xz. |
|
const ( |
|
None byte = 0x0 |
|
CRC32 byte = 0x1 |
|
CRC64 byte = 0x4 |
|
SHA256 byte = 0xa |
|
) |
|
|
|
// errInvalidFlags indicates that flags are invalid. |
|
var errInvalidFlags = errors.New("xz: invalid flags") |
|
|
|
// verifyFlags returns the error errInvalidFlags if the value is |
|
// invalid. |
|
func verifyFlags(flags byte) error { |
|
switch flags { |
|
case None, CRC32, CRC64, SHA256: |
|
return nil |
|
default: |
|
return errInvalidFlags |
|
} |
|
} |
|
|
|
// flagstrings maps flag values to strings. |
|
var flagstrings = map[byte]string{ |
|
None: "None", |
|
CRC32: "CRC-32", |
|
CRC64: "CRC-64", |
|
SHA256: "SHA-256", |
|
} |
|
|
|
// flagString returns the string representation for the given flags. |
|
func flagString(flags byte) string { |
|
s, ok := flagstrings[flags] |
|
if !ok { |
|
return "invalid" |
|
} |
|
return s |
|
} |
|
|
|
// newHashFunc returns a function that creates hash instances for the |
|
// hash method encoded in flags. |
|
func newHashFunc(flags byte) (newHash func() hash.Hash, err error) { |
|
switch flags { |
|
case None: |
|
newHash = newNoneHash |
|
case CRC32: |
|
newHash = newCRC32 |
|
case CRC64: |
|
newHash = newCRC64 |
|
case SHA256: |
|
newHash = sha256.New |
|
default: |
|
err = errInvalidFlags |
|
} |
|
return |
|
} |
|
|
|
// header provides the actual content of the xz file header: the flags. |
|
type header struct { |
|
flags byte |
|
} |
|
|
|
// Errors returned by readHeader. |
|
var errHeaderMagic = errors.New("xz: invalid header magic bytes") |
|
|
|
// ValidHeader checks whether data is a correct xz file header. The |
|
// length of data must be HeaderLen. |
|
func ValidHeader(data []byte) bool { |
|
var h header |
|
err := h.UnmarshalBinary(data) |
|
return err == nil |
|
} |
|
|
|
// String returns a string representation of the flags. |
|
func (h header) String() string { |
|
return flagString(h.flags) |
|
} |
|
|
|
// UnmarshalBinary reads header from the provided data slice. |
|
func (h *header) UnmarshalBinary(data []byte) error { |
|
// header length |
|
if len(data) != HeaderLen { |
|
return errors.New("xz: wrong file header length") |
|
} |
|
|
|
// magic header |
|
if !bytes.Equal(headerMagic, data[:6]) { |
|
return errHeaderMagic |
|
} |
|
|
|
// checksum |
|
crc := crc32.NewIEEE() |
|
crc.Write(data[6:8]) |
|
if uint32LE(data[8:]) != crc.Sum32() { |
|
return errors.New("xz: invalid checksum for file header") |
|
} |
|
|
|
// stream flags |
|
if data[6] != 0 { |
|
return errInvalidFlags |
|
} |
|
flags := data[7] |
|
if err := verifyFlags(flags); err != nil { |
|
return err |
|
} |
|
|
|
h.flags = flags |
|
return nil |
|
} |
|
|
|
// MarshalBinary generates the xz file header. |
|
func (h *header) MarshalBinary() (data []byte, err error) { |
|
if err = verifyFlags(h.flags); err != nil { |
|
return nil, err |
|
} |
|
|
|
data = make([]byte, 12) |
|
copy(data, headerMagic) |
|
data[7] = h.flags |
|
|
|
crc := crc32.NewIEEE() |
|
crc.Write(data[6:8]) |
|
putUint32LE(data[8:], crc.Sum32()) |
|
|
|
return data, nil |
|
} |
|
|
|
/*** Footer ***/ |
|
|
|
// footerLen defines the length of the footer. |
|
const footerLen = 12 |
|
|
|
// footerMagic contains the footer magic bytes. |
|
var footerMagic = []byte{'Y', 'Z'} |
|
|
|
// footer represents the content of the xz file footer. |
|
type footer struct { |
|
indexSize int64 |
|
flags byte |
|
} |
|
|
|
// String prints a string representation of the footer structure. |
|
func (f footer) String() string { |
|
return fmt.Sprintf("%s index size %d", flagString(f.flags), f.indexSize) |
|
} |
|
|
|
// Minimum and maximum for the size of the index (backward size). |
|
const ( |
|
minIndexSize = 4 |
|
maxIndexSize = (1 << 32) * 4 |
|
) |
|
|
|
// MarshalBinary converts footer values into an xz file footer. Note |
|
// that the footer value is checked for correctness. |
|
func (f *footer) MarshalBinary() (data []byte, err error) { |
|
if err = verifyFlags(f.flags); err != nil { |
|
return nil, err |
|
} |
|
if !(minIndexSize <= f.indexSize && f.indexSize <= maxIndexSize) { |
|
return nil, errors.New("xz: index size out of range") |
|
} |
|
if f.indexSize%4 != 0 { |
|
return nil, errors.New( |
|
"xz: index size not aligned to four bytes") |
|
} |
|
|
|
data = make([]byte, footerLen) |
|
|
|
// backward size (index size) |
|
s := (f.indexSize / 4) - 1 |
|
putUint32LE(data[4:], uint32(s)) |
|
// flags |
|
data[9] = f.flags |
|
// footer magic |
|
copy(data[10:], footerMagic) |
|
|
|
// CRC-32 |
|
crc := crc32.NewIEEE() |
|
crc.Write(data[4:10]) |
|
putUint32LE(data, crc.Sum32()) |
|
|
|
return data, nil |
|
} |
|
|
|
// UnmarshalBinary sets the footer value by unmarshalling an xz file |
|
// footer. |
|
func (f *footer) UnmarshalBinary(data []byte) error { |
|
if len(data) != footerLen { |
|
return errors.New("xz: wrong footer length") |
|
} |
|
|
|
// magic bytes |
|
if !bytes.Equal(data[10:], footerMagic) { |
|
return errors.New("xz: footer magic invalid") |
|
} |
|
|
|
// CRC-32 |
|
crc := crc32.NewIEEE() |
|
crc.Write(data[4:10]) |
|
if uint32LE(data) != crc.Sum32() { |
|
return errors.New("xz: footer checksum error") |
|
} |
|
|
|
var g footer |
|
// backward size (index size) |
|
g.indexSize = (int64(uint32LE(data[4:])) + 1) * 4 |
|
|
|
// flags |
|
if data[8] != 0 { |
|
return errInvalidFlags |
|
} |
|
g.flags = data[9] |
|
if err := verifyFlags(g.flags); err != nil { |
|
return err |
|
} |
|
|
|
*f = g |
|
return nil |
|
} |
|
|
|
/*** Block Header ***/ |
|
|
|
// blockHeader represents the content of an xz block header. |
|
type blockHeader struct { |
|
compressedSize int64 |
|
uncompressedSize int64 |
|
filters []filter |
|
} |
|
|
|
// String converts the block header into a string. |
|
func (h blockHeader) String() string { |
|
var buf bytes.Buffer |
|
first := true |
|
if h.compressedSize >= 0 { |
|
fmt.Fprintf(&buf, "compressed size %d", h.compressedSize) |
|
first = false |
|
} |
|
if h.uncompressedSize >= 0 { |
|
if !first { |
|
buf.WriteString(" ") |
|
} |
|
fmt.Fprintf(&buf, "uncompressed size %d", h.uncompressedSize) |
|
first = false |
|
} |
|
for _, f := range h.filters { |
|
if !first { |
|
buf.WriteString(" ") |
|
} |
|
fmt.Fprintf(&buf, "filter %s", f) |
|
first = false |
|
} |
|
return buf.String() |
|
} |
|
|
|
// Masks for the block flags. |
|
const ( |
|
filterCountMask = 0x03 |
|
compressedSizePresent = 0x40 |
|
uncompressedSizePresent = 0x80 |
|
reservedBlockFlags = 0x3C |
|
) |
|
|
|
// errIndexIndicator signals that an index indicator (0x00) has been found |
|
// instead of an expected block header indicator. |
|
var errIndexIndicator = errors.New("xz: found index indicator") |
|
|
|
// readBlockHeader reads the block header. |
|
func readBlockHeader(r io.Reader) (h *blockHeader, n int, err error) { |
|
var buf bytes.Buffer |
|
buf.Grow(20) |
|
|
|
// block header size |
|
z, err := io.CopyN(&buf, r, 1) |
|
n = int(z) |
|
if err != nil { |
|
return nil, n, err |
|
} |
|
s := buf.Bytes()[0] |
|
if s == 0 { |
|
return nil, n, errIndexIndicator |
|
} |
|
|
|
// read complete header |
|
headerLen := (int(s) + 1) * 4 |
|
buf.Grow(headerLen - 1) |
|
z, err = io.CopyN(&buf, r, int64(headerLen-1)) |
|
n += int(z) |
|
if err != nil { |
|
return nil, n, err |
|
} |
|
|
|
// unmarshal block header |
|
h = new(blockHeader) |
|
if err = h.UnmarshalBinary(buf.Bytes()); err != nil { |
|
return nil, n, err |
|
} |
|
|
|
return h, n, nil |
|
} |
|
|
|
// readSizeInBlockHeader reads the uncompressed or compressed size |
|
// fields in the block header. The present value informs the function |
|
// whether the respective field is actually present in the header. |
|
func readSizeInBlockHeader(r io.ByteReader, present bool) (n int64, err error) { |
|
if !present { |
|
return -1, nil |
|
} |
|
x, _, err := readUvarint(r) |
|
if err != nil { |
|
return 0, err |
|
} |
|
if x >= 1<<63 { |
|
return 0, errors.New("xz: size overflow in block header") |
|
} |
|
return int64(x), nil |
|
} |
|
|
|
// UnmarshalBinary unmarshals the block header. |
|
func (h *blockHeader) UnmarshalBinary(data []byte) error { |
|
// Check header length |
|
s := data[0] |
|
if data[0] == 0 { |
|
return errIndexIndicator |
|
} |
|
headerLen := (int(s) + 1) * 4 |
|
if len(data) != headerLen { |
|
return fmt.Errorf("xz: data length %d; want %d", len(data), |
|
headerLen) |
|
} |
|
n := headerLen - 4 |
|
|
|
// Check CRC-32 |
|
crc := crc32.NewIEEE() |
|
crc.Write(data[:n]) |
|
if crc.Sum32() != uint32LE(data[n:]) { |
|
return errors.New("xz: checksum error for block header") |
|
} |
|
|
|
// Block header flags |
|
flags := data[1] |
|
if flags&reservedBlockFlags != 0 { |
|
return errors.New("xz: reserved block header flags set") |
|
} |
|
|
|
r := bytes.NewReader(data[2:n]) |
|
|
|
// Compressed size |
|
var err error |
|
h.compressedSize, err = readSizeInBlockHeader( |
|
r, flags&compressedSizePresent != 0) |
|
if err != nil { |
|
return err |
|
} |
|
|
|
// Uncompressed size |
|
h.uncompressedSize, err = readSizeInBlockHeader( |
|
r, flags&uncompressedSizePresent != 0) |
|
if err != nil { |
|
return err |
|
} |
|
|
|
h.filters, err = readFilters(r, int(flags&filterCountMask)+1) |
|
if err != nil { |
|
return err |
|
} |
|
|
|
// Check padding |
|
// Since headerLen is a multiple of 4 we don't need to check |
|
// alignment. |
|
k := r.Len() |
|
// The standard spec says that the padding should have not more |
|
// than 3 bytes. However we found paddings of 4 or 5 in the |
|
// wild. See https://github.com/ulikunitz/xz/pull/11 and |
|
// https://github.com/ulikunitz/xz/issues/15 |
|
// |
|
// The only reasonable approach seems to be to ignore the |
|
// padding size. We still check that all padding bytes are zero. |
|
if !allZeros(data[n-k : n]) { |
|
return errPadding |
|
} |
|
return nil |
|
} |
|
|
|
// MarshalBinary marshals the binary header. |
|
func (h *blockHeader) MarshalBinary() (data []byte, err error) { |
|
if !(minFilters <= len(h.filters) && len(h.filters) <= maxFilters) { |
|
return nil, errors.New("xz: filter count wrong") |
|
} |
|
for i, f := range h.filters { |
|
if i < len(h.filters)-1 { |
|
if f.id() == lzmaFilterID { |
|
return nil, errors.New( |
|
"xz: LZMA2 filter is not the last") |
|
} |
|
} else { |
|
// last filter |
|
if f.id() != lzmaFilterID { |
|
return nil, errors.New("xz: " + |
|
"last filter must be the LZMA2 filter") |
|
} |
|
} |
|
} |
|
|
|
var buf bytes.Buffer |
|
// header size must set at the end |
|
buf.WriteByte(0) |
|
|
|
// flags |
|
flags := byte(len(h.filters) - 1) |
|
if h.compressedSize >= 0 { |
|
flags |= compressedSizePresent |
|
} |
|
if h.uncompressedSize >= 0 { |
|
flags |= uncompressedSizePresent |
|
} |
|
buf.WriteByte(flags) |
|
|
|
p := make([]byte, 10) |
|
if h.compressedSize >= 0 { |
|
k := putUvarint(p, uint64(h.compressedSize)) |
|
buf.Write(p[:k]) |
|
} |
|
if h.uncompressedSize >= 0 { |
|
k := putUvarint(p, uint64(h.uncompressedSize)) |
|
buf.Write(p[:k]) |
|
} |
|
|
|
for _, f := range h.filters { |
|
fp, err := f.MarshalBinary() |
|
if err != nil { |
|
return nil, err |
|
} |
|
buf.Write(fp) |
|
} |
|
|
|
// padding |
|
for i := padLen(int64(buf.Len())); i > 0; i-- { |
|
buf.WriteByte(0) |
|
} |
|
|
|
// crc place holder |
|
buf.Write(p[:4]) |
|
|
|
data = buf.Bytes() |
|
if len(data)%4 != 0 { |
|
panic("data length not aligned") |
|
} |
|
s := len(data)/4 - 1 |
|
if !(1 < s && s <= 255) { |
|
panic("wrong block header size") |
|
} |
|
data[0] = byte(s) |
|
|
|
crc := crc32.NewIEEE() |
|
crc.Write(data[:len(data)-4]) |
|
putUint32LE(data[len(data)-4:], crc.Sum32()) |
|
|
|
return data, nil |
|
} |
|
|
|
// Constants used for marshalling and unmarshalling filters in the xz |
|
// block header. |
|
const ( |
|
minFilters = 1 |
|
maxFilters = 4 |
|
minReservedID = 1 << 62 |
|
) |
|
|
|
// filter represents a filter in the block header. |
|
type filter interface { |
|
id() uint64 |
|
UnmarshalBinary(data []byte) error |
|
MarshalBinary() (data []byte, err error) |
|
reader(r io.Reader, c *ReaderConfig) (fr io.Reader, err error) |
|
writeCloser(w io.WriteCloser, c *WriterConfig) (fw io.WriteCloser, err error) |
|
// filter must be last filter |
|
last() bool |
|
} |
|
|
|
// readFilter reads a block filter from the block header. At this point |
|
// in time only the LZMA2 filter is supported. |
|
func readFilter(r io.Reader) (f filter, err error) { |
|
br := lzma.ByteReader(r) |
|
|
|
// index |
|
id, _, err := readUvarint(br) |
|
if err != nil { |
|
return nil, err |
|
} |
|
|
|
var data []byte |
|
switch id { |
|
case lzmaFilterID: |
|
data = make([]byte, lzmaFilterLen) |
|
data[0] = lzmaFilterID |
|
if _, err = io.ReadFull(r, data[1:]); err != nil { |
|
return nil, err |
|
} |
|
f = new(lzmaFilter) |
|
default: |
|
if id >= minReservedID { |
|
return nil, errors.New( |
|
"xz: reserved filter id in block stream header") |
|
} |
|
return nil, errors.New("xz: invalid filter id") |
|
} |
|
if err = f.UnmarshalBinary(data); err != nil { |
|
return nil, err |
|
} |
|
return f, err |
|
} |
|
|
|
// readFilters reads count filters. At this point in time only the count |
|
// 1 is supported. |
|
func readFilters(r io.Reader, count int) (filters []filter, err error) { |
|
if count != 1 { |
|
return nil, errors.New("xz: unsupported filter count") |
|
} |
|
f, err := readFilter(r) |
|
if err != nil { |
|
return nil, err |
|
} |
|
return []filter{f}, err |
|
} |
|
|
|
/*** Index ***/ |
|
|
|
// record describes a block in the xz file index. |
|
type record struct { |
|
unpaddedSize int64 |
|
uncompressedSize int64 |
|
} |
|
|
|
// readRecord reads an index record. |
|
func readRecord(r io.ByteReader) (rec record, n int, err error) { |
|
u, k, err := readUvarint(r) |
|
n += k |
|
if err != nil { |
|
return rec, n, err |
|
} |
|
rec.unpaddedSize = int64(u) |
|
if rec.unpaddedSize < 0 { |
|
return rec, n, errors.New("xz: unpadded size negative") |
|
} |
|
|
|
u, k, err = readUvarint(r) |
|
n += k |
|
if err != nil { |
|
return rec, n, err |
|
} |
|
rec.uncompressedSize = int64(u) |
|
if rec.uncompressedSize < 0 { |
|
return rec, n, errors.New("xz: uncompressed size negative") |
|
} |
|
|
|
return rec, n, nil |
|
} |
|
|
|
// MarshalBinary converts an index record in its binary encoding. |
|
func (rec *record) MarshalBinary() (data []byte, err error) { |
|
// maximum length of a uvarint is 10 |
|
p := make([]byte, 20) |
|
n := putUvarint(p, uint64(rec.unpaddedSize)) |
|
n += putUvarint(p[n:], uint64(rec.uncompressedSize)) |
|
return p[:n], nil |
|
} |
|
|
|
// writeIndex writes the index, a sequence of records. |
|
func writeIndex(w io.Writer, index []record) (n int64, err error) { |
|
crc := crc32.NewIEEE() |
|
mw := io.MultiWriter(w, crc) |
|
|
|
// index indicator |
|
k, err := mw.Write([]byte{0}) |
|
n += int64(k) |
|
if err != nil { |
|
return n, err |
|
} |
|
|
|
// number of records |
|
p := make([]byte, 10) |
|
k = putUvarint(p, uint64(len(index))) |
|
k, err = mw.Write(p[:k]) |
|
n += int64(k) |
|
if err != nil { |
|
return n, err |
|
} |
|
|
|
// list of records |
|
for _, rec := range index { |
|
p, err := rec.MarshalBinary() |
|
if err != nil { |
|
return n, err |
|
} |
|
k, err = mw.Write(p) |
|
n += int64(k) |
|
if err != nil { |
|
return n, err |
|
} |
|
} |
|
|
|
// index padding |
|
k, err = mw.Write(make([]byte, padLen(int64(n)))) |
|
n += int64(k) |
|
if err != nil { |
|
return n, err |
|
} |
|
|
|
// crc32 checksum |
|
putUint32LE(p, crc.Sum32()) |
|
k, err = w.Write(p[:4]) |
|
n += int64(k) |
|
|
|
return n, err |
|
} |
|
|
|
// readIndexBody reads the index from the reader. It assumes that the |
|
// index indicator has already been read. |
|
func readIndexBody(r io.Reader, expectedRecordLen int) (records []record, n int64, err error) { |
|
crc := crc32.NewIEEE() |
|
// index indicator |
|
crc.Write([]byte{0}) |
|
|
|
br := lzma.ByteReader(io.TeeReader(r, crc)) |
|
|
|
// number of records |
|
u, k, err := readUvarint(br) |
|
n += int64(k) |
|
if err != nil { |
|
return nil, n, err |
|
} |
|
recLen := int(u) |
|
if recLen < 0 || uint64(recLen) != u { |
|
return nil, n, errors.New("xz: record number overflow") |
|
} |
|
if recLen != expectedRecordLen { |
|
return nil, n, fmt.Errorf( |
|
"xz: index length is %d; want %d", |
|
recLen, expectedRecordLen) |
|
} |
|
|
|
// list of records |
|
records = make([]record, recLen) |
|
for i := range records { |
|
records[i], k, err = readRecord(br) |
|
n += int64(k) |
|
if err != nil { |
|
return nil, n, err |
|
} |
|
} |
|
|
|
p := make([]byte, padLen(int64(n+1)), 4) |
|
k, err = io.ReadFull(br.(io.Reader), p) |
|
n += int64(k) |
|
if err != nil { |
|
return nil, n, err |
|
} |
|
if !allZeros(p) { |
|
return nil, n, errors.New("xz: non-zero byte in index padding") |
|
} |
|
|
|
// crc32 |
|
s := crc.Sum32() |
|
p = p[:4] |
|
k, err = io.ReadFull(br.(io.Reader), p) |
|
n += int64(k) |
|
if err != nil { |
|
return records, n, err |
|
} |
|
if uint32LE(p) != s { |
|
return nil, n, errors.New("xz: wrong checksum for index") |
|
} |
|
|
|
return records, n, nil |
|
}
|
|
|