Платформа ЦРНП "Мирокод" для разработки проектов
https://git.mirocod.ru
You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
500 lines
20 KiB
500 lines
20 KiB
//+build !noasm,!appengine |
|
|
|
/* |
|
* Minio Cloud Storage, (C) 2017 Minio, Inc. |
|
* |
|
* Licensed under the Apache License, Version 2.0 (the "License"); |
|
* you may not use this file except in compliance with the License. |
|
* You may obtain a copy of the License at |
|
* |
|
* http://www.apache.org/licenses/LICENSE-2.0 |
|
* |
|
* Unless required by applicable law or agreed to in writing, software |
|
* distributed under the License is distributed on an "AS IS" BASIS, |
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
|
* See the License for the specific language governing permissions and |
|
* limitations under the License. |
|
*/ |
|
|
|
package sha256 |
|
|
|
import ( |
|
"encoding/binary" |
|
"errors" |
|
"hash" |
|
"sort" |
|
"sync/atomic" |
|
"time" |
|
) |
|
|
|
//go:noescape |
|
func sha256X16Avx512(digests *[512]byte, scratch *[512]byte, table *[512]uint64, mask []uint64, inputs [16][]byte) |
|
|
|
// Avx512ServerUID - Do not start at 0 but next multiple of 16 so as to be able to |
|
// differentiate with default initialiation value of 0 |
|
const Avx512ServerUID = 16 |
|
|
|
var uidCounter uint64 |
|
|
|
// NewAvx512 - initialize sha256 Avx512 implementation. |
|
func NewAvx512(a512srv *Avx512Server) hash.Hash { |
|
uid := atomic.AddUint64(&uidCounter, 1) |
|
return &Avx512Digest{uid: uid, a512srv: a512srv} |
|
} |
|
|
|
// Avx512Digest - Type for computing SHA256 using Avx512 |
|
type Avx512Digest struct { |
|
uid uint64 |
|
a512srv *Avx512Server |
|
x [chunk]byte |
|
nx int |
|
len uint64 |
|
final bool |
|
result [Size]byte |
|
} |
|
|
|
// Size - Return size of checksum |
|
func (d *Avx512Digest) Size() int { return Size } |
|
|
|
// BlockSize - Return blocksize of checksum |
|
func (d Avx512Digest) BlockSize() int { return BlockSize } |
|
|
|
// Reset - reset sha digest to its initial values |
|
func (d *Avx512Digest) Reset() { |
|
d.a512srv.blocksCh <- blockInput{uid: d.uid, reset: true} |
|
d.nx = 0 |
|
d.len = 0 |
|
d.final = false |
|
} |
|
|
|
// Write to digest |
|
func (d *Avx512Digest) Write(p []byte) (nn int, err error) { |
|
|
|
if d.final { |
|
return 0, errors.New("Avx512Digest already finalized. Reset first before writing again") |
|
} |
|
|
|
nn = len(p) |
|
d.len += uint64(nn) |
|
if d.nx > 0 { |
|
n := copy(d.x[d.nx:], p) |
|
d.nx += n |
|
if d.nx == chunk { |
|
d.a512srv.blocksCh <- blockInput{uid: d.uid, msg: d.x[:]} |
|
d.nx = 0 |
|
} |
|
p = p[n:] |
|
} |
|
if len(p) >= chunk { |
|
n := len(p) &^ (chunk - 1) |
|
d.a512srv.blocksCh <- blockInput{uid: d.uid, msg: p[:n]} |
|
p = p[n:] |
|
} |
|
if len(p) > 0 { |
|
d.nx = copy(d.x[:], p) |
|
} |
|
return |
|
} |
|
|
|
// Sum - Return sha256 sum in bytes |
|
func (d *Avx512Digest) Sum(in []byte) (result []byte) { |
|
|
|
if d.final { |
|
return append(in, d.result[:]...) |
|
} |
|
|
|
trail := make([]byte, 0, 128) |
|
trail = append(trail, d.x[:d.nx]...) |
|
|
|
len := d.len |
|
// Padding. Add a 1 bit and 0 bits until 56 bytes mod 64. |
|
var tmp [64]byte |
|
tmp[0] = 0x80 |
|
if len%64 < 56 { |
|
trail = append(trail, tmp[0:56-len%64]...) |
|
} else { |
|
trail = append(trail, tmp[0:64+56-len%64]...) |
|
} |
|
d.nx = 0 |
|
|
|
// Length in bits. |
|
len <<= 3 |
|
for i := uint(0); i < 8; i++ { |
|
tmp[i] = byte(len >> (56 - 8*i)) |
|
} |
|
trail = append(trail, tmp[0:8]...) |
|
|
|
sumCh := make(chan [Size]byte) |
|
d.a512srv.blocksCh <- blockInput{uid: d.uid, msg: trail, final: true, sumCh: sumCh} |
|
d.result = <-sumCh |
|
d.final = true |
|
return append(in, d.result[:]...) |
|
} |
|
|
|
var table = [512]uint64{ |
|
0x428a2f98428a2f98, 0x428a2f98428a2f98, 0x428a2f98428a2f98, 0x428a2f98428a2f98, |
|
0x428a2f98428a2f98, 0x428a2f98428a2f98, 0x428a2f98428a2f98, 0x428a2f98428a2f98, |
|
0x7137449171374491, 0x7137449171374491, 0x7137449171374491, 0x7137449171374491, |
|
0x7137449171374491, 0x7137449171374491, 0x7137449171374491, 0x7137449171374491, |
|
0xb5c0fbcfb5c0fbcf, 0xb5c0fbcfb5c0fbcf, 0xb5c0fbcfb5c0fbcf, 0xb5c0fbcfb5c0fbcf, |
|
0xb5c0fbcfb5c0fbcf, 0xb5c0fbcfb5c0fbcf, 0xb5c0fbcfb5c0fbcf, 0xb5c0fbcfb5c0fbcf, |
|
0xe9b5dba5e9b5dba5, 0xe9b5dba5e9b5dba5, 0xe9b5dba5e9b5dba5, 0xe9b5dba5e9b5dba5, |
|
0xe9b5dba5e9b5dba5, 0xe9b5dba5e9b5dba5, 0xe9b5dba5e9b5dba5, 0xe9b5dba5e9b5dba5, |
|
0x3956c25b3956c25b, 0x3956c25b3956c25b, 0x3956c25b3956c25b, 0x3956c25b3956c25b, |
|
0x3956c25b3956c25b, 0x3956c25b3956c25b, 0x3956c25b3956c25b, 0x3956c25b3956c25b, |
|
0x59f111f159f111f1, 0x59f111f159f111f1, 0x59f111f159f111f1, 0x59f111f159f111f1, |
|
0x59f111f159f111f1, 0x59f111f159f111f1, 0x59f111f159f111f1, 0x59f111f159f111f1, |
|
0x923f82a4923f82a4, 0x923f82a4923f82a4, 0x923f82a4923f82a4, 0x923f82a4923f82a4, |
|
0x923f82a4923f82a4, 0x923f82a4923f82a4, 0x923f82a4923f82a4, 0x923f82a4923f82a4, |
|
0xab1c5ed5ab1c5ed5, 0xab1c5ed5ab1c5ed5, 0xab1c5ed5ab1c5ed5, 0xab1c5ed5ab1c5ed5, |
|
0xab1c5ed5ab1c5ed5, 0xab1c5ed5ab1c5ed5, 0xab1c5ed5ab1c5ed5, 0xab1c5ed5ab1c5ed5, |
|
0xd807aa98d807aa98, 0xd807aa98d807aa98, 0xd807aa98d807aa98, 0xd807aa98d807aa98, |
|
0xd807aa98d807aa98, 0xd807aa98d807aa98, 0xd807aa98d807aa98, 0xd807aa98d807aa98, |
|
0x12835b0112835b01, 0x12835b0112835b01, 0x12835b0112835b01, 0x12835b0112835b01, |
|
0x12835b0112835b01, 0x12835b0112835b01, 0x12835b0112835b01, 0x12835b0112835b01, |
|
0x243185be243185be, 0x243185be243185be, 0x243185be243185be, 0x243185be243185be, |
|
0x243185be243185be, 0x243185be243185be, 0x243185be243185be, 0x243185be243185be, |
|
0x550c7dc3550c7dc3, 0x550c7dc3550c7dc3, 0x550c7dc3550c7dc3, 0x550c7dc3550c7dc3, |
|
0x550c7dc3550c7dc3, 0x550c7dc3550c7dc3, 0x550c7dc3550c7dc3, 0x550c7dc3550c7dc3, |
|
0x72be5d7472be5d74, 0x72be5d7472be5d74, 0x72be5d7472be5d74, 0x72be5d7472be5d74, |
|
0x72be5d7472be5d74, 0x72be5d7472be5d74, 0x72be5d7472be5d74, 0x72be5d7472be5d74, |
|
0x80deb1fe80deb1fe, 0x80deb1fe80deb1fe, 0x80deb1fe80deb1fe, 0x80deb1fe80deb1fe, |
|
0x80deb1fe80deb1fe, 0x80deb1fe80deb1fe, 0x80deb1fe80deb1fe, 0x80deb1fe80deb1fe, |
|
0x9bdc06a79bdc06a7, 0x9bdc06a79bdc06a7, 0x9bdc06a79bdc06a7, 0x9bdc06a79bdc06a7, |
|
0x9bdc06a79bdc06a7, 0x9bdc06a79bdc06a7, 0x9bdc06a79bdc06a7, 0x9bdc06a79bdc06a7, |
|
0xc19bf174c19bf174, 0xc19bf174c19bf174, 0xc19bf174c19bf174, 0xc19bf174c19bf174, |
|
0xc19bf174c19bf174, 0xc19bf174c19bf174, 0xc19bf174c19bf174, 0xc19bf174c19bf174, |
|
0xe49b69c1e49b69c1, 0xe49b69c1e49b69c1, 0xe49b69c1e49b69c1, 0xe49b69c1e49b69c1, |
|
0xe49b69c1e49b69c1, 0xe49b69c1e49b69c1, 0xe49b69c1e49b69c1, 0xe49b69c1e49b69c1, |
|
0xefbe4786efbe4786, 0xefbe4786efbe4786, 0xefbe4786efbe4786, 0xefbe4786efbe4786, |
|
0xefbe4786efbe4786, 0xefbe4786efbe4786, 0xefbe4786efbe4786, 0xefbe4786efbe4786, |
|
0x0fc19dc60fc19dc6, 0x0fc19dc60fc19dc6, 0x0fc19dc60fc19dc6, 0x0fc19dc60fc19dc6, |
|
0x0fc19dc60fc19dc6, 0x0fc19dc60fc19dc6, 0x0fc19dc60fc19dc6, 0x0fc19dc60fc19dc6, |
|
0x240ca1cc240ca1cc, 0x240ca1cc240ca1cc, 0x240ca1cc240ca1cc, 0x240ca1cc240ca1cc, |
|
0x240ca1cc240ca1cc, 0x240ca1cc240ca1cc, 0x240ca1cc240ca1cc, 0x240ca1cc240ca1cc, |
|
0x2de92c6f2de92c6f, 0x2de92c6f2de92c6f, 0x2de92c6f2de92c6f, 0x2de92c6f2de92c6f, |
|
0x2de92c6f2de92c6f, 0x2de92c6f2de92c6f, 0x2de92c6f2de92c6f, 0x2de92c6f2de92c6f, |
|
0x4a7484aa4a7484aa, 0x4a7484aa4a7484aa, 0x4a7484aa4a7484aa, 0x4a7484aa4a7484aa, |
|
0x4a7484aa4a7484aa, 0x4a7484aa4a7484aa, 0x4a7484aa4a7484aa, 0x4a7484aa4a7484aa, |
|
0x5cb0a9dc5cb0a9dc, 0x5cb0a9dc5cb0a9dc, 0x5cb0a9dc5cb0a9dc, 0x5cb0a9dc5cb0a9dc, |
|
0x5cb0a9dc5cb0a9dc, 0x5cb0a9dc5cb0a9dc, 0x5cb0a9dc5cb0a9dc, 0x5cb0a9dc5cb0a9dc, |
|
0x76f988da76f988da, 0x76f988da76f988da, 0x76f988da76f988da, 0x76f988da76f988da, |
|
0x76f988da76f988da, 0x76f988da76f988da, 0x76f988da76f988da, 0x76f988da76f988da, |
|
0x983e5152983e5152, 0x983e5152983e5152, 0x983e5152983e5152, 0x983e5152983e5152, |
|
0x983e5152983e5152, 0x983e5152983e5152, 0x983e5152983e5152, 0x983e5152983e5152, |
|
0xa831c66da831c66d, 0xa831c66da831c66d, 0xa831c66da831c66d, 0xa831c66da831c66d, |
|
0xa831c66da831c66d, 0xa831c66da831c66d, 0xa831c66da831c66d, 0xa831c66da831c66d, |
|
0xb00327c8b00327c8, 0xb00327c8b00327c8, 0xb00327c8b00327c8, 0xb00327c8b00327c8, |
|
0xb00327c8b00327c8, 0xb00327c8b00327c8, 0xb00327c8b00327c8, 0xb00327c8b00327c8, |
|
0xbf597fc7bf597fc7, 0xbf597fc7bf597fc7, 0xbf597fc7bf597fc7, 0xbf597fc7bf597fc7, |
|
0xbf597fc7bf597fc7, 0xbf597fc7bf597fc7, 0xbf597fc7bf597fc7, 0xbf597fc7bf597fc7, |
|
0xc6e00bf3c6e00bf3, 0xc6e00bf3c6e00bf3, 0xc6e00bf3c6e00bf3, 0xc6e00bf3c6e00bf3, |
|
0xc6e00bf3c6e00bf3, 0xc6e00bf3c6e00bf3, 0xc6e00bf3c6e00bf3, 0xc6e00bf3c6e00bf3, |
|
0xd5a79147d5a79147, 0xd5a79147d5a79147, 0xd5a79147d5a79147, 0xd5a79147d5a79147, |
|
0xd5a79147d5a79147, 0xd5a79147d5a79147, 0xd5a79147d5a79147, 0xd5a79147d5a79147, |
|
0x06ca635106ca6351, 0x06ca635106ca6351, 0x06ca635106ca6351, 0x06ca635106ca6351, |
|
0x06ca635106ca6351, 0x06ca635106ca6351, 0x06ca635106ca6351, 0x06ca635106ca6351, |
|
0x1429296714292967, 0x1429296714292967, 0x1429296714292967, 0x1429296714292967, |
|
0x1429296714292967, 0x1429296714292967, 0x1429296714292967, 0x1429296714292967, |
|
0x27b70a8527b70a85, 0x27b70a8527b70a85, 0x27b70a8527b70a85, 0x27b70a8527b70a85, |
|
0x27b70a8527b70a85, 0x27b70a8527b70a85, 0x27b70a8527b70a85, 0x27b70a8527b70a85, |
|
0x2e1b21382e1b2138, 0x2e1b21382e1b2138, 0x2e1b21382e1b2138, 0x2e1b21382e1b2138, |
|
0x2e1b21382e1b2138, 0x2e1b21382e1b2138, 0x2e1b21382e1b2138, 0x2e1b21382e1b2138, |
|
0x4d2c6dfc4d2c6dfc, 0x4d2c6dfc4d2c6dfc, 0x4d2c6dfc4d2c6dfc, 0x4d2c6dfc4d2c6dfc, |
|
0x4d2c6dfc4d2c6dfc, 0x4d2c6dfc4d2c6dfc, 0x4d2c6dfc4d2c6dfc, 0x4d2c6dfc4d2c6dfc, |
|
0x53380d1353380d13, 0x53380d1353380d13, 0x53380d1353380d13, 0x53380d1353380d13, |
|
0x53380d1353380d13, 0x53380d1353380d13, 0x53380d1353380d13, 0x53380d1353380d13, |
|
0x650a7354650a7354, 0x650a7354650a7354, 0x650a7354650a7354, 0x650a7354650a7354, |
|
0x650a7354650a7354, 0x650a7354650a7354, 0x650a7354650a7354, 0x650a7354650a7354, |
|
0x766a0abb766a0abb, 0x766a0abb766a0abb, 0x766a0abb766a0abb, 0x766a0abb766a0abb, |
|
0x766a0abb766a0abb, 0x766a0abb766a0abb, 0x766a0abb766a0abb, 0x766a0abb766a0abb, |
|
0x81c2c92e81c2c92e, 0x81c2c92e81c2c92e, 0x81c2c92e81c2c92e, 0x81c2c92e81c2c92e, |
|
0x81c2c92e81c2c92e, 0x81c2c92e81c2c92e, 0x81c2c92e81c2c92e, 0x81c2c92e81c2c92e, |
|
0x92722c8592722c85, 0x92722c8592722c85, 0x92722c8592722c85, 0x92722c8592722c85, |
|
0x92722c8592722c85, 0x92722c8592722c85, 0x92722c8592722c85, 0x92722c8592722c85, |
|
0xa2bfe8a1a2bfe8a1, 0xa2bfe8a1a2bfe8a1, 0xa2bfe8a1a2bfe8a1, 0xa2bfe8a1a2bfe8a1, |
|
0xa2bfe8a1a2bfe8a1, 0xa2bfe8a1a2bfe8a1, 0xa2bfe8a1a2bfe8a1, 0xa2bfe8a1a2bfe8a1, |
|
0xa81a664ba81a664b, 0xa81a664ba81a664b, 0xa81a664ba81a664b, 0xa81a664ba81a664b, |
|
0xa81a664ba81a664b, 0xa81a664ba81a664b, 0xa81a664ba81a664b, 0xa81a664ba81a664b, |
|
0xc24b8b70c24b8b70, 0xc24b8b70c24b8b70, 0xc24b8b70c24b8b70, 0xc24b8b70c24b8b70, |
|
0xc24b8b70c24b8b70, 0xc24b8b70c24b8b70, 0xc24b8b70c24b8b70, 0xc24b8b70c24b8b70, |
|
0xc76c51a3c76c51a3, 0xc76c51a3c76c51a3, 0xc76c51a3c76c51a3, 0xc76c51a3c76c51a3, |
|
0xc76c51a3c76c51a3, 0xc76c51a3c76c51a3, 0xc76c51a3c76c51a3, 0xc76c51a3c76c51a3, |
|
0xd192e819d192e819, 0xd192e819d192e819, 0xd192e819d192e819, 0xd192e819d192e819, |
|
0xd192e819d192e819, 0xd192e819d192e819, 0xd192e819d192e819, 0xd192e819d192e819, |
|
0xd6990624d6990624, 0xd6990624d6990624, 0xd6990624d6990624, 0xd6990624d6990624, |
|
0xd6990624d6990624, 0xd6990624d6990624, 0xd6990624d6990624, 0xd6990624d6990624, |
|
0xf40e3585f40e3585, 0xf40e3585f40e3585, 0xf40e3585f40e3585, 0xf40e3585f40e3585, |
|
0xf40e3585f40e3585, 0xf40e3585f40e3585, 0xf40e3585f40e3585, 0xf40e3585f40e3585, |
|
0x106aa070106aa070, 0x106aa070106aa070, 0x106aa070106aa070, 0x106aa070106aa070, |
|
0x106aa070106aa070, 0x106aa070106aa070, 0x106aa070106aa070, 0x106aa070106aa070, |
|
0x19a4c11619a4c116, 0x19a4c11619a4c116, 0x19a4c11619a4c116, 0x19a4c11619a4c116, |
|
0x19a4c11619a4c116, 0x19a4c11619a4c116, 0x19a4c11619a4c116, 0x19a4c11619a4c116, |
|
0x1e376c081e376c08, 0x1e376c081e376c08, 0x1e376c081e376c08, 0x1e376c081e376c08, |
|
0x1e376c081e376c08, 0x1e376c081e376c08, 0x1e376c081e376c08, 0x1e376c081e376c08, |
|
0x2748774c2748774c, 0x2748774c2748774c, 0x2748774c2748774c, 0x2748774c2748774c, |
|
0x2748774c2748774c, 0x2748774c2748774c, 0x2748774c2748774c, 0x2748774c2748774c, |
|
0x34b0bcb534b0bcb5, 0x34b0bcb534b0bcb5, 0x34b0bcb534b0bcb5, 0x34b0bcb534b0bcb5, |
|
0x34b0bcb534b0bcb5, 0x34b0bcb534b0bcb5, 0x34b0bcb534b0bcb5, 0x34b0bcb534b0bcb5, |
|
0x391c0cb3391c0cb3, 0x391c0cb3391c0cb3, 0x391c0cb3391c0cb3, 0x391c0cb3391c0cb3, |
|
0x391c0cb3391c0cb3, 0x391c0cb3391c0cb3, 0x391c0cb3391c0cb3, 0x391c0cb3391c0cb3, |
|
0x4ed8aa4a4ed8aa4a, 0x4ed8aa4a4ed8aa4a, 0x4ed8aa4a4ed8aa4a, 0x4ed8aa4a4ed8aa4a, |
|
0x4ed8aa4a4ed8aa4a, 0x4ed8aa4a4ed8aa4a, 0x4ed8aa4a4ed8aa4a, 0x4ed8aa4a4ed8aa4a, |
|
0x5b9cca4f5b9cca4f, 0x5b9cca4f5b9cca4f, 0x5b9cca4f5b9cca4f, 0x5b9cca4f5b9cca4f, |
|
0x5b9cca4f5b9cca4f, 0x5b9cca4f5b9cca4f, 0x5b9cca4f5b9cca4f, 0x5b9cca4f5b9cca4f, |
|
0x682e6ff3682e6ff3, 0x682e6ff3682e6ff3, 0x682e6ff3682e6ff3, 0x682e6ff3682e6ff3, |
|
0x682e6ff3682e6ff3, 0x682e6ff3682e6ff3, 0x682e6ff3682e6ff3, 0x682e6ff3682e6ff3, |
|
0x748f82ee748f82ee, 0x748f82ee748f82ee, 0x748f82ee748f82ee, 0x748f82ee748f82ee, |
|
0x748f82ee748f82ee, 0x748f82ee748f82ee, 0x748f82ee748f82ee, 0x748f82ee748f82ee, |
|
0x78a5636f78a5636f, 0x78a5636f78a5636f, 0x78a5636f78a5636f, 0x78a5636f78a5636f, |
|
0x78a5636f78a5636f, 0x78a5636f78a5636f, 0x78a5636f78a5636f, 0x78a5636f78a5636f, |
|
0x84c8781484c87814, 0x84c8781484c87814, 0x84c8781484c87814, 0x84c8781484c87814, |
|
0x84c8781484c87814, 0x84c8781484c87814, 0x84c8781484c87814, 0x84c8781484c87814, |
|
0x8cc702088cc70208, 0x8cc702088cc70208, 0x8cc702088cc70208, 0x8cc702088cc70208, |
|
0x8cc702088cc70208, 0x8cc702088cc70208, 0x8cc702088cc70208, 0x8cc702088cc70208, |
|
0x90befffa90befffa, 0x90befffa90befffa, 0x90befffa90befffa, 0x90befffa90befffa, |
|
0x90befffa90befffa, 0x90befffa90befffa, 0x90befffa90befffa, 0x90befffa90befffa, |
|
0xa4506ceba4506ceb, 0xa4506ceba4506ceb, 0xa4506ceba4506ceb, 0xa4506ceba4506ceb, |
|
0xa4506ceba4506ceb, 0xa4506ceba4506ceb, 0xa4506ceba4506ceb, 0xa4506ceba4506ceb, |
|
0xbef9a3f7bef9a3f7, 0xbef9a3f7bef9a3f7, 0xbef9a3f7bef9a3f7, 0xbef9a3f7bef9a3f7, |
|
0xbef9a3f7bef9a3f7, 0xbef9a3f7bef9a3f7, 0xbef9a3f7bef9a3f7, 0xbef9a3f7bef9a3f7, |
|
0xc67178f2c67178f2, 0xc67178f2c67178f2, 0xc67178f2c67178f2, 0xc67178f2c67178f2, |
|
0xc67178f2c67178f2, 0xc67178f2c67178f2, 0xc67178f2c67178f2, 0xc67178f2c67178f2} |
|
|
|
// Interface function to assembly ode |
|
func blockAvx512(digests *[512]byte, input [16][]byte, mask []uint64) [16][Size]byte { |
|
|
|
scratch := [512]byte{} |
|
sha256X16Avx512(digests, &scratch, &table, mask, input) |
|
|
|
output := [16][Size]byte{} |
|
for i := 0; i < 16; i++ { |
|
output[i] = getDigest(i, digests[:]) |
|
} |
|
|
|
return output |
|
} |
|
|
|
func getDigest(index int, state []byte) (sum [Size]byte) { |
|
for j := 0; j < 16; j += 2 { |
|
for i := index*4 + j*Size; i < index*4+(j+1)*Size; i += Size { |
|
binary.BigEndian.PutUint32(sum[j*2:], binary.LittleEndian.Uint32(state[i:i+4])) |
|
} |
|
} |
|
return |
|
} |
|
|
|
// Message to send across input channel |
|
type blockInput struct { |
|
uid uint64 |
|
msg []byte |
|
reset bool |
|
final bool |
|
sumCh chan [Size]byte |
|
} |
|
|
|
// Avx512Server - Type to implement 16x parallel handling of SHA256 invocations |
|
type Avx512Server struct { |
|
blocksCh chan blockInput // Input channel |
|
totalIn int // Total number of inputs waiting to be processed |
|
lanes [16]Avx512LaneInfo // Array with info per lane (out of 16) |
|
digests map[uint64][Size]byte // Map of uids to (interim) digest results |
|
} |
|
|
|
// Avx512LaneInfo - Info for each lane |
|
type Avx512LaneInfo struct { |
|
uid uint64 // unique identification for this SHA processing |
|
block []byte // input block to be processed |
|
outputCh chan [Size]byte // channel for output result |
|
} |
|
|
|
// NewAvx512Server - Create new object for parallel processing handling |
|
func NewAvx512Server() *Avx512Server { |
|
a512srv := &Avx512Server{} |
|
a512srv.digests = make(map[uint64][Size]byte) |
|
a512srv.blocksCh = make(chan blockInput) |
|
|
|
// Start a single thread for reading from the input channel |
|
go a512srv.Process() |
|
return a512srv |
|
} |
|
|
|
// Process - Sole handler for reading from the input channel |
|
func (a512srv *Avx512Server) Process() { |
|
for { |
|
select { |
|
case block := <-a512srv.blocksCh: |
|
if block.reset { |
|
a512srv.reset(block.uid) |
|
continue |
|
} |
|
index := block.uid & 0xf |
|
// fmt.Println("Adding message:", block.uid, index) |
|
|
|
if a512srv.lanes[index].block != nil { // If slot is already filled, process all inputs |
|
//fmt.Println("Invoking Blocks()") |
|
a512srv.blocks() |
|
} |
|
a512srv.totalIn++ |
|
a512srv.lanes[index] = Avx512LaneInfo{uid: block.uid, block: block.msg} |
|
if block.final { |
|
a512srv.lanes[index].outputCh = block.sumCh |
|
} |
|
if a512srv.totalIn == len(a512srv.lanes) { |
|
// fmt.Println("Invoking Blocks() while FULL: ") |
|
a512srv.blocks() |
|
} |
|
|
|
// TODO: test with larger timeout |
|
case <-time.After(1 * time.Microsecond): |
|
for _, lane := range a512srv.lanes { |
|
if lane.block != nil { // check if there is any input to process |
|
// fmt.Println("Invoking Blocks() on TIMEOUT: ") |
|
a512srv.blocks() |
|
break // we are done |
|
} |
|
} |
|
} |
|
} |
|
} |
|
|
|
// Do a reset for this calculation |
|
func (a512srv *Avx512Server) reset(uid uint64) { |
|
|
|
// Check if there is a message still waiting to be processed (and remove if so) |
|
for i, lane := range a512srv.lanes { |
|
if lane.uid == uid { |
|
if lane.block != nil { |
|
a512srv.lanes[i] = Avx512LaneInfo{} // clear message |
|
a512srv.totalIn-- |
|
} |
|
} |
|
} |
|
|
|
// Delete entry from hash map |
|
delete(a512srv.digests, uid) |
|
} |
|
|
|
// Invoke assembly and send results back |
|
func (a512srv *Avx512Server) blocks() { |
|
|
|
inputs := [16][]byte{} |
|
for i := range inputs { |
|
inputs[i] = a512srv.lanes[i].block |
|
} |
|
|
|
mask := expandMask(genMask(inputs)) |
|
outputs := blockAvx512(a512srv.getDigests(), inputs, mask) |
|
|
|
a512srv.totalIn = 0 |
|
for i := 0; i < len(outputs); i++ { |
|
uid, outputCh := a512srv.lanes[i].uid, a512srv.lanes[i].outputCh |
|
a512srv.digests[uid] = outputs[i] |
|
a512srv.lanes[i] = Avx512LaneInfo{} |
|
|
|
if outputCh != nil { |
|
// Send back result |
|
outputCh <- outputs[i] |
|
delete(a512srv.digests, uid) // Delete entry from hashmap |
|
} |
|
} |
|
} |
|
|
|
func (a512srv *Avx512Server) Write(uid uint64, p []byte) (nn int, err error) { |
|
a512srv.blocksCh <- blockInput{uid: uid, msg: p} |
|
return len(p), nil |
|
} |
|
|
|
// Sum - return sha256 sum in bytes for a given sum id. |
|
func (a512srv *Avx512Server) Sum(uid uint64, p []byte) [32]byte { |
|
sumCh := make(chan [32]byte) |
|
a512srv.blocksCh <- blockInput{uid: uid, msg: p, final: true, sumCh: sumCh} |
|
return <-sumCh |
|
} |
|
|
|
func (a512srv *Avx512Server) getDigests() *[512]byte { |
|
digests := [512]byte{} |
|
for i, lane := range a512srv.lanes { |
|
a, ok := a512srv.digests[lane.uid] |
|
if ok { |
|
binary.BigEndian.PutUint32(digests[(i+0*16)*4:], binary.LittleEndian.Uint32(a[0:4])) |
|
binary.BigEndian.PutUint32(digests[(i+1*16)*4:], binary.LittleEndian.Uint32(a[4:8])) |
|
binary.BigEndian.PutUint32(digests[(i+2*16)*4:], binary.LittleEndian.Uint32(a[8:12])) |
|
binary.BigEndian.PutUint32(digests[(i+3*16)*4:], binary.LittleEndian.Uint32(a[12:16])) |
|
binary.BigEndian.PutUint32(digests[(i+4*16)*4:], binary.LittleEndian.Uint32(a[16:20])) |
|
binary.BigEndian.PutUint32(digests[(i+5*16)*4:], binary.LittleEndian.Uint32(a[20:24])) |
|
binary.BigEndian.PutUint32(digests[(i+6*16)*4:], binary.LittleEndian.Uint32(a[24:28])) |
|
binary.BigEndian.PutUint32(digests[(i+7*16)*4:], binary.LittleEndian.Uint32(a[28:32])) |
|
} else { |
|
binary.LittleEndian.PutUint32(digests[(i+0*16)*4:], init0) |
|
binary.LittleEndian.PutUint32(digests[(i+1*16)*4:], init1) |
|
binary.LittleEndian.PutUint32(digests[(i+2*16)*4:], init2) |
|
binary.LittleEndian.PutUint32(digests[(i+3*16)*4:], init3) |
|
binary.LittleEndian.PutUint32(digests[(i+4*16)*4:], init4) |
|
binary.LittleEndian.PutUint32(digests[(i+5*16)*4:], init5) |
|
binary.LittleEndian.PutUint32(digests[(i+6*16)*4:], init6) |
|
binary.LittleEndian.PutUint32(digests[(i+7*16)*4:], init7) |
|
} |
|
} |
|
return &digests |
|
} |
|
|
|
// Helper struct for sorting blocks based on length |
|
type lane struct { |
|
len uint |
|
pos uint |
|
} |
|
|
|
type lanes []lane |
|
|
|
func (lns lanes) Len() int { return len(lns) } |
|
func (lns lanes) Swap(i, j int) { lns[i], lns[j] = lns[j], lns[i] } |
|
func (lns lanes) Less(i, j int) bool { return lns[i].len < lns[j].len } |
|
|
|
// Helper struct for |
|
type maskRounds struct { |
|
mask uint64 |
|
rounds uint64 |
|
} |
|
|
|
func genMask(input [16][]byte) [16]maskRounds { |
|
|
|
// Sort on blocks length small to large |
|
var sorted [16]lane |
|
for c, inpt := range input { |
|
sorted[c] = lane{uint(len(inpt)), uint(c)} |
|
} |
|
sort.Sort(lanes(sorted[:])) |
|
|
|
// Create mask array including 'rounds' between masks |
|
m, round, index := uint64(0xffff), uint64(0), 0 |
|
var mr [16]maskRounds |
|
for _, s := range sorted { |
|
if s.len > 0 { |
|
if uint64(s.len)>>6 > round { |
|
mr[index] = maskRounds{m, (uint64(s.len) >> 6) - round} |
|
index++ |
|
} |
|
round = uint64(s.len) >> 6 |
|
} |
|
m = m & ^(1 << uint(s.pos)) |
|
} |
|
|
|
return mr |
|
} |
|
|
|
// TODO: remove function |
|
func expandMask(mr [16]maskRounds) []uint64 { |
|
size := uint64(0) |
|
for _, r := range mr { |
|
size += r.rounds |
|
} |
|
result, index := make([]uint64, size), 0 |
|
for _, r := range mr { |
|
for j := uint64(0); j < r.rounds; j++ { |
|
result[index] = r.mask |
|
index++ |
|
} |
|
} |
|
return result |
|
}
|
|
|