diff --git a/vendor-log b/vendor-log index e98bc91..69382aa 100644 --- a/vendor-log +++ b/vendor-log @@ -27,3 +27,7 @@ f6b343c37ca80bfa8ea539da67a0b621f84fab1d golang.org/x/crypto/acme/autocert e6ac2fc51e89a3249e82157fa0bb7a18ef9dd5bb github.com/kr/pretty bb797dc4fb8320488f47bf11de07a733d7233e1f github.com/kr/text 3b8db5e93c4c02efbc313e17b2e796b0914a01fb go.uber.org/atomic +84e9865bb03ad38c464043bf5382ce8c68ca5f0c github.com/brandur/simplebox +f6b343c37ca80bfa8ea539da67a0b621f84fab1d golang.org/x/crypto/nacl/secretbox +f6b343c37ca80bfa8ea539da67a0b621f84fab1d golang.org/x/crypto/poly1305 +f6b343c37ca80bfa8ea539da67a0b621f84fab1d golang.org/x/crypto/salsa20/salsa diff --git a/vendor/github.com/brandur/simplebox/doc.go b/vendor/github.com/brandur/simplebox/doc.go new file mode 100644 index 0000000..32f17a5 --- /dev/null +++ b/vendor/github.com/brandur/simplebox/doc.go @@ -0,0 +1,23 @@ +/* +Package simplebox provides a simple, easy-to-use cryptographic API where all of +the hard decisions have been made for you in advance. The backing cryptography +is XSalsa20 and Poly1305, which are known to be secure and fast. + +This package uses NaCl's secretbox under the hood, but also includes a simple +yet secure nonce generation strategy. A 24-byte random nonce is generated from +a secure source, used to encrypt a message, and prepended to the resulting +ciphertex. When it's time for decryption, the message is split back into nonce +and ciphertext, and the message is decrypted. + +Thanks to the size of the nonce, the chance of a collision is negligible. For +example, after encrypting 2^64 messages, the odds of there having been a +repeated nonce is approximately 2^-64. + +Note that although this strategy assures the confidentiality of your messages, +it doesn't provide any protection against messages being reordered and replayed +by an active adversary. + +This idea is entirely based on the SimpleBox implementation included with +RbNaCl: https://github.com/cryptosphere/rbnacl/wiki/SimpleBox +*/ +package simplebox diff --git a/vendor/github.com/brandur/simplebox/simplebox.go b/vendor/github.com/brandur/simplebox/simplebox.go new file mode 100644 index 0000000..97e8a1a --- /dev/null +++ b/vendor/github.com/brandur/simplebox/simplebox.go @@ -0,0 +1,60 @@ +package simplebox + +import ( + "crypto/rand" + "fmt" + + "golang.org/x/crypto/nacl/secretbox" +) + +const ( + // Length in bytes of a secret key used for encryption and decryption. + KeySize = 32 + + // Length in bytes of a nonce value (which must be unique and may be + // random) used for encryption and decryption. + NonceSize = 24 +) + +// SimpleBox provides a simple wrapper around NaCl's secretbox with a +// self-contained random nonce strategy. +type SimpleBox struct { + secretKey *[KeySize]byte +} + +// Creates a SimpleBox from a secret key. +func NewFromSecretKey(secretKey *[KeySize]byte) *SimpleBox { + return &SimpleBox{secretKey: secretKey} +} + +// Decrypts the given ciphertext and returns plaintext. An appropriate error is +// included if decryption failed. +func (b *SimpleBox) Decrypt(cipher []byte) ([]byte, error) { + if len(cipher) <= NonceSize { + return nil, fmt.Errorf("Ciphertext is of invalid length.") + } + + var nonce [NonceSize]byte + copy(nonce[:], cipher[0:NonceSize]) + payload := cipher[NonceSize:] + + var opened []byte + opened, ok := secretbox.Open(opened[:0], payload, &nonce, b.secretKey) + + if !ok { + return nil, fmt.Errorf("Ciphertext could not be decrypted.") + } + + return opened, nil +} + +// Encrypts the given plaintext and returns ciphertext. +func (b *SimpleBox) Encrypt(plain []byte) []byte { + var nonce [NonceSize]byte + rand.Reader.Read(nonce[:]) + + var box []byte + box = secretbox.Seal(box[:0], plain, &nonce, b.secretKey) + + return append(nonce[:], box...) +} diff --git a/vendor/golang.org/x/crypto/nacl/secretbox/secretbox.go b/vendor/golang.org/x/crypto/nacl/secretbox/secretbox.go new file mode 100644 index 0000000..1e1dff5 --- /dev/null +++ b/vendor/golang.org/x/crypto/nacl/secretbox/secretbox.go @@ -0,0 +1,149 @@ +// Copyright 2012 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +/* +Package secretbox encrypts and authenticates small messages. + +Secretbox uses XSalsa20 and Poly1305 to encrypt and authenticate messages with +secret-key cryptography. The length of messages is not hidden. + +It is the caller's responsibility to ensure the uniqueness of nonces—for +example, by using nonce 1 for the first message, nonce 2 for the second +message, etc. Nonces are long enough that randomly generated nonces have +negligible risk of collision. + +This package is interoperable with NaCl: https://nacl.cr.yp.to/secretbox.html. +*/ +package secretbox // import "golang.org/x/crypto/nacl/secretbox" + +import ( + "golang.org/x/crypto/poly1305" + "golang.org/x/crypto/salsa20/salsa" +) + +// Overhead is the number of bytes of overhead when boxing a message. +const Overhead = poly1305.TagSize + +// setup produces a sub-key and Salsa20 counter given a nonce and key. +func setup(subKey *[32]byte, counter *[16]byte, nonce *[24]byte, key *[32]byte) { + // We use XSalsa20 for encryption so first we need to generate a + // key and nonce with HSalsa20. + var hNonce [16]byte + copy(hNonce[:], nonce[:]) + salsa.HSalsa20(subKey, &hNonce, key, &salsa.Sigma) + + // The final 8 bytes of the original nonce form the new nonce. + copy(counter[:], nonce[16:]) +} + +// sliceForAppend takes a slice and a requested number of bytes. It returns a +// slice with the contents of the given slice followed by that many bytes and a +// second slice that aliases into it and contains only the extra bytes. If the +// original slice has sufficient capacity then no allocation is performed. +func sliceForAppend(in []byte, n int) (head, tail []byte) { + if total := len(in) + n; cap(in) >= total { + head = in[:total] + } else { + head = make([]byte, total) + copy(head, in) + } + tail = head[len(in):] + return +} + +// Seal appends an encrypted and authenticated copy of message to out, which +// must not overlap message. The key and nonce pair must be unique for each +// distinct message and the output will be Overhead bytes longer than message. +func Seal(out, message []byte, nonce *[24]byte, key *[32]byte) []byte { + var subKey [32]byte + var counter [16]byte + setup(&subKey, &counter, nonce, key) + + // The Poly1305 key is generated by encrypting 32 bytes of zeros. Since + // Salsa20 works with 64-byte blocks, we also generate 32 bytes of + // keystream as a side effect. + var firstBlock [64]byte + salsa.XORKeyStream(firstBlock[:], firstBlock[:], &counter, &subKey) + + var poly1305Key [32]byte + copy(poly1305Key[:], firstBlock[:]) + + ret, out := sliceForAppend(out, len(message)+poly1305.TagSize) + + // We XOR up to 32 bytes of message with the keystream generated from + // the first block. + firstMessageBlock := message + if len(firstMessageBlock) > 32 { + firstMessageBlock = firstMessageBlock[:32] + } + + tagOut := out + out = out[poly1305.TagSize:] + for i, x := range firstMessageBlock { + out[i] = firstBlock[32+i] ^ x + } + message = message[len(firstMessageBlock):] + ciphertext := out + out = out[len(firstMessageBlock):] + + // Now encrypt the rest. + counter[8] = 1 + salsa.XORKeyStream(out, message, &counter, &subKey) + + var tag [poly1305.TagSize]byte + poly1305.Sum(&tag, ciphertext, &poly1305Key) + copy(tagOut, tag[:]) + + return ret +} + +// Open authenticates and decrypts a box produced by Seal and appends the +// message to out, which must not overlap box. The output will be Overhead +// bytes smaller than box. +func Open(out []byte, box []byte, nonce *[24]byte, key *[32]byte) ([]byte, bool) { + if len(box) < Overhead { + return nil, false + } + + var subKey [32]byte + var counter [16]byte + setup(&subKey, &counter, nonce, key) + + // The Poly1305 key is generated by encrypting 32 bytes of zeros. Since + // Salsa20 works with 64-byte blocks, we also generate 32 bytes of + // keystream as a side effect. + var firstBlock [64]byte + salsa.XORKeyStream(firstBlock[:], firstBlock[:], &counter, &subKey) + + var poly1305Key [32]byte + copy(poly1305Key[:], firstBlock[:]) + var tag [poly1305.TagSize]byte + copy(tag[:], box) + + if !poly1305.Verify(&tag, box[poly1305.TagSize:], &poly1305Key) { + return nil, false + } + + ret, out := sliceForAppend(out, len(box)-Overhead) + + // We XOR up to 32 bytes of box with the keystream generated from + // the first block. + box = box[Overhead:] + firstMessageBlock := box + if len(firstMessageBlock) > 32 { + firstMessageBlock = firstMessageBlock[:32] + } + for i, x := range firstMessageBlock { + out[i] = firstBlock[32+i] ^ x + } + + box = box[len(firstMessageBlock):] + out = out[len(firstMessageBlock):] + + // Now decrypt the rest. + counter[8] = 1 + salsa.XORKeyStream(out, box, &counter, &subKey) + + return ret, true +} diff --git a/vendor/golang.org/x/crypto/poly1305/poly1305.go b/vendor/golang.org/x/crypto/poly1305/poly1305.go new file mode 100644 index 0000000..4a5f826 --- /dev/null +++ b/vendor/golang.org/x/crypto/poly1305/poly1305.go @@ -0,0 +1,32 @@ +// Copyright 2012 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +/* +Package poly1305 implements Poly1305 one-time message authentication code as specified in http://cr.yp.to/mac/poly1305-20050329.pdf. + +Poly1305 is a fast, one-time authentication function. It is infeasible for an +attacker to generate an authenticator for a message without the key. However, a +key must only be used for a single message. Authenticating two different +messages with the same key allows an attacker to forge authenticators for other +messages with the same key. + +Poly1305 was originally coupled with AES in order to make Poly1305-AES. AES was +used with a fixed key in order to generate one-time keys from an nonce. +However, in this package AES isn't used and the one-time key is specified +directly. +*/ +package poly1305 // import "golang.org/x/crypto/poly1305" + +import "crypto/subtle" + +// TagSize is the size, in bytes, of a poly1305 authenticator. +const TagSize = 16 + +// Verify returns true if mac is a valid authenticator for m with the given +// key. +func Verify(mac *[16]byte, m []byte, key *[32]byte) bool { + var tmp [16]byte + Sum(&tmp, m, key) + return subtle.ConstantTimeCompare(tmp[:], mac[:]) == 1 +} diff --git a/vendor/golang.org/x/crypto/poly1305/sum_amd64.go b/vendor/golang.org/x/crypto/poly1305/sum_amd64.go new file mode 100644 index 0000000..4dd72fe --- /dev/null +++ b/vendor/golang.org/x/crypto/poly1305/sum_amd64.go @@ -0,0 +1,22 @@ +// Copyright 2012 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +// +build amd64,!gccgo,!appengine + +package poly1305 + +// This function is implemented in sum_amd64.s +//go:noescape +func poly1305(out *[16]byte, m *byte, mlen uint64, key *[32]byte) + +// Sum generates an authenticator for m using a one-time key and puts the +// 16-byte result into out. Authenticating two different messages with the same +// key allows an attacker to forge messages at will. +func Sum(out *[16]byte, m []byte, key *[32]byte) { + var mPtr *byte + if len(m) > 0 { + mPtr = &m[0] + } + poly1305(out, mPtr, uint64(len(m)), key) +} diff --git a/vendor/golang.org/x/crypto/poly1305/sum_amd64.s b/vendor/golang.org/x/crypto/poly1305/sum_amd64.s new file mode 100644 index 0000000..bc75c61 --- /dev/null +++ b/vendor/golang.org/x/crypto/poly1305/sum_amd64.s @@ -0,0 +1,125 @@ +// Copyright 2012 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +// +build amd64,!gccgo,!appengine + +#include "textflag.h" + +#define POLY1305_ADD(msg, h0, h1, h2) \ + ADDQ 0(msg), h0; \ + ADCQ 8(msg), h1; \ + ADCQ $1, h2; \ + LEAQ 16(msg), msg + +#define POLY1305_MUL(h0, h1, h2, r0, r1, t0, t1, t2, t3) \ + MOVQ r0, AX; \ + MULQ h0; \ + MOVQ AX, t0; \ + MOVQ DX, t1; \ + MOVQ r0, AX; \ + MULQ h1; \ + ADDQ AX, t1; \ + ADCQ $0, DX; \ + MOVQ r0, t2; \ + IMULQ h2, t2; \ + ADDQ DX, t2; \ + \ + MOVQ r1, AX; \ + MULQ h0; \ + ADDQ AX, t1; \ + ADCQ $0, DX; \ + MOVQ DX, h0; \ + MOVQ r1, t3; \ + IMULQ h2, t3; \ + MOVQ r1, AX; \ + MULQ h1; \ + ADDQ AX, t2; \ + ADCQ DX, t3; \ + ADDQ h0, t2; \ + ADCQ $0, t3; \ + \ + MOVQ t0, h0; \ + MOVQ t1, h1; \ + MOVQ t2, h2; \ + ANDQ $3, h2; \ + MOVQ t2, t0; \ + ANDQ $0xFFFFFFFFFFFFFFFC, t0; \ + ADDQ t0, h0; \ + ADCQ t3, h1; \ + ADCQ $0, h2; \ + SHRQ $2, t3, t2; \ + SHRQ $2, t3; \ + ADDQ t2, h0; \ + ADCQ t3, h1; \ + ADCQ $0, h2 + +DATA poly1305Mask<>+0x00(SB)/8, $0x0FFFFFFC0FFFFFFF +DATA poly1305Mask<>+0x08(SB)/8, $0x0FFFFFFC0FFFFFFC +GLOBL poly1305Mask<>(SB), RODATA, $16 + +// func poly1305(out *[16]byte, m *byte, mlen uint64, key *[32]key) +TEXT ·poly1305(SB), $0-32 + MOVQ out+0(FP), DI + MOVQ m+8(FP), SI + MOVQ mlen+16(FP), R15 + MOVQ key+24(FP), AX + + MOVQ 0(AX), R11 + MOVQ 8(AX), R12 + ANDQ poly1305Mask<>(SB), R11 // r0 + ANDQ poly1305Mask<>+8(SB), R12 // r1 + XORQ R8, R8 // h0 + XORQ R9, R9 // h1 + XORQ R10, R10 // h2 + + CMPQ R15, $16 + JB bytes_between_0_and_15 + +loop: + POLY1305_ADD(SI, R8, R9, R10) + +multiply: + POLY1305_MUL(R8, R9, R10, R11, R12, BX, CX, R13, R14) + SUBQ $16, R15 + CMPQ R15, $16 + JAE loop + +bytes_between_0_and_15: + TESTQ R15, R15 + JZ done + MOVQ $1, BX + XORQ CX, CX + XORQ R13, R13 + ADDQ R15, SI + +flush_buffer: + SHLQ $8, BX, CX + SHLQ $8, BX + MOVB -1(SI), R13 + XORQ R13, BX + DECQ SI + DECQ R15 + JNZ flush_buffer + + ADDQ BX, R8 + ADCQ CX, R9 + ADCQ $0, R10 + MOVQ $16, R15 + JMP multiply + +done: + MOVQ R8, AX + MOVQ R9, BX + SUBQ $0xFFFFFFFFFFFFFFFB, AX + SBBQ $0xFFFFFFFFFFFFFFFF, BX + SBBQ $3, R10 + CMOVQCS R8, AX + CMOVQCS R9, BX + MOVQ key+24(FP), R8 + ADDQ 16(R8), AX + ADCQ 24(R8), BX + + MOVQ AX, 0(DI) + MOVQ BX, 8(DI) + RET diff --git a/vendor/golang.org/x/crypto/poly1305/sum_arm.go b/vendor/golang.org/x/crypto/poly1305/sum_arm.go new file mode 100644 index 0000000..5dc321c --- /dev/null +++ b/vendor/golang.org/x/crypto/poly1305/sum_arm.go @@ -0,0 +1,22 @@ +// Copyright 2015 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +// +build arm,!gccgo,!appengine,!nacl + +package poly1305 + +// This function is implemented in sum_arm.s +//go:noescape +func poly1305_auth_armv6(out *[16]byte, m *byte, mlen uint32, key *[32]byte) + +// Sum generates an authenticator for m using a one-time key and puts the +// 16-byte result into out. Authenticating two different messages with the same +// key allows an attacker to forge messages at will. +func Sum(out *[16]byte, m []byte, key *[32]byte) { + var mPtr *byte + if len(m) > 0 { + mPtr = &m[0] + } + poly1305_auth_armv6(out, mPtr, uint32(len(m)), key) +} diff --git a/vendor/golang.org/x/crypto/poly1305/sum_ref.go b/vendor/golang.org/x/crypto/poly1305/sum_ref.go new file mode 100644 index 0000000..dbe50e7 --- /dev/null +++ b/vendor/golang.org/x/crypto/poly1305/sum_ref.go @@ -0,0 +1,1531 @@ +// Copyright 2012 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +// +build !amd64,!arm gccgo appengine nacl + +package poly1305 + +// Based on original, public domain implementation from NaCl by D. J. +// Bernstein. + +import "math" + +const ( + alpham80 = 0.00000000558793544769287109375 + alpham48 = 24.0 + alpham16 = 103079215104.0 + alpha0 = 6755399441055744.0 + alpha18 = 1770887431076116955136.0 + alpha32 = 29014219670751100192948224.0 + alpha50 = 7605903601369376408980219232256.0 + alpha64 = 124615124604835863084731911901282304.0 + alpha82 = 32667107224410092492483962313449748299776.0 + alpha96 = 535217884764734955396857238543560676143529984.0 + alpha112 = 35076039295941670036888435985190792471742381031424.0 + alpha130 = 9194973245195333150150082162901855101712434733101613056.0 + scale = 0.0000000000000000000000000000000000000036734198463196484624023016788195177431833298649127735047148490821200539357960224151611328125 + offset0 = 6755408030990331.0 + offset1 = 29014256564239239022116864.0 + offset2 = 124615283061160854719918951570079744.0 + offset3 = 535219245894202480694386063513315216128475136.0 +) + +// Sum generates an authenticator for m using a one-time key and puts the +// 16-byte result into out. Authenticating two different messages with the same +// key allows an attacker to forge messages at will. +func Sum(out *[16]byte, m []byte, key *[32]byte) { + r := key + s := key[16:] + var ( + y7 float64 + y6 float64 + y1 float64 + y0 float64 + y5 float64 + y4 float64 + x7 float64 + x6 float64 + x1 float64 + x0 float64 + y3 float64 + y2 float64 + x5 float64 + r3lowx0 float64 + x4 float64 + r0lowx6 float64 + x3 float64 + r3highx0 float64 + x2 float64 + r0highx6 float64 + r0lowx0 float64 + sr1lowx6 float64 + r0highx0 float64 + sr1highx6 float64 + sr3low float64 + r1lowx0 float64 + sr2lowx6 float64 + r1highx0 float64 + sr2highx6 float64 + r2lowx0 float64 + sr3lowx6 float64 + r2highx0 float64 + sr3highx6 float64 + r1highx4 float64 + r1lowx4 float64 + r0highx4 float64 + r0lowx4 float64 + sr3highx4 float64 + sr3lowx4 float64 + sr2highx4 float64 + sr2lowx4 float64 + r0lowx2 float64 + r0highx2 float64 + r1lowx2 float64 + r1highx2 float64 + r2lowx2 float64 + r2highx2 float64 + sr3lowx2 float64 + sr3highx2 float64 + z0 float64 + z1 float64 + z2 float64 + z3 float64 + m0 int64 + m1 int64 + m2 int64 + m3 int64 + m00 uint32 + m01 uint32 + m02 uint32 + m03 uint32 + m10 uint32 + m11 uint32 + m12 uint32 + m13 uint32 + m20 uint32 + m21 uint32 + m22 uint32 + m23 uint32 + m30 uint32 + m31 uint32 + m32 uint32 + m33 uint64 + lbelow2 int32 + lbelow3 int32 + lbelow4 int32 + lbelow5 int32 + lbelow6 int32 + lbelow7 int32 + lbelow8 int32 + lbelow9 int32 + lbelow10 int32 + lbelow11 int32 + lbelow12 int32 + lbelow13 int32 + lbelow14 int32 + lbelow15 int32 + s00 uint32 + s01 uint32 + s02 uint32 + s03 uint32 + s10 uint32 + s11 uint32 + s12 uint32 + s13 uint32 + s20 uint32 + s21 uint32 + s22 uint32 + s23 uint32 + s30 uint32 + s31 uint32 + s32 uint32 + s33 uint32 + bits32 uint64 + f uint64 + f0 uint64 + f1 uint64 + f2 uint64 + f3 uint64 + f4 uint64 + g uint64 + g0 uint64 + g1 uint64 + g2 uint64 + g3 uint64 + g4 uint64 + ) + + var p int32 + + l := int32(len(m)) + + r00 := uint32(r[0]) + + r01 := uint32(r[1]) + + r02 := uint32(r[2]) + r0 := int64(2151) + + r03 := uint32(r[3]) + r03 &= 15 + r0 <<= 51 + + r10 := uint32(r[4]) + r10 &= 252 + r01 <<= 8 + r0 += int64(r00) + + r11 := uint32(r[5]) + r02 <<= 16 + r0 += int64(r01) + + r12 := uint32(r[6]) + r03 <<= 24 + r0 += int64(r02) + + r13 := uint32(r[7]) + r13 &= 15 + r1 := int64(2215) + r0 += int64(r03) + + d0 := r0 + r1 <<= 51 + r2 := int64(2279) + + r20 := uint32(r[8]) + r20 &= 252 + r11 <<= 8 + r1 += int64(r10) + + r21 := uint32(r[9]) + r12 <<= 16 + r1 += int64(r11) + + r22 := uint32(r[10]) + r13 <<= 24 + r1 += int64(r12) + + r23 := uint32(r[11]) + r23 &= 15 + r2 <<= 51 + r1 += int64(r13) + + d1 := r1 + r21 <<= 8 + r2 += int64(r20) + + r30 := uint32(r[12]) + r30 &= 252 + r22 <<= 16 + r2 += int64(r21) + + r31 := uint32(r[13]) + r23 <<= 24 + r2 += int64(r22) + + r32 := uint32(r[14]) + r2 += int64(r23) + r3 := int64(2343) + + d2 := r2 + r3 <<= 51 + + r33 := uint32(r[15]) + r33 &= 15 + r31 <<= 8 + r3 += int64(r30) + + r32 <<= 16 + r3 += int64(r31) + + r33 <<= 24 + r3 += int64(r32) + + r3 += int64(r33) + h0 := alpha32 - alpha32 + + d3 := r3 + h1 := alpha32 - alpha32 + + h2 := alpha32 - alpha32 + + h3 := alpha32 - alpha32 + + h4 := alpha32 - alpha32 + + r0low := math.Float64frombits(uint64(d0)) + h5 := alpha32 - alpha32 + + r1low := math.Float64frombits(uint64(d1)) + h6 := alpha32 - alpha32 + + r2low := math.Float64frombits(uint64(d2)) + h7 := alpha32 - alpha32 + + r0low -= alpha0 + + r1low -= alpha32 + + r2low -= alpha64 + + r0high := r0low + alpha18 + + r3low := math.Float64frombits(uint64(d3)) + + r1high := r1low + alpha50 + sr1low := scale * r1low + + r2high := r2low + alpha82 + sr2low := scale * r2low + + r0high -= alpha18 + r0high_stack := r0high + + r3low -= alpha96 + + r1high -= alpha50 + r1high_stack := r1high + + sr1high := sr1low + alpham80 + + r0low -= r0high + + r2high -= alpha82 + sr3low = scale * r3low + + sr2high := sr2low + alpham48 + + r1low -= r1high + r1low_stack := r1low + + sr1high -= alpham80 + sr1high_stack := sr1high + + r2low -= r2high + r2low_stack := r2low + + sr2high -= alpham48 + sr2high_stack := sr2high + + r3high := r3low + alpha112 + r0low_stack := r0low + + sr1low -= sr1high + sr1low_stack := sr1low + + sr3high := sr3low + alpham16 + r2high_stack := r2high + + sr2low -= sr2high + sr2low_stack := sr2low + + r3high -= alpha112 + r3high_stack := r3high + + sr3high -= alpham16 + sr3high_stack := sr3high + + r3low -= r3high + r3low_stack := r3low + + sr3low -= sr3high + sr3low_stack := sr3low + + if l < 16 { + goto addatmost15bytes + } + + m00 = uint32(m[p+0]) + m0 = 2151 + + m0 <<= 51 + m1 = 2215 + m01 = uint32(m[p+1]) + + m1 <<= 51 + m2 = 2279 + m02 = uint32(m[p+2]) + + m2 <<= 51 + m3 = 2343 + m03 = uint32(m[p+3]) + + m10 = uint32(m[p+4]) + m01 <<= 8 + m0 += int64(m00) + + m11 = uint32(m[p+5]) + m02 <<= 16 + m0 += int64(m01) + + m12 = uint32(m[p+6]) + m03 <<= 24 + m0 += int64(m02) + + m13 = uint32(m[p+7]) + m3 <<= 51 + m0 += int64(m03) + + m20 = uint32(m[p+8]) + m11 <<= 8 + m1 += int64(m10) + + m21 = uint32(m[p+9]) + m12 <<= 16 + m1 += int64(m11) + + m22 = uint32(m[p+10]) + m13 <<= 24 + m1 += int64(m12) + + m23 = uint32(m[p+11]) + m1 += int64(m13) + + m30 = uint32(m[p+12]) + m21 <<= 8 + m2 += int64(m20) + + m31 = uint32(m[p+13]) + m22 <<= 16 + m2 += int64(m21) + + m32 = uint32(m[p+14]) + m23 <<= 24 + m2 += int64(m22) + + m33 = uint64(m[p+15]) + m2 += int64(m23) + + d0 = m0 + m31 <<= 8 + m3 += int64(m30) + + d1 = m1 + m32 <<= 16 + m3 += int64(m31) + + d2 = m2 + m33 += 256 + + m33 <<= 24 + m3 += int64(m32) + + m3 += int64(m33) + d3 = m3 + + p += 16 + l -= 16 + + z0 = math.Float64frombits(uint64(d0)) + + z1 = math.Float64frombits(uint64(d1)) + + z2 = math.Float64frombits(uint64(d2)) + + z3 = math.Float64frombits(uint64(d3)) + + z0 -= alpha0 + + z1 -= alpha32 + + z2 -= alpha64 + + z3 -= alpha96 + + h0 += z0 + + h1 += z1 + + h3 += z2 + + h5 += z3 + + if l < 16 { + goto multiplyaddatmost15bytes + } + +multiplyaddatleast16bytes: + + m2 = 2279 + m20 = uint32(m[p+8]) + y7 = h7 + alpha130 + + m2 <<= 51 + m3 = 2343 + m21 = uint32(m[p+9]) + y6 = h6 + alpha130 + + m3 <<= 51 + m0 = 2151 + m22 = uint32(m[p+10]) + y1 = h1 + alpha32 + + m0 <<= 51 + m1 = 2215 + m23 = uint32(m[p+11]) + y0 = h0 + alpha32 + + m1 <<= 51 + m30 = uint32(m[p+12]) + y7 -= alpha130 + + m21 <<= 8 + m2 += int64(m20) + m31 = uint32(m[p+13]) + y6 -= alpha130 + + m22 <<= 16 + m2 += int64(m21) + m32 = uint32(m[p+14]) + y1 -= alpha32 + + m23 <<= 24 + m2 += int64(m22) + m33 = uint64(m[p+15]) + y0 -= alpha32 + + m2 += int64(m23) + m00 = uint32(m[p+0]) + y5 = h5 + alpha96 + + m31 <<= 8 + m3 += int64(m30) + m01 = uint32(m[p+1]) + y4 = h4 + alpha96 + + m32 <<= 16 + m02 = uint32(m[p+2]) + x7 = h7 - y7 + y7 *= scale + + m33 += 256 + m03 = uint32(m[p+3]) + x6 = h6 - y6 + y6 *= scale + + m33 <<= 24 + m3 += int64(m31) + m10 = uint32(m[p+4]) + x1 = h1 - y1 + + m01 <<= 8 + m3 += int64(m32) + m11 = uint32(m[p+5]) + x0 = h0 - y0 + + m3 += int64(m33) + m0 += int64(m00) + m12 = uint32(m[p+6]) + y5 -= alpha96 + + m02 <<= 16 + m0 += int64(m01) + m13 = uint32(m[p+7]) + y4 -= alpha96 + + m03 <<= 24 + m0 += int64(m02) + d2 = m2 + x1 += y7 + + m0 += int64(m03) + d3 = m3 + x0 += y6 + + m11 <<= 8 + m1 += int64(m10) + d0 = m0 + x7 += y5 + + m12 <<= 16 + m1 += int64(m11) + x6 += y4 + + m13 <<= 24 + m1 += int64(m12) + y3 = h3 + alpha64 + + m1 += int64(m13) + d1 = m1 + y2 = h2 + alpha64 + + x0 += x1 + + x6 += x7 + + y3 -= alpha64 + r3low = r3low_stack + + y2 -= alpha64 + r0low = r0low_stack + + x5 = h5 - y5 + r3lowx0 = r3low * x0 + r3high = r3high_stack + + x4 = h4 - y4 + r0lowx6 = r0low * x6 + r0high = r0high_stack + + x3 = h3 - y3 + r3highx0 = r3high * x0 + sr1low = sr1low_stack + + x2 = h2 - y2 + r0highx6 = r0high * x6 + sr1high = sr1high_stack + + x5 += y3 + r0lowx0 = r0low * x0 + r1low = r1low_stack + + h6 = r3lowx0 + r0lowx6 + sr1lowx6 = sr1low * x6 + r1high = r1high_stack + + x4 += y2 + r0highx0 = r0high * x0 + sr2low = sr2low_stack + + h7 = r3highx0 + r0highx6 + sr1highx6 = sr1high * x6 + sr2high = sr2high_stack + + x3 += y1 + r1lowx0 = r1low * x0 + r2low = r2low_stack + + h0 = r0lowx0 + sr1lowx6 + sr2lowx6 = sr2low * x6 + r2high = r2high_stack + + x2 += y0 + r1highx0 = r1high * x0 + sr3low = sr3low_stack + + h1 = r0highx0 + sr1highx6 + sr2highx6 = sr2high * x6 + sr3high = sr3high_stack + + x4 += x5 + r2lowx0 = r2low * x0 + z2 = math.Float64frombits(uint64(d2)) + + h2 = r1lowx0 + sr2lowx6 + sr3lowx6 = sr3low * x6 + + x2 += x3 + r2highx0 = r2high * x0 + z3 = math.Float64frombits(uint64(d3)) + + h3 = r1highx0 + sr2highx6 + sr3highx6 = sr3high * x6 + + r1highx4 = r1high * x4 + z2 -= alpha64 + + h4 = r2lowx0 + sr3lowx6 + r1lowx4 = r1low * x4 + + r0highx4 = r0high * x4 + z3 -= alpha96 + + h5 = r2highx0 + sr3highx6 + r0lowx4 = r0low * x4 + + h7 += r1highx4 + sr3highx4 = sr3high * x4 + + h6 += r1lowx4 + sr3lowx4 = sr3low * x4 + + h5 += r0highx4 + sr2highx4 = sr2high * x4 + + h4 += r0lowx4 + sr2lowx4 = sr2low * x4 + + h3 += sr3highx4 + r0lowx2 = r0low * x2 + + h2 += sr3lowx4 + r0highx2 = r0high * x2 + + h1 += sr2highx4 + r1lowx2 = r1low * x2 + + h0 += sr2lowx4 + r1highx2 = r1high * x2 + + h2 += r0lowx2 + r2lowx2 = r2low * x2 + + h3 += r0highx2 + r2highx2 = r2high * x2 + + h4 += r1lowx2 + sr3lowx2 = sr3low * x2 + + h5 += r1highx2 + sr3highx2 = sr3high * x2 + + p += 16 + l -= 16 + h6 += r2lowx2 + + h7 += r2highx2 + + z1 = math.Float64frombits(uint64(d1)) + h0 += sr3lowx2 + + z0 = math.Float64frombits(uint64(d0)) + h1 += sr3highx2 + + z1 -= alpha32 + + z0 -= alpha0 + + h5 += z3 + + h3 += z2 + + h1 += z1 + + h0 += z0 + + if l >= 16 { + goto multiplyaddatleast16bytes + } + +multiplyaddatmost15bytes: + + y7 = h7 + alpha130 + + y6 = h6 + alpha130 + + y1 = h1 + alpha32 + + y0 = h0 + alpha32 + + y7 -= alpha130 + + y6 -= alpha130 + + y1 -= alpha32 + + y0 -= alpha32 + + y5 = h5 + alpha96 + + y4 = h4 + alpha96 + + x7 = h7 - y7 + y7 *= scale + + x6 = h6 - y6 + y6 *= scale + + x1 = h1 - y1 + + x0 = h0 - y0 + + y5 -= alpha96 + + y4 -= alpha96 + + x1 += y7 + + x0 += y6 + + x7 += y5 + + x6 += y4 + + y3 = h3 + alpha64 + + y2 = h2 + alpha64 + + x0 += x1 + + x6 += x7 + + y3 -= alpha64 + r3low = r3low_stack + + y2 -= alpha64 + r0low = r0low_stack + + x5 = h5 - y5 + r3lowx0 = r3low * x0 + r3high = r3high_stack + + x4 = h4 - y4 + r0lowx6 = r0low * x6 + r0high = r0high_stack + + x3 = h3 - y3 + r3highx0 = r3high * x0 + sr1low = sr1low_stack + + x2 = h2 - y2 + r0highx6 = r0high * x6 + sr1high = sr1high_stack + + x5 += y3 + r0lowx0 = r0low * x0 + r1low = r1low_stack + + h6 = r3lowx0 + r0lowx6 + sr1lowx6 = sr1low * x6 + r1high = r1high_stack + + x4 += y2 + r0highx0 = r0high * x0 + sr2low = sr2low_stack + + h7 = r3highx0 + r0highx6 + sr1highx6 = sr1high * x6 + sr2high = sr2high_stack + + x3 += y1 + r1lowx0 = r1low * x0 + r2low = r2low_stack + + h0 = r0lowx0 + sr1lowx6 + sr2lowx6 = sr2low * x6 + r2high = r2high_stack + + x2 += y0 + r1highx0 = r1high * x0 + sr3low = sr3low_stack + + h1 = r0highx0 + sr1highx6 + sr2highx6 = sr2high * x6 + sr3high = sr3high_stack + + x4 += x5 + r2lowx0 = r2low * x0 + + h2 = r1lowx0 + sr2lowx6 + sr3lowx6 = sr3low * x6 + + x2 += x3 + r2highx0 = r2high * x0 + + h3 = r1highx0 + sr2highx6 + sr3highx6 = sr3high * x6 + + r1highx4 = r1high * x4 + + h4 = r2lowx0 + sr3lowx6 + r1lowx4 = r1low * x4 + + r0highx4 = r0high * x4 + + h5 = r2highx0 + sr3highx6 + r0lowx4 = r0low * x4 + + h7 += r1highx4 + sr3highx4 = sr3high * x4 + + h6 += r1lowx4 + sr3lowx4 = sr3low * x4 + + h5 += r0highx4 + sr2highx4 = sr2high * x4 + + h4 += r0lowx4 + sr2lowx4 = sr2low * x4 + + h3 += sr3highx4 + r0lowx2 = r0low * x2 + + h2 += sr3lowx4 + r0highx2 = r0high * x2 + + h1 += sr2highx4 + r1lowx2 = r1low * x2 + + h0 += sr2lowx4 + r1highx2 = r1high * x2 + + h2 += r0lowx2 + r2lowx2 = r2low * x2 + + h3 += r0highx2 + r2highx2 = r2high * x2 + + h4 += r1lowx2 + sr3lowx2 = sr3low * x2 + + h5 += r1highx2 + sr3highx2 = sr3high * x2 + + h6 += r2lowx2 + + h7 += r2highx2 + + h0 += sr3lowx2 + + h1 += sr3highx2 + +addatmost15bytes: + + if l == 0 { + goto nomorebytes + } + + lbelow2 = l - 2 + + lbelow3 = l - 3 + + lbelow2 >>= 31 + lbelow4 = l - 4 + + m00 = uint32(m[p+0]) + lbelow3 >>= 31 + p += lbelow2 + + m01 = uint32(m[p+1]) + lbelow4 >>= 31 + p += lbelow3 + + m02 = uint32(m[p+2]) + p += lbelow4 + m0 = 2151 + + m03 = uint32(m[p+3]) + m0 <<= 51 + m1 = 2215 + + m0 += int64(m00) + m01 &^= uint32(lbelow2) + + m02 &^= uint32(lbelow3) + m01 -= uint32(lbelow2) + + m01 <<= 8 + m03 &^= uint32(lbelow4) + + m0 += int64(m01) + lbelow2 -= lbelow3 + + m02 += uint32(lbelow2) + lbelow3 -= lbelow4 + + m02 <<= 16 + m03 += uint32(lbelow3) + + m03 <<= 24 + m0 += int64(m02) + + m0 += int64(m03) + lbelow5 = l - 5 + + lbelow6 = l - 6 + lbelow7 = l - 7 + + lbelow5 >>= 31 + lbelow8 = l - 8 + + lbelow6 >>= 31 + p += lbelow5 + + m10 = uint32(m[p+4]) + lbelow7 >>= 31 + p += lbelow6 + + m11 = uint32(m[p+5]) + lbelow8 >>= 31 + p += lbelow7 + + m12 = uint32(m[p+6]) + m1 <<= 51 + p += lbelow8 + + m13 = uint32(m[p+7]) + m10 &^= uint32(lbelow5) + lbelow4 -= lbelow5 + + m10 += uint32(lbelow4) + lbelow5 -= lbelow6 + + m11 &^= uint32(lbelow6) + m11 += uint32(lbelow5) + + m11 <<= 8 + m1 += int64(m10) + + m1 += int64(m11) + m12 &^= uint32(lbelow7) + + lbelow6 -= lbelow7 + m13 &^= uint32(lbelow8) + + m12 += uint32(lbelow6) + lbelow7 -= lbelow8 + + m12 <<= 16 + m13 += uint32(lbelow7) + + m13 <<= 24 + m1 += int64(m12) + + m1 += int64(m13) + m2 = 2279 + + lbelow9 = l - 9 + m3 = 2343 + + lbelow10 = l - 10 + lbelow11 = l - 11 + + lbelow9 >>= 31 + lbelow12 = l - 12 + + lbelow10 >>= 31 + p += lbelow9 + + m20 = uint32(m[p+8]) + lbelow11 >>= 31 + p += lbelow10 + + m21 = uint32(m[p+9]) + lbelow12 >>= 31 + p += lbelow11 + + m22 = uint32(m[p+10]) + m2 <<= 51 + p += lbelow12 + + m23 = uint32(m[p+11]) + m20 &^= uint32(lbelow9) + lbelow8 -= lbelow9 + + m20 += uint32(lbelow8) + lbelow9 -= lbelow10 + + m21 &^= uint32(lbelow10) + m21 += uint32(lbelow9) + + m21 <<= 8 + m2 += int64(m20) + + m2 += int64(m21) + m22 &^= uint32(lbelow11) + + lbelow10 -= lbelow11 + m23 &^= uint32(lbelow12) + + m22 += uint32(lbelow10) + lbelow11 -= lbelow12 + + m22 <<= 16 + m23 += uint32(lbelow11) + + m23 <<= 24 + m2 += int64(m22) + + m3 <<= 51 + lbelow13 = l - 13 + + lbelow13 >>= 31 + lbelow14 = l - 14 + + lbelow14 >>= 31 + p += lbelow13 + lbelow15 = l - 15 + + m30 = uint32(m[p+12]) + lbelow15 >>= 31 + p += lbelow14 + + m31 = uint32(m[p+13]) + p += lbelow15 + m2 += int64(m23) + + m32 = uint32(m[p+14]) + m30 &^= uint32(lbelow13) + lbelow12 -= lbelow13 + + m30 += uint32(lbelow12) + lbelow13 -= lbelow14 + + m3 += int64(m30) + m31 &^= uint32(lbelow14) + + m31 += uint32(lbelow13) + m32 &^= uint32(lbelow15) + + m31 <<= 8 + lbelow14 -= lbelow15 + + m3 += int64(m31) + m32 += uint32(lbelow14) + d0 = m0 + + m32 <<= 16 + m33 = uint64(lbelow15 + 1) + d1 = m1 + + m33 <<= 24 + m3 += int64(m32) + d2 = m2 + + m3 += int64(m33) + d3 = m3 + + z3 = math.Float64frombits(uint64(d3)) + + z2 = math.Float64frombits(uint64(d2)) + + z1 = math.Float64frombits(uint64(d1)) + + z0 = math.Float64frombits(uint64(d0)) + + z3 -= alpha96 + + z2 -= alpha64 + + z1 -= alpha32 + + z0 -= alpha0 + + h5 += z3 + + h3 += z2 + + h1 += z1 + + h0 += z0 + + y7 = h7 + alpha130 + + y6 = h6 + alpha130 + + y1 = h1 + alpha32 + + y0 = h0 + alpha32 + + y7 -= alpha130 + + y6 -= alpha130 + + y1 -= alpha32 + + y0 -= alpha32 + + y5 = h5 + alpha96 + + y4 = h4 + alpha96 + + x7 = h7 - y7 + y7 *= scale + + x6 = h6 - y6 + y6 *= scale + + x1 = h1 - y1 + + x0 = h0 - y0 + + y5 -= alpha96 + + y4 -= alpha96 + + x1 += y7 + + x0 += y6 + + x7 += y5 + + x6 += y4 + + y3 = h3 + alpha64 + + y2 = h2 + alpha64 + + x0 += x1 + + x6 += x7 + + y3 -= alpha64 + r3low = r3low_stack + + y2 -= alpha64 + r0low = r0low_stack + + x5 = h5 - y5 + r3lowx0 = r3low * x0 + r3high = r3high_stack + + x4 = h4 - y4 + r0lowx6 = r0low * x6 + r0high = r0high_stack + + x3 = h3 - y3 + r3highx0 = r3high * x0 + sr1low = sr1low_stack + + x2 = h2 - y2 + r0highx6 = r0high * x6 + sr1high = sr1high_stack + + x5 += y3 + r0lowx0 = r0low * x0 + r1low = r1low_stack + + h6 = r3lowx0 + r0lowx6 + sr1lowx6 = sr1low * x6 + r1high = r1high_stack + + x4 += y2 + r0highx0 = r0high * x0 + sr2low = sr2low_stack + + h7 = r3highx0 + r0highx6 + sr1highx6 = sr1high * x6 + sr2high = sr2high_stack + + x3 += y1 + r1lowx0 = r1low * x0 + r2low = r2low_stack + + h0 = r0lowx0 + sr1lowx6 + sr2lowx6 = sr2low * x6 + r2high = r2high_stack + + x2 += y0 + r1highx0 = r1high * x0 + sr3low = sr3low_stack + + h1 = r0highx0 + sr1highx6 + sr2highx6 = sr2high * x6 + sr3high = sr3high_stack + + x4 += x5 + r2lowx0 = r2low * x0 + + h2 = r1lowx0 + sr2lowx6 + sr3lowx6 = sr3low * x6 + + x2 += x3 + r2highx0 = r2high * x0 + + h3 = r1highx0 + sr2highx6 + sr3highx6 = sr3high * x6 + + r1highx4 = r1high * x4 + + h4 = r2lowx0 + sr3lowx6 + r1lowx4 = r1low * x4 + + r0highx4 = r0high * x4 + + h5 = r2highx0 + sr3highx6 + r0lowx4 = r0low * x4 + + h7 += r1highx4 + sr3highx4 = sr3high * x4 + + h6 += r1lowx4 + sr3lowx4 = sr3low * x4 + + h5 += r0highx4 + sr2highx4 = sr2high * x4 + + h4 += r0lowx4 + sr2lowx4 = sr2low * x4 + + h3 += sr3highx4 + r0lowx2 = r0low * x2 + + h2 += sr3lowx4 + r0highx2 = r0high * x2 + + h1 += sr2highx4 + r1lowx2 = r1low * x2 + + h0 += sr2lowx4 + r1highx2 = r1high * x2 + + h2 += r0lowx2 + r2lowx2 = r2low * x2 + + h3 += r0highx2 + r2highx2 = r2high * x2 + + h4 += r1lowx2 + sr3lowx2 = sr3low * x2 + + h5 += r1highx2 + sr3highx2 = sr3high * x2 + + h6 += r2lowx2 + + h7 += r2highx2 + + h0 += sr3lowx2 + + h1 += sr3highx2 + +nomorebytes: + + y7 = h7 + alpha130 + + y0 = h0 + alpha32 + + y1 = h1 + alpha32 + + y2 = h2 + alpha64 + + y7 -= alpha130 + + y3 = h3 + alpha64 + + y4 = h4 + alpha96 + + y5 = h5 + alpha96 + + x7 = h7 - y7 + y7 *= scale + + y0 -= alpha32 + + y1 -= alpha32 + + y2 -= alpha64 + + h6 += x7 + + y3 -= alpha64 + + y4 -= alpha96 + + y5 -= alpha96 + + y6 = h6 + alpha130 + + x0 = h0 - y0 + + x1 = h1 - y1 + + x2 = h2 - y2 + + y6 -= alpha130 + + x0 += y7 + + x3 = h3 - y3 + + x4 = h4 - y4 + + x5 = h5 - y5 + + x6 = h6 - y6 + + y6 *= scale + + x2 += y0 + + x3 += y1 + + x4 += y2 + + x0 += y6 + + x5 += y3 + + x6 += y4 + + x2 += x3 + + x0 += x1 + + x4 += x5 + + x6 += y5 + + x2 += offset1 + d1 = int64(math.Float64bits(x2)) + + x0 += offset0 + d0 = int64(math.Float64bits(x0)) + + x4 += offset2 + d2 = int64(math.Float64bits(x4)) + + x6 += offset3 + d3 = int64(math.Float64bits(x6)) + + f0 = uint64(d0) + + f1 = uint64(d1) + bits32 = math.MaxUint64 + + f2 = uint64(d2) + bits32 >>= 32 + + f3 = uint64(d3) + f = f0 >> 32 + + f0 &= bits32 + f &= 255 + + f1 += f + g0 = f0 + 5 + + g = g0 >> 32 + g0 &= bits32 + + f = f1 >> 32 + f1 &= bits32 + + f &= 255 + g1 = f1 + g + + g = g1 >> 32 + f2 += f + + f = f2 >> 32 + g1 &= bits32 + + f2 &= bits32 + f &= 255 + + f3 += f + g2 = f2 + g + + g = g2 >> 32 + g2 &= bits32 + + f4 = f3 >> 32 + f3 &= bits32 + + f4 &= 255 + g3 = f3 + g + + g = g3 >> 32 + g3 &= bits32 + + g4 = f4 + g + + g4 = g4 - 4 + s00 = uint32(s[0]) + + f = uint64(int64(g4) >> 63) + s01 = uint32(s[1]) + + f0 &= f + g0 &^= f + s02 = uint32(s[2]) + + f1 &= f + f0 |= g0 + s03 = uint32(s[3]) + + g1 &^= f + f2 &= f + s10 = uint32(s[4]) + + f3 &= f + g2 &^= f + s11 = uint32(s[5]) + + g3 &^= f + f1 |= g1 + s12 = uint32(s[6]) + + f2 |= g2 + f3 |= g3 + s13 = uint32(s[7]) + + s01 <<= 8 + f0 += uint64(s00) + s20 = uint32(s[8]) + + s02 <<= 16 + f0 += uint64(s01) + s21 = uint32(s[9]) + + s03 <<= 24 + f0 += uint64(s02) + s22 = uint32(s[10]) + + s11 <<= 8 + f1 += uint64(s10) + s23 = uint32(s[11]) + + s12 <<= 16 + f1 += uint64(s11) + s30 = uint32(s[12]) + + s13 <<= 24 + f1 += uint64(s12) + s31 = uint32(s[13]) + + f0 += uint64(s03) + f1 += uint64(s13) + s32 = uint32(s[14]) + + s21 <<= 8 + f2 += uint64(s20) + s33 = uint32(s[15]) + + s22 <<= 16 + f2 += uint64(s21) + + s23 <<= 24 + f2 += uint64(s22) + + s31 <<= 8 + f3 += uint64(s30) + + s32 <<= 16 + f3 += uint64(s31) + + s33 <<= 24 + f3 += uint64(s32) + + f2 += uint64(s23) + f3 += uint64(s33) + + out[0] = byte(f0) + f0 >>= 8 + out[1] = byte(f0) + f0 >>= 8 + out[2] = byte(f0) + f0 >>= 8 + out[3] = byte(f0) + f0 >>= 8 + f1 += f0 + + out[4] = byte(f1) + f1 >>= 8 + out[5] = byte(f1) + f1 >>= 8 + out[6] = byte(f1) + f1 >>= 8 + out[7] = byte(f1) + f1 >>= 8 + f2 += f1 + + out[8] = byte(f2) + f2 >>= 8 + out[9] = byte(f2) + f2 >>= 8 + out[10] = byte(f2) + f2 >>= 8 + out[11] = byte(f2) + f2 >>= 8 + f3 += f2 + + out[12] = byte(f3) + f3 >>= 8 + out[13] = byte(f3) + f3 >>= 8 + out[14] = byte(f3) + f3 >>= 8 + out[15] = byte(f3) +} diff --git a/vendor/golang.org/x/crypto/salsa20/salsa/hsalsa20.go b/vendor/golang.org/x/crypto/salsa20/salsa/hsalsa20.go new file mode 100644 index 0000000..4c96147 --- /dev/null +++ b/vendor/golang.org/x/crypto/salsa20/salsa/hsalsa20.go @@ -0,0 +1,144 @@ +// Copyright 2012 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +// Package salsa provides low-level access to functions in the Salsa family. +package salsa // import "golang.org/x/crypto/salsa20/salsa" + +// Sigma is the Salsa20 constant for 256-bit keys. +var Sigma = [16]byte{'e', 'x', 'p', 'a', 'n', 'd', ' ', '3', '2', '-', 'b', 'y', 't', 'e', ' ', 'k'} + +// HSalsa20 applies the HSalsa20 core function to a 16-byte input in, 32-byte +// key k, and 16-byte constant c, and puts the result into the 32-byte array +// out. +func HSalsa20(out *[32]byte, in *[16]byte, k *[32]byte, c *[16]byte) { + x0 := uint32(c[0]) | uint32(c[1])<<8 | uint32(c[2])<<16 | uint32(c[3])<<24 + x1 := uint32(k[0]) | uint32(k[1])<<8 | uint32(k[2])<<16 | uint32(k[3])<<24 + x2 := uint32(k[4]) | uint32(k[5])<<8 | uint32(k[6])<<16 | uint32(k[7])<<24 + x3 := uint32(k[8]) | uint32(k[9])<<8 | uint32(k[10])<<16 | uint32(k[11])<<24 + x4 := uint32(k[12]) | uint32(k[13])<<8 | uint32(k[14])<<16 | uint32(k[15])<<24 + x5 := uint32(c[4]) | uint32(c[5])<<8 | uint32(c[6])<<16 | uint32(c[7])<<24 + x6 := uint32(in[0]) | uint32(in[1])<<8 | uint32(in[2])<<16 | uint32(in[3])<<24 + x7 := uint32(in[4]) | uint32(in[5])<<8 | uint32(in[6])<<16 | uint32(in[7])<<24 + x8 := uint32(in[8]) | uint32(in[9])<<8 | uint32(in[10])<<16 | uint32(in[11])<<24 + x9 := uint32(in[12]) | uint32(in[13])<<8 | uint32(in[14])<<16 | uint32(in[15])<<24 + x10 := uint32(c[8]) | uint32(c[9])<<8 | uint32(c[10])<<16 | uint32(c[11])<<24 + x11 := uint32(k[16]) | uint32(k[17])<<8 | uint32(k[18])<<16 | uint32(k[19])<<24 + x12 := uint32(k[20]) | uint32(k[21])<<8 | uint32(k[22])<<16 | uint32(k[23])<<24 + x13 := uint32(k[24]) | uint32(k[25])<<8 | uint32(k[26])<<16 | uint32(k[27])<<24 + x14 := uint32(k[28]) | uint32(k[29])<<8 | uint32(k[30])<<16 | uint32(k[31])<<24 + x15 := uint32(c[12]) | uint32(c[13])<<8 | uint32(c[14])<<16 | uint32(c[15])<<24 + + for i := 0; i < 20; i += 2 { + u := x0 + x12 + x4 ^= u<<7 | u>>(32-7) + u = x4 + x0 + x8 ^= u<<9 | u>>(32-9) + u = x8 + x4 + x12 ^= u<<13 | u>>(32-13) + u = x12 + x8 + x0 ^= u<<18 | u>>(32-18) + + u = x5 + x1 + x9 ^= u<<7 | u>>(32-7) + u = x9 + x5 + x13 ^= u<<9 | u>>(32-9) + u = x13 + x9 + x1 ^= u<<13 | u>>(32-13) + u = x1 + x13 + x5 ^= u<<18 | u>>(32-18) + + u = x10 + x6 + x14 ^= u<<7 | u>>(32-7) + u = x14 + x10 + x2 ^= u<<9 | u>>(32-9) + u = x2 + x14 + x6 ^= u<<13 | u>>(32-13) + u = x6 + x2 + x10 ^= u<<18 | u>>(32-18) + + u = x15 + x11 + x3 ^= u<<7 | u>>(32-7) + u = x3 + x15 + x7 ^= u<<9 | u>>(32-9) + u = x7 + x3 + x11 ^= u<<13 | u>>(32-13) + u = x11 + x7 + x15 ^= u<<18 | u>>(32-18) + + u = x0 + x3 + x1 ^= u<<7 | u>>(32-7) + u = x1 + x0 + x2 ^= u<<9 | u>>(32-9) + u = x2 + x1 + x3 ^= u<<13 | u>>(32-13) + u = x3 + x2 + x0 ^= u<<18 | u>>(32-18) + + u = x5 + x4 + x6 ^= u<<7 | u>>(32-7) + u = x6 + x5 + x7 ^= u<<9 | u>>(32-9) + u = x7 + x6 + x4 ^= u<<13 | u>>(32-13) + u = x4 + x7 + x5 ^= u<<18 | u>>(32-18) + + u = x10 + x9 + x11 ^= u<<7 | u>>(32-7) + u = x11 + x10 + x8 ^= u<<9 | u>>(32-9) + u = x8 + x11 + x9 ^= u<<13 | u>>(32-13) + u = x9 + x8 + x10 ^= u<<18 | u>>(32-18) + + u = x15 + x14 + x12 ^= u<<7 | u>>(32-7) + u = x12 + x15 + x13 ^= u<<9 | u>>(32-9) + u = x13 + x12 + x14 ^= u<<13 | u>>(32-13) + u = x14 + x13 + x15 ^= u<<18 | u>>(32-18) + } + out[0] = byte(x0) + out[1] = byte(x0 >> 8) + out[2] = byte(x0 >> 16) + out[3] = byte(x0 >> 24) + + out[4] = byte(x5) + out[5] = byte(x5 >> 8) + out[6] = byte(x5 >> 16) + out[7] = byte(x5 >> 24) + + out[8] = byte(x10) + out[9] = byte(x10 >> 8) + out[10] = byte(x10 >> 16) + out[11] = byte(x10 >> 24) + + out[12] = byte(x15) + out[13] = byte(x15 >> 8) + out[14] = byte(x15 >> 16) + out[15] = byte(x15 >> 24) + + out[16] = byte(x6) + out[17] = byte(x6 >> 8) + out[18] = byte(x6 >> 16) + out[19] = byte(x6 >> 24) + + out[20] = byte(x7) + out[21] = byte(x7 >> 8) + out[22] = byte(x7 >> 16) + out[23] = byte(x7 >> 24) + + out[24] = byte(x8) + out[25] = byte(x8 >> 8) + out[26] = byte(x8 >> 16) + out[27] = byte(x8 >> 24) + + out[28] = byte(x9) + out[29] = byte(x9 >> 8) + out[30] = byte(x9 >> 16) + out[31] = byte(x9 >> 24) +} diff --git a/vendor/golang.org/x/crypto/salsa20/salsa/salsa2020_amd64.s b/vendor/golang.org/x/crypto/salsa20/salsa/salsa2020_amd64.s new file mode 100644 index 0000000..083fe38 --- /dev/null +++ b/vendor/golang.org/x/crypto/salsa20/salsa/salsa2020_amd64.s @@ -0,0 +1,889 @@ +// Copyright 2012 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +// +build amd64,!appengine,!gccgo + +// This code was translated into a form compatible with 6a from the public +// domain sources in SUPERCOP: http://bench.cr.yp.to/supercop.html + +// func salsa2020XORKeyStream(out, in *byte, n uint64, nonce, key *byte) +// This needs up to 64 bytes at 360(SP); hence the non-obvious frame size. +TEXT ·salsa2020XORKeyStream(SB),0,$456-40 // frame = 424 + 32 byte alignment + MOVQ out+0(FP),DI + MOVQ in+8(FP),SI + MOVQ n+16(FP),DX + MOVQ nonce+24(FP),CX + MOVQ key+32(FP),R8 + + MOVQ SP,R12 + MOVQ SP,R9 + ADDQ $31, R9 + ANDQ $~31, R9 + MOVQ R9, SP + + MOVQ DX,R9 + MOVQ CX,DX + MOVQ R8,R10 + CMPQ R9,$0 + JBE DONE + START: + MOVL 20(R10),CX + MOVL 0(R10),R8 + MOVL 0(DX),AX + MOVL 16(R10),R11 + MOVL CX,0(SP) + MOVL R8, 4 (SP) + MOVL AX, 8 (SP) + MOVL R11, 12 (SP) + MOVL 8(DX),CX + MOVL 24(R10),R8 + MOVL 4(R10),AX + MOVL 4(DX),R11 + MOVL CX,16(SP) + MOVL R8, 20 (SP) + MOVL AX, 24 (SP) + MOVL R11, 28 (SP) + MOVL 12(DX),CX + MOVL 12(R10),DX + MOVL 28(R10),R8 + MOVL 8(R10),AX + MOVL DX,32(SP) + MOVL CX, 36 (SP) + MOVL R8, 40 (SP) + MOVL AX, 44 (SP) + MOVQ $1634760805,DX + MOVQ $857760878,CX + MOVQ $2036477234,R8 + MOVQ $1797285236,AX + MOVL DX,48(SP) + MOVL CX, 52 (SP) + MOVL R8, 56 (SP) + MOVL AX, 60 (SP) + CMPQ R9,$256 + JB BYTESBETWEEN1AND255 + MOVOA 48(SP),X0 + PSHUFL $0X55,X0,X1 + PSHUFL $0XAA,X0,X2 + PSHUFL $0XFF,X0,X3 + PSHUFL $0X00,X0,X0 + MOVOA X1,64(SP) + MOVOA X2,80(SP) + MOVOA X3,96(SP) + MOVOA X0,112(SP) + MOVOA 0(SP),X0 + PSHUFL $0XAA,X0,X1 + PSHUFL $0XFF,X0,X2 + PSHUFL $0X00,X0,X3 + PSHUFL $0X55,X0,X0 + MOVOA X1,128(SP) + MOVOA X2,144(SP) + MOVOA X3,160(SP) + MOVOA X0,176(SP) + MOVOA 16(SP),X0 + PSHUFL $0XFF,X0,X1 + PSHUFL $0X55,X0,X2 + PSHUFL $0XAA,X0,X0 + MOVOA X1,192(SP) + MOVOA X2,208(SP) + MOVOA X0,224(SP) + MOVOA 32(SP),X0 + PSHUFL $0X00,X0,X1 + PSHUFL $0XAA,X0,X2 + PSHUFL $0XFF,X0,X0 + MOVOA X1,240(SP) + MOVOA X2,256(SP) + MOVOA X0,272(SP) + BYTESATLEAST256: + MOVL 16(SP),DX + MOVL 36 (SP),CX + MOVL DX,288(SP) + MOVL CX,304(SP) + ADDQ $1,DX + SHLQ $32,CX + ADDQ CX,DX + MOVQ DX,CX + SHRQ $32,CX + MOVL DX, 292 (SP) + MOVL CX, 308 (SP) + ADDQ $1,DX + SHLQ $32,CX + ADDQ CX,DX + MOVQ DX,CX + SHRQ $32,CX + MOVL DX, 296 (SP) + MOVL CX, 312 (SP) + ADDQ $1,DX + SHLQ $32,CX + ADDQ CX,DX + MOVQ DX,CX + SHRQ $32,CX + MOVL DX, 300 (SP) + MOVL CX, 316 (SP) + ADDQ $1,DX + SHLQ $32,CX + ADDQ CX,DX + MOVQ DX,CX + SHRQ $32,CX + MOVL DX,16(SP) + MOVL CX, 36 (SP) + MOVQ R9,352(SP) + MOVQ $20,DX + MOVOA 64(SP),X0 + MOVOA 80(SP),X1 + MOVOA 96(SP),X2 + MOVOA 256(SP),X3 + MOVOA 272(SP),X4 + MOVOA 128(SP),X5 + MOVOA 144(SP),X6 + MOVOA 176(SP),X7 + MOVOA 192(SP),X8 + MOVOA 208(SP),X9 + MOVOA 224(SP),X10 + MOVOA 304(SP),X11 + MOVOA 112(SP),X12 + MOVOA 160(SP),X13 + MOVOA 240(SP),X14 + MOVOA 288(SP),X15 + MAINLOOP1: + MOVOA X1,320(SP) + MOVOA X2,336(SP) + MOVOA X13,X1 + PADDL X12,X1 + MOVOA X1,X2 + PSLLL $7,X1 + PXOR X1,X14 + PSRLL $25,X2 + PXOR X2,X14 + MOVOA X7,X1 + PADDL X0,X1 + MOVOA X1,X2 + PSLLL $7,X1 + PXOR X1,X11 + PSRLL $25,X2 + PXOR X2,X11 + MOVOA X12,X1 + PADDL X14,X1 + MOVOA X1,X2 + PSLLL $9,X1 + PXOR X1,X15 + PSRLL $23,X2 + PXOR X2,X15 + MOVOA X0,X1 + PADDL X11,X1 + MOVOA X1,X2 + PSLLL $9,X1 + PXOR X1,X9 + PSRLL $23,X2 + PXOR X2,X9 + MOVOA X14,X1 + PADDL X15,X1 + MOVOA X1,X2 + PSLLL $13,X1 + PXOR X1,X13 + PSRLL $19,X2 + PXOR X2,X13 + MOVOA X11,X1 + PADDL X9,X1 + MOVOA X1,X2 + PSLLL $13,X1 + PXOR X1,X7 + PSRLL $19,X2 + PXOR X2,X7 + MOVOA X15,X1 + PADDL X13,X1 + MOVOA X1,X2 + PSLLL $18,X1 + PXOR X1,X12 + PSRLL $14,X2 + PXOR X2,X12 + MOVOA 320(SP),X1 + MOVOA X12,320(SP) + MOVOA X9,X2 + PADDL X7,X2 + MOVOA X2,X12 + PSLLL $18,X2 + PXOR X2,X0 + PSRLL $14,X12 + PXOR X12,X0 + MOVOA X5,X2 + PADDL X1,X2 + MOVOA X2,X12 + PSLLL $7,X2 + PXOR X2,X3 + PSRLL $25,X12 + PXOR X12,X3 + MOVOA 336(SP),X2 + MOVOA X0,336(SP) + MOVOA X6,X0 + PADDL X2,X0 + MOVOA X0,X12 + PSLLL $7,X0 + PXOR X0,X4 + PSRLL $25,X12 + PXOR X12,X4 + MOVOA X1,X0 + PADDL X3,X0 + MOVOA X0,X12 + PSLLL $9,X0 + PXOR X0,X10 + PSRLL $23,X12 + PXOR X12,X10 + MOVOA X2,X0 + PADDL X4,X0 + MOVOA X0,X12 + PSLLL $9,X0 + PXOR X0,X8 + PSRLL $23,X12 + PXOR X12,X8 + MOVOA X3,X0 + PADDL X10,X0 + MOVOA X0,X12 + PSLLL $13,X0 + PXOR X0,X5 + PSRLL $19,X12 + PXOR X12,X5 + MOVOA X4,X0 + PADDL X8,X0 + MOVOA X0,X12 + PSLLL $13,X0 + PXOR X0,X6 + PSRLL $19,X12 + PXOR X12,X6 + MOVOA X10,X0 + PADDL X5,X0 + MOVOA X0,X12 + PSLLL $18,X0 + PXOR X0,X1 + PSRLL $14,X12 + PXOR X12,X1 + MOVOA 320(SP),X0 + MOVOA X1,320(SP) + MOVOA X4,X1 + PADDL X0,X1 + MOVOA X1,X12 + PSLLL $7,X1 + PXOR X1,X7 + PSRLL $25,X12 + PXOR X12,X7 + MOVOA X8,X1 + PADDL X6,X1 + MOVOA X1,X12 + PSLLL $18,X1 + PXOR X1,X2 + PSRLL $14,X12 + PXOR X12,X2 + MOVOA 336(SP),X12 + MOVOA X2,336(SP) + MOVOA X14,X1 + PADDL X12,X1 + MOVOA X1,X2 + PSLLL $7,X1 + PXOR X1,X5 + PSRLL $25,X2 + PXOR X2,X5 + MOVOA X0,X1 + PADDL X7,X1 + MOVOA X1,X2 + PSLLL $9,X1 + PXOR X1,X10 + PSRLL $23,X2 + PXOR X2,X10 + MOVOA X12,X1 + PADDL X5,X1 + MOVOA X1,X2 + PSLLL $9,X1 + PXOR X1,X8 + PSRLL $23,X2 + PXOR X2,X8 + MOVOA X7,X1 + PADDL X10,X1 + MOVOA X1,X2 + PSLLL $13,X1 + PXOR X1,X4 + PSRLL $19,X2 + PXOR X2,X4 + MOVOA X5,X1 + PADDL X8,X1 + MOVOA X1,X2 + PSLLL $13,X1 + PXOR X1,X14 + PSRLL $19,X2 + PXOR X2,X14 + MOVOA X10,X1 + PADDL X4,X1 + MOVOA X1,X2 + PSLLL $18,X1 + PXOR X1,X0 + PSRLL $14,X2 + PXOR X2,X0 + MOVOA 320(SP),X1 + MOVOA X0,320(SP) + MOVOA X8,X0 + PADDL X14,X0 + MOVOA X0,X2 + PSLLL $18,X0 + PXOR X0,X12 + PSRLL $14,X2 + PXOR X2,X12 + MOVOA X11,X0 + PADDL X1,X0 + MOVOA X0,X2 + PSLLL $7,X0 + PXOR X0,X6 + PSRLL $25,X2 + PXOR X2,X6 + MOVOA 336(SP),X2 + MOVOA X12,336(SP) + MOVOA X3,X0 + PADDL X2,X0 + MOVOA X0,X12 + PSLLL $7,X0 + PXOR X0,X13 + PSRLL $25,X12 + PXOR X12,X13 + MOVOA X1,X0 + PADDL X6,X0 + MOVOA X0,X12 + PSLLL $9,X0 + PXOR X0,X15 + PSRLL $23,X12 + PXOR X12,X15 + MOVOA X2,X0 + PADDL X13,X0 + MOVOA X0,X12 + PSLLL $9,X0 + PXOR X0,X9 + PSRLL $23,X12 + PXOR X12,X9 + MOVOA X6,X0 + PADDL X15,X0 + MOVOA X0,X12 + PSLLL $13,X0 + PXOR X0,X11 + PSRLL $19,X12 + PXOR X12,X11 + MOVOA X13,X0 + PADDL X9,X0 + MOVOA X0,X12 + PSLLL $13,X0 + PXOR X0,X3 + PSRLL $19,X12 + PXOR X12,X3 + MOVOA X15,X0 + PADDL X11,X0 + MOVOA X0,X12 + PSLLL $18,X0 + PXOR X0,X1 + PSRLL $14,X12 + PXOR X12,X1 + MOVOA X9,X0 + PADDL X3,X0 + MOVOA X0,X12 + PSLLL $18,X0 + PXOR X0,X2 + PSRLL $14,X12 + PXOR X12,X2 + MOVOA 320(SP),X12 + MOVOA 336(SP),X0 + SUBQ $2,DX + JA MAINLOOP1 + PADDL 112(SP),X12 + PADDL 176(SP),X7 + PADDL 224(SP),X10 + PADDL 272(SP),X4 + MOVD X12,DX + MOVD X7,CX + MOVD X10,R8 + MOVD X4,R9 + PSHUFL $0X39,X12,X12 + PSHUFL $0X39,X7,X7 + PSHUFL $0X39,X10,X10 + PSHUFL $0X39,X4,X4 + XORL 0(SI),DX + XORL 4(SI),CX + XORL 8(SI),R8 + XORL 12(SI),R9 + MOVL DX,0(DI) + MOVL CX,4(DI) + MOVL R8,8(DI) + MOVL R9,12(DI) + MOVD X12,DX + MOVD X7,CX + MOVD X10,R8 + MOVD X4,R9 + PSHUFL $0X39,X12,X12 + PSHUFL $0X39,X7,X7 + PSHUFL $0X39,X10,X10 + PSHUFL $0X39,X4,X4 + XORL 64(SI),DX + XORL 68(SI),CX + XORL 72(SI),R8 + XORL 76(SI),R9 + MOVL DX,64(DI) + MOVL CX,68(DI) + MOVL R8,72(DI) + MOVL R9,76(DI) + MOVD X12,DX + MOVD X7,CX + MOVD X10,R8 + MOVD X4,R9 + PSHUFL $0X39,X12,X12 + PSHUFL $0X39,X7,X7 + PSHUFL $0X39,X10,X10 + PSHUFL $0X39,X4,X4 + XORL 128(SI),DX + XORL 132(SI),CX + XORL 136(SI),R8 + XORL 140(SI),R9 + MOVL DX,128(DI) + MOVL CX,132(DI) + MOVL R8,136(DI) + MOVL R9,140(DI) + MOVD X12,DX + MOVD X7,CX + MOVD X10,R8 + MOVD X4,R9 + XORL 192(SI),DX + XORL 196(SI),CX + XORL 200(SI),R8 + XORL 204(SI),R9 + MOVL DX,192(DI) + MOVL CX,196(DI) + MOVL R8,200(DI) + MOVL R9,204(DI) + PADDL 240(SP),X14 + PADDL 64(SP),X0 + PADDL 128(SP),X5 + PADDL 192(SP),X8 + MOVD X14,DX + MOVD X0,CX + MOVD X5,R8 + MOVD X8,R9 + PSHUFL $0X39,X14,X14 + PSHUFL $0X39,X0,X0 + PSHUFL $0X39,X5,X5 + PSHUFL $0X39,X8,X8 + XORL 16(SI),DX + XORL 20(SI),CX + XORL 24(SI),R8 + XORL 28(SI),R9 + MOVL DX,16(DI) + MOVL CX,20(DI) + MOVL R8,24(DI) + MOVL R9,28(DI) + MOVD X14,DX + MOVD X0,CX + MOVD X5,R8 + MOVD X8,R9 + PSHUFL $0X39,X14,X14 + PSHUFL $0X39,X0,X0 + PSHUFL $0X39,X5,X5 + PSHUFL $0X39,X8,X8 + XORL 80(SI),DX + XORL 84(SI),CX + XORL 88(SI),R8 + XORL 92(SI),R9 + MOVL DX,80(DI) + MOVL CX,84(DI) + MOVL R8,88(DI) + MOVL R9,92(DI) + MOVD X14,DX + MOVD X0,CX + MOVD X5,R8 + MOVD X8,R9 + PSHUFL $0X39,X14,X14 + PSHUFL $0X39,X0,X0 + PSHUFL $0X39,X5,X5 + PSHUFL $0X39,X8,X8 + XORL 144(SI),DX + XORL 148(SI),CX + XORL 152(SI),R8 + XORL 156(SI),R9 + MOVL DX,144(DI) + MOVL CX,148(DI) + MOVL R8,152(DI) + MOVL R9,156(DI) + MOVD X14,DX + MOVD X0,CX + MOVD X5,R8 + MOVD X8,R9 + XORL 208(SI),DX + XORL 212(SI),CX + XORL 216(SI),R8 + XORL 220(SI),R9 + MOVL DX,208(DI) + MOVL CX,212(DI) + MOVL R8,216(DI) + MOVL R9,220(DI) + PADDL 288(SP),X15 + PADDL 304(SP),X11 + PADDL 80(SP),X1 + PADDL 144(SP),X6 + MOVD X15,DX + MOVD X11,CX + MOVD X1,R8 + MOVD X6,R9 + PSHUFL $0X39,X15,X15 + PSHUFL $0X39,X11,X11 + PSHUFL $0X39,X1,X1 + PSHUFL $0X39,X6,X6 + XORL 32(SI),DX + XORL 36(SI),CX + XORL 40(SI),R8 + XORL 44(SI),R9 + MOVL DX,32(DI) + MOVL CX,36(DI) + MOVL R8,40(DI) + MOVL R9,44(DI) + MOVD X15,DX + MOVD X11,CX + MOVD X1,R8 + MOVD X6,R9 + PSHUFL $0X39,X15,X15 + PSHUFL $0X39,X11,X11 + PSHUFL $0X39,X1,X1 + PSHUFL $0X39,X6,X6 + XORL 96(SI),DX + XORL 100(SI),CX + XORL 104(SI),R8 + XORL 108(SI),R9 + MOVL DX,96(DI) + MOVL CX,100(DI) + MOVL R8,104(DI) + MOVL R9,108(DI) + MOVD X15,DX + MOVD X11,CX + MOVD X1,R8 + MOVD X6,R9 + PSHUFL $0X39,X15,X15 + PSHUFL $0X39,X11,X11 + PSHUFL $0X39,X1,X1 + PSHUFL $0X39,X6,X6 + XORL 160(SI),DX + XORL 164(SI),CX + XORL 168(SI),R8 + XORL 172(SI),R9 + MOVL DX,160(DI) + MOVL CX,164(DI) + MOVL R8,168(DI) + MOVL R9,172(DI) + MOVD X15,DX + MOVD X11,CX + MOVD X1,R8 + MOVD X6,R9 + XORL 224(SI),DX + XORL 228(SI),CX + XORL 232(SI),R8 + XORL 236(SI),R9 + MOVL DX,224(DI) + MOVL CX,228(DI) + MOVL R8,232(DI) + MOVL R9,236(DI) + PADDL 160(SP),X13 + PADDL 208(SP),X9 + PADDL 256(SP),X3 + PADDL 96(SP),X2 + MOVD X13,DX + MOVD X9,CX + MOVD X3,R8 + MOVD X2,R9 + PSHUFL $0X39,X13,X13 + PSHUFL $0X39,X9,X9 + PSHUFL $0X39,X3,X3 + PSHUFL $0X39,X2,X2 + XORL 48(SI),DX + XORL 52(SI),CX + XORL 56(SI),R8 + XORL 60(SI),R9 + MOVL DX,48(DI) + MOVL CX,52(DI) + MOVL R8,56(DI) + MOVL R9,60(DI) + MOVD X13,DX + MOVD X9,CX + MOVD X3,R8 + MOVD X2,R9 + PSHUFL $0X39,X13,X13 + PSHUFL $0X39,X9,X9 + PSHUFL $0X39,X3,X3 + PSHUFL $0X39,X2,X2 + XORL 112(SI),DX + XORL 116(SI),CX + XORL 120(SI),R8 + XORL 124(SI),R9 + MOVL DX,112(DI) + MOVL CX,116(DI) + MOVL R8,120(DI) + MOVL R9,124(DI) + MOVD X13,DX + MOVD X9,CX + MOVD X3,R8 + MOVD X2,R9 + PSHUFL $0X39,X13,X13 + PSHUFL $0X39,X9,X9 + PSHUFL $0X39,X3,X3 + PSHUFL $0X39,X2,X2 + XORL 176(SI),DX + XORL 180(SI),CX + XORL 184(SI),R8 + XORL 188(SI),R9 + MOVL DX,176(DI) + MOVL CX,180(DI) + MOVL R8,184(DI) + MOVL R9,188(DI) + MOVD X13,DX + MOVD X9,CX + MOVD X3,R8 + MOVD X2,R9 + XORL 240(SI),DX + XORL 244(SI),CX + XORL 248(SI),R8 + XORL 252(SI),R9 + MOVL DX,240(DI) + MOVL CX,244(DI) + MOVL R8,248(DI) + MOVL R9,252(DI) + MOVQ 352(SP),R9 + SUBQ $256,R9 + ADDQ $256,SI + ADDQ $256,DI + CMPQ R9,$256 + JAE BYTESATLEAST256 + CMPQ R9,$0 + JBE DONE + BYTESBETWEEN1AND255: + CMPQ R9,$64 + JAE NOCOPY + MOVQ DI,DX + LEAQ 360(SP),DI + MOVQ R9,CX + REP; MOVSB + LEAQ 360(SP),DI + LEAQ 360(SP),SI + NOCOPY: + MOVQ R9,352(SP) + MOVOA 48(SP),X0 + MOVOA 0(SP),X1 + MOVOA 16(SP),X2 + MOVOA 32(SP),X3 + MOVOA X1,X4 + MOVQ $20,CX + MAINLOOP2: + PADDL X0,X4 + MOVOA X0,X5 + MOVOA X4,X6 + PSLLL $7,X4 + PSRLL $25,X6 + PXOR X4,X3 + PXOR X6,X3 + PADDL X3,X5 + MOVOA X3,X4 + MOVOA X5,X6 + PSLLL $9,X5 + PSRLL $23,X6 + PXOR X5,X2 + PSHUFL $0X93,X3,X3 + PXOR X6,X2 + PADDL X2,X4 + MOVOA X2,X5 + MOVOA X4,X6 + PSLLL $13,X4 + PSRLL $19,X6 + PXOR X4,X1 + PSHUFL $0X4E,X2,X2 + PXOR X6,X1 + PADDL X1,X5 + MOVOA X3,X4 + MOVOA X5,X6 + PSLLL $18,X5 + PSRLL $14,X6 + PXOR X5,X0 + PSHUFL $0X39,X1,X1 + PXOR X6,X0 + PADDL X0,X4 + MOVOA X0,X5 + MOVOA X4,X6 + PSLLL $7,X4 + PSRLL $25,X6 + PXOR X4,X1 + PXOR X6,X1 + PADDL X1,X5 + MOVOA X1,X4 + MOVOA X5,X6 + PSLLL $9,X5 + PSRLL $23,X6 + PXOR X5,X2 + PSHUFL $0X93,X1,X1 + PXOR X6,X2 + PADDL X2,X4 + MOVOA X2,X5 + MOVOA X4,X6 + PSLLL $13,X4 + PSRLL $19,X6 + PXOR X4,X3 + PSHUFL $0X4E,X2,X2 + PXOR X6,X3 + PADDL X3,X5 + MOVOA X1,X4 + MOVOA X5,X6 + PSLLL $18,X5 + PSRLL $14,X6 + PXOR X5,X0 + PSHUFL $0X39,X3,X3 + PXOR X6,X0 + PADDL X0,X4 + MOVOA X0,X5 + MOVOA X4,X6 + PSLLL $7,X4 + PSRLL $25,X6 + PXOR X4,X3 + PXOR X6,X3 + PADDL X3,X5 + MOVOA X3,X4 + MOVOA X5,X6 + PSLLL $9,X5 + PSRLL $23,X6 + PXOR X5,X2 + PSHUFL $0X93,X3,X3 + PXOR X6,X2 + PADDL X2,X4 + MOVOA X2,X5 + MOVOA X4,X6 + PSLLL $13,X4 + PSRLL $19,X6 + PXOR X4,X1 + PSHUFL $0X4E,X2,X2 + PXOR X6,X1 + PADDL X1,X5 + MOVOA X3,X4 + MOVOA X5,X6 + PSLLL $18,X5 + PSRLL $14,X6 + PXOR X5,X0 + PSHUFL $0X39,X1,X1 + PXOR X6,X0 + PADDL X0,X4 + MOVOA X0,X5 + MOVOA X4,X6 + PSLLL $7,X4 + PSRLL $25,X6 + PXOR X4,X1 + PXOR X6,X1 + PADDL X1,X5 + MOVOA X1,X4 + MOVOA X5,X6 + PSLLL $9,X5 + PSRLL $23,X6 + PXOR X5,X2 + PSHUFL $0X93,X1,X1 + PXOR X6,X2 + PADDL X2,X4 + MOVOA X2,X5 + MOVOA X4,X6 + PSLLL $13,X4 + PSRLL $19,X6 + PXOR X4,X3 + PSHUFL $0X4E,X2,X2 + PXOR X6,X3 + SUBQ $4,CX + PADDL X3,X5 + MOVOA X1,X4 + MOVOA X5,X6 + PSLLL $18,X5 + PXOR X7,X7 + PSRLL $14,X6 + PXOR X5,X0 + PSHUFL $0X39,X3,X3 + PXOR X6,X0 + JA MAINLOOP2 + PADDL 48(SP),X0 + PADDL 0(SP),X1 + PADDL 16(SP),X2 + PADDL 32(SP),X3 + MOVD X0,CX + MOVD X1,R8 + MOVD X2,R9 + MOVD X3,AX + PSHUFL $0X39,X0,X0 + PSHUFL $0X39,X1,X1 + PSHUFL $0X39,X2,X2 + PSHUFL $0X39,X3,X3 + XORL 0(SI),CX + XORL 48(SI),R8 + XORL 32(SI),R9 + XORL 16(SI),AX + MOVL CX,0(DI) + MOVL R8,48(DI) + MOVL R9,32(DI) + MOVL AX,16(DI) + MOVD X0,CX + MOVD X1,R8 + MOVD X2,R9 + MOVD X3,AX + PSHUFL $0X39,X0,X0 + PSHUFL $0X39,X1,X1 + PSHUFL $0X39,X2,X2 + PSHUFL $0X39,X3,X3 + XORL 20(SI),CX + XORL 4(SI),R8 + XORL 52(SI),R9 + XORL 36(SI),AX + MOVL CX,20(DI) + MOVL R8,4(DI) + MOVL R9,52(DI) + MOVL AX,36(DI) + MOVD X0,CX + MOVD X1,R8 + MOVD X2,R9 + MOVD X3,AX + PSHUFL $0X39,X0,X0 + PSHUFL $0X39,X1,X1 + PSHUFL $0X39,X2,X2 + PSHUFL $0X39,X3,X3 + XORL 40(SI),CX + XORL 24(SI),R8 + XORL 8(SI),R9 + XORL 56(SI),AX + MOVL CX,40(DI) + MOVL R8,24(DI) + MOVL R9,8(DI) + MOVL AX,56(DI) + MOVD X0,CX + MOVD X1,R8 + MOVD X2,R9 + MOVD X3,AX + XORL 60(SI),CX + XORL 44(SI),R8 + XORL 28(SI),R9 + XORL 12(SI),AX + MOVL CX,60(DI) + MOVL R8,44(DI) + MOVL R9,28(DI) + MOVL AX,12(DI) + MOVQ 352(SP),R9 + MOVL 16(SP),CX + MOVL 36 (SP),R8 + ADDQ $1,CX + SHLQ $32,R8 + ADDQ R8,CX + MOVQ CX,R8 + SHRQ $32,R8 + MOVL CX,16(SP) + MOVL R8, 36 (SP) + CMPQ R9,$64 + JA BYTESATLEAST65 + JAE BYTESATLEAST64 + MOVQ DI,SI + MOVQ DX,DI + MOVQ R9,CX + REP; MOVSB + BYTESATLEAST64: + DONE: + MOVQ R12,SP + RET + BYTESATLEAST65: + SUBQ $64,R9 + ADDQ $64,DI + ADDQ $64,SI + JMP BYTESBETWEEN1AND255 diff --git a/vendor/golang.org/x/crypto/salsa20/salsa/salsa208.go b/vendor/golang.org/x/crypto/salsa20/salsa/salsa208.go new file mode 100644 index 0000000..9bfc092 --- /dev/null +++ b/vendor/golang.org/x/crypto/salsa20/salsa/salsa208.go @@ -0,0 +1,199 @@ +// Copyright 2012 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package salsa + +// Core208 applies the Salsa20/8 core function to the 64-byte array in and puts +// the result into the 64-byte array out. The input and output may be the same array. +func Core208(out *[64]byte, in *[64]byte) { + j0 := uint32(in[0]) | uint32(in[1])<<8 | uint32(in[2])<<16 | uint32(in[3])<<24 + j1 := uint32(in[4]) | uint32(in[5])<<8 | uint32(in[6])<<16 | uint32(in[7])<<24 + j2 := uint32(in[8]) | uint32(in[9])<<8 | uint32(in[10])<<16 | uint32(in[11])<<24 + j3 := uint32(in[12]) | uint32(in[13])<<8 | uint32(in[14])<<16 | uint32(in[15])<<24 + j4 := uint32(in[16]) | uint32(in[17])<<8 | uint32(in[18])<<16 | uint32(in[19])<<24 + j5 := uint32(in[20]) | uint32(in[21])<<8 | uint32(in[22])<<16 | uint32(in[23])<<24 + j6 := uint32(in[24]) | uint32(in[25])<<8 | uint32(in[26])<<16 | uint32(in[27])<<24 + j7 := uint32(in[28]) | uint32(in[29])<<8 | uint32(in[30])<<16 | uint32(in[31])<<24 + j8 := uint32(in[32]) | uint32(in[33])<<8 | uint32(in[34])<<16 | uint32(in[35])<<24 + j9 := uint32(in[36]) | uint32(in[37])<<8 | uint32(in[38])<<16 | uint32(in[39])<<24 + j10 := uint32(in[40]) | uint32(in[41])<<8 | uint32(in[42])<<16 | uint32(in[43])<<24 + j11 := uint32(in[44]) | uint32(in[45])<<8 | uint32(in[46])<<16 | uint32(in[47])<<24 + j12 := uint32(in[48]) | uint32(in[49])<<8 | uint32(in[50])<<16 | uint32(in[51])<<24 + j13 := uint32(in[52]) | uint32(in[53])<<8 | uint32(in[54])<<16 | uint32(in[55])<<24 + j14 := uint32(in[56]) | uint32(in[57])<<8 | uint32(in[58])<<16 | uint32(in[59])<<24 + j15 := uint32(in[60]) | uint32(in[61])<<8 | uint32(in[62])<<16 | uint32(in[63])<<24 + + x0, x1, x2, x3, x4, x5, x6, x7, x8 := j0, j1, j2, j3, j4, j5, j6, j7, j8 + x9, x10, x11, x12, x13, x14, x15 := j9, j10, j11, j12, j13, j14, j15 + + for i := 0; i < 8; i += 2 { + u := x0 + x12 + x4 ^= u<<7 | u>>(32-7) + u = x4 + x0 + x8 ^= u<<9 | u>>(32-9) + u = x8 + x4 + x12 ^= u<<13 | u>>(32-13) + u = x12 + x8 + x0 ^= u<<18 | u>>(32-18) + + u = x5 + x1 + x9 ^= u<<7 | u>>(32-7) + u = x9 + x5 + x13 ^= u<<9 | u>>(32-9) + u = x13 + x9 + x1 ^= u<<13 | u>>(32-13) + u = x1 + x13 + x5 ^= u<<18 | u>>(32-18) + + u = x10 + x6 + x14 ^= u<<7 | u>>(32-7) + u = x14 + x10 + x2 ^= u<<9 | u>>(32-9) + u = x2 + x14 + x6 ^= u<<13 | u>>(32-13) + u = x6 + x2 + x10 ^= u<<18 | u>>(32-18) + + u = x15 + x11 + x3 ^= u<<7 | u>>(32-7) + u = x3 + x15 + x7 ^= u<<9 | u>>(32-9) + u = x7 + x3 + x11 ^= u<<13 | u>>(32-13) + u = x11 + x7 + x15 ^= u<<18 | u>>(32-18) + + u = x0 + x3 + x1 ^= u<<7 | u>>(32-7) + u = x1 + x0 + x2 ^= u<<9 | u>>(32-9) + u = x2 + x1 + x3 ^= u<<13 | u>>(32-13) + u = x3 + x2 + x0 ^= u<<18 | u>>(32-18) + + u = x5 + x4 + x6 ^= u<<7 | u>>(32-7) + u = x6 + x5 + x7 ^= u<<9 | u>>(32-9) + u = x7 + x6 + x4 ^= u<<13 | u>>(32-13) + u = x4 + x7 + x5 ^= u<<18 | u>>(32-18) + + u = x10 + x9 + x11 ^= u<<7 | u>>(32-7) + u = x11 + x10 + x8 ^= u<<9 | u>>(32-9) + u = x8 + x11 + x9 ^= u<<13 | u>>(32-13) + u = x9 + x8 + x10 ^= u<<18 | u>>(32-18) + + u = x15 + x14 + x12 ^= u<<7 | u>>(32-7) + u = x12 + x15 + x13 ^= u<<9 | u>>(32-9) + u = x13 + x12 + x14 ^= u<<13 | u>>(32-13) + u = x14 + x13 + x15 ^= u<<18 | u>>(32-18) + } + x0 += j0 + x1 += j1 + x2 += j2 + x3 += j3 + x4 += j4 + x5 += j5 + x6 += j6 + x7 += j7 + x8 += j8 + x9 += j9 + x10 += j10 + x11 += j11 + x12 += j12 + x13 += j13 + x14 += j14 + x15 += j15 + + out[0] = byte(x0) + out[1] = byte(x0 >> 8) + out[2] = byte(x0 >> 16) + out[3] = byte(x0 >> 24) + + out[4] = byte(x1) + out[5] = byte(x1 >> 8) + out[6] = byte(x1 >> 16) + out[7] = byte(x1 >> 24) + + out[8] = byte(x2) + out[9] = byte(x2 >> 8) + out[10] = byte(x2 >> 16) + out[11] = byte(x2 >> 24) + + out[12] = byte(x3) + out[13] = byte(x3 >> 8) + out[14] = byte(x3 >> 16) + out[15] = byte(x3 >> 24) + + out[16] = byte(x4) + out[17] = byte(x4 >> 8) + out[18] = byte(x4 >> 16) + out[19] = byte(x4 >> 24) + + out[20] = byte(x5) + out[21] = byte(x5 >> 8) + out[22] = byte(x5 >> 16) + out[23] = byte(x5 >> 24) + + out[24] = byte(x6) + out[25] = byte(x6 >> 8) + out[26] = byte(x6 >> 16) + out[27] = byte(x6 >> 24) + + out[28] = byte(x7) + out[29] = byte(x7 >> 8) + out[30] = byte(x7 >> 16) + out[31] = byte(x7 >> 24) + + out[32] = byte(x8) + out[33] = byte(x8 >> 8) + out[34] = byte(x8 >> 16) + out[35] = byte(x8 >> 24) + + out[36] = byte(x9) + out[37] = byte(x9 >> 8) + out[38] = byte(x9 >> 16) + out[39] = byte(x9 >> 24) + + out[40] = byte(x10) + out[41] = byte(x10 >> 8) + out[42] = byte(x10 >> 16) + out[43] = byte(x10 >> 24) + + out[44] = byte(x11) + out[45] = byte(x11 >> 8) + out[46] = byte(x11 >> 16) + out[47] = byte(x11 >> 24) + + out[48] = byte(x12) + out[49] = byte(x12 >> 8) + out[50] = byte(x12 >> 16) + out[51] = byte(x12 >> 24) + + out[52] = byte(x13) + out[53] = byte(x13 >> 8) + out[54] = byte(x13 >> 16) + out[55] = byte(x13 >> 24) + + out[56] = byte(x14) + out[57] = byte(x14 >> 8) + out[58] = byte(x14 >> 16) + out[59] = byte(x14 >> 24) + + out[60] = byte(x15) + out[61] = byte(x15 >> 8) + out[62] = byte(x15 >> 16) + out[63] = byte(x15 >> 24) +} diff --git a/vendor/golang.org/x/crypto/salsa20/salsa/salsa20_amd64.go b/vendor/golang.org/x/crypto/salsa20/salsa/salsa20_amd64.go new file mode 100644 index 0000000..903c785 --- /dev/null +++ b/vendor/golang.org/x/crypto/salsa20/salsa/salsa20_amd64.go @@ -0,0 +1,23 @@ +// Copyright 2012 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +// +build amd64,!appengine,!gccgo + +package salsa + +// This function is implemented in salsa2020_amd64.s. + +//go:noescape + +func salsa2020XORKeyStream(out, in *byte, n uint64, nonce, key *byte) + +// XORKeyStream crypts bytes from in to out using the given key and counters. +// In and out may be the same slice but otherwise should not overlap. Counter +// contains the raw salsa20 counter bytes (both nonce and block counter). +func XORKeyStream(out, in []byte, counter *[16]byte, key *[32]byte) { + if len(in) == 0 { + return + } + salsa2020XORKeyStream(&out[0], &in[0], uint64(len(in)), &counter[0], &key[0]) +} diff --git a/vendor/golang.org/x/crypto/salsa20/salsa/salsa20_ref.go b/vendor/golang.org/x/crypto/salsa20/salsa/salsa20_ref.go new file mode 100644 index 0000000..95f8ca5 --- /dev/null +++ b/vendor/golang.org/x/crypto/salsa20/salsa/salsa20_ref.go @@ -0,0 +1,234 @@ +// Copyright 2012 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +// +build !amd64 appengine gccgo + +package salsa + +const rounds = 20 + +// core applies the Salsa20 core function to 16-byte input in, 32-byte key k, +// and 16-byte constant c, and puts the result into 64-byte array out. +func core(out *[64]byte, in *[16]byte, k *[32]byte, c *[16]byte) { + j0 := uint32(c[0]) | uint32(c[1])<<8 | uint32(c[2])<<16 | uint32(c[3])<<24 + j1 := uint32(k[0]) | uint32(k[1])<<8 | uint32(k[2])<<16 | uint32(k[3])<<24 + j2 := uint32(k[4]) | uint32(k[5])<<8 | uint32(k[6])<<16 | uint32(k[7])<<24 + j3 := uint32(k[8]) | uint32(k[9])<<8 | uint32(k[10])<<16 | uint32(k[11])<<24 + j4 := uint32(k[12]) | uint32(k[13])<<8 | uint32(k[14])<<16 | uint32(k[15])<<24 + j5 := uint32(c[4]) | uint32(c[5])<<8 | uint32(c[6])<<16 | uint32(c[7])<<24 + j6 := uint32(in[0]) | uint32(in[1])<<8 | uint32(in[2])<<16 | uint32(in[3])<<24 + j7 := uint32(in[4]) | uint32(in[5])<<8 | uint32(in[6])<<16 | uint32(in[7])<<24 + j8 := uint32(in[8]) | uint32(in[9])<<8 | uint32(in[10])<<16 | uint32(in[11])<<24 + j9 := uint32(in[12]) | uint32(in[13])<<8 | uint32(in[14])<<16 | uint32(in[15])<<24 + j10 := uint32(c[8]) | uint32(c[9])<<8 | uint32(c[10])<<16 | uint32(c[11])<<24 + j11 := uint32(k[16]) | uint32(k[17])<<8 | uint32(k[18])<<16 | uint32(k[19])<<24 + j12 := uint32(k[20]) | uint32(k[21])<<8 | uint32(k[22])<<16 | uint32(k[23])<<24 + j13 := uint32(k[24]) | uint32(k[25])<<8 | uint32(k[26])<<16 | uint32(k[27])<<24 + j14 := uint32(k[28]) | uint32(k[29])<<8 | uint32(k[30])<<16 | uint32(k[31])<<24 + j15 := uint32(c[12]) | uint32(c[13])<<8 | uint32(c[14])<<16 | uint32(c[15])<<24 + + x0, x1, x2, x3, x4, x5, x6, x7, x8 := j0, j1, j2, j3, j4, j5, j6, j7, j8 + x9, x10, x11, x12, x13, x14, x15 := j9, j10, j11, j12, j13, j14, j15 + + for i := 0; i < rounds; i += 2 { + u := x0 + x12 + x4 ^= u<<7 | u>>(32-7) + u = x4 + x0 + x8 ^= u<<9 | u>>(32-9) + u = x8 + x4 + x12 ^= u<<13 | u>>(32-13) + u = x12 + x8 + x0 ^= u<<18 | u>>(32-18) + + u = x5 + x1 + x9 ^= u<<7 | u>>(32-7) + u = x9 + x5 + x13 ^= u<<9 | u>>(32-9) + u = x13 + x9 + x1 ^= u<<13 | u>>(32-13) + u = x1 + x13 + x5 ^= u<<18 | u>>(32-18) + + u = x10 + x6 + x14 ^= u<<7 | u>>(32-7) + u = x14 + x10 + x2 ^= u<<9 | u>>(32-9) + u = x2 + x14 + x6 ^= u<<13 | u>>(32-13) + u = x6 + x2 + x10 ^= u<<18 | u>>(32-18) + + u = x15 + x11 + x3 ^= u<<7 | u>>(32-7) + u = x3 + x15 + x7 ^= u<<9 | u>>(32-9) + u = x7 + x3 + x11 ^= u<<13 | u>>(32-13) + u = x11 + x7 + x15 ^= u<<18 | u>>(32-18) + + u = x0 + x3 + x1 ^= u<<7 | u>>(32-7) + u = x1 + x0 + x2 ^= u<<9 | u>>(32-9) + u = x2 + x1 + x3 ^= u<<13 | u>>(32-13) + u = x3 + x2 + x0 ^= u<<18 | u>>(32-18) + + u = x5 + x4 + x6 ^= u<<7 | u>>(32-7) + u = x6 + x5 + x7 ^= u<<9 | u>>(32-9) + u = x7 + x6 + x4 ^= u<<13 | u>>(32-13) + u = x4 + x7 + x5 ^= u<<18 | u>>(32-18) + + u = x10 + x9 + x11 ^= u<<7 | u>>(32-7) + u = x11 + x10 + x8 ^= u<<9 | u>>(32-9) + u = x8 + x11 + x9 ^= u<<13 | u>>(32-13) + u = x9 + x8 + x10 ^= u<<18 | u>>(32-18) + + u = x15 + x14 + x12 ^= u<<7 | u>>(32-7) + u = x12 + x15 + x13 ^= u<<9 | u>>(32-9) + u = x13 + x12 + x14 ^= u<<13 | u>>(32-13) + u = x14 + x13 + x15 ^= u<<18 | u>>(32-18) + } + x0 += j0 + x1 += j1 + x2 += j2 + x3 += j3 + x4 += j4 + x5 += j5 + x6 += j6 + x7 += j7 + x8 += j8 + x9 += j9 + x10 += j10 + x11 += j11 + x12 += j12 + x13 += j13 + x14 += j14 + x15 += j15 + + out[0] = byte(x0) + out[1] = byte(x0 >> 8) + out[2] = byte(x0 >> 16) + out[3] = byte(x0 >> 24) + + out[4] = byte(x1) + out[5] = byte(x1 >> 8) + out[6] = byte(x1 >> 16) + out[7] = byte(x1 >> 24) + + out[8] = byte(x2) + out[9] = byte(x2 >> 8) + out[10] = byte(x2 >> 16) + out[11] = byte(x2 >> 24) + + out[12] = byte(x3) + out[13] = byte(x3 >> 8) + out[14] = byte(x3 >> 16) + out[15] = byte(x3 >> 24) + + out[16] = byte(x4) + out[17] = byte(x4 >> 8) + out[18] = byte(x4 >> 16) + out[19] = byte(x4 >> 24) + + out[20] = byte(x5) + out[21] = byte(x5 >> 8) + out[22] = byte(x5 >> 16) + out[23] = byte(x5 >> 24) + + out[24] = byte(x6) + out[25] = byte(x6 >> 8) + out[26] = byte(x6 >> 16) + out[27] = byte(x6 >> 24) + + out[28] = byte(x7) + out[29] = byte(x7 >> 8) + out[30] = byte(x7 >> 16) + out[31] = byte(x7 >> 24) + + out[32] = byte(x8) + out[33] = byte(x8 >> 8) + out[34] = byte(x8 >> 16) + out[35] = byte(x8 >> 24) + + out[36] = byte(x9) + out[37] = byte(x9 >> 8) + out[38] = byte(x9 >> 16) + out[39] = byte(x9 >> 24) + + out[40] = byte(x10) + out[41] = byte(x10 >> 8) + out[42] = byte(x10 >> 16) + out[43] = byte(x10 >> 24) + + out[44] = byte(x11) + out[45] = byte(x11 >> 8) + out[46] = byte(x11 >> 16) + out[47] = byte(x11 >> 24) + + out[48] = byte(x12) + out[49] = byte(x12 >> 8) + out[50] = byte(x12 >> 16) + out[51] = byte(x12 >> 24) + + out[52] = byte(x13) + out[53] = byte(x13 >> 8) + out[54] = byte(x13 >> 16) + out[55] = byte(x13 >> 24) + + out[56] = byte(x14) + out[57] = byte(x14 >> 8) + out[58] = byte(x14 >> 16) + out[59] = byte(x14 >> 24) + + out[60] = byte(x15) + out[61] = byte(x15 >> 8) + out[62] = byte(x15 >> 16) + out[63] = byte(x15 >> 24) +} + +// XORKeyStream crypts bytes from in to out using the given key and counters. +// In and out may be the same slice but otherwise should not overlap. Counter +// contains the raw salsa20 counter bytes (both nonce and block counter). +func XORKeyStream(out, in []byte, counter *[16]byte, key *[32]byte) { + var block [64]byte + var counterCopy [16]byte + copy(counterCopy[:], counter[:]) + + for len(in) >= 64 { + core(&block, &counterCopy, key, &Sigma) + for i, x := range block { + out[i] = in[i] ^ x + } + u := uint32(1) + for i := 8; i < 16; i++ { + u += uint32(counterCopy[i]) + counterCopy[i] = byte(u) + u >>= 8 + } + in = in[64:] + out = out[64:] + } + + if len(in) > 0 { + core(&block, &counterCopy, key, &Sigma) + for i, v := range in { + out[i] = v ^ block[i] + } + } +}