Replace x/crypto/ed25519 code with github.com/gtank/ed25519

Code pulled from commit 0a030f62c0 with
FeEqual and FeCSwap removed.
This commit is contained in:
Filippo Valsorda 2019-01-21 17:43:47 -05:00
parent 2156d823cd
commit 7522470fbc
19 changed files with 1481 additions and 3249 deletions

View File

@ -1,5 +1,5 @@
Copyright (c) 2009 The Go Authors. All rights reserved.
Copyright (c) 2019 George Tankersley. All rights reserved.
Copyright (c) 2017 George Tankersley. All rights reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are

View File

@ -5,7 +5,11 @@
package edwards25519
import "math/big"
import (
"math/big"
"github.com/gtank/ristretto255/internal/edwards25519/internal/radix51"
)
var (
SQRT_M1 FieldElement
@ -24,9 +28,9 @@ func init() {
CONST_D_MINUS_ONE_SQ, _ = new(big.Int).SetString("40440834346308536858101042469323190826248399146238708352240133220865137265952", 10)
)
feFromBig(&SQRT_M1, CONST_SQRT_M1)
feFromBig(&SQRT_AD_MINUS_ONE, CONST_SQRT_AD_MINUS_ONE)
feFromBig(&INVSQRT_A_MINUS_D, CONST_INVSQRT_A_MINUS_D)
feFromBig(&ONE_MINUS_D_SQ, CONST_ONE_MINUS_D_SQ)
feFromBig(&D_MINUS_ONE_SQ, CONST_D_MINUS_ONE_SQ)
radix51.FeFromBig(&SQRT_M1, CONST_SQRT_M1)
radix51.FeFromBig(&SQRT_AD_MINUS_ONE, CONST_SQRT_AD_MINUS_ONE)
radix51.FeFromBig(&INVSQRT_A_MINUS_D, CONST_INVSQRT_A_MINUS_D)
radix51.FeFromBig(&ONE_MINUS_D_SQ, CONST_ONE_MINUS_D_SQ)
radix51.FeFromBig(&D_MINUS_ONE_SQ, CONST_D_MINUS_ONE_SQ)
}

View File

@ -7,51 +7,57 @@ package edwards25519
import (
"crypto/subtle"
"math/big"
x "github.com/gtank/ristretto255/internal/edwards25519/internal/edwards25519"
"github.com/gtank/ristretto255/internal/edwards25519/internal/radix51"
)
// FeEqual returns 1 if a and b are equal, and 0 otherwise.
func FeEqual(a, b *FieldElement) int {
var sa, sb [32]byte
x.FeToBytes(&sa, a)
x.FeToBytes(&sb, b)
radix51.FeToBytes(&sa, a)
radix51.FeToBytes(&sb, b)
return subtle.ConstantTimeCompare(sa[:], sb[:])
}
// FeSelect sets out to v if cond == 1, and to u if cond == 0.
// out, v and u are allowed to overlap.
func FeSelect(out, v, u *FieldElement, cond int) {
x.FeCMove(out, u, int32(cond^1))
x.FeCMove(out, v, int32(cond))
b := uint64(cond) * 0xffffffffffffffff
out[0] = (b & v[0]) | (^b & u[0])
out[1] = (b & v[1]) | (^b & u[1])
out[2] = (b & v[2]) | (^b & u[2])
out[3] = (b & v[3]) | (^b & u[3])
out[4] = (b & v[4]) | (^b & u[4])
}
// FeCondNeg sets u to -u if cond == 1, and to u if cond == 0.
func FeCondNeg(u *FieldElement, cond int) {
var neg FieldElement
FeNeg(&neg, u)
x.FeCMove(u, &neg, int32(cond))
b := uint64(cond) * 0xffffffffffffffff
u[0] ^= b & (u[0] ^ neg[0])
u[1] ^= b & (u[1] ^ neg[1])
u[2] ^= b & (u[2] ^ neg[2])
u[3] ^= b & (u[3] ^ neg[3])
u[4] ^= b & (u[4] ^ neg[4])
}
// FeIsNegative returns 1 if u is negative, and 0 otherwise.
func FeIsNegative(u *FieldElement) int {
var b [32]byte
radix51.FeToBytes(&b, u)
return int(b[0] & 1)
}
// FeAbs sets out to |u|. out and u are allowed to overlap.
func FeAbs(out, u *FieldElement) {
var neg FieldElement
FeNeg(&neg, u)
FeSelect(out, &neg, u, int(FeIsNegative(u)))
FeSelect(out, &neg, u, FeIsNegative(u))
}
func feFromBig(dst *FieldElement, n *big.Int) {
var buf [32]byte
nn := n.Bytes()
copy(buf[len(buf)-len(nn):], nn)
for i := range buf[:len(buf)/2] {
buf[i], buf[len(buf)-1] = buf[len(buf)-1], buf[i]
}
x.FeFromBytes(dst, &buf)
}
// Copied from second-level internal/edwards25519
// fePow22523 is from x/crypto/ed25519/internal/edwards25519.
func fePow22523(out, z *FieldElement) {
var t0, t1, t2 FieldElement
var i int

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,11 @@
package group
import "github.com/gtank/ristretto255/internal/edwards25519/internal/radix51"
var (
// d, a constant in the curve equation
D radix51.FieldElement = [5]uint64{929955233495203, 466365720129213, 1662059464998953, 2033849074728123, 1442794654840575}
// 2*d, used in addition formula
D2 radix51.FieldElement = [5]uint64{1859910466990425, 932731440258426, 1072319116312658, 1815898335770999, 633789495995903}
)

View File

@ -0,0 +1,272 @@
// Implements group logic for the Ed25519 curve.
package group
import (
"math/big"
field "github.com/gtank/ristretto255/internal/edwards25519/internal/radix51"
)
// From EFD https://hyperelliptic.org/EFD/g1p/auto-twisted-extended-1.html
// An elliptic curve in twisted Edwards form has parameters a, d and coordinates
// x, y satisfying the following equations:
//
// a * x^2 + y^2 = 1 + d * x^2 * y^2
//
// Extended coordinates assume a = -1 and represent x, y as (X, Y, Z, T)
// satisfying the following equations:
//
// x = X / Z
// y = Y / Z
// x * y = T / Z
//
// This representation was introduced in the HisilWongCarterDawson paper "Twisted
// Edwards curves revisited" (Asiacrypt 2008).
type ExtendedGroupElement struct {
X, Y, Z, T field.FieldElement
}
// Converts (x,y) to (X:Y:T:Z) extended coordinates, or "P3" in ref10. As
// described in "Twisted Edwards Curves Revisited", Hisil-Wong-Carter-Dawson
// 2008, Section 3.1 (https://eprint.iacr.org/2008/522.pdf)
// See also https://hyperelliptic.org/EFD/g1p/auto-twisted-extended-1.html#addition-add-2008-hwcd-3
func (v *ExtendedGroupElement) FromAffine(x, y *big.Int) {
field.FeFromBig(&v.X, x)
field.FeFromBig(&v.Y, y)
field.FeMul(&v.T, &v.X, &v.Y)
field.FeOne(&v.Z)
}
// Extended coordinates are XYZT with x = X/Z, y = Y/Z, or the "P3"
// representation in ref10. Extended->affine is the same operation as moving
// from projective to affine. Per HWCD, it is safe to move from extended to
// projective by simply ignoring T.
func (v *ExtendedGroupElement) ToAffine() (*big.Int, *big.Int) {
var x, y, zinv field.FieldElement
field.FeInvert(&zinv, &v.Z)
field.FeMul(&x, &v.X, &zinv)
field.FeMul(&y, &v.Y, &zinv)
return field.FeToBig(&x), field.FeToBig(&y)
}
// Per HWCD, it is safe to move from extended to projective by simply ignoring T.
func (v *ExtendedGroupElement) ToProjective() *ProjectiveGroupElement {
var p ProjectiveGroupElement
field.FeCopy(&p.X, &v.X)
field.FeCopy(&p.Y, &v.Y)
field.FeCopy(&p.Z, &v.Z)
return &p
}
func (v *ExtendedGroupElement) Zero() *ExtendedGroupElement {
field.FeZero(&v.X)
field.FeOne(&v.Y)
field.FeOne(&v.Z)
field.FeZero(&v.T)
return v
}
// This is the same addition formula everyone uses, "add-2008-hwcd-3".
// https://hyperelliptic.org/EFD/g1p/auto-twisted-extended-1.html#addition-add-2008-hwcd-3
// TODO We know Z1=1 and Z2=1 here, so mmadd-2008-hwcd-3 (6M + 1S + 1*k + 9add) could apply
func (v *ExtendedGroupElement) Add(p1, p2 *ExtendedGroupElement) *ExtendedGroupElement {
var tmp1, tmp2, A, B, C, D, E, F, G, H field.FieldElement
field.FeSub(&tmp1, &p1.Y, &p1.X) // tmp1 <-- Y1-X1
field.FeSub(&tmp2, &p2.Y, &p2.X) // tmp2 <-- Y2-X2
field.FeMul(&A, &tmp1, &tmp2) // A <-- tmp1*tmp2 = (Y1-X1)*(Y2-X2)
field.FeAdd(&tmp1, &p1.Y, &p1.X) // tmp1 <-- Y1+X1
field.FeAdd(&tmp2, &p2.Y, &p2.X) // tmp2 <-- Y2+X2
field.FeMul(&B, &tmp1, &tmp2) // B <-- tmp1*tmp2 = (Y1+X1)*(Y2+X2)
field.FeMul(&tmp1, &p1.T, &p2.T) // tmp1 <-- T1*T2
field.FeMul(&C, &tmp1, &D2) // C <-- tmp1*2d = T1*2d*T2
field.FeMul(&tmp1, &p1.Z, &p2.Z) // tmp1 <-- Z1*Z2
field.FeAdd(&D, &tmp1, &tmp1) // D <-- tmp1 + tmp1 = 2*Z1*Z2
field.FeSub(&E, &B, &A) // E <-- B-A
field.FeSub(&F, &D, &C) // F <-- D-C
field.FeAdd(&G, &D, &C) // G <-- D+C
field.FeAdd(&H, &B, &A) // H <-- B+A
field.FeMul(&v.X, &E, &F) // X3 <-- E*F
field.FeMul(&v.Y, &G, &H) // Y3 <-- G*H
field.FeMul(&v.T, &E, &H) // T3 <-- E*H
field.FeMul(&v.Z, &F, &G) // Z3 <-- F*G
return v
}
// This implements the explicit formulas from HWCD Section 3.3, "Dedicated
// Doubling in [extended coordinates]".
//
// Explicit formula is as follows. Cost is 4M + 4S + 1D. For Ed25519, a = -1:
//
// A ← X1^2
// B ← Y1^2
// C ← 2*Z1^2
// D ← a*A
// E ← (X1+Y1)^2 A B
// G ← D+B
// F ← GC
// H ← DB
// X3 ← E*F
// Y3 ← G*H
// T3 ← E*H
// Z3 ← F*G
//
// In ref10/donna/dalek etc, this is instead handled by a faster
// mixed-coordinate doubling that results in a "Completed" group element
// instead of another point in extended coordinates. I have implemented it
// this way to see if more straightforward code is worth the (hopefully small)
// performance tradeoff.
func (v *ExtendedGroupElement) Double() *ExtendedGroupElement {
// TODO: Convert to projective coordinates? Section 4.3 mixed doubling?
// TODO: make a decision about how these APIs work wrt chaining/smashing
// *v = *(v.ToProjective().Double().ToExtended())
// return v
var A, B, C, D, E, F, G, H field.FieldElement
// A ← X1^2, B ← Y1^2
field.FeSquare(&A, &v.X)
field.FeSquare(&B, &v.Y)
// C ← 2*Z1^2
field.FeSquare(&C, &v.Z)
field.FeAdd(&C, &C, &C) // TODO should probably implement FeSquare2
// D ← -1*A
field.FeNeg(&D, &A) // implemented as substraction
// E ← (X1+Y1)^2 A B
var t0 field.FieldElement
field.FeAdd(&t0, &v.X, &v.Y)
field.FeSquare(&t0, &t0)
field.FeSub(&E, &t0, &A)
field.FeSub(&E, &E, &B)
// G ← D+B
field.FeAdd(&G, &D, &B)
// F ← GC
field.FeSub(&F, &G, &C)
// H ← DB
field.FeSub(&H, &D, &B)
// X3 ← E*F
field.FeMul(&v.X, &E, &F)
// Y3 ← G*H
field.FeMul(&v.Y, &G, &H)
// T3 ← E*H
field.FeMul(&v.T, &E, &H)
// Z3 ← F*G
field.FeMul(&v.Z, &F, &G)
return v
}
// Projective coordinates are XYZ with x = X/Z, y = Y/Z, or the "P2"
// representation in ref10. This representation has a cheaper doubling formula
// than extended coordinates.
type ProjectiveGroupElement struct {
X, Y, Z field.FieldElement
}
func (v *ProjectiveGroupElement) FromAffine(x, y *big.Int) {
field.FeFromBig(&v.X, x)
field.FeFromBig(&v.Y, y)
field.FeOne(&v.Z)
}
func (v *ProjectiveGroupElement) ToAffine() (*big.Int, *big.Int) {
var x, y, zinv field.FieldElement
field.FeInvert(&zinv, &v.Z)
field.FeMul(&x, &v.X, &zinv)
field.FeMul(&y, &v.Y, &zinv)
return field.FeToBig(&x), field.FeToBig(&y)
}
// HWCD Section 3: "Given (X : Y : Z) in [projective coordinates] passing to
// [extended coordinates, (X : Y : T : Z)] can be performed in 3M+1S by computing
// (XZ, YZ, XY, Z^2)"
func (v *ProjectiveGroupElement) ToExtended() *ExtendedGroupElement {
var r ExtendedGroupElement
field.FeMul(&r.X, &v.X, &v.Z)
field.FeMul(&r.Y, &v.Y, &v.Z)
field.FeMul(&r.T, &v.X, &v.Y)
field.FeSquare(&r.Z, &v.Z)
return &r
}
func (v *ProjectiveGroupElement) Zero() *ProjectiveGroupElement {
field.FeZero(&v.X)
field.FeOne(&v.Y)
field.FeOne(&v.Z)
return v
}
// Because we are often converting from affine, we can use "mdbl-2008-bbjlp"
// which assumes Z1=1. We also assume a = -1.
//
// Assumptions: Z1 = 1.
// Cost: 2M + 4S + 1*a + 7add + 1*2.
// Source: 2008 BernsteinBirknerJoyeLangePeters
// http://eprint.iacr.org/2008/013, plus Z1=1, plus standard simplification.
// Explicit formulas:
//
// B = (X1+Y1)^2
// C = X1^2
// D = Y1^2
// E = a*C
// F = E+D
// X3 = (B-C-D)*(F-2)
// Y3 = F*(E-D)
// Z3 = F^2-2*F
//
// This assumption is one reason why this package is internal. For instance, it
// will not hold throughout a Montgomery ladder, when we convert to projective
// from possibly arbitrary extended coordinates.
func (v *ProjectiveGroupElement) DoubleZ1() *ProjectiveGroupElement {
// TODO This function is inconsistent with the other ones in that it
// returns a copy rather than smashing the receiver. It doesn't matter
// because it is always called on ephemeral intermediate values, but should
// fix.
var p, q ProjectiveGroupElement
var t0, t1 field.FieldElement
p = *v
// C = X1^2, D = Y1^2
field.FeSquare(&t0, &p.X)
field.FeSquare(&t1, &p.Y)
// B = (X1+Y1)^2
field.FeAdd(&p.Z, &p.X, &p.Y) // Z is irrelevant but already allocated
field.FeSquare(&q.X, &p.Z)
// E = a*C where a = -1
field.FeNeg(&q.Z, &t0)
// F = E + D
field.FeAdd(&p.X, &q.Z, &t1)
// X3 = (B-C-D)*(F-2)
field.FeSub(&p.Y, &q.X, &t0)
field.FeSub(&p.Y, &p.Y, &t1)
field.FeSub(&p.Z, &p.X, &field.FieldTwo)
field.FeMul(&q.X, &p.Y, &p.Z)
// Y3 = F*(E-D)
field.FeSub(&p.Y, &q.Z, &t1)
field.FeMul(&q.Y, &p.X, &p.Y)
// Z3 = F^2 - 2*F
field.FeSquare(&q.Z, &p.X)
field.FeSub(&q.Z, &q.Z, &p.X)
field.FeSub(&q.Z, &q.Z, &p.X)
return &q
}

View File

@ -0,0 +1,17 @@
// Copyright (c) 2017 George Tankersley. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
// Constants used in the implementation of GF(2^255-19) field arithmetic.
package radix51
const (
// The vaule 2^51-1, used in carry propagation
maskLow51Bits = uint64(1)<<51 - 1
)
var (
FieldZero FieldElement = [5]uint64{0, 0, 0, 0, 0}
FieldOne FieldElement = [5]uint64{1, 0, 0, 0, 0}
FieldTwo FieldElement = [5]uint64{2, 0, 0, 0, 0}
)

View File

@ -0,0 +1,332 @@
// Copyright (c) 2017 George Tankersley. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
// Field arithmetic in radix 2^51 representation. This code is a port of the
// public domain amd64-51-30k version of ed25519 from SUPERCOP.
package radix51
import (
"math/big"
"math/bits"
)
// FieldElement represents an element of the field GF(2^255-19). An element t
// represents the integer t[0] + t[1]*2^51 + t[2]*2^102 + t[3]*2^153 +
// t[4]*2^204.
type FieldElement [5]uint64
func FeZero(v *FieldElement) {
v[0] = 0
v[1] = 0
v[2] = 0
v[3] = 0
v[4] = 0
}
func FeOne(v *FieldElement) {
v[0] = 1
v[1] = 0
v[2] = 0
v[3] = 0
v[4] = 0
}
// SetInt sets the receiving FieldElement to the specified small integer.
func SetInt(v *FieldElement, x uint64) {
v[0] = x
v[1] = 0
v[2] = 0
v[3] = 0
v[4] = 0
}
func FeReduce(t, v *FieldElement) {
// Copy v
*t = *v
// Lev v = v[0] + v[1]*2^51 + v[2]*2^102 + v[3]*2^153 + v[4]*2^204
// Reduce each limb below 2^51, propagating carries.
t[1] += t[0] >> 51
t[0] = t[0] & maskLow51Bits
t[2] += t[1] >> 51
t[1] = t[1] & maskLow51Bits
t[3] += t[2] >> 51
t[2] = t[2] & maskLow51Bits
t[4] += t[3] >> 51
t[3] = t[3] & maskLow51Bits
t[0] += (t[4] >> 51) * 19
t[4] = t[4] & maskLow51Bits
// We now hate a field element t < 2^255, but need t <= 2^255-19
// TODO Document why this works. It's the elaborate comment about r = h-pq etc etc.
// Get the carry bit
c := (t[0] + 19) >> 51
c = (t[1] + c) >> 51
c = (t[2] + c) >> 51
c = (t[3] + c) >> 51
c = (t[4] + c) >> 51
t[0] += 19 * c
t[1] += t[0] >> 51
t[0] = t[0] & maskLow51Bits
t[2] += t[1] >> 51
t[1] = t[1] & maskLow51Bits
t[3] += t[2] >> 51
t[2] = t[2] & maskLow51Bits
t[4] += t[3] >> 51
t[3] = t[3] & maskLow51Bits
// no additional carry
t[4] = t[4] & maskLow51Bits
}
// FeAdd sets out = a + b. Long sequences of additions without reduction that
// let coefficients grow larger than 54 bits would be a problem. Paper
// cautions: "do not have such sequences of additions".
func FeAdd(out, a, b *FieldElement) {
out[0] = a[0] + b[0]
out[1] = a[1] + b[1]
out[2] = a[2] + b[2]
out[3] = a[3] + b[3]
out[4] = a[4] + b[4]
}
// FeSub sets out = a - b
func FeSub(out, a, b *FieldElement) {
var t FieldElement
t = *b
// Reduce each limb below 2^51, propagating carries. Ensures that results
// fit within the limbs. This would not be required for reduced input.
t[1] += t[0] >> 51
t[0] = t[0] & maskLow51Bits
t[2] += t[1] >> 51
t[1] = t[1] & maskLow51Bits
t[3] += t[2] >> 51
t[2] = t[2] & maskLow51Bits
t[4] += t[3] >> 51
t[3] = t[3] & maskLow51Bits
t[0] += (t[4] >> 51) * 19
t[4] = t[4] & maskLow51Bits
// This is slightly more complicated. Because we use unsigned coefficients, we
// first add a multiple of p and then subtract.
out[0] = (a[0] + 0xFFFFFFFFFFFDA) - t[0]
out[1] = (a[1] + 0xFFFFFFFFFFFFE) - t[1]
out[2] = (a[2] + 0xFFFFFFFFFFFFE) - t[2]
out[3] = (a[3] + 0xFFFFFFFFFFFFE) - t[3]
out[4] = (a[4] + 0xFFFFFFFFFFFFE) - t[4]
}
// FeNeg sets out = -a
func FeNeg(out, a *FieldElement) {
var t FieldElement
FeZero(&t)
FeSub(out, &t, a)
}
// FeInvert sets out = 1/z mod p by calculating z^(p-2), p-2 = 2^255 - 21.
func FeInvert(out, z *FieldElement) {
// Inversion is implemented as exponentiation with exponent p 2. It uses the
// same sequence of 255 squarings and 11 multiplications as [Curve25519].
var z2, z9, z11, z2_5_0, z2_10_0, z2_20_0, z2_50_0, z2_100_0, t FieldElement
FeSquare(&z2, z) // 2
FeSquare(&t, &z2) // 4
FeSquare(&t, &t) // 8
FeMul(&z9, &t, z) // 9
FeMul(&z11, &z9, &z2) // 11
FeSquare(&t, &z11) // 22
FeMul(&z2_5_0, &t, &z9) // 2^5 - 2^0 = 31
FeSquare(&t, &z2_5_0) // 2^6 - 2^1
for i := 0; i < 4; i++ {
FeSquare(&t, &t) // 2^10 - 2^5
}
FeMul(&z2_10_0, &t, &z2_5_0) // 2^10 - 2^0
FeSquare(&t, &z2_10_0) // 2^11 - 2^1
for i := 0; i < 9; i++ {
FeSquare(&t, &t) // 2^20 - 2^10
}
FeMul(&z2_20_0, &t, &z2_10_0) // 2^20 - 2^0
FeSquare(&t, &z2_20_0) // 2^21 - 2^1
for i := 0; i < 19; i++ {
FeSquare(&t, &t) // 2^40 - 2^20
}
FeMul(&t, &t, &z2_20_0) // 2^40 - 2^0
FeSquare(&t, &t) // 2^41 - 2^1
for i := 0; i < 9; i++ {
FeSquare(&t, &t) // 2^50 - 2^10
}
FeMul(&z2_50_0, &t, &z2_10_0) // 2^50 - 2^0
FeSquare(&t, &z2_50_0) // 2^51 - 2^1
for i := 0; i < 49; i++ {
FeSquare(&t, &t) // 2^100 - 2^50
}
FeMul(&z2_100_0, &t, &z2_50_0) // 2^100 - 2^0
FeSquare(&t, &z2_100_0) // 2^101 - 2^1
for i := 0; i < 99; i++ {
FeSquare(&t, &t) // 2^200 - 2^100
}
FeMul(&t, &t, &z2_100_0) // 2^200 - 2^0
FeSquare(&t, &t) // 2^201 - 2^1
for i := 0; i < 49; i++ {
FeSquare(&t, &t) // 2^250 - 2^50
}
FeMul(&t, &t, &z2_50_0) // 2^250 - 2^0
FeSquare(&t, &t) // 2^251 - 2^1
FeSquare(&t, &t) // 2^252 - 2^2
FeSquare(&t, &t) // 2^253 - 2^3
FeSquare(&t, &t) // 2^254 - 2^4
FeSquare(&t, &t) // 2^255 - 2^5
FeMul(out, &t, &z11) // 2^255 - 21
}
func FeCopy(out, in *FieldElement) {
copy(out[:], in[:])
}
func FeFromBytes(v *FieldElement, x *[32]byte) {
v[0] = uint64(x[0])
v[0] |= uint64(x[1]) << 8
v[0] |= uint64(x[2]) << 16
v[0] |= uint64(x[3]) << 24
v[0] |= uint64(x[4]) << 32
v[0] |= uint64(x[5]) << 40
v[0] |= uint64(x[6]&7) << 48
v[1] = uint64(x[6]) >> 3
v[1] |= uint64(x[7]) << 5
v[1] |= uint64(x[8]) << 13
v[1] |= uint64(x[9]) << 21
v[1] |= uint64(x[10]) << 29
v[1] |= uint64(x[11]) << 37
v[1] |= uint64(x[12]&63) << 45
v[2] = uint64(x[12]) >> 6
v[2] |= uint64(x[13]) << 2
v[2] |= uint64(x[14]) << 10
v[2] |= uint64(x[15]) << 18
v[2] |= uint64(x[16]) << 26
v[2] |= uint64(x[17]) << 34
v[2] |= uint64(x[18]) << 42
v[2] |= uint64(x[19]&1) << 50
v[3] = uint64(x[19]) >> 1
v[3] |= uint64(x[20]) << 7
v[3] |= uint64(x[21]) << 15
v[3] |= uint64(x[22]) << 23
v[3] |= uint64(x[23]) << 31
v[3] |= uint64(x[24]) << 39
v[3] |= uint64(x[25]&15) << 47
v[4] = uint64(x[25]) >> 4
v[4] |= uint64(x[26]) << 4
v[4] |= uint64(x[27]) << 12
v[4] |= uint64(x[28]) << 20
v[4] |= uint64(x[29]) << 28
v[4] |= uint64(x[30]) << 36
v[4] |= uint64(x[31]&127) << 44
}
func FeToBytes(r *[32]byte, v *FieldElement) {
var t FieldElement
FeReduce(&t, v)
r[0] = byte(t[0] & 0xff)
r[1] = byte((t[0] >> 8) & 0xff)
r[2] = byte((t[0] >> 16) & 0xff)
r[3] = byte((t[0] >> 24) & 0xff)
r[4] = byte((t[0] >> 32) & 0xff)
r[5] = byte((t[0] >> 40) & 0xff)
r[6] = byte((t[0] >> 48))
r[6] ^= byte((t[1] << 3) & 0xf8)
r[7] = byte((t[1] >> 5) & 0xff)
r[8] = byte((t[1] >> 13) & 0xff)
r[9] = byte((t[1] >> 21) & 0xff)
r[10] = byte((t[1] >> 29) & 0xff)
r[11] = byte((t[1] >> 37) & 0xff)
r[12] = byte((t[1] >> 45))
r[12] ^= byte((t[2] << 6) & 0xc0)
r[13] = byte((t[2] >> 2) & 0xff)
r[14] = byte((t[2] >> 10) & 0xff)
r[15] = byte((t[2] >> 18) & 0xff)
r[16] = byte((t[2] >> 26) & 0xff)
r[17] = byte((t[2] >> 34) & 0xff)
r[18] = byte((t[2] >> 42) & 0xff)
r[19] = byte((t[2] >> 50))
r[19] ^= byte((t[3] << 1) & 0xfe)
r[20] = byte((t[3] >> 7) & 0xff)
r[21] = byte((t[3] >> 15) & 0xff)
r[22] = byte((t[3] >> 23) & 0xff)
r[23] = byte((t[3] >> 31) & 0xff)
r[24] = byte((t[3] >> 39) & 0xff)
r[25] = byte((t[3] >> 47))
r[25] ^= byte((t[4] << 4) & 0xf0)
r[26] = byte((t[4] >> 4) & 0xff)
r[27] = byte((t[4] >> 12) & 0xff)
r[28] = byte((t[4] >> 20) & 0xff)
r[29] = byte((t[4] >> 28) & 0xff)
r[30] = byte((t[4] >> 36) & 0xff)
r[31] = byte((t[4] >> 44))
}
func FeFromBig(h *FieldElement, num *big.Int) {
var buf [32]byte
offset := 0
words := num.Bits()
numWords := len(words)
for n := 0; n < numWords; n++ {
word := words[n]
for i := 0; i < bits.UintSize/8; i++ {
if offset >= len(buf) {
break
}
buf[offset] = byte(word >> uint((i << 3)))
offset++
}
}
FeFromBytes(h, &buf)
}
func FeToBig(h *FieldElement) *big.Int {
var buf [32]byte
FeToBytes(&buf, h) // does a reduction
numWords := 256 / bits.UintSize
words := make([]big.Word, numWords)
offset := 0
byteSize := uint(bits.UintSize >> 3)
for n := 0; n < numWords; n++ {
word := uint(0)
for i := uint(0); i < byteSize; i++ {
if offset >= len(buf) {
break
}
word |= uint(buf[offset]) << (i << 3)
offset++
}
words[n] = big.Word(word)
}
out := new(big.Int)
return out.SetBits(words)
}

View File

@ -0,0 +1,126 @@
// Copyright (c) 2017 George Tankersley. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
// +build !amd64 noasm
package radix51
// FeMul sets out = a * b
func FeMul(out, x, y *FieldElement) {
var x0, x1, x2, x3, x4 uint64
var y0, y1, y2, y3, y4 uint64
x0 = x[0]
x1 = x[1]
x2 = x[2]
x3 = x[3]
x4 = x[4]
y0 = y[0]
y1 = y[1]
y2 = y[2]
y3 = y[3]
y4 = y[4]
// Reduction can be carried out simultaneously to multiplication. For
// example, we do not compute a coefficient r_5 . Whenever the result of a
// mul instruction belongs to r_5 , for example in the multiplication of
// x_3*y_2 , we multiply one of the inputs by 19 and add the result to r_0.
x1_19 := x1 * 19
x2_19 := x2 * 19
x3_19 := x3 * 19
x4_19 := x4 * 19
// calculate r0 = x0*y0 + 19*(x1*y4 + x2*y3 + x3*y2 + x4*y1)
r00, r01 := mul64x64(0, 0, x0, y0)
r00, r01 = mul64x64(r00, r01, x1_19, y4)
r00, r01 = mul64x64(r00, r01, x2_19, y3)
r00, r01 = mul64x64(r00, r01, x3_19, y2)
r00, r01 = mul64x64(r00, r01, x4_19, y1)
// calculate r1 = x0*y1 + x1*y0 + 19*(x2*y4 + x3*y3 + x4*y2)
r10, r11 := mul64x64(0, 0, x0, y1)
r10, r11 = mul64x64(r10, r11, x1, y0)
r10, r11 = mul64x64(r10, r11, x2_19, y4)
r10, r11 = mul64x64(r10, r11, x3_19, y3)
r10, r11 = mul64x64(r10, r11, x4_19, y2)
// calculate r2 = x0*y2 + x1*y1 + x2*y0 + 19*(x3*y4 + x4*y3)
r20, r21 := mul64x64(0, 0, x0, y2)
r20, r21 = mul64x64(r20, r21, x1, y1)
r20, r21 = mul64x64(r20, r21, x2, y0)
r20, r21 = mul64x64(r20, r21, x3_19, y4)
r20, r21 = mul64x64(r20, r21, x4_19, y3)
// calculate r3 = x0*y3 + x1*y2 + x2*y1 + x3*y0 + 19*x4*y4
r30, r31 := mul64x64(0, 0, x0, y3)
r30, r31 = mul64x64(r30, r31, x1, y2)
r30, r31 = mul64x64(r30, r31, x2, y1)
r30, r31 = mul64x64(r30, r31, x3, y0)
r30, r31 = mul64x64(r30, r31, x4_19, y4)
// calculate r4 = x0*y4 + x1*y3 + x2*y2 + x3*y1 + x4*y0
r40, r41 := mul64x64(0, 0, x0, y4)
r40, r41 = mul64x64(r40, r41, x1, y3)
r40, r41 = mul64x64(r40, r41, x2, y2)
r40, r41 = mul64x64(r40, r41, x3, y1)
r40, r41 = mul64x64(r40, r41, x4, y0)
// After the multiplication we need to reduce (carry) the 5 coefficients to
// obtain a result with coefficients that are at most slightly larger than
// 2^51 . Denote the two registers holding coefficient r_0 as r_00 and r_01
// with r_0 = 2^64*r_01 + r_00 . Similarly denote the two registers holding
// coefficient r_1 as r_10 and r_11 . We first shift r_01 left by 13, while
// shifting in the most significant bits of r_00 (shld instruction) and
// then compute the logical and of r_00 with 2^51 1. We do the same with
// r_10 and r_11 and add r_01 into r_10 after the logical and with 2^51
// 1. We proceed this way for coefficients r_2,...,r_4; register r_41 is
// multiplied by 19 before adding it to r_00 .
r01 = (r01 << 13) | (r00 >> 51)
r00 &= maskLow51Bits
r11 = (r11 << 13) | (r10 >> 51)
r10 &= maskLow51Bits
r10 += r01
r21 = (r21 << 13) | (r20 >> 51)
r20 &= maskLow51Bits
r20 += r11
r31 = (r31 << 13) | (r30 >> 51)
r30 &= maskLow51Bits
r30 += r21
r41 = (r41 << 13) | (r40 >> 51)
r40 &= maskLow51Bits
r40 += r31
r41 *= 19
r00 += r41
// Now all 5 coefficients fit into 64-bit registers but are still too large
// to be used as input to another multiplication. We therefore carry from
// r_0 to r_1 , from r_1 to r_2 , from r_2 to r_3 , from r_3 to r_4 , and
// finally from r_4 to r_0 . Each of these carries is done as one copy, one
// right shift by 51, one logical and with 2^51 1, and one addition.
r10 += r00 >> 51
r00 &= maskLow51Bits
r20 += r10 >> 51
r10 &= maskLow51Bits
r30 += r20 >> 51
r20 &= maskLow51Bits
r40 += r30 >> 51
r30 &= maskLow51Bits
r00 += (r40 >> 51) * 19
r40 &= maskLow51Bits
out[0] = r00
out[1] = r10
out[2] = r20
out[3] = r30
out[4] = r40
}

View File

@ -0,0 +1,10 @@
// Copyright (c) 2017 George Tankersley. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
// +build amd64,!noasm
package radix51
// go:noescape
func FeMul(out, a, b *FieldElement)

View File

@ -0,0 +1,202 @@
// Copyright (c) 2017 George Tankersley. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
// Based on assembly generated by PeachPy. Equivalent to the Go in fe_mul.go,
// which was originally based on the amd64-51-30k assembly in SUPERCOP.
// +build amd64,!noasm
// func FeMul(outp *uint64, xp *uint64, yp *uint64)
TEXT ·FeMul(SB),$0-24
MOVQ outp+0(FP), DI
MOVQ xp+8(FP), BX
MOVQ yp+16(FP), CX
// Calculate r0
MOVQ 0(BX), AX // rax <-- x0
MULQ 0(CX) // rdx, rax <-- x0*y0
MOVQ AX, SI // r00 = rax
MOVQ DX, BP // r01 = rdx
MOVQ 8(BX), DX // rdx <-- x1
IMUL3Q $19, DX, AX // rax <-- x1*19
MULQ 32(CX) // rdx, rax <-- x1_19*y4
ADDQ AX, SI // r00 += rax
ADCQ DX, BP // r01 += rdx
MOVQ 16(BX), DX // rdx <-- x2
IMUL3Q $19, DX, AX // rax <-- x2*19
MULQ 24(CX) // rdx, rax <-- x2_19*y3
ADDQ AX, SI // r00 += rax
ADCQ DX, BP // r01 += rdx
MOVQ 24(BX), DX // rdx <-- x3
IMUL3Q $19, DX, AX // rax <-- x3*19
MULQ 16(CX) // rdx, rax <-- x3_19 * y2
ADDQ AX, SI // r00 += rax
ADCQ DX, BP // r01 += rdx
MOVQ 32(BX), DX // rdx <-- x4
IMUL3Q $19, DX, AX // rax <-- x4*19
MULQ 8(CX) // rdx rax <-- x4_19*y1
ADDQ AX, SI // r00 += rax
ADCQ DX, BP // r01 += rdx
// Calculate r1
MOVQ 0(BX), AX
MULQ 8(CX)
MOVQ AX, R8 // r10
MOVQ DX, R9 // r11
MOVQ 8(BX), AX
MULQ 0(CX)
ADDQ AX, R8
ADCQ DX, R9
MOVQ 16(BX), DX
IMUL3Q $19, DX, AX
MULQ 32(CX)
ADDQ AX, R8
ADCQ DX, R9
MOVQ 24(BX), DX
IMUL3Q $19, DX, AX
MULQ 24(CX)
ADDQ AX, R8
ADCQ DX, R9
MOVQ 32(BX), DX
IMUL3Q $19, DX, AX
MULQ 16(CX)
ADDQ AX, R8
ADCQ DX, R9
// Calculate r2
MOVQ 0(BX), AX
MULQ 16(CX)
MOVQ AX, R10 // r20
MOVQ DX, R11 // r21
MOVQ 8(BX), AX
MULQ 8(CX)
ADDQ AX, R10
ADCQ DX, R11
MOVQ 16(BX), AX
MULQ 0(CX)
ADDQ AX, R10
ADCQ DX, R11
MOVQ 24(BX), DX
IMUL3Q $19, DX, AX
MULQ 32(CX)
ADDQ AX, R10
ADCQ DX, R11
MOVQ 32(BX), DX
IMUL3Q $19, DX, AX
MULQ 24(CX)
ADDQ AX, R10
ADCQ DX, R11
// Calculate r3
MOVQ 0(BX), AX
MULQ 24(CX)
MOVQ AX, R12 // r30
MOVQ DX, R13 // r31
MOVQ 8(BX), AX
MULQ 16(CX)
ADDQ AX, R12
ADCQ DX, R13
MOVQ 16(BX), AX
MULQ 8(CX)
ADDQ AX, R12
ADCQ DX, R13
MOVQ 24(BX), AX
MULQ 0(CX)
ADDQ AX, R12
ADCQ DX, R13
MOVQ 32(BX), DX
IMUL3Q $19, DX, AX
MULQ 32(CX)
ADDQ AX, R12
ADCQ DX, R13
// Calculate r4
MOVQ 0(BX), AX
MULQ 32(CX)
MOVQ AX, R14 // r40
MOVQ DX, R15 // r41
MOVQ 8(BX), AX
MULQ 24(CX)
ADDQ AX, R14
ADCQ DX, R15
MOVQ 16(BX), AX
MULQ 16(CX)
ADDQ AX, R14
ADCQ DX, R15
MOVQ 24(BX), AX
MULQ 8(CX)
ADDQ AX, R14
ADCQ DX, R15
MOVQ 32(BX), AX
MULQ 0(CX)
ADDQ AX, R14
ADCQ DX, R15
MOVQ $2251799813685247, AX // (1<<51) - 1
SHLQ $13, SI, BP // r01 = shld with r00
ANDQ AX, SI // r00 &= mask51
SHLQ $13, R8, R9 // r11 = shld with r10
ANDQ AX, R8 // r10 &= mask51
ADDQ BP, R8 // r10 += r01
SHLQ $13, R10, R11 // r21 = shld with r20
ANDQ AX, R10 // r20 &= mask51
ADDQ R9, R10 // r20 += r11
SHLQ $13, R12, R13 // r31 = shld with r30
ANDQ AX, R12 // r30 &= mask51
ADDQ R11, R12 // r30 += r21
SHLQ $13, R14, R15 // r41 = shld with r40
ANDQ AX, R14 // r40 &= mask51
ADDQ R13, R14 // r40 += r31
IMUL3Q $19, R15, R15 // r41 = r41*19
ADDQ R15, SI // r00 += r41
MOVQ SI, DX // rdx <-- r00
SHRQ $51, DX // rdx <-- r00 >> 51
ADDQ DX, R8 // r10 += r00 >> 51
MOVQ R8, DX // rdx <-- r10
SHRQ $51, DX // rdx <-- r10 >> 51
ANDQ AX, SI // r00 &= mask51
ADDQ DX, R10 // r20 += r10 >> 51
MOVQ R10, DX // rdx <-- r20
SHRQ $51, DX // rdx <-- r20 >> 51
ANDQ AX, R8 // r10 &= mask51
ADDQ DX, R12 // r30 += r20 >> 51
MOVQ R12, DX // rdx <-- r30
SHRQ $51, DX // rdx <-- r30 >> 51
ANDQ AX, R10 // r20 &= mask51
ADDQ DX, R14 // r40 += r30 >> 51
MOVQ R14, DX // rdx <-- r40
SHRQ $51, DX // rdx <-- r40 >> 51
ANDQ AX, R12 // r30 &= mask51
IMUL3Q $19, DX, DX // rdx <-- (r40 >> 51) * 19
ADDQ DX, SI // r00 += (r40 >> 51) *19
ANDQ AX, R14 // r40 &= mask51
MOVQ SI, 0(DI)
MOVQ R8, 8(DI)
MOVQ R10, 16(DI)
MOVQ R12, 24(DI)
MOVQ R14, 32(DI)
RET

View File

@ -0,0 +1,98 @@
// Copyright (c) 2017 George Tankersley. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
// +build !amd64 noasm
package radix51
// FeSquare sets out = x*x
func FeSquare(out, x *FieldElement) {
// Squaring needs only 15 mul instructions. Some inputs are multiplied by 2;
// this is combined with multiplication by 19 where possible. The coefficient
// reduction after squaring is the same as for multiplication.
var x0, x1, x2, x3, x4 uint64
x0 = x[0]
x1 = x[1]
x2 = x[2]
x3 = x[3]
x4 = x[4]
x0_2 := x0 << 1
x1_2 := x1 << 1
x1_38 := x1 * 38
x2_38 := x2 * 38
x3_38 := x3 * 38
x3_19 := x3 * 19
x4_19 := x4 * 19
// r0 = x0*x0 + x1*38*x4 + x2*38*x3
r00, r01 := mul64x64(0, 0, x0, x0)
r00, r01 = mul64x64(r00, r01, x1_38, x4)
r00, r01 = mul64x64(r00, r01, x2_38, x3)
// r1 = x0*2*x1 + x2*38*x4 + x3*19*x3
r10, r11 := mul64x64(0, 0, x0_2, x1)
r10, r11 = mul64x64(r10, r11, x2_38, x4)
r10, r11 = mul64x64(r10, r11, x3_19, x3)
// r2 = x0*2*x2 + x1*x1 + x3*38*x4
r20, r21 := mul64x64(0, 0, x0_2, x2)
r20, r21 = mul64x64(r20, r21, x1, x1)
r20, r21 = mul64x64(r20, r21, x3_38, x4)
// r3 = x0*2*x3 + x1*2*x2 + x4*19*x4
r30, r31 := mul64x64(0, 0, x0_2, x3)
r30, r31 = mul64x64(r30, r31, x1_2, x2)
r30, r31 = mul64x64(r30, r31, x4_19, x4)
// r4 = x0*2*x4 + x1*2*x3 + x2*x2
r40, r41 := mul64x64(0, 0, x0_2, x4)
r40, r41 = mul64x64(r40, r41, x1_2, x3)
r40, r41 = mul64x64(r40, r41, x2, x2)
// Same reduction
r01 = (r01 << 13) | (r00 >> 51)
r00 &= maskLow51Bits
r11 = (r11 << 13) | (r10 >> 51)
r10 &= maskLow51Bits
r10 += r01
r21 = (r21 << 13) | (r20 >> 51)
r20 &= maskLow51Bits
r20 += r11
r31 = (r31 << 13) | (r30 >> 51)
r30 &= maskLow51Bits
r30 += r21
r41 = (r41 << 13) | (r40 >> 51)
r40 &= maskLow51Bits
r40 += r31
r41 *= 19
r00 += r41
r10 += r00 >> 51
r00 &= maskLow51Bits
r20 += r10 >> 51
r10 &= maskLow51Bits
r30 += r20 >> 51
r20 &= maskLow51Bits
r40 += r30 >> 51
r30 &= maskLow51Bits
r00 += (r40 >> 51) * 19
r40 &= maskLow51Bits
out[0] = r00
out[1] = r10
out[2] = r20
out[3] = r30
out[4] = r40
}

View File

@ -0,0 +1,10 @@
// Copyright (c) 2017 George Tankersley. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
// +build amd64,!noasm
package radix51
// go:noescape
func FeSquare(out, x *FieldElement)

View File

@ -0,0 +1,150 @@
// Copyright (c) 2017 George Tankersley. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
// +build amd64,!noasm
// func FeSquare(outp *uint64, xp *uint64)
TEXT ·FeSquare(SB),4,$0-16
MOVQ outp+0(FP), DI
MOVQ xp+8(FP), SI
// r0 = x0*x0 + x1*38*x4 + x2*38*x3
MOVQ 0(SI), AX
MULQ 0(SI)
MOVQ AX, CX // r00
MOVQ DX, R8 // r01
MOVQ 8(SI), DX
IMUL3Q $38, DX, AX
MULQ 32(SI)
ADDQ AX, CX
ADCQ DX, R8
MOVQ 16(SI), DX
IMUL3Q $38, DX, AX
MULQ 24(SI)
ADDQ AX, CX
ADCQ DX, R8
// r1 = x0*2*x1 + x2*38*x4 + x3*19*x3
MOVQ 0(SI), AX
SHLQ $1, AX
MULQ 8(SI)
MOVQ AX, R9 // r10
MOVQ DX, R10 // r11
MOVQ 16(SI), DX
IMUL3Q $38, DX, AX
MULQ 32(SI)
ADDQ AX, R9
ADCQ DX, R10
MOVQ 24(SI), DX
IMUL3Q $19, DX, AX
MULQ 24(SI)
ADDQ AX, R9
ADCQ DX, R10
// r2 = x0*2*x2 + x1*x1 + x3*38*x4
MOVQ 0(SI), AX
SHLQ $1, AX
MULQ 16(SI)
MOVQ AX, R11 // r20
MOVQ DX, R12 // r21
MOVQ 8(SI), AX
MULQ 8(SI)
ADDQ AX, R11
ADCQ DX, R12
MOVQ 24(SI), DX
IMUL3Q $38, DX, AX
MULQ 32(SI)
ADDQ AX, R11
ADCQ DX, R12
// r3 = x0*2*x3 + x1*2*x2 + x4*19*x4
MOVQ 0(SI), AX
SHLQ $1, AX
MULQ 24(SI)
MOVQ AX, R13 // r30
MOVQ DX, R14 // r31
MOVQ 8(SI), AX
SHLQ $1, AX
MULQ 16(SI)
ADDQ AX, R13
ADCQ DX, R14
MOVQ 32(SI), DX
IMUL3Q $19, DX, AX
MULQ 32(SI)
ADDQ AX, R13
ADCQ DX, R14
// r4 = x0*2*x4 + x1*2*x3 + x2*x2
MOVQ 0(SI), AX
SHLQ $1, AX
MULQ 32(SI)
MOVQ AX, R15 // r40
MOVQ DX, BX // r41
MOVQ 8(SI), AX
SHLQ $1, AX
MULQ 24(SI)
ADDQ AX, R15
ADCQ DX, BX
MOVQ 16(SI), AX
MULQ 16(SI)
ADDQ AX, R15
ADCQ DX, BX
// Reduce
MOVQ $2251799813685247, AX // (1<<51) - 1
SHLQ $13, CX, R8 // r01 = shld with r00
ANDQ AX, CX // r00 &= mask51
SHLQ $13, R9, R10 // r11 = shld with r10
ANDQ AX, R9 // r10 &= mask51
ADDQ R8, R9 // r10 += r01
SHLQ $13, R11, R12 // r21 = shld with r20
ANDQ AX, R11 // r20 &= mask51
ADDQ R10, R11 // r20 += r11
SHLQ $13, R13, R14 // r31 = shld with r30
ANDQ AX, R13 // r30 &= mask51
ADDQ R12, R13 // r30 += r21
SHLQ $13, R15, BX // r41 = shld with r40
ANDQ AX, R15 // r40 &= mask51
ADDQ R14, R15 // r40 += r31
IMUL3Q $19, BX, DX // r41 = r41*19
ADDQ DX, CX // r00 += r41
MOVQ CX, DX // rdx <-- r00
SHRQ $51, DX // rdx <-- r00 >> 51
ADDQ DX, R9 // r10 += r00 >> 51
MOVQ R9, DX // rdx <-- r10
SHRQ $51, DX // rdx <-- r10 >> 51
ANDQ AX, CX // r00 &= mask51
ADDQ DX, R11 // r20 += r10 >> 51
MOVQ R11, DX // rdx <-- r20
SHRQ $51, DX // rdx <-- r20 >> 51
ANDQ AX, R9 // r10 &= mask51
ADDQ DX, R13 // r30 += r20 >> 51
MOVQ R13, DX // rdx <-- r30
SHRQ $51, DX // rdx <-- r30 >> 51
ANDQ AX, R11 // r20 &= mask51
ADDQ DX, R15 // r40 += r30 >> 51
MOVQ R15, DX // rdx <-- r40
SHRQ $51, DX // rdx <-- r40 >> 51
ANDQ AX, R13 // r30 &= mask51
IMUL3Q $19, DX, DX // rdx <-- (r40 >> 51) * 19
ADDQ DX, CX // r00 += (r40 >> 51) *19
ANDQ AX, R15 // r40 &= mask51
MOVQ CX, 0(DI)
MOVQ R9, 8(DI)
MOVQ R11, 16(DI)
MOVQ R13, 24(DI)
MOVQ R15, 32(DI)
RET

View File

@ -0,0 +1,179 @@
// Copyright (c) 2017 George Tankersley. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package radix51
import (
"bytes"
"crypto/rand"
"io"
"testing"
"unsafe"
)
func TestMul64to128(t *testing.T) {
a := uint64(5)
b := uint64(5)
r0, r1 := mul64x64(0, 0, a, b)
if r0 != 0x19 || r1 != 0 {
t.Errorf("lo-range wide mult failed, got %d + %d*(2**64)", r0, r1)
}
a = uint64(18014398509481983) // 2^54 - 1
b = uint64(18014398509481983) // 2^54 - 1
r0, r1 = mul64x64(0, 0, a, b)
if r0 != 0xff80000000000001 || r1 != 0xfffffffffff {
t.Errorf("hi-range wide mult failed, got %d + %d*(2**64)", r0, r1)
}
a = uint64(1125899906842661)
b = uint64(2097155)
r0, r1 = mul64x64(0, 0, a, b)
r0, r1 = mul64x64(r0, r1, a, b)
r0, r1 = mul64x64(r0, r1, a, b)
r0, r1 = mul64x64(r0, r1, a, b)
r0, r1 = mul64x64(r0, r1, a, b)
if r0 != 16888498990613035 || r1 != 640 {
t.Errorf("wrong answer: %d + %d*(2**64)", r0, r1)
}
}
func BenchmarkWideMultInline(t *testing.B) {
var r0, r1, ol, oh uint64
a := uint64(18014398509481983) // 2^54 - 1
b := uint64(18014398509481983) // 2^54 - 1
for i := 0; i < t.N; i++ {
t1 := (a>>32)*(b&0xFFFFFFFF) + ((a & 0xFFFFFFFF) * (b & 0xFFFFFFFF) >> 32)
t2 := (a&0xFFFFFFFF)*(b>>32) + (t1 & 0xFFFFFFFF)
ol = (a * b) + r0
cmp := ol < r0
oh = r1 + (a>>32)*(b>>32) + t1>>32 + t2>>32 + uint64(*(*byte)(unsafe.Pointer(&cmp)))
r1 = oh
r0 = ol
}
}
func BenchmarkWideMultCall(t *testing.B) {
var r0, r1 uint64
a := uint64(18014398509481983)
b := uint64(18014398509481983)
for i := 0; i < t.N; i++ {
r0, r1 = mul64x64(r0, r1, a, b)
}
}
func TestFeFromBytesRoundTrip(t *testing.T) {
var in, out [32]byte
var fe, r FieldElement
in = [32]byte{1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17,
18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32}
FeFromBytes(&fe, &in)
FeToBytes(&out, &fe)
if !bytes.Equal(in[:], out[:]) {
t.Error("Bytes<>FE doesn't roundtrip")
}
// Random field element
fe[0] = 0x4e645be9215a2
fe[1] = 0x4e9654922df12
fe[2] = 0x5829e468b0205
fe[3] = 0x5e8fca9e0881c
fe[4] = 0x5c490f087d796
FeToBytes(&out, &fe)
FeFromBytes(&r, &out)
for i := 0; i < len(fe); i++ {
if r[i] != fe[i] {
t.Error("FE<>Bytes doesn't roundtrip")
}
}
}
// Tests self-consistency between FeMul and FeSquare.
func TestSanity(t *testing.T) {
var x FieldElement
var x2, x2sq FieldElement
// var x2Go, x2sqGo FieldElement
x = [5]uint64{1, 1, 1, 1, 1}
FeMul(&x2, &x, &x)
// FeMulGo(&x2Go, &x, &x)
FeSquare(&x2sq, &x)
// FeSquareGo(&x2sqGo, &x)
// if !vartimeEqual(x2, x2Go) || !vartimeEqual(x2sq, x2sqGo) || !vartimeEqual(x2, x2sq) {
// t.Fatalf("all ones failed\nmul.s: %d\nmul.g: %d\nsqr.s: %d\nsqr.g: %d\n", x2, x2Go, x2sq, x2sqGo)
// }
if !vartimeEqual(x2, x2sq) {
t.Fatalf("all ones failed\nmul: %x\nsqr: %x\n", x2, x2sq)
}
var bytes [32]byte
_, err := io.ReadFull(rand.Reader, bytes[:])
if err != nil {
t.Fatal(err)
}
FeFromBytes(&x, &bytes)
FeMul(&x2, &x, &x)
// FeMulGo(&x2Go, &x, &x)
FeSquare(&x2sq, &x)
// FeSquareGo(&x2sqGo, &x)
// if !vartimeEqual(x2, x2Go) || !vartimeEqual(x2sq, x2sqGo) || !vartimeEqual(x2, x2sq) {
// t.Fatalf("random field element failed\nfe: %x\n\nmul.s: %x\nmul.g: %x\nsqr.s: %x\nsqr.g: %x\n", x, x2, x2Go, x2sq, x2sqGo)
// }
if !vartimeEqual(x2, x2sq) {
t.Fatalf("all ones failed\nmul: %x\nsqr: %x\n", x2, x2sq)
}
}
func vartimeEqual(x, y FieldElement) bool {
for i := 0; i < 5; i++ {
if x[i] != y[i] {
return false
}
}
return true
}
func TestFeInvert(t *testing.T) {
var x FieldElement = [5]uint64{1, 1, 1, 1, 1}
var one FieldElement = [5]uint64{1, 0, 0, 0, 0}
var xinv, r FieldElement
FeInvert(&xinv, &x)
FeMul(&r, &x, &xinv)
FeReduce(&r, &r)
if !vartimeEqual(one, r) {
t.Errorf("inversion identity failed, got: %x", r)
}
var bytes [32]byte
_, err := io.ReadFull(rand.Reader, bytes[:])
if err != nil {
t.Fatal(err)
}
FeFromBytes(&x, &bytes)
FeInvert(&xinv, &x)
FeMul(&r, &x, &xinv)
FeReduce(&r, &r)
if !vartimeEqual(one, r) {
t.Errorf("random inversion identity failed, got: %x for field element %x", r, x)
}
}

View File

@ -0,0 +1,18 @@
// Copyright (c) 2017 George Tankersley. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package radix51
import "unsafe"
// mul64x64 multiples two 64-bit numbers and adds them to two accumulators.
// This function is written to ensure it inlines. I am so sorry.
func mul64x64(lo, hi, a, b uint64) (ol uint64, oh uint64) {
t1 := (a>>32)*(b&0xFFFFFFFF) + ((a & 0xFFFFFFFF) * (b & 0xFFFFFFFF) >> 32)
t2 := (a&0xFFFFFFFF)*(b>>32) + (t1 & 0xFFFFFFFF)
ol = (a * b) + lo
cmp := ol < lo
oh = hi + (a>>32)*(b>>32) + t1>>32 + t2>>32 + uint64(*(*byte)(unsafe.Pointer(&cmp)))
return
}

View File

@ -5,14 +5,16 @@
package edwards25519
import x "github.com/gtank/ristretto255/internal/edwards25519/internal/edwards25519"
import (
"github.com/gtank/ristretto255/internal/edwards25519/internal/group"
"github.com/gtank/ristretto255/internal/edwards25519/internal/radix51"
)
// Expose some types and functions from the x/crypto code to ristretto255.
// Expose some types and functions from the internal package to ristretto255.
type ExtendedGroupElement = x.ExtendedGroupElement
type FieldElement = x.FieldElement
type ExtendedGroupElement = group.ExtendedGroupElement
type FieldElement = radix51.FieldElement
var FeMul = x.FeMul
var FeSquare = x.FeSquare
var FeNeg = x.FeNeg
var FeIsNegative = x.FeIsNegative
var FeMul = radix51.FeMul
var FeSquare = radix51.FeSquare
var FeNeg = radix51.FeNeg

View File

@ -31,3 +31,13 @@ func (e *Element) Equal(ee *Element) int {
return out
}
// FromUniformBytes maps the 64-byte slice b to an Element e uniformly and
// deterministically. This can be used for hash-to-group operations or to obtain
// a random element.
func (e *Element) FromUniformBytes(b []byte) {
if len(b) != 64 {
panic("ristretto255: FromUniformBytes called with a byte slice of length different than 64")
}
}