1 module dcrypt.ecc.curved25519.fieldElement; 2 3 import dcrypt.bitmanip; 4 import dcrypt.util; 5 6 @safe nothrow @nogc: 7 8 /// fe means field element. 9 /// Here the field is \Z/(2^255-19). 10 /// An element t, entries t[0]...t[9], represents the integer 11 /// t[0]+2^26 t[1]+2^51 t[2]+2^77 t[3]+2^102 t[4]+...+2^230 t[9]. 12 /// Bounds on each t[i] vary depending on context. 13 14 @safe 15 struct fe { 16 17 enum fe zero = 0; 18 enum fe one = [1,0,0,0,0,0,0,0,0,0]; 19 20 private uint[10] value; 21 private alias value this; 22 23 // this(in ref uint[10] v) { 24 // value = v; 25 // } 26 nothrow @nogc: 27 28 /// Create fe from 32 bytes. 29 static fe fromBytes(in ubyte[] bytes) 30 in { 31 assert(bytes.length == 32, "bytes.length must be 32."); 32 } body { 33 return fe_frombytes(bytes); 34 } 35 36 this(in uint[] v) 37 in { 38 assert(v.length == 10, "v.length must be 10."); 39 } body { 40 value = v; 41 } 42 43 // TODO remove 44 this(in uint v) 45 { 46 value[] = v; 47 } 48 49 fe opAssign(in ref uint[10] v) { 50 value = v; 51 return this; 52 } 53 54 /// Comparing in constant time. 55 bool opEquals()(auto ref const fe rhs) const { 56 return crypto_equals(this.value, rhs.value); 57 } 58 59 fe opBinary(string op)(auto ref const fe rhs) const pure 60 if (op == "+" || op == "-" || op == "*") 61 { 62 static if(op == "+") { 63 fe tmp; 64 tmp.value[] = this.value[] + rhs.value[]; 65 return tmp; 66 } else static if(op == "-") { 67 fe tmp; 68 tmp.value[] = this.value[] - rhs.value[]; 69 return tmp; 70 } else static if(op == "*") { 71 return fe_mul(this, rhs); 72 } 73 } 74 75 fe opUnary(string op)() const 76 if(op == "-") 77 { 78 static if(op == "-") { 79 fe tmp; 80 tmp.value[] = -this.value[]; 81 return tmp; 82 } 83 } 84 85 ref fe opOpAssign(string op)(auto ref const fe rhs) pure 86 if (op == "+" || op == "-" || op == "*") 87 { 88 static if(op == "+") { 89 value[] += rhs.value[]; 90 } else static if(op == "-") { 91 value[] -= rhs.value[]; 92 } else static if(op == "*") { 93 this = this * rhs; 94 } 95 96 return this; 97 } 98 99 /* 100 return 1 if f == 0 101 return 0 if f != 0 102 103 Preconditions: 104 |f| bounded by 1.1*2^26,1.1*2^25,1.1*2^26,1.1*2^25,etc. 105 */ 106 @property 107 bool isNonzero() const 108 { 109 immutable ubyte[32] zero = 0; 110 return !crypto_equals(fe_tobytes(this), zero); 111 } 112 113 /** 114 return 1 if f is in {1,3,5,...,q-2} 115 return 0 if f is in {0,2,4,...,q-1} 116 117 Preconditions: 118 |f| bounded by 1.1*2^26,1.1*2^25,1.1*2^26,1.1*2^25,etc. 119 */ 120 @property 121 bool isNegative() const 122 { 123 return (toBytes[0] & 1) == 1; 124 } 125 126 @property 127 fe inverse() const { 128 return fe_invert(this); 129 } 130 131 /// Changes the sign. 132 void negate() { 133 value[] = -value[]; 134 } 135 136 @property 137 ubyte[32] toBytes() const { 138 return fe_tobytes(this); 139 } 140 141 /// Params: 142 /// repeat = Repeat squaring n times. 143 /// Returns: f^2 or f^(2^repeat). 144 @property 145 fe sq(uint repeat = 1)() const pure 146 if(repeat > 0) 147 { 148 fe f = fe_sq(this); 149 150 static if(repeat > 1) { 151 foreach(i; 1..repeat) { 152 f = f.sq!1; 153 } 154 } 155 156 return f; 157 } 158 159 /// Returns: 2*f*f 160 @property 161 fe sq2() const pure { 162 return fe_sq2(this); 163 } 164 165 /// Power by squaring in constant time. 166 /// Returns f^power 167 @property 168 fe cpow(uint power)() const { 169 fe r = fe.one; 170 fe sq = this; 171 for(uint p = power; p > 0; p >>= 1) { 172 if((p & 1) == 1) { 173 r *= sq; 174 } 175 sq = sq.sq; 176 } 177 178 return r; 179 } 180 } 181 182 183 184 ///// h = f + g 185 ///// Can overlap h with f or g. 186 ///// 187 ///// Preconditions: 188 ///// |f| bounded by 1.1*2^25,1.1*2^24,1.1*2^25,1.1*2^24,etc. 189 ///// |g| bounded by 1.1*2^25,1.1*2^24,1.1*2^25,1.1*2^24,etc. 190 ///// 191 ///// Postconditions: 192 ///// |h| bounded by 1.1*2^26,1.1*2^25,1.1*2^26,1.1*2^25,etc. 193 //void fe_add(ref fe h, in ref fe f, in ref fe g) 194 //{ 195 // //h[] = f[] + g[]; 196 // foreach(i; 0..10) { 197 // h[i] = f[i] + g[i]; 198 // } 199 //} 200 201 202 /// Conditional move. 203 /// Replace (f,g) with (g,g) if b == 1; 204 /// replace (f,g) with (f,g) if b == 0. 205 /// 206 /// Params: 207 /// dest = Destination. 208 /// src = Source. 209 /// condition = Condition. 210 void fe_cmov(ref fe dest, in ref fe src, in bool condition) 211 in { 212 assert(condition == 0 || condition == 1); 213 } out { 214 if(condition == 1) { 215 assert(dest == src); 216 } 217 } body { 218 immutable uint mask = -(cast(int)condition); 219 220 assert((condition == 0 && mask == 0) || (condition == 1 && mask == 0xFFFFFFFF)); 221 222 dest[] ^= mask & dest[]; 223 dest[] ^= mask & src[]; 224 } 225 226 // test conditional move 227 unittest { 228 import std.algorithm: all; 229 230 fe a, b; 231 a[] = 13; 232 b[] = 42; 233 234 assert(all!"a == 13"(a[])); 235 assert(all!"a == 42"(b[])); 236 237 fe_cmov(a, b, 0); 238 239 assert(all!"a == 13"(a[])); 240 assert(all!"a == 42"(b[])); 241 242 fe_cmov(a, b, 1); 243 244 assert(all!"a == 42"(a[])); 245 } 246 247 ulong load_3(in ubyte[] inp) pure 248 in { 249 assert(inp.length == 3); 250 } body { 251 ulong result; 252 result = cast(ulong) inp[0]; 253 result |= (cast(ulong) inp[1]) << 8; 254 result |= (cast(ulong) inp[2]) << 16; 255 return result; 256 } 257 258 ulong load_4(in ubyte[] inp) pure 259 in { 260 assert(inp.length == 4); 261 } body { 262 ulong result; 263 result = cast(ulong) inp[0]; 264 result |= (cast(ulong) inp[1]) << 8; 265 result |= (cast(ulong) inp[2]) << 16; 266 result |= (cast(ulong) inp[3]) << 24; 267 return result; 268 } 269 270 /* 271 Ignores top bit of h. 272 */ 273 private fe fe_frombytes(in ubyte[] s) pure 274 in { 275 assert(s.length == 32); 276 } body { 277 long h0 = load_4(s[0..4]); 278 long h1 = load_3(s[4..7]) << 6; 279 long h2 = load_3(s[7..10]) << 5; 280 long h3 = load_3(s[10..13]) << 3; 281 long h4 = load_3(s[13..16]) << 2; 282 long h5 = load_4(s[16..20]); 283 long h6 = load_3(s[20..23]) << 7; 284 long h7 = load_3(s[23..26]) << 5; 285 long h8 = load_3(s[26..29]) << 4; 286 long h9 = (load_3(s[29..32]) & 8388607) << 2; 287 288 long carry0; 289 long carry1; 290 long carry2; 291 long carry3; 292 long carry4; 293 long carry5; 294 long carry6; 295 long carry7; 296 long carry8; 297 long carry9; 298 299 carry9 = (h9 + cast(long) (1<<24)) >> 25; h0 += carry9 * 19; h9 -= SHL64(carry9,25); 300 carry1 = (h1 + cast(long) (1<<24)) >> 25; h2 += carry1; h1 -= SHL64(carry1,25); 301 carry3 = (h3 + cast(long) (1<<24)) >> 25; h4 += carry3; h3 -= SHL64(carry3,25); 302 carry5 = (h5 + cast(long) (1<<24)) >> 25; h6 += carry5; h5 -= SHL64(carry5,25); 303 carry7 = (h7 + cast(long) (1<<24)) >> 25; h8 += carry7; h7 -= SHL64(carry7,25); 304 305 carry0 = (h0 + cast(long) (1<<25)) >> 26; h1 += carry0; h0 -= SHL64(carry0,26); 306 carry2 = (h2 + cast(long) (1<<25)) >> 26; h3 += carry2; h2 -= SHL64(carry2,26); 307 carry4 = (h4 + cast(long) (1<<25)) >> 26; h5 += carry4; h4 -= SHL64(carry4,26); 308 carry6 = (h6 + cast(long) (1<<25)) >> 26; h7 += carry6; h6 -= SHL64(carry6,26); 309 carry8 = (h8 + cast(long) (1<<25)) >> 26; h9 += carry8; h8 -= SHL64(carry8,26); 310 311 fe h; 312 h[0] = cast(int) h0; 313 h[1] = cast(int) h1; 314 h[2] = cast(int) h2; 315 h[3] = cast(int) h3; 316 h[4] = cast(int) h4; 317 h[5] = cast(int) h5; 318 h[6] = cast(int) h6; 319 h[7] = cast(int) h7; 320 h[8] = cast(int) h8; 321 h[9] = cast(int) h9; 322 return h; 323 } 324 325 // TODO replace all SHL* with << 326 long SHL64(in long val, in uint shift) pure nothrow @nogc { 327 return cast(long)(cast(ulong) val << shift); 328 } 329 330 int SHL32(in int val, in uint shift) pure nothrow @nogc { 331 return cast(int)(cast(uint) val << shift); 332 } 333 334 int SHL8(in byte val, in uint shift) pure nothrow @nogc { 335 return cast(byte)(cast(ubyte) val << shift); 336 } 337 338 unittest { 339 assert(cast(int)(0xFFFFFFFF) << 7 == SHL32(0xFFFFFFFF, 7)); 340 } 341 342 private fe fe_invert(in ref fe z) 343 { 344 fe t0; 345 fe t1; 346 fe t2; 347 fe t3; 348 // pow225521 349 t0 = z.sq; 350 t1 = t0.sq.sq; 351 352 t1 *= z; 353 t0 *= t1; 354 t2 = t0.sq; 355 t1 *= t2; 356 357 t2 = t1.sq!5; 358 359 t1 *= t2; 360 t2 = t1.sq!10; 361 362 t2 *= t1; 363 364 t3 = t2.sq!20; 365 366 t2 *= t3; 367 t2 = t2.sq!10; 368 369 t1 *= t2; 370 371 t2 = t1.sq!50; 372 373 t2 *= t1; 374 t3 = t2.sq!100; 375 376 t2 *= t3; 377 t2 = t2.sq!50; 378 379 t1 *= t2; 380 381 t1 = t1.sq!5; 382 383 return t1 * t0; 384 } 385 386 387 388 389 390 /** 391 Returns: h = f * g 392 Can overlap h with f or g. 393 394 Preconditions: 395 |f| bounded by 1.65*2^26,1.65*2^25,1.65*2^26,1.65*2^25,etc. 396 |g| bounded by 1.65*2^26,1.65*2^25,1.65*2^26,1.65*2^25,etc. 397 398 Postconditions: 399 |h| bounded by 1.01*2^25,1.01*2^24,1.01*2^25,1.01*2^24,etc. 400 401 Note: 402 Notes on implementation strategy: 403 404 Using schoolbook multiplication. 405 Karatsuba would save a little in some cost models. 406 407 Most multiplications by 2 and 19 are 32-bit precomputations; 408 cheaper than 64-bit postcomputations. 409 410 There is one remaining multiplication by 19 in the carry chain; 411 one *19 precomputation can be merged into this, 412 but the resulting data flow is considerably less clean. 413 414 There are 12 carries below. 415 10 of them are 2-way parallelizable and vectorizable. 416 Can get away with 11 carries, but then data flow is much deeper. 417 418 With tighter constraints on inputs can squeeze carries into int32. 419 */ 420 private fe fe_mul(in ref fe f, in ref fe g) pure 421 { 422 int f0 = f[0]; 423 int f1 = f[1]; 424 int f2 = f[2]; 425 int f3 = f[3]; 426 int f4 = f[4]; 427 int f5 = f[5]; 428 int f6 = f[6]; 429 int f7 = f[7]; 430 int f8 = f[8]; 431 int f9 = f[9]; 432 int g0 = g[0]; 433 int g1 = g[1]; 434 int g2 = g[2]; 435 int g3 = g[3]; 436 int g4 = g[4]; 437 int g5 = g[5]; 438 int g6 = g[6]; 439 int g7 = g[7]; 440 int g8 = g[8]; 441 int g9 = g[9]; 442 int g1_19 = 19 * g1; /* 1.959375*2^29 */ 443 int g2_19 = 19 * g2; /* 1.959375*2^30; still ok */ 444 int g3_19 = 19 * g3; 445 int g4_19 = 19 * g4; 446 int g5_19 = 19 * g5; 447 int g6_19 = 19 * g6; 448 int g7_19 = 19 * g7; 449 int g8_19 = 19 * g8; 450 int g9_19 = 19 * g9; 451 int f1_2 = 2 * f1; 452 int f3_2 = 2 * f3; 453 int f5_2 = 2 * f5; 454 int f7_2 = 2 * f7; 455 int f9_2 = 2 * f9; 456 long f0g0 = f0 * cast(long) g0; 457 long f0g1 = f0 * cast(long) g1; 458 long f0g2 = f0 * cast(long) g2; 459 long f0g3 = f0 * cast(long) g3; 460 long f0g4 = f0 * cast(long) g4; 461 long f0g5 = f0 * cast(long) g5; 462 long f0g6 = f0 * cast(long) g6; 463 long f0g7 = f0 * cast(long) g7; 464 long f0g8 = f0 * cast(long) g8; 465 long f0g9 = f0 * cast(long) g9; 466 long f1g0 = f1 * cast(long) g0; 467 long f1g1_2 = f1_2 * cast(long) g1; 468 long f1g2 = f1 * cast(long) g2; 469 long f1g3_2 = f1_2 * cast(long) g3; 470 long f1g4 = f1 * cast(long) g4; 471 long f1g5_2 = f1_2 * cast(long) g5; 472 long f1g6 = f1 * cast(long) g6; 473 long f1g7_2 = f1_2 * cast(long) g7; 474 long f1g8 = f1 * cast(long) g8; 475 long f1g9_38 = f1_2 * cast(long) g9_19; 476 long f2g0 = f2 * cast(long) g0; 477 long f2g1 = f2 * cast(long) g1; 478 long f2g2 = f2 * cast(long) g2; 479 long f2g3 = f2 * cast(long) g3; 480 long f2g4 = f2 * cast(long) g4; 481 long f2g5 = f2 * cast(long) g5; 482 long f2g6 = f2 * cast(long) g6; 483 long f2g7 = f2 * cast(long) g7; 484 long f2g8_19 = f2 * cast(long) g8_19; 485 long f2g9_19 = f2 * cast(long) g9_19; 486 long f3g0 = f3 * cast(long) g0; 487 long f3g1_2 = f3_2 * cast(long) g1; 488 long f3g2 = f3 * cast(long) g2; 489 long f3g3_2 = f3_2 * cast(long) g3; 490 long f3g4 = f3 * cast(long) g4; 491 long f3g5_2 = f3_2 * cast(long) g5; 492 long f3g6 = f3 * cast(long) g6; 493 long f3g7_38 = f3_2 * cast(long) g7_19; 494 long f3g8_19 = f3 * cast(long) g8_19; 495 long f3g9_38 = f3_2 * cast(long) g9_19; 496 long f4g0 = f4 * cast(long) g0; 497 long f4g1 = f4 * cast(long) g1; 498 long f4g2 = f4 * cast(long) g2; 499 long f4g3 = f4 * cast(long) g3; 500 long f4g4 = f4 * cast(long) g4; 501 long f4g5 = f4 * cast(long) g5; 502 long f4g6_19 = f4 * cast(long) g6_19; 503 long f4g7_19 = f4 * cast(long) g7_19; 504 long f4g8_19 = f4 * cast(long) g8_19; 505 long f4g9_19 = f4 * cast(long) g9_19; 506 long f5g0 = f5 * cast(long) g0; 507 long f5g1_2 = f5_2 * cast(long) g1; 508 long f5g2 = f5 * cast(long) g2; 509 long f5g3_2 = f5_2 * cast(long) g3; 510 long f5g4 = f5 * cast(long) g4; 511 long f5g5_38 = f5_2 * cast(long) g5_19; 512 long f5g6_19 = f5 * cast(long) g6_19; 513 long f5g7_38 = f5_2 * cast(long) g7_19; 514 long f5g8_19 = f5 * cast(long) g8_19; 515 long f5g9_38 = f5_2 * cast(long) g9_19; 516 long f6g0 = f6 * cast(long) g0; 517 long f6g1 = f6 * cast(long) g1; 518 long f6g2 = f6 * cast(long) g2; 519 long f6g3 = f6 * cast(long) g3; 520 long f6g4_19 = f6 * cast(long) g4_19; 521 long f6g5_19 = f6 * cast(long) g5_19; 522 long f6g6_19 = f6 * cast(long) g6_19; 523 long f6g7_19 = f6 * cast(long) g7_19; 524 long f6g8_19 = f6 * cast(long) g8_19; 525 long f6g9_19 = f6 * cast(long) g9_19; 526 long f7g0 = f7 * cast(long) g0; 527 long f7g1_2 = f7_2 * cast(long) g1; 528 long f7g2 = f7 * cast(long) g2; 529 long f7g3_38 = f7_2 * cast(long) g3_19; 530 long f7g4_19 = f7 * cast(long) g4_19; 531 long f7g5_38 = f7_2 * cast(long) g5_19; 532 long f7g6_19 = f7 * cast(long) g6_19; 533 long f7g7_38 = f7_2 * cast(long) g7_19; 534 long f7g8_19 = f7 * cast(long) g8_19; 535 long f7g9_38 = f7_2 * cast(long) g9_19; 536 long f8g0 = f8 * cast(long) g0; 537 long f8g1 = f8 * cast(long) g1; 538 long f8g2_19 = f8 * cast(long) g2_19; 539 long f8g3_19 = f8 * cast(long) g3_19; 540 long f8g4_19 = f8 * cast(long) g4_19; 541 long f8g5_19 = f8 * cast(long) g5_19; 542 long f8g6_19 = f8 * cast(long) g6_19; 543 long f8g7_19 = f8 * cast(long) g7_19; 544 long f8g8_19 = f8 * cast(long) g8_19; 545 long f8g9_19 = f8 * cast(long) g9_19; 546 long f9g0 = f9 * cast(long) g0; 547 long f9g1_38 = f9_2 * cast(long) g1_19; 548 long f9g2_19 = f9 * cast(long) g2_19; 549 long f9g3_38 = f9_2 * cast(long) g3_19; 550 long f9g4_19 = f9 * cast(long) g4_19; 551 long f9g5_38 = f9_2 * cast(long) g5_19; 552 long f9g6_19 = f9 * cast(long) g6_19; 553 long f9g7_38 = f9_2 * cast(long) g7_19; 554 long f9g8_19 = f9 * cast(long) g8_19; 555 long f9g9_38 = f9_2 * cast(long) g9_19; 556 long h0 = f0g0+f1g9_38+f2g8_19+f3g7_38+f4g6_19+f5g5_38+f6g4_19+f7g3_38+f8g2_19+f9g1_38; 557 long h1 = f0g1+f1g0 +f2g9_19+f3g8_19+f4g7_19+f5g6_19+f6g5_19+f7g4_19+f8g3_19+f9g2_19; 558 long h2 = f0g2+f1g1_2 +f2g0 +f3g9_38+f4g8_19+f5g7_38+f6g6_19+f7g5_38+f8g4_19+f9g3_38; 559 long h3 = f0g3+f1g2 +f2g1 +f3g0 +f4g9_19+f5g8_19+f6g7_19+f7g6_19+f8g5_19+f9g4_19; 560 long h4 = f0g4+f1g3_2 +f2g2 +f3g1_2 +f4g0 +f5g9_38+f6g8_19+f7g7_38+f8g6_19+f9g5_38; 561 long h5 = f0g5+f1g4 +f2g3 +f3g2 +f4g1 +f5g0 +f6g9_19+f7g8_19+f8g7_19+f9g6_19; 562 long h6 = f0g6+f1g5_2 +f2g4 +f3g3_2 +f4g2 +f5g1_2 +f6g0 +f7g9_38+f8g8_19+f9g7_38; 563 long h7 = f0g7+f1g6 +f2g5 +f3g4 +f4g3 +f5g2 +f6g1 +f7g0 +f8g9_19+f9g8_19; 564 long h8 = f0g8+f1g7_2 +f2g6 +f3g5_2 +f4g4 +f5g3_2 +f6g2 +f7g1_2 +f8g0 +f9g9_38; 565 long h9 = f0g9+f1g8 +f2g7 +f3g6 +f4g5 +f5g4 +f6g3 +f7g2 +f8g1 +f9g0 ; 566 long carry0; 567 long carry1; 568 long carry2; 569 long carry3; 570 long carry4; 571 long carry5; 572 long carry6; 573 long carry7; 574 long carry8; 575 long carry9; 576 577 /* 578 |h0| <= (1.65*1.65*2^52*(1+19+19+19+19)+1.65*1.65*2^50*(38+38+38+38+38)) 579 i.e. |h0| <= 1.4*2^60; narrower ranges for h2, h4, h6, h8 580 |h1| <= (1.65*1.65*2^51*(1+1+19+19+19+19+19+19+19+19)) 581 i.e. |h1| <= 1.7*2^59; narrower ranges for h3, h5, h7, h9 582 */ 583 584 carry0 = (h0 + cast(long) (1<<25)) >> 26; h1 += carry0; h0 -= SHL64(carry0,26); 585 carry4 = (h4 + cast(long) (1<<25)) >> 26; h5 += carry4; h4 -= SHL64(carry4,26); 586 /* |h0| <= 2^25 */ 587 /* |h4| <= 2^25 */ 588 /* |h1| <= 1.71*2^59 */ 589 /* |h5| <= 1.71*2^59 */ 590 591 carry1 = (h1 + cast(long) (1<<24)) >> 25; h2 += carry1; h1 -= SHL64(carry1,25); 592 carry5 = (h5 + cast(long) (1<<24)) >> 25; h6 += carry5; h5 -= SHL64(carry5,25); 593 /* |h1| <= 2^24; from now on fits into int32 */ 594 /* |h5| <= 2^24; from now on fits into int32 */ 595 /* |h2| <= 1.41*2^60 */ 596 /* |h6| <= 1.41*2^60 */ 597 598 carry2 = (h2 + cast(long) (1<<25)) >> 26; h3 += carry2; h2 -= SHL64(carry2,26); 599 carry6 = (h6 + cast(long) (1<<25)) >> 26; h7 += carry6; h6 -= SHL64(carry6,26); 600 /* |h2| <= 2^25; from now on fits into int32 unchanged */ 601 /* |h6| <= 2^25; from now on fits into int32 unchanged */ 602 /* |h3| <= 1.71*2^59 */ 603 /* |h7| <= 1.71*2^59 */ 604 605 carry3 = (h3 + cast(long) (1<<24)) >> 25; h4 += carry3; h3 -= SHL64(carry3,25); 606 carry7 = (h7 + cast(long) (1<<24)) >> 25; h8 += carry7; h7 -= SHL64(carry7,25); 607 /* |h3| <= 2^24; from now on fits into int32 unchanged */ 608 /* |h7| <= 2^24; from now on fits into int32 unchanged */ 609 /* |h4| <= 1.72*2^34 */ 610 /* |h8| <= 1.41*2^60 */ 611 612 carry4 = (h4 + cast(long) (1<<25)) >> 26; h5 += carry4; h4 -= SHL64(carry4,26); 613 carry8 = (h8 + cast(long) (1<<25)) >> 26; h9 += carry8; h8 -= SHL64(carry8,26); 614 /* |h4| <= 2^25; from now on fits into int32 unchanged */ 615 /* |h8| <= 2^25; from now on fits into int32 unchanged */ 616 /* |h5| <= 1.01*2^24 */ 617 /* |h9| <= 1.71*2^59 */ 618 619 carry9 = (h9 + cast(long) (1<<24)) >> 25; h0 += carry9 * 19; h9 -= SHL64(carry9,25); 620 /* |h9| <= 2^24; from now on fits into int32 unchanged */ 621 /* |h0| <= 1.1*2^39 */ 622 623 carry0 = (h0 + cast(long) (1<<25)) >> 26; h1 += carry0; h0 -= SHL64(carry0,26); 624 /* |h0| <= 2^25; from now on fits into int32 unchanged */ 625 /* |h1| <= 1.01*2^24 */ 626 fe h; 627 h[0] = cast(int) h0; 628 h[1] = cast(int) h1; 629 h[2] = cast(int) h2; 630 h[3] = cast(int) h3; 631 h[4] = cast(int) h4; 632 h[5] = cast(int) h5; 633 h[6] = cast(int) h6; 634 h[7] = cast(int) h7; 635 h[8] = cast(int) h8; 636 h[9] = cast(int) h9; 637 638 return h; 639 } 640 641 642 /// Returns f * 121666 643 /// 644 /// Preconditions: 645 /// |f| bounded by 1.1*2^26,1.1*2^25,1.1*2^26,1.1*2^25,etc. 646 /// 647 /// Postconditions: 648 /// |h| bounded by 1.1*2^25,1.1*2^24,1.1*2^25,1.1*2^24,etc. 649 public fe fe_mul121666(in ref fe f) pure 650 { 651 int f0 = f[0]; 652 int f1 = f[1]; 653 int f2 = f[2]; 654 int f3 = f[3]; 655 int f4 = f[4]; 656 int f5 = f[5]; 657 int f6 = f[6]; 658 int f7 = f[7]; 659 int f8 = f[8]; 660 int f9 = f[9]; 661 long h0 = f0 * cast(long) 121666; 662 long h1 = f1 * cast(long) 121666; 663 long h2 = f2 * cast(long) 121666; 664 long h3 = f3 * cast(long) 121666; 665 long h4 = f4 * cast(long) 121666; 666 long h5 = f5 * cast(long) 121666; 667 long h6 = f6 * cast(long) 121666; 668 long h7 = f7 * cast(long) 121666; 669 long h8 = f8 * cast(long) 121666; 670 long h9 = f9 * cast(long) 121666; 671 long carry0; 672 long carry1; 673 long carry2; 674 long carry3; 675 long carry4; 676 long carry5; 677 long carry6; 678 long carry7; 679 long carry8; 680 long carry9; 681 682 carry9 = (h9 + cast(long) (1<<24)) >> 25; h0 += carry9 * 19; h9 -= carry9 << 25; 683 carry1 = (h1 + cast(long) (1<<24)) >> 25; h2 += carry1; h1 -= carry1 << 25; 684 carry3 = (h3 + cast(long) (1<<24)) >> 25; h4 += carry3; h3 -= carry3 << 25; 685 carry5 = (h5 + cast(long) (1<<24)) >> 25; h6 += carry5; h5 -= carry5 << 25; 686 carry7 = (h7 + cast(long) (1<<24)) >> 25; h8 += carry7; h7 -= carry7 << 25; 687 688 carry0 = (h0 + cast(long) (1<<25)) >> 26; h1 += carry0; h0 -= carry0 << 26; 689 carry2 = (h2 + cast(long) (1<<25)) >> 26; h3 += carry2; h2 -= carry2 << 26; 690 carry4 = (h4 + cast(long) (1<<25)) >> 26; h5 += carry4; h4 -= carry4 << 26; 691 carry6 = (h6 + cast(long) (1<<25)) >> 26; h7 += carry6; h6 -= carry6 << 26; 692 carry8 = (h8 + cast(long) (1<<25)) >> 26; h9 += carry8; h8 -= carry8 << 26; 693 694 fe h; 695 h[0] = cast(uint) h0; 696 h[1] = cast(uint) h1; 697 h[2] = cast(uint) h2; 698 h[3] = cast(uint) h3; 699 h[4] = cast(uint) h4; 700 h[5] = cast(uint) h5; 701 h[6] = cast(uint) h6; 702 h[7] = cast(uint) h7; 703 h[8] = cast(uint) h8; 704 h[9] = cast(uint) h9; 705 706 return h; 707 } 708 709 /// Returns: z^(2^(225-23)) 710 package fe fe_pow22523(in ref fe z) pure 711 { 712 fe t0; 713 fe t1; 714 fe t2; 715 t0 = z.sq; 716 t1 = t0.sq.sq; 717 718 t1 *= z; 719 t0 *= t1; 720 t0 = t0.sq; 721 t0 *= t1; 722 723 // t1 = z.cpow!9; 724 // t0 = z.cpow!31; 725 726 t1 = t0.sq!5; 727 728 t0 *= t1; 729 730 t1 = t0.sq!10; 731 732 t1 *= t0; 733 734 t2 = t1.sq!20; 735 736 t1 *= t2; 737 738 t1 = t1.sq!10; 739 740 t0 *= t1; 741 742 t1 = t0.sq!50; 743 744 t1 *= t0; 745 746 t2 = t1.sq!100; 747 748 t1 *= t2; 749 750 t1 = t1.sq!50; 751 752 t0 *= t1; 753 t0 = t0.sq.sq; 754 755 return t0 * z; 756 } 757 758 759 /** 760 Returns: h = f * f 761 Can overlap h with f. 762 763 Preconditions: 764 |f| bounded by 1.65*2^26,1.65*2^25,1.65*2^26,1.65*2^25,etc. 765 766 Postconditions: 767 |h| bounded by 1.01*2^25,1.01*2^24,1.01*2^25,1.01*2^24,etc. 768 769 Note: 770 See fe_mul.c for discussion of implementation strategy. 771 */ 772 private fe fe_sq(in ref fe f) pure 773 { 774 int f0 = f[0]; 775 int f1 = f[1]; 776 int f2 = f[2]; 777 int f3 = f[3]; 778 int f4 = f[4]; 779 int f5 = f[5]; 780 int f6 = f[6]; 781 int f7 = f[7]; 782 int f8 = f[8]; 783 int f9 = f[9]; 784 int f0_2 = 2 * f0; 785 int f1_2 = 2 * f1; 786 int f2_2 = 2 * f2; 787 int f3_2 = 2 * f3; 788 int f4_2 = 2 * f4; 789 int f5_2 = 2 * f5; 790 int f6_2 = 2 * f6; 791 int f7_2 = 2 * f7; 792 int f5_38 = 38 * f5; /* 1.959375*2^30 */ 793 int f6_19 = 19 * f6; /* 1.959375*2^30 */ 794 int f7_38 = 38 * f7; /* 1.959375*2^30 */ 795 int f8_19 = 19 * f8; /* 1.959375*2^30 */ 796 int f9_38 = 38 * f9; /* 1.959375*2^30 */ 797 long f0f0 = f0 * cast(long) f0; 798 long f0f1_2 = f0_2 * cast(long) f1; 799 long f0f2_2 = f0_2 * cast(long) f2; 800 long f0f3_2 = f0_2 * cast(long) f3; 801 long f0f4_2 = f0_2 * cast(long) f4; 802 long f0f5_2 = f0_2 * cast(long) f5; 803 long f0f6_2 = f0_2 * cast(long) f6; 804 long f0f7_2 = f0_2 * cast(long) f7; 805 long f0f8_2 = f0_2 * cast(long) f8; 806 long f0f9_2 = f0_2 * cast(long) f9; 807 long f1f1_2 = f1_2 * cast(long) f1; 808 long f1f2_2 = f1_2 * cast(long) f2; 809 long f1f3_4 = f1_2 * cast(long) f3_2; 810 long f1f4_2 = f1_2 * cast(long) f4; 811 long f1f5_4 = f1_2 * cast(long) f5_2; 812 long f1f6_2 = f1_2 * cast(long) f6; 813 long f1f7_4 = f1_2 * cast(long) f7_2; 814 long f1f8_2 = f1_2 * cast(long) f8; 815 long f1f9_76 = f1_2 * cast(long) f9_38; 816 long f2f2 = f2 * cast(long) f2; 817 long f2f3_2 = f2_2 * cast(long) f3; 818 long f2f4_2 = f2_2 * cast(long) f4; 819 long f2f5_2 = f2_2 * cast(long) f5; 820 long f2f6_2 = f2_2 * cast(long) f6; 821 long f2f7_2 = f2_2 * cast(long) f7; 822 long f2f8_38 = f2_2 * cast(long) f8_19; 823 long f2f9_38 = f2 * cast(long) f9_38; 824 long f3f3_2 = f3_2 * cast(long) f3; 825 long f3f4_2 = f3_2 * cast(long) f4; 826 long f3f5_4 = f3_2 * cast(long) f5_2; 827 long f3f6_2 = f3_2 * cast(long) f6; 828 long f3f7_76 = f3_2 * cast(long) f7_38; 829 long f3f8_38 = f3_2 * cast(long) f8_19; 830 long f3f9_76 = f3_2 * cast(long) f9_38; 831 long f4f4 = f4 * cast(long) f4; 832 long f4f5_2 = f4_2 * cast(long) f5; 833 long f4f6_38 = f4_2 * cast(long) f6_19; 834 long f4f7_38 = f4 * cast(long) f7_38; 835 long f4f8_38 = f4_2 * cast(long) f8_19; 836 long f4f9_38 = f4 * cast(long) f9_38; 837 long f5f5_38 = f5 * cast(long) f5_38; 838 long f5f6_38 = f5_2 * cast(long) f6_19; 839 long f5f7_76 = f5_2 * cast(long) f7_38; 840 long f5f8_38 = f5_2 * cast(long) f8_19; 841 long f5f9_76 = f5_2 * cast(long) f9_38; 842 long f6f6_19 = f6 * cast(long) f6_19; 843 long f6f7_38 = f6 * cast(long) f7_38; 844 long f6f8_38 = f6_2 * cast(long) f8_19; 845 long f6f9_38 = f6 * cast(long) f9_38; 846 long f7f7_38 = f7 * cast(long) f7_38; 847 long f7f8_38 = f7_2 * cast(long) f8_19; 848 long f7f9_76 = f7_2 * cast(long) f9_38; 849 long f8f8_19 = f8 * cast(long) f8_19; 850 long f8f9_38 = f8 * cast(long) f9_38; 851 long f9f9_38 = f9 * cast(long) f9_38; 852 long h0 = f0f0 +f1f9_76+f2f8_38+f3f7_76+f4f6_38+f5f5_38; 853 long h1 = f0f1_2+f2f9_38+f3f8_38+f4f7_38+f5f6_38; 854 long h2 = f0f2_2+f1f1_2 +f3f9_76+f4f8_38+f5f7_76+f6f6_19; 855 long h3 = f0f3_2+f1f2_2 +f4f9_38+f5f8_38+f6f7_38; 856 long h4 = f0f4_2+f1f3_4 +f2f2 +f5f9_76+f6f8_38+f7f7_38; 857 long h5 = f0f5_2+f1f4_2 +f2f3_2 +f6f9_38+f7f8_38; 858 long h6 = f0f6_2+f1f5_4 +f2f4_2 +f3f3_2 +f7f9_76+f8f8_19; 859 long h7 = f0f7_2+f1f6_2 +f2f5_2 +f3f4_2 +f8f9_38; 860 long h8 = f0f8_2+f1f7_4 +f2f6_2 +f3f5_4 +f4f4 +f9f9_38; 861 long h9 = f0f9_2+f1f8_2 +f2f7_2 +f3f6_2 +f4f5_2; 862 long carry0; 863 long carry1; 864 long carry2; 865 long carry3; 866 long carry4; 867 long carry5; 868 long carry6; 869 long carry7; 870 long carry8; 871 long carry9; 872 873 carry0 = (h0 + cast(long) (1<<25)) >> 26; h1 += carry0; h0 -= SHL64(carry0,26); 874 carry4 = (h4 + cast(long) (1<<25)) >> 26; h5 += carry4; h4 -= SHL64(carry4,26); 875 876 carry1 = (h1 + cast(long) (1<<24)) >> 25; h2 += carry1; h1 -= SHL64(carry1,25); 877 carry5 = (h5 + cast(long) (1<<24)) >> 25; h6 += carry5; h5 -= SHL64(carry5,25); 878 879 carry2 = (h2 + cast(long) (1<<25)) >> 26; h3 += carry2; h2 -= SHL64(carry2,26); 880 carry6 = (h6 + cast(long) (1<<25)) >> 26; h7 += carry6; h6 -= SHL64(carry6,26); 881 882 carry3 = (h3 + cast(long) (1<<24)) >> 25; h4 += carry3; h3 -= SHL64(carry3,25); 883 carry7 = (h7 + cast(long) (1<<24)) >> 25; h8 += carry7; h7 -= SHL64(carry7,25); 884 885 carry4 = (h4 + cast(long) (1<<25)) >> 26; h5 += carry4; h4 -= SHL64(carry4,26); 886 carry8 = (h8 + cast(long) (1<<25)) >> 26; h9 += carry8; h8 -= SHL64(carry8,26); 887 888 carry9 = (h9 + cast(long) (1<<24)) >> 25; h0 += carry9 * 19; h9 -= SHL64(carry9,25); 889 890 carry0 = (h0 + cast(long) (1<<25)) >> 26; h1 += carry0; h0 -= SHL64(carry0,26); 891 892 fe h; 893 h[0] = cast(int) h0; 894 h[1] = cast(int) h1; 895 h[2] = cast(int) h2; 896 h[3] = cast(int) h3; 897 h[4] = cast(int) h4; 898 h[5] = cast(int) h5; 899 h[6] = cast(int) h6; 900 h[7] = cast(int) h7; 901 h[8] = cast(int) h8; 902 h[9] = cast(int) h9; 903 return h; 904 } 905 906 /** 907 Returns: h = 2 * f * f 908 Can overlap h with f. 909 910 Preconditions: 911 |f| bounded by 1.65*2^26,1.65*2^25,1.65*2^26,1.65*2^25,etc. 912 913 Postconditions: 914 |h| bounded by 1.01*2^25,1.01*2^24,1.01*2^25,1.01*2^24,etc. 915 916 Note: 917 See fe_mul.c for discussion of implementation strategy. 918 */ 919 private fe fe_sq2(in ref fe f) pure 920 { 921 int f0 = f[0]; 922 int f1 = f[1]; 923 int f2 = f[2]; 924 int f3 = f[3]; 925 int f4 = f[4]; 926 int f5 = f[5]; 927 int f6 = f[6]; 928 int f7 = f[7]; 929 int f8 = f[8]; 930 int f9 = f[9]; 931 int f0_2 = 2 * f0; 932 int f1_2 = 2 * f1; 933 int f2_2 = 2 * f2; 934 int f3_2 = 2 * f3; 935 int f4_2 = 2 * f4; 936 int f5_2 = 2 * f5; 937 int f6_2 = 2 * f6; 938 int f7_2 = 2 * f7; 939 int f5_38 = 38 * f5; /* 1.959375*2^30 */ 940 int f6_19 = 19 * f6; /* 1.959375*2^30 */ 941 int f7_38 = 38 * f7; /* 1.959375*2^30 */ 942 int f8_19 = 19 * f8; /* 1.959375*2^30 */ 943 int f9_38 = 38 * f9; /* 1.959375*2^30 */ 944 long f0f0 = f0 * cast(long) f0; 945 long f0f1_2 = f0_2 * cast(long) f1; 946 long f0f2_2 = f0_2 * cast(long) f2; 947 long f0f3_2 = f0_2 * cast(long) f3; 948 long f0f4_2 = f0_2 * cast(long) f4; 949 long f0f5_2 = f0_2 * cast(long) f5; 950 long f0f6_2 = f0_2 * cast(long) f6; 951 long f0f7_2 = f0_2 * cast(long) f7; 952 long f0f8_2 = f0_2 * cast(long) f8; 953 long f0f9_2 = f0_2 * cast(long) f9; 954 long f1f1_2 = f1_2 * cast(long) f1; 955 long f1f2_2 = f1_2 * cast(long) f2; 956 long f1f3_4 = f1_2 * cast(long) f3_2; 957 long f1f4_2 = f1_2 * cast(long) f4; 958 long f1f5_4 = f1_2 * cast(long) f5_2; 959 long f1f6_2 = f1_2 * cast(long) f6; 960 long f1f7_4 = f1_2 * cast(long) f7_2; 961 long f1f8_2 = f1_2 * cast(long) f8; 962 long f1f9_76 = f1_2 * cast(long) f9_38; 963 long f2f2 = f2 * cast(long) f2; 964 long f2f3_2 = f2_2 * cast(long) f3; 965 long f2f4_2 = f2_2 * cast(long) f4; 966 long f2f5_2 = f2_2 * cast(long) f5; 967 long f2f6_2 = f2_2 * cast(long) f6; 968 long f2f7_2 = f2_2 * cast(long) f7; 969 long f2f8_38 = f2_2 * cast(long) f8_19; 970 long f2f9_38 = f2 * cast(long) f9_38; 971 long f3f3_2 = f3_2 * cast(long) f3; 972 long f3f4_2 = f3_2 * cast(long) f4; 973 long f3f5_4 = f3_2 * cast(long) f5_2; 974 long f3f6_2 = f3_2 * cast(long) f6; 975 long f3f7_76 = f3_2 * cast(long) f7_38; 976 long f3f8_38 = f3_2 * cast(long) f8_19; 977 long f3f9_76 = f3_2 * cast(long) f9_38; 978 long f4f4 = f4 * cast(long) f4; 979 long f4f5_2 = f4_2 * cast(long) f5; 980 long f4f6_38 = f4_2 * cast(long) f6_19; 981 long f4f7_38 = f4 * cast(long) f7_38; 982 long f4f8_38 = f4_2 * cast(long) f8_19; 983 long f4f9_38 = f4 * cast(long) f9_38; 984 long f5f5_38 = f5 * cast(long) f5_38; 985 long f5f6_38 = f5_2 * cast(long) f6_19; 986 long f5f7_76 = f5_2 * cast(long) f7_38; 987 long f5f8_38 = f5_2 * cast(long) f8_19; 988 long f5f9_76 = f5_2 * cast(long) f9_38; 989 long f6f6_19 = f6 * cast(long) f6_19; 990 long f6f7_38 = f6 * cast(long) f7_38; 991 long f6f8_38 = f6_2 * cast(long) f8_19; 992 long f6f9_38 = f6 * cast(long) f9_38; 993 long f7f7_38 = f7 * cast(long) f7_38; 994 long f7f8_38 = f7_2 * cast(long) f8_19; 995 long f7f9_76 = f7_2 * cast(long) f9_38; 996 long f8f8_19 = f8 * cast(long) f8_19; 997 long f8f9_38 = f8 * cast(long) f9_38; 998 long f9f9_38 = f9 * cast(long) f9_38; 999 long h0 = f0f0 +f1f9_76+f2f8_38+f3f7_76+f4f6_38+f5f5_38; 1000 long h1 = f0f1_2+f2f9_38+f3f8_38+f4f7_38+f5f6_38; 1001 long h2 = f0f2_2+f1f1_2 +f3f9_76+f4f8_38+f5f7_76+f6f6_19; 1002 long h3 = f0f3_2+f1f2_2 +f4f9_38+f5f8_38+f6f7_38; 1003 long h4 = f0f4_2+f1f3_4 +f2f2 +f5f9_76+f6f8_38+f7f7_38; 1004 long h5 = f0f5_2+f1f4_2 +f2f3_2 +f6f9_38+f7f8_38; 1005 long h6 = f0f6_2+f1f5_4 +f2f4_2 +f3f3_2 +f7f9_76+f8f8_19; 1006 long h7 = f0f7_2+f1f6_2 +f2f5_2 +f3f4_2 +f8f9_38; 1007 long h8 = f0f8_2+f1f7_4 +f2f6_2 +f3f5_4 +f4f4 +f9f9_38; 1008 long h9 = f0f9_2+f1f8_2 +f2f7_2 +f3f6_2 +f4f5_2; 1009 long carry0; 1010 long carry1; 1011 long carry2; 1012 long carry3; 1013 long carry4; 1014 long carry5; 1015 long carry6; 1016 long carry7; 1017 long carry8; 1018 long carry9; 1019 1020 h0 += h0; 1021 h1 += h1; 1022 h2 += h2; 1023 h3 += h3; 1024 h4 += h4; 1025 h5 += h5; 1026 h6 += h6; 1027 h7 += h7; 1028 h8 += h8; 1029 h9 += h9; 1030 1031 carry0 = (h0 + cast(long) (1<<25)) >> 26; h1 += carry0; h0 -= SHL64(carry0,26); 1032 carry4 = (h4 + cast(long) (1<<25)) >> 26; h5 += carry4; h4 -= SHL64(carry4,26); 1033 1034 carry1 = (h1 + cast(long) (1<<24)) >> 25; h2 += carry1; h1 -= SHL64(carry1,25); 1035 carry5 = (h5 + cast(long) (1<<24)) >> 25; h6 += carry5; h5 -= SHL64(carry5,25); 1036 1037 carry2 = (h2 + cast(long) (1<<25)) >> 26; h3 += carry2; h2 -= SHL64(carry2,26); 1038 carry6 = (h6 + cast(long) (1<<25)) >> 26; h7 += carry6; h6 -= SHL64(carry6,26); 1039 1040 carry3 = (h3 + cast(long) (1<<24)) >> 25; h4 += carry3; h3 -= SHL64(carry3,25); 1041 carry7 = (h7 + cast(long) (1<<24)) >> 25; h8 += carry7; h7 -= SHL64(carry7,25); 1042 1043 carry4 = (h4 + cast(long) (1<<25)) >> 26; h5 += carry4; h4 -= SHL64(carry4,26); 1044 carry8 = (h8 + cast(long) (1<<25)) >> 26; h9 += carry8; h8 -= SHL64(carry8,26); 1045 1046 carry9 = (h9 + cast(long) (1<<24)) >> 25; h0 += carry9 * 19; h9 -= SHL64(carry9,25); 1047 1048 carry0 = (h0 + cast(long) (1<<25)) >> 26; h1 += carry0; h0 -= SHL64(carry0,26); 1049 1050 fe h; 1051 h[0] = cast(int) h0; 1052 h[1] = cast(int) h1; 1053 h[2] = cast(int) h2; 1054 h[3] = cast(int) h3; 1055 h[4] = cast(int) h4; 1056 h[5] = cast(int) h5; 1057 h[6] = cast(int) h6; 1058 h[7] = cast(int) h7; 1059 h[8] = cast(int) h8; 1060 h[9] = cast(int) h9; 1061 return h; 1062 } 1063 1064 ///** 1065 // Returns: h = f - g 1066 // Can overlap h with f or g. 1067 // 1068 // Preconditions: 1069 // |f| bounded by 1.1*2^25,1.1*2^24,1.1*2^25,1.1*2^24,etc. 1070 // |g| bounded by 1.1*2^25,1.1*2^24,1.1*2^25,1.1*2^24,etc. 1071 // 1072 // Postconditions: 1073 // |h| bounded by 1.1*2^26,1.1*2^25,1.1*2^26,1.1*2^25,etc. 1074 // */ 1075 //void fe_sub(ref fe h, in ref fe f, in ref fe g) 1076 //{ 1077 // h[] = f[] - g[]; 1078 //} 1079 1080 1081 /** 1082 * 1083 * Params: 1084 * s = 32 byte buffer 1085 * 1086 Preconditions: 1087 |h| bounded by 1.1*2^26,1.1*2^25,1.1*2^26,1.1*2^25,etc. 1088 1089 Write p=2^255-19; q=floor(h/p). 1090 Basic claim: q = floor(2^(-255)(h + 19 2^(-25)h9 + 2^(-1))). 1091 1092 Proof: 1093 Have |h|<=p so |q|<=1 so |19^2 2^(-255) q|<1/4. 1094 Also have |h-2^230 h9|<2^231 so |19 2^(-255)(h-2^230 h9)|<1/4. 1095 1096 Write y=2^(-1)-19^2 2^(-255)q-19 2^(-255)(h-2^230 h9). 1097 Then 0<y<1. 1098 1099 Write r=h-pq. 1100 Have 0<=r<=p-1=2^255-20. 1101 Thus 0<=r+19(2^-255)r<r+19(2^-255)2^255<=2^255-1. 1102 1103 Write x=r+19(2^-255)r+y. 1104 Then 0<x<2^255 so floor(2^(-255)x) = 0 so floor(q+2^(-255)x) = q. 1105 1106 Have q+2^(-255)x = 2^(-255)(h + 19 2^(-25) h9 + 2^(-1)) 1107 so floor(2^(-255)(h + 19 2^(-25) h9 + 2^(-1))) = q. 1108 */ 1109 private ubyte[32] fe_tobytes(in ref fe h) pure 1110 { 1111 int h0 = h[0]; 1112 int h1 = h[1]; 1113 int h2 = h[2]; 1114 int h3 = h[3]; 1115 int h4 = h[4]; 1116 int h5 = h[5]; 1117 int h6 = h[6]; 1118 int h7 = h[7]; 1119 int h8 = h[8]; 1120 int h9 = h[9]; 1121 int q; 1122 int carry0; 1123 int carry1; 1124 int carry2; 1125 int carry3; 1126 int carry4; 1127 int carry5; 1128 int carry6; 1129 int carry7; 1130 int carry8; 1131 int carry9; 1132 1133 q = (19 * h9 + ((cast(int) 1) << 24)) >> 25; 1134 q = (h0 + q) >> 26; 1135 q = (h1 + q) >> 25; 1136 q = (h2 + q) >> 26; 1137 q = (h3 + q) >> 25; 1138 q = (h4 + q) >> 26; 1139 q = (h5 + q) >> 25; 1140 q = (h6 + q) >> 26; 1141 q = (h7 + q) >> 25; 1142 q = (h8 + q) >> 26; 1143 q = (h9 + q) >> 25; 1144 1145 /* Goal: Output h-(2^255-19)q, which is between 0 and 2^255-20. */ 1146 h0 += 19 * q; 1147 /* Goal: Output h-2^255 q, which is between 0 and 2^255-20. */ 1148 1149 carry0 = h0 >> 26; h1 += carry0; h0 -= carry0 << 26; 1150 carry1 = h1 >> 25; h2 += carry1; h1 -= carry1 << 25; 1151 carry2 = h2 >> 26; h3 += carry2; h2 -= carry2 << 26; 1152 carry3 = h3 >> 25; h4 += carry3; h3 -= carry3 << 25; 1153 carry4 = h4 >> 26; h5 += carry4; h4 -= carry4 << 26; 1154 carry5 = h5 >> 25; h6 += carry5; h5 -= carry5 << 25; 1155 carry6 = h6 >> 26; h7 += carry6; h6 -= carry6 << 26; 1156 carry7 = h7 >> 25; h8 += carry7; h7 -= carry7 << 25; 1157 carry8 = h8 >> 26; h9 += carry8; h8 -= carry8 << 26; 1158 carry9 = h9 >> 25; h9 -= carry9 << 25; 1159 /* h10 = carry9 */ 1160 1161 /* 1162 Goal: Output h0+...+2^255 h10-2^255 q, which is between 0 and 2^255-20. 1163 Have h0+...+2^230 h9 between 0 and 2^255-1; 1164 evidently 2^255 h10-2^255 q = 0. 1165 Goal: Output h0+...+2^230 h9. 1166 */ 1167 1168 ubyte[32] s; 1169 s[0] = cast(ubyte) (h0 >> 0); 1170 s[1] = cast(ubyte) (h0 >> 8); 1171 s[2] = cast(ubyte) (h0 >> 16); 1172 s[3] = cast(ubyte) (((h0 >> 24) | (h1 << 2))); 1173 s[4] = cast(ubyte) (h1 >> 6); 1174 s[5] = cast(ubyte) (h1 >> 14); 1175 s[6] = cast(ubyte) (((h1 >> 22) | (h2 << 3))); 1176 s[7] = cast(ubyte) (h2 >> 5); 1177 s[8] = cast(ubyte) (h2 >> 13); 1178 s[9] = cast(ubyte) (((h2 >> 21) | (h3 << 5))); 1179 s[10] = cast(ubyte) (h3 >> 3); 1180 s[11] = cast(ubyte) (h3 >> 11); 1181 s[12] = cast(ubyte) (((h3 >> 19) | (h4 << 6))); 1182 s[13] = cast(ubyte) (h4 >> 2); 1183 s[14] = cast(ubyte) (h4 >> 10); 1184 s[15] = cast(ubyte) (h4 >> 18); 1185 s[16] = cast(ubyte) (h5 >> 0); 1186 s[17] = cast(ubyte) (h5 >> 8); 1187 s[18] = cast(ubyte) (h5 >> 16); 1188 s[19] = cast(ubyte) (((h5 >> 24) | (h6 << 1))); 1189 s[20] = cast(ubyte) (h6 >> 7); 1190 s[21] = cast(ubyte) (h6 >> 15); 1191 s[22] = cast(ubyte) (((h6 >> 23) | (h7 << 3))); 1192 s[23] = cast(ubyte) (h7 >> 5); 1193 s[24] = cast(ubyte) (h7 >> 13); 1194 s[25] = cast(ubyte) (((h7 >> 21) | (h8 << 4))); 1195 s[26] = cast(ubyte) (h8 >> 4); 1196 s[27] = cast(ubyte) (h8 >> 12); 1197 s[28] = cast(ubyte) (((h8 >> 20) | (h9 << 6))); 1198 s[29] = cast(ubyte) (h9 >> 2); 1199 s[30] = cast(ubyte) (h9 >> 10); 1200 s[31] = cast(ubyte) (h9 >> 18); 1201 1202 return s; 1203 } 1204 1205 /// Conditional swap. 1206 /// Replace (f,g) with (g,f) if b == 1; 1207 /// replace (f,g) with (f,g) if b == 0. 1208 /// Params: 1209 /// b = 0 or 1 1210 void fe_cswap(ref fe f, ref fe g, in uint b) 1211 in { 1212 assert(b == 0 || b == 1); 1213 } body 1214 { 1215 // TODO refactor 1216 immutable uint mask = -b; 1217 1218 assert(mask == 0 || mask == 0xFFFFFFFF); 1219 1220 f[] ^= mask & g[]; 1221 g[] ^= mask & f[]; 1222 f[] ^= mask & g[]; 1223 } 1224 1225 unittest { 1226 fe a; 1227 fe b; 1228 a[] = 1; 1229 b[] = 2; 1230 fe A = 1; 1231 fe B = 2; 1232 1233 fe_cswap(A, B, 0); 1234 assert(A == a); 1235 assert(B == b); 1236 fe_cswap(A, B, 1); 1237 assert(A == b); 1238 assert(B == a); 1239 }