1 module dcrypt.blockcipher.aesni; 2 3 /// 4 /// This module provides a hardware accelerated implementation of AES using the Intel AES instructionset. 5 /// Aside from being faster than software AES this implementation is presumably less prone to cache timing attacks such as FLUSH+RELOAD. 6 /// 7 /// The assembler instructions for key setup are more or less copied from linux kernel crypto (arch/x86/crypto/aesni-intel_asm.S). 8 /// 9 /// This code relies on intel AES and SSE2 instruction sets. If the CPU does not support all of these, an error will be thrown. 10 /// Consider using checkHardwareAES() to check if aesni is supported. 11 /// 12 13 import core.cpuid; 14 import dcrypt.blockcipher.blockcipher; 15 16 /// OOP API wrapper for AESNI 17 alias BlockCipherWrapper!AESNI AESNIEngine; 18 19 version (D_InlineAsm_X86_64) { 20 // enable AESNI 21 version = AESNI; 22 } 23 24 // TODO what if AESNI is not supported? 25 version(AESNI) {} else { 26 // fall back to software aes implementation 27 import dcrypt.blockcipher.aes; 28 alias AESNI = AES; // use software AES on non x86_64 platforms 29 30 } 31 32 /// Check if CPU supports aes instructions. 33 /// 34 /// Returns: true if and only if CPU supports AES acceleration. 35 public static bool checkHardwareAES() nothrow @nogc { 36 version(AESNI) { 37 return aes & sse2; 38 } else { 39 return false; 40 } 41 } 42 43 version (AESNI) { 44 45 /// 46 /// hardware accelerated aes implementation 47 /// makes use of intel AESNI 48 /// 49 /// This code relies on intel AES and SSE2 instruction sets. If the CPU does not support all of these, an error will be thrown. 50 /// Consider using checkHardwareAES() to check if aesni is supported. 51 /// 52 @safe 53 public struct AESNI 54 { 55 56 public enum blockSize = 16; 57 public enum name = "AES-NI"; 58 59 public { 60 61 /// Params: 62 /// forEncryption = `false`: decrypt, `true`: encrypt 63 /// userKey = Secret key. 64 /// iv = Not used. 65 /// 66 /// Throws: Error if aes instruction set is not supported by CPU. 67 /// Use checkHardwareAES() to avoid running into this error. 68 void start(bool forEncryption, in ubyte[] userKey, in ubyte[] iv = null) nothrow @nogc 69 { 70 assertHardwareSupport(); 71 72 this.forEncryption = forEncryption; 73 74 switch(userKey.length) { 75 case 16: rounds = 11; 76 break; 77 case 24: rounds = 13; 78 break; 79 case 32: rounds = 15; 80 break; 81 default: assert(false, "Invalid user key size. (16, 24 or 32 bytes allowed)"); 82 } 83 84 AES_KEY_EXPANSION(userKey, workingKey[0..rounds*16], forEncryption); 85 86 initialized = true; 87 } 88 89 uint processBlock(in ubyte[] input, ubyte[] output) nothrow @nogc 90 in { 91 assert(initialized, "AESNI engine not initialized"); 92 assert(blockSize<=input.length, "input buffer too short"); 93 assert(blockSize<=output.length, "output buffer too short"); 94 } 95 body { 96 97 if (forEncryption) 98 { 99 AES_ENCRYPT(workingKey[0..rounds*16], input, output); 100 } 101 else 102 { 103 AES_DECRYPT(workingKey[0..rounds*16], input, output); 104 } 105 106 return blockSize; 107 } 108 109 void reset() nothrow @nogc 110 { 111 } 112 } 113 114 115 // begin of private section 116 private: 117 118 119 /// AES encryption (1 block) 120 /// Params: 121 /// key = rounds*16 byte encryption key schedule 122 /// input = 16 bytes plaintext 123 /// output = at least 16 bytes output buffer 124 @trusted 125 void AES_ENCRYPT(in ubyte[] key, in ubyte[] input, ubyte[] output) nothrow @nogc 126 in { 127 assert(key.length == 16*rounds, "invalid key size"); 128 assert(input.length == 16, "invalid input block size"); 129 assert(output.length >= 16, "output buffer too small"); 130 } 131 body { 132 133 ubyte rounds = this.rounds; 134 135 asm @nogc nothrow { 136 137 // XMM0: round key 138 // XMM1: data block 139 140 // load plaintext into XMM1 141 mov RSI, input+8; // pointer to plaintext 142 movdqu XMM1, [RSI]; // read plaintext block 143 144 145 mov RSI, key+8; // pointer to key schedule 146 147 // AES-128 encryption sequence. 148 // The data block is in XMM1 149 // Register XMM0 holds the round keys(from 0 to 10 in this order). 150 // In the end, XMM1 holds the encryption result. 151 movdqu XMM0, [RSI+0x00]; 152 pxor XMM1, XMM0; // Whitening step (Round 0) 153 add RSI, 0x10; 154 155 xor RCX, RCX; 156 mov CL, rounds; 157 sub RCX, 2; 158 159 // encryption rounds 1..N-1 160 _encLoop: 161 movdqu XMM0, [RSI]; // load round key 162 aesenc XMM1, XMM0; // encrypt 163 add RSI, 0x10; // increment round key pointer 164 loop _encLoop; 165 166 movdqu XMM0, [RSI]; // load last round key 167 aesenclast XMM1, XMM0; // last round 168 169 // store encrypted data to buffer 170 mov RDI, output+8; // pointer to output buffer 171 movdqu [RDI], XMM1; // write processed data to buffer 172 173 // wipe data 174 pxor XMM0, XMM0; 175 pxor XMM1, XMM1; 176 } 177 } 178 179 /// AES128 11 round decryption 180 /// Params: 181 /// key = rounds*16 byte decryption key schedule 182 /// input = 16 bytes ciphertext 183 /// output = at least 16 bytes output buffer 184 @trusted 185 void AES_DECRYPT(in ubyte[] key, in ubyte[] input, ubyte[] output) nothrow @nogc 186 in { 187 assert(key.length == 16*rounds, "invalid key size"); 188 assert(input.length == 16, "invalid input block size"); 189 assert(output.length >= 16, "output buffer too small"); 190 } 191 body { 192 193 ubyte rounds = rounds; 194 195 asm @nogc nothrow { 196 197 // XMM0: round key 198 // XMM1: data block 199 200 201 202 // load ciphertext block into XMM1 203 mov RSI, input + 8; // get to first block of data 204 movdqu XMM1, [RSI]; // get first block of data 205 206 mov RSI, key + 8; // get pointer to key schedule 207 208 // RCX: round counter 209 xor RCX, RCX; 210 mov CL, rounds; // load number of rounds 211 dec RCX; // set index: rounds-1 212 shl RCX, 4; // multiply by 0x10 213 add RSI, RCX; // set pointer to last round key 214 shr RCX, 4; // restore RCX to number of rounds 215 216 movdqu XMM0, [RSI]; // load first round key 217 sub RSI, 0x10; // adjust round key pointer 218 pxor XMM1, XMM0; // Whitening step (Round 0) 219 dec RCX; // first round done 220 221 // decryption rounds 1..N-1 222 _decLoop: 223 movdqu XMM0, [RSI]; // load round key 224 aesdec XMM1, XMM0; // decrypt 225 sub RSI, 0x10; // adjust round key pointer 226 loop _decLoop; 227 228 movdqu XMM0, [RSI]; 229 aesdeclast XMM1, XMM0; // last round 230 231 // store decrypted data to buffer 232 mov RSI, output + 8; // get pointer to output buffer 233 movdqu [RSI], XMM1; // write block to output buffer 234 235 // wipe data 236 pxor XMM0, XMM0; 237 pxor XMM1, XMM1; 238 } 239 } 240 241 /// 242 /// Expand a 128,192,256 bit user key into N round keys for AES with 128 bit blocks. 243 /// 244 /// source: linux source code, arch/x86/crypto/aesni-intel_asm.S 245 /// Params: 246 /// 247 /// userKey = the AES key as given by the user 248 /// keySchedule = enough space for N round keys 249 /// forEncryption = true: generate encryption key, false: generate decryption key 250 /// 251 /// 252 @trusted 253 private static void AES_KEY_EXPANSION(in ubyte[] userKey, ubyte[] keySchedule, bool forEncryption) nothrow @nogc 254 in { 255 assertHardwareSupport(); 256 } 257 body { 258 259 size_t rounds; 260 switch(userKey.length) { 261 case 16: rounds = 11; 262 break; 263 case 24: rounds = 13; 264 break; 265 case 32: rounds = 15; 266 break; 267 default: assert(false, "Invalid user key size. (16, 24 or 32 bytes allowed)"); 268 } 269 270 if(keySchedule.length != rounds*16) { 271 // Never omit this check, so use assert(false). 272 assert(false, "Invalid key schedule size. Should be 'rounds*16' ."); 273 } 274 275 asm @nogc nothrow { 276 277 // pointer to key schedule: RDI 278 // user key length: RDX 279 // pointer to user key: RSI 280 281 mov RDI, keySchedule+8; // pointer to key schedule 282 mov RDX, userKey+0; // length of user key 283 mov RSI, userKey+8; // pointer to user key 284 285 286 movdqu XMM0, [RSI]; // user key (first 16 bytes) 287 movdqu [RDI], XMM0; 288 add RDI, 0x10; // key addr 289 // movl %edx, 480(KEYP) 290 pxor XMM4, XMM4; // xmm4 is assumed 0 in _key_expansion_x 291 292 // branch depending on user key length 293 cmp DL, 24; // len == 192 bits 294 jb _enc_key128; 295 je _enc_key192; 296 297 // 256 bit 298 movdqu XMM2, [RSI+0x10]; // second part of user key (bytes 16...31) 299 movdqu [RDI], XMM2; 300 add RDI, 0x10; 301 aeskeygenassist XMM1, XMM2, 0x1; // round 1 302 call _key_expansion_256a; 303 aeskeygenassist XMM1, XMM0, 0x1; 304 call _key_expansion_256b; 305 aeskeygenassist XMM1, XMM2, 0x2; // round 2 306 call _key_expansion_256a; 307 aeskeygenassist XMM1, XMM0, 0x2; 308 call _key_expansion_256b; 309 aeskeygenassist XMM1, XMM2, 0x4; // round 3 310 call _key_expansion_256a; 311 aeskeygenassist XMM1, XMM0, 0x4; 312 call _key_expansion_256b; 313 aeskeygenassist XMM1, XMM2, 0x8; // round 4 314 call _key_expansion_256a; 315 aeskeygenassist XMM1, XMM0, 0x8; 316 call _key_expansion_256b; 317 aeskeygenassist XMM1, XMM2, 0x10; // round 5 318 call _key_expansion_256a; 319 aeskeygenassist XMM1, XMM0, 0x10; 320 call _key_expansion_256b; 321 aeskeygenassist XMM1, XMM2, 0x20; // round 6 322 call _key_expansion_256a; 323 aeskeygenassist XMM1, XMM0, 0x20; 324 call _key_expansion_256b; 325 aeskeygenassist XMM1, XMM2, 0x40; // round 7 326 call _key_expansion_256a; 327 jmp _end; 328 329 // 192 bit 330 _enc_key192: 331 movq XMM2, [RSI+0x10]; // second part of user key (bytes 16...23) 332 aeskeygenassist XMM1, XMM2, 0x1; // round 1 333 call _key_expansion_192a; 334 aeskeygenassist XMM1, XMM2, 0x2; // round 2 335 call _key_expansion_192b; 336 aeskeygenassist XMM1, XMM2, 0x4; // round 3 337 call _key_expansion_192a; 338 aeskeygenassist XMM1, XMM2, 0x8; // round 4 339 call _key_expansion_192b; 340 aeskeygenassist XMM1, XMM2, 0x10; // round 5 341 call _key_expansion_192a; 342 aeskeygenassist XMM1, XMM2, 0x20; // round 6 343 call _key_expansion_192b; 344 aeskeygenassist XMM1, XMM2, 0x40; // round 7 345 call _key_expansion_192a; 346 aeskeygenassist XMM1, XMM2, 0x80; // round 8 347 call _key_expansion_192b; 348 jmp _end; 349 350 // 128 bit 351 _enc_key128: 352 aeskeygenassist XMM1, XMM0, 0x1; // round 1 353 call _key_expansion_128; 354 aeskeygenassist XMM1, XMM0, 0x2; // round 2 355 call _key_expansion_128; 356 aeskeygenassist XMM1, XMM0, 0x4; // round 3 357 call _key_expansion_128; 358 aeskeygenassist XMM1, XMM0, 0x8; // round 4 359 call _key_expansion_128; 360 aeskeygenassist XMM1, XMM0, 0x10; // round 5 361 call _key_expansion_128; 362 aeskeygenassist XMM1, XMM0, 0x20; // round 6 363 call _key_expansion_128; 364 aeskeygenassist XMM1, XMM0, 0x40; // round 7 365 call _key_expansion_128; 366 aeskeygenassist XMM1, XMM0, 0x80; // round 8 367 call _key_expansion_128; 368 aeskeygenassist XMM1, XMM0, 0x1b; // round 9 369 call _key_expansion_128; 370 aeskeygenassist XMM1, XMM0, 0x36; // round 10 371 call _key_expansion_128; 372 jmp _end; 373 374 align 4; 375 _key_expansion_128:; 376 _key_expansion_256a: 377 pshufd XMM1, XMM1, 0b11111111; 378 shufps XMM4, XMM0, 0b00010000; 379 pxor XMM0, XMM4; 380 shufps XMM4, XMM0, 0b10001100; 381 pxor XMM0, XMM4; 382 pxor XMM0, XMM1; 383 movdqu [RDI], XMM0; 384 add RDI, 0x10; 385 ret; 386 387 align 4; 388 _key_expansion_192a: 389 pshufd XMM1, XMM1, 0b01010101; 390 shufps XMM4, XMM0, 0b00010000; 391 pxor XMM0, XMM4; 392 shufps XMM4, XMM0, 0b10001100; 393 pxor XMM0, XMM4; 394 pxor XMM0, XMM1; 395 396 movdqu XMM5, XMM2; 397 movdqu XMM6, XMM2; 398 pslldq XMM5, 4; 399 pshufd XMM3, XMM0, 0b11111111; 400 pxor XMM2, XMM3; 401 pxor XMM2, XMM5; 402 403 movdqu XMM1, XMM0; 404 shufps XMM6, XMM0, 0b01000100; 405 movdqu [RDI], XMM6; 406 shufps XMM1, XMM2, 0b01001110; 407 movdqu [RDI+0x10], XMM1; 408 add RDI, 0x20; 409 ret; 410 411 412 align 4; 413 _key_expansion_192b: 414 pshufd XMM1, XMM1, 0b01010101; 415 shufps XMM4, XMM0, 0b00010000; 416 pxor XMM0, XMM4; 417 shufps XMM4, XMM0, 0b10001100; 418 pxor XMM0, XMM4; 419 pxor XMM0, XMM1; 420 421 movdqu XMM5, XMM2; 422 pslldq XMM5, 4; 423 pshufd XMM3, XMM0, 0b11111111; 424 pxor XMM2, XMM3; 425 pxor XMM2, XMM5; 426 427 movdqu [RDI], XMM0; 428 add RDI, 0x10; 429 ret; 430 431 align 4; 432 _key_expansion_256b: 433 pshufd XMM1, XMM1, 0b10101010; 434 shufps XMM4, XMM2, 0b00010000; 435 pxor XMM2, XMM4; 436 shufps XMM4, XMM2, 0b10001100; 437 pxor XMM2, XMM4; 438 pxor XMM2, XMM1; 439 movdqu [RDI], XMM2; 440 add RDI, 0x10; 441 ret; 442 443 _end:; 444 445 } 446 447 if(!forEncryption) { 448 asm @nogc nothrow { 449 450 // This section generates the decryption key schedule by 451 // calling AESIMC on all except the first and the last round key. 452 // Note that this way, round keys will be reverse ordered in memory. 453 // TODO: Reorder round keys such that they are in order. Requires adaption of decryption function too. 454 455 mov RCX, rounds; // set counter to number of rounds - 2 456 sub RCX, 2; 457 458 mov RDI, keySchedule+8; // pointer to key output buffer 459 add RDI, 0x10; // dont modify first key 460 461 _loopDecKey: 462 movdqu XMM0, [RDI]; // load 463 aesimc XMM0, XMM0; // invert 464 movdqu [RDI], XMM0; // store 465 466 add RDI, 0x10; // increment pointer 467 468 loop _loopDecKey; // loop rounds-2 times 469 } 470 } 471 472 473 asm @nogc nothrow { 474 // clear registers to ensure that no key data is at unexpected locations 475 pxor XMM0, XMM0; 476 pxor XMM1, XMM1; 477 pxor XMM2, XMM2; 478 pxor XMM3, XMM3; 479 pxor XMM4, XMM4; 480 pxor XMM5, XMM5; 481 pxor XMM6, XMM6; 482 } 483 } 484 485 private { 486 ubyte[16*15] workingKey; // space for 15 round keys 487 ubyte rounds; 488 bool forEncryption; 489 bool initialized; 490 } 491 492 /// Asserts that SSE2 and AES instructions are supported. 493 /// Throws: an AssertionException if CPU does not support required instructions. 494 private static void assertHardwareSupport() nothrow @nogc { 495 assert(aes, "AES not supported by CPU!"); 496 assert(sse2, "SSE2 not supported by CPU!"); 497 }; 498 499 500 // ~this() { 501 // // wipe working key 502 // workingKey[] = 0; 503 // } 504 505 506 // 507 // unittests 508 // 509 510 511 /// Test AES128-128 encryption key schedule with test vectors from FIPS 197 512 unittest { 513 immutable ubyte[] key = cast(immutable ubyte[])x"000102030405060708090a0b0c0d0e0f"; 514 515 string expectedKeySchedule = x" 516 000102030405060708090a0b0c0d0e0f 517 d6aa74fdd2af72fadaa678f1d6ab76fe 518 b692cf0b643dbdf1be9bc5006830b3fe 519 b6ff744ed2c2c9bf6c590cbf0469bf41 520 47f7f7bc95353e03f96c32bcfd058dfd 521 3caaa3e8a99f9deb50f3af57adf622aa 522 5e390f7df7a69296a7553dc10aa31f6b 523 14f9701ae35fe28c440adf4d4ea9c026 524 47438735a41c65b9e016baf4aebf7ad2 525 549932d1f08557681093ed9cbe2c974e 526 13111d7fe3944a17f307a78b4d2b30c5 527 "; 528 529 ubyte[11*16] keySchedule; 530 AES_KEY_EXPANSION(key, keySchedule, true); 531 assert(keySchedule == expectedKeySchedule, "128 bit AES_KEY_EXPANSION failed"); 532 } 533 534 /// Test AES128-128 decryption key schedule with test vectors from FIPS 197 535 unittest { 536 immutable ubyte[] key = cast(immutable ubyte[])x"000102030405060708090a0b0c0d0e0f"; 537 538 // Reverse order compared to FIPS 197 test vectors. 539 string expectedKeySchedule = x" 540 000102030405060708090a0b0c0d0e0f 541 8c56dff0825dd3f9805ad3fc8659d7fd 542 a0db02992286d160a2dc029c2485d561 543 c7c6e391e54032f1479c306d6319e50c 544 a8a2f5044de2c7f50a7ef79869671294 545 2ec410276326d7d26958204a003f32de 546 72e3098d11c5de5f789dfe1578a2cccb 547 8d82fc749c47222be4dadc3e9c7810f5 548 1362a4638f2586486bff5a76f7874a83 549 13aa29be9c8faff6f770f58000f7bf03 550 13111d7fe3944a17f307a78b4d2b30c5 551 "; 552 553 ubyte[11*16] keySchedule; 554 AES_KEY_EXPANSION(key, keySchedule, false); 555 556 assert(keySchedule == expectedKeySchedule, "128 bit AES_KEY_EXPANSION (decryption) failed"); 557 } 558 559 560 /// Test AES128-192 encryption key schedule with test vectors from FIPS 197 561 unittest { 562 immutable ubyte[] key = cast(immutable ubyte[])x"000102030405060708090a0b0c0d0e0f1011121314151617"; 563 assert(key.length == 24); 564 string expectedKeySchedule = x" 565 000102030405060708090a0b0c0d0e0f 566 10111213141516175846f2f95c43f4fe 567 544afef55847f0fa4856e2e95c43f4fe 568 40f949b31cbabd4d48f043b810b7b342 569 570 58e151ab04a2a5557effb5416245080c 571 2ab54bb43a02f8f662e3a95d66410c08 572 f501857297448d7ebdf1c6ca87f33e3c 573 e510976183519b6934157c9ea351f1e0 574 575 1ea0372a995309167c439e77ff12051e 576 dd7e0e887e2fff68608fc842f9dcc154 577 859f5f237a8d5a3dc0c02952beefd63a 578 de601e7827bcdf2ca223800fd8aeda32 579 580 a4970a331a78dc09c418c271e3a41d5d 581 "; 582 583 ubyte[13*16] keySchedule; 584 AES_KEY_EXPANSION(key, keySchedule, true); 585 586 assert(keySchedule == expectedKeySchedule, "192 bit AES_KEY_EXPANSION failed"); 587 } 588 589 /// Test AES128-256 encryption key schedule with test vectors from FIPS 197 590 unittest { 591 immutable ubyte[] key = cast(immutable ubyte[])x"000102030405060708090a0b0c0d0e0f101112131415161718191a1b1c1d1e1f"; 592 593 string expectedKeySchedule = x" 594 000102030405060708090a0b0c0d0e0f 595 101112131415161718191a1b1c1d1e1f 596 a573c29fa176c498a97fce93a572c09c 597 1651a8cd0244beda1a5da4c10640bade 598 599 ae87dff00ff11b68a68ed5fb03fc1567 600 6de1f1486fa54f9275f8eb5373b8518d 601 c656827fc9a799176f294cec6cd5598b 602 3de23a75524775e727bf9eb45407cf39 603 604 0bdc905fc27b0948ad5245a4c1871c2f 605 45f5a66017b2d387300d4d33640a820a 606 7ccff71cbeb4fe5413e6bbf0d261a7df 607 f01afafee7a82979d7a5644ab3afe640 608 609 2541fe719bf500258813bbd55a721c0a 610 4e5a6699a9f24fe07e572baacdf8cdea 611 24fc79ccbf0979e9371ac23c6d68de36 612 "; 613 614 ubyte[15*16] keySchedule; 615 AES_KEY_EXPANSION(key, keySchedule, true); 616 617 assert(keySchedule[] == expectedKeySchedule[], "256 bit AES_KEY_EXPANSION failed"); 618 } 619 } 620 621 /// Test AES encryption and decryption with different key sizes. 622 @safe 623 unittest { 624 // test vectors from http://www.inconteam.com/software-development/41-encryption/55-aes-test-vectors 625 626 static string[] test_keys = [ 627 x"2b7e151628aed2a6abf7158809cf4f3c", 628 x"8e73b0f7da0e6452c810f32b809079e562f8ead2522c6b7b", 629 x"603deb1015ca71be2b73aef0857d77811f352c073b6108d72d9810a30914dff4" 630 ]; 631 632 static string[] test_plaintexts = [ 633 x"6bc1bee22e409f96e93d7e117393172a", 634 x"6bc1bee22e409f96e93d7e117393172a", 635 x"6bc1bee22e409f96e93d7e117393172a" 636 ]; 637 638 static string[] test_ciphertexts = [ 639 x"3ad77bb40d7a3660a89ecaf32466ef97", 640 x"bd334f1d6e45f25ff712a214571fa5cc", 641 x"f3eed1bdb5d2a03c064b5a7e3db181f8" 642 643 ]; 644 645 AESNIEngine t = new AESNIEngine(); 646 647 blockCipherTest(t, test_keys, test_plaintexts, test_ciphertexts); 648 649 } 650 }