mirror of the now-defunct rocklinux.org
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

5448 lines
170 KiB

  1. diff -pruN linux-2.6.6_orig/drivers/block/Kconfig linux-2.6.6/drivers/block/Kconfig
  2. --- linux-2.6.6_orig/drivers/block/Kconfig 2004-05-16 15:07:54.000000000 +0200
  3. +++ linux-2.6.6/drivers/block/Kconfig 2004-05-16 15:08:27.000000000 +0200
  4. @@ -235,14 +235,6 @@ config BLK_DEV_LOOP
  5. bits of, say, a sound file). This is also safe if the file resides
  6. on a remote file server.
  7. - There are several ways of encrypting disks. Some of these require
  8. - kernel patches. The vanilla kernel offers the cryptoloop option
  9. - and a Device Mapper target (which is superior, as it supports all
  10. - file systems). If you want to use the cryptoloop, say Y to both
  11. - LOOP and CRYPTOLOOP, and make sure you have a recent (version 2.12
  12. - or later) version of util-linux. Additionally, be aware that
  13. - the cryptoloop is not safe for storing journaled filesystems.
  14. -
  15. Note that this loop device has nothing to do with the loopback
  16. device used for network connections from the machine to itself.
  17. @@ -251,19 +243,12 @@ config BLK_DEV_LOOP
  18. Most users will answer N here.
  19. -config BLK_DEV_CRYPTOLOOP
  20. - tristate "Cryptoloop Support"
  21. - select CRYPTO
  22. +config BLK_DEV_LOOP_AES
  23. + bool "AES encrypted loop device support"
  24. depends on BLK_DEV_LOOP
  25. ---help---
  26. - Say Y here if you want to be able to use the ciphers that are
  27. - provided by the CryptoAPI as loop transformation. This might be
  28. - used as hard disk encryption.
  29. -
  30. - WARNING: This device is not safe for journaled file systems like
  31. - ext3 or Reiserfs. Please use the Device Mapper crypto module
  32. - instead, which can be configured to be on-disk compatible with the
  33. - cryptoloop device.
  34. + If you want to use AES encryption algorithm to encrypt loop
  35. + devices, say Y here. If you don't know what to do here, say N.
  36. config BLK_DEV_NBD
  37. tristate "Network block device support"
  38. diff -pruN linux-2.6.6_orig/drivers/block/loop.c linux-2.6.6/drivers/block/loop.c
  39. --- linux-2.6.6_orig/drivers/block/loop.c 2004-05-16 15:07:54.000000000 +0200
  40. +++ linux-2.6.6/drivers/block/loop.c 2004-05-16 15:08:27.000000000 +0200
  41. @@ -39,10 +39,36 @@
  42. * Support up to 256 loop devices
  43. * Heinz Mauelshagen <mge@sistina.com>, Feb 2002
  44. *
  45. + * AES transfer added. IV is now passed as (512 byte) sector number.
  46. + * Jari Ruusu, May 18 2001
  47. + *
  48. + * External encryption module locking bug fixed.
  49. + * Ingo Rohloff <rohloff@in.tum.de>, June 21 2001
  50. + *
  51. + * Make device backed loop work with swap (pre-allocated buffers + queue rewrite).
  52. + * Jari Ruusu, September 2 2001
  53. + *
  54. + * Ported 'pre-allocated buffers + queue rewrite' to BIO for 2.5 kernels
  55. + * Ben Slusky <sluskyb@stwing.org>, March 1 2002
  56. + * Jari Ruusu, March 27 2002
  57. + *
  58. + * File backed code now uses file->f_op->read/write. Based on Andrew Morton's idea.
  59. + * Jari Ruusu, May 23 2002
  60. + *
  61. + * Exported hard sector size correctly, fixed file-backed-loop-on-tmpfs bug,
  62. + * plus many more enhancements and optimizations.
  63. + * Adam J. Richter <adam@yggdrasil.com>, Aug 2002
  64. + *
  65. + * Added support for removing offset from IV computations.
  66. + * Jari Ruusu, September 21 2003
  67. + *
  68. + * Added support for MD5 IV computation and multi-key operation.
  69. + * Jari Ruusu, October 8 2003
  70. + *
  71. + *
  72. * Still To Fix:
  73. * - Advisory locking is ignored here.
  74. * - Should use an own CAP_* category instead of CAP_SYS_ADMIN
  75. - *
  76. */
  77. #include <linux/config.h>
  78. @@ -51,6 +77,7 @@
  79. #include <linux/sched.h>
  80. #include <linux/fs.h>
  81. #include <linux/file.h>
  82. +#include <linux/bio.h>
  83. #include <linux/stat.h>
  84. #include <linux/errno.h>
  85. #include <linux/major.h>
  86. @@ -66,45 +93,39 @@
  87. #include <linux/suspend.h>
  88. #include <linux/writeback.h>
  89. #include <linux/buffer_head.h> /* for invalidate_bdev() */
  90. -#include <linux/completion.h>
  91. #include <asm/uaccess.h>
  92. +#include <asm/byteorder.h>
  93. +
  94. +#include "../misc/aes.h"
  95. +#include "../misc/md5.h"
  96. +
  97. +#if defined(CONFIG_COMPAT)
  98. +# include <linux/ioctl32.h>
  99. +# define IOCTL32_COMPATIBLE_PTR ((void*)0)
  100. +#endif
  101. static int max_loop = 8;
  102. -static struct loop_device *loop_dev;
  103. static struct gendisk **disks;
  104. /*
  105. * Transfer functions
  106. */
  107. -static int transfer_none(struct loop_device *lo, int cmd,
  108. - struct page *raw_page, unsigned raw_off,
  109. - struct page *loop_page, unsigned loop_off,
  110. - int size, sector_t real_block)
  111. +static int transfer_none(struct loop_device *lo, int cmd, char *raw_buf,
  112. + char *loop_buf, int size, sector_t real_block)
  113. {
  114. - char *raw_buf = kmap_atomic(raw_page, KM_USER0) + raw_off;
  115. - char *loop_buf = kmap_atomic(loop_page, KM_USER1) + loop_off;
  116. + /* this code is only called from file backed loop */
  117. + /* and that code expects this function to be no-op */
  118. - if (cmd == READ)
  119. - memcpy(loop_buf, raw_buf, size);
  120. - else
  121. - memcpy(raw_buf, loop_buf, size);
  122. -
  123. - kunmap_atomic(raw_buf, KM_USER0);
  124. - kunmap_atomic(loop_buf, KM_USER1);
  125. cond_resched();
  126. return 0;
  127. }
  128. -static int transfer_xor(struct loop_device *lo, int cmd,
  129. - struct page *raw_page, unsigned raw_off,
  130. - struct page *loop_page, unsigned loop_off,
  131. - int size, sector_t real_block)
  132. -{
  133. - char *raw_buf = kmap_atomic(raw_page, KM_USER0) + raw_off;
  134. - char *loop_buf = kmap_atomic(loop_page, KM_USER1) + loop_off;
  135. - char *in, *out, *key;
  136. - int i, keysize;
  137. +static int transfer_xor(struct loop_device *lo, int cmd, char *raw_buf,
  138. + char *loop_buf, int size, sector_t real_block)
  139. +{
  140. + char *in, *out, *key;
  141. + int i, keysize;
  142. if (cmd == READ) {
  143. in = raw_buf;
  144. @@ -118,14 +139,11 @@ static int transfer_xor(struct loop_devi
  145. keysize = lo->lo_encrypt_key_size;
  146. for (i = 0; i < size; i++)
  147. *out++ = *in++ ^ key[(i & 511) % keysize];
  148. -
  149. - kunmap_atomic(raw_buf, KM_USER0);
  150. - kunmap_atomic(loop_buf, KM_USER1);
  151. cond_resched();
  152. return 0;
  153. }
  154. -static int xor_init(struct loop_device *lo, const struct loop_info64 *info)
  155. +static int xor_init(struct loop_device *lo, struct loop_info64 *info)
  156. {
  157. if (info->lo_encrypt_key_size <= 0)
  158. return -EINVAL;
  159. @@ -135,335 +153,982 @@ static int xor_init(struct loop_device *
  160. static struct loop_func_table none_funcs = {
  161. .number = LO_CRYPT_NONE,
  162. .transfer = transfer_none,
  163. -};
  164. +};
  165. static struct loop_func_table xor_funcs = {
  166. .number = LO_CRYPT_XOR,
  167. .transfer = transfer_xor,
  168. - .init = xor_init
  169. -};
  170. + .init = xor_init,
  171. +};
  172. +
  173. +#if CONFIG_BLK_DEV_LOOP_AES
  174. +typedef struct {
  175. + aes_context *keyPtr[64];
  176. + unsigned keyMask;
  177. +} AESmultiKey;
  178. +
  179. +static AESmultiKey *allocMultiKey(void)
  180. +{
  181. + AESmultiKey *m;
  182. + aes_context *a;
  183. + int x, n;
  184. +
  185. + m = (AESmultiKey *) kmalloc(sizeof(AESmultiKey), GFP_KERNEL);
  186. + if(!m) return 0;
  187. + memset(m, 0, sizeof(AESmultiKey));
  188. +
  189. + n = PAGE_SIZE / sizeof(aes_context);
  190. + if(!n) n = 1;
  191. +
  192. + a = (aes_context *) kmalloc(sizeof(aes_context) * n, GFP_KERNEL);
  193. + if(!a) {
  194. + kfree(m);
  195. + return 0;
  196. + }
  197. +
  198. + x = 0;
  199. + while((x < 64) && n) {
  200. + m->keyPtr[x] = a;
  201. + a++;
  202. + x++;
  203. + n--;
  204. + }
  205. + return m;
  206. +}
  207. +
  208. +static void clearAndFreeMultiKey(AESmultiKey *m)
  209. +{
  210. + aes_context *a;
  211. + int x, n;
  212. +
  213. + n = PAGE_SIZE / sizeof(aes_context);
  214. + if(!n) n = 1;
  215. +
  216. + x = 0;
  217. + while(x < 64) {
  218. + a = m->keyPtr[x];
  219. + if(!a) break;
  220. + memset(a, 0, sizeof(aes_context) * n);
  221. + kfree(a);
  222. + x += n;
  223. + }
  224. +
  225. + kfree(m);
  226. +}
  227. +
  228. +static int multiKeySetup(struct loop_device *lo, unsigned char *k)
  229. +{
  230. + AESmultiKey *m;
  231. + aes_context *a;
  232. + int x, y, n;
  233. + union {
  234. + u_int32_t w[8]; /* needed for 4 byte alignment for b[] */
  235. + unsigned char b[32];
  236. + } un;
  237. +
  238. + if(lo->lo_key_owner != current->uid && !capable(CAP_SYS_ADMIN))
  239. + return -EPERM;
  240. +
  241. + m = (AESmultiKey *)lo->key_data;
  242. + if(!m) return -ENXIO;
  243. +
  244. + n = PAGE_SIZE / sizeof(aes_context);
  245. + if(!n) n = 1;
  246. +
  247. + x = 0;
  248. + while(x < 64) {
  249. + if(!m->keyPtr[x]) {
  250. + a = (aes_context *) kmalloc(sizeof(aes_context) * n, GFP_KERNEL);
  251. + if(!a) return -ENOMEM;
  252. + y = x;
  253. + while((y < (x + n)) && (y < 64)) {
  254. + m->keyPtr[y] = a;
  255. + a++;
  256. + y++;
  257. + }
  258. + }
  259. + if(copy_from_user(&un.b[0], k, 32)) return -EFAULT;
  260. + aes_set_key(m->keyPtr[x], &un.b[0], lo->lo_encrypt_key_size, 0);
  261. + k += 32;
  262. + x++;
  263. + }
  264. + m->keyMask = 0x3F; /* range 0...63 */
  265. + lo->lo_flags |= 0x100000; /* multi-key (info exported to user space) */
  266. + memset(&un.b[0], 0, 32);
  267. + return 0;
  268. +}
  269. +
  270. +void loop_compute_sector_iv(sector_t devSect, u_int32_t *ivout)
  271. +{
  272. + if(sizeof(sector_t) == 8) {
  273. + ivout[0] = cpu_to_le32(devSect);
  274. + ivout[1] = cpu_to_le32((u_int64_t)devSect>>32);
  275. + ivout[3] = ivout[2] = 0;
  276. + } else {
  277. + ivout[0] = cpu_to_le32(devSect);
  278. + ivout[3] = ivout[2] = ivout[1] = 0;
  279. + }
  280. +}
  281. +
  282. +void loop_compute_md5_iv(sector_t devSect, u_int32_t *ivout, u_int32_t *data)
  283. +{
  284. + int x;
  285. +#if defined(__BIG_ENDIAN)
  286. + int y, e;
  287. +#endif
  288. + u_int32_t buf[16];
  289. +
  290. + ivout[0] = 0x67452301;
  291. + ivout[1] = 0xefcdab89;
  292. + ivout[2] = 0x98badcfe;
  293. + ivout[3] = 0x10325476;
  294. +
  295. +#if defined(__BIG_ENDIAN)
  296. + y = 7;
  297. + e = 16;
  298. + do {
  299. + if (!y) {
  300. + e = 12;
  301. + /* md5_transform_CPUbyteorder wants data in CPU byte order */
  302. + /* devSect is already in CPU byte order -- no need to convert */
  303. + if(sizeof(sector_t) == 8) {
  304. + /* use only 56 bits of sector number */
  305. + buf[12] = devSect;
  306. + buf[13] = (((u_int64_t)devSect >> 32) & 0xFFFFFF) | 0x80000000;
  307. + } else {
  308. + /* 32 bits of sector number + 24 zero bits */
  309. + buf[12] = devSect;
  310. + buf[13] = 0x80000000;
  311. + }
  312. + /* 4024 bits == 31 * 128 bit plaintext blocks + 56 bits of sector number */
  313. + buf[14] = 4024;
  314. + buf[15] = 0;
  315. + }
  316. + x = 0;
  317. + do {
  318. + buf[x ] = cpu_to_le32(data[0]);
  319. + buf[x + 1] = cpu_to_le32(data[1]);
  320. + buf[x + 2] = cpu_to_le32(data[2]);
  321. + buf[x + 3] = cpu_to_le32(data[3]);
  322. + x += 4;
  323. + data += 4;
  324. + } while (x < e);
  325. + md5_transform_CPUbyteorder(&ivout[0], &buf[0]);
  326. + } while (--y >= 0);
  327. + ivout[0] = cpu_to_le32(ivout[0]);
  328. + ivout[1] = cpu_to_le32(ivout[1]);
  329. + ivout[2] = cpu_to_le32(ivout[2]);
  330. + ivout[3] = cpu_to_le32(ivout[3]);
  331. +#else
  332. + x = 6;
  333. + do {
  334. + md5_transform_CPUbyteorder(&ivout[0], data);
  335. + data += 16;
  336. + } while (--x >= 0);
  337. + memcpy(buf, data, 48);
  338. + /* md5_transform_CPUbyteorder wants data in CPU byte order */
  339. + /* devSect is already in CPU byte order -- no need to convert */
  340. + if(sizeof(sector_t) == 8) {
  341. + /* use only 56 bits of sector number */
  342. + buf[12] = devSect;
  343. + buf[13] = (((u_int64_t)devSect >> 32) & 0xFFFFFF) | 0x80000000;
  344. + } else {
  345. + /* 32 bits of sector number + 24 zero bits */
  346. + buf[12] = devSect;
  347. + buf[13] = 0x80000000;
  348. + }
  349. + /* 4024 bits == 31 * 128 bit plaintext blocks + 56 bits of sector number */
  350. + buf[14] = 4024;
  351. + buf[15] = 0;
  352. + md5_transform_CPUbyteorder(&ivout[0], &buf[0]);
  353. +#endif
  354. +}
  355. +
  356. +static int transfer_aes(struct loop_device *lo, int cmd, char *raw_buf,
  357. + char *loop_buf, int size, sector_t devSect)
  358. +{
  359. + aes_context *a;
  360. + AESmultiKey *m;
  361. + int x;
  362. + unsigned y;
  363. + u_int32_t iv[8];
  364. +
  365. + if(!size || (size & 511)) {
  366. + return -EINVAL;
  367. + }
  368. + m = (AESmultiKey *)lo->key_data;
  369. + y = m->keyMask;
  370. + if(cmd == READ) {
  371. + while(size) {
  372. + a = m->keyPtr[((unsigned)devSect) & y];
  373. + if(y) {
  374. + memcpy(&iv[0], raw_buf, 16);
  375. + raw_buf += 16;
  376. + loop_buf += 16;
  377. + } else {
  378. + loop_compute_sector_iv(devSect, &iv[0]);
  379. + }
  380. + x = 15;
  381. + do {
  382. + memcpy(&iv[4], raw_buf, 16);
  383. + aes_decrypt(a, raw_buf, loop_buf);
  384. + *((u_int32_t *)(&loop_buf[ 0])) ^= iv[0];
  385. + *((u_int32_t *)(&loop_buf[ 4])) ^= iv[1];
  386. + *((u_int32_t *)(&loop_buf[ 8])) ^= iv[2];
  387. + *((u_int32_t *)(&loop_buf[12])) ^= iv[3];
  388. + if(y && !x) {
  389. + raw_buf -= 496;
  390. + loop_buf -= 496;
  391. + loop_compute_md5_iv(devSect, &iv[4], (u_int32_t *)(&loop_buf[16]));
  392. + } else {
  393. + raw_buf += 16;
  394. + loop_buf += 16;
  395. + memcpy(&iv[0], raw_buf, 16);
  396. + }
  397. + aes_decrypt(a, raw_buf, loop_buf);
  398. + *((u_int32_t *)(&loop_buf[ 0])) ^= iv[4];
  399. + *((u_int32_t *)(&loop_buf[ 4])) ^= iv[5];
  400. + *((u_int32_t *)(&loop_buf[ 8])) ^= iv[6];
  401. + *((u_int32_t *)(&loop_buf[12])) ^= iv[7];
  402. + if(y && !x) {
  403. + raw_buf += 512;
  404. + loop_buf += 512;
  405. + } else {
  406. + raw_buf += 16;
  407. + loop_buf += 16;
  408. + }
  409. + } while(--x >= 0);
  410. + cond_resched();
  411. + size -= 512;
  412. + devSect++;
  413. + }
  414. + } else {
  415. + while(size) {
  416. + a = m->keyPtr[((unsigned)devSect) & y];
  417. + if(y) {
  418. + /* on 2.4 and later kernels, real raw_buf is not doing */
  419. + /* any writes now so it can be used as temp buffer */
  420. + memcpy(raw_buf, loop_buf, 512);
  421. + loop_compute_md5_iv(devSect, &iv[0], (u_int32_t *)(&raw_buf[16]));
  422. + x = 15;
  423. + do {
  424. + iv[0] ^= *((u_int32_t *)(&raw_buf[ 0]));
  425. + iv[1] ^= *((u_int32_t *)(&raw_buf[ 4]));
  426. + iv[2] ^= *((u_int32_t *)(&raw_buf[ 8]));
  427. + iv[3] ^= *((u_int32_t *)(&raw_buf[12]));
  428. + aes_encrypt(a, (unsigned char *)(&iv[0]), raw_buf);
  429. + memcpy(&iv[0], raw_buf, 16);
  430. + raw_buf += 16;
  431. + iv[0] ^= *((u_int32_t *)(&raw_buf[ 0]));
  432. + iv[1] ^= *((u_int32_t *)(&raw_buf[ 4]));
  433. + iv[2] ^= *((u_int32_t *)(&raw_buf[ 8]));
  434. + iv[3] ^= *((u_int32_t *)(&raw_buf[12]));
  435. + aes_encrypt(a, (unsigned char *)(&iv[0]), raw_buf);
  436. + memcpy(&iv[0], raw_buf, 16);
  437. + raw_buf += 16;
  438. + } while(--x >= 0);
  439. + loop_buf += 512;
  440. + } else {
  441. + loop_compute_sector_iv(devSect, &iv[0]);
  442. + x = 15;
  443. + do {
  444. + iv[0] ^= *((u_int32_t *)(&loop_buf[ 0]));
  445. + iv[1] ^= *((u_int32_t *)(&loop_buf[ 4]));
  446. + iv[2] ^= *((u_int32_t *)(&loop_buf[ 8]));
  447. + iv[3] ^= *((u_int32_t *)(&loop_buf[12]));
  448. + aes_encrypt(a, (unsigned char *)(&iv[0]), raw_buf);
  449. + memcpy(&iv[0], raw_buf, 16);
  450. + loop_buf += 16;
  451. + raw_buf += 16;
  452. + iv[0] ^= *((u_int32_t *)(&loop_buf[ 0]));
  453. + iv[1] ^= *((u_int32_t *)(&loop_buf[ 4]));
  454. + iv[2] ^= *((u_int32_t *)(&loop_buf[ 8]));
  455. + iv[3] ^= *((u_int32_t *)(&loop_buf[12]));
  456. + aes_encrypt(a, (unsigned char *)(&iv[0]), raw_buf);
  457. + memcpy(&iv[0], raw_buf, 16);
  458. + loop_buf += 16;
  459. + raw_buf += 16;
  460. + } while(--x >= 0);
  461. + }
  462. + cond_resched();
  463. + size -= 512;
  464. + devSect++;
  465. + }
  466. + }
  467. + return(0);
  468. +}
  469. +
  470. +static int keySetup_aes(struct loop_device *lo, struct loop_info64 *info)
  471. +{
  472. + AESmultiKey *m;
  473. + union {
  474. + u_int32_t w[8]; /* needed for 4 byte alignment for b[] */
  475. + unsigned char b[32];
  476. + } un;
  477. +
  478. + lo->key_data = m = allocMultiKey();
  479. + if(!m) return(-ENOMEM);
  480. + memcpy(&un.b[0], &info->lo_encrypt_key[0], 32);
  481. + aes_set_key(m->keyPtr[0], &un.b[0], info->lo_encrypt_key_size, 0);
  482. + memset(&info->lo_encrypt_key[0], 0, sizeof(info->lo_encrypt_key));
  483. + memset(&un.b[0], 0, 32);
  484. + return(0);
  485. +}
  486. +
  487. +static int keyClean_aes(struct loop_device *lo)
  488. +{
  489. + if(lo->key_data) {
  490. + clearAndFreeMultiKey((AESmultiKey *)lo->key_data);
  491. + lo->key_data = 0;
  492. + }
  493. + return(0);
  494. +}
  495. +
  496. +static int handleIoctl_aes(struct loop_device *lo, int cmd, unsigned long arg)
  497. +{
  498. + int err;
  499. +
  500. + switch (cmd) {
  501. + case LOOP_MULTI_KEY_SETUP:
  502. + err = multiKeySetup(lo, (unsigned char *)arg);
  503. + break;
  504. + default:
  505. + err = -EINVAL;
  506. + }
  507. + return err;
  508. +}
  509. +
  510. +static struct loop_func_table funcs_aes = {
  511. + number: 16, /* 16 == AES */
  512. + transfer: transfer_aes,
  513. + init: keySetup_aes,
  514. + release: keyClean_aes,
  515. + ioctl: handleIoctl_aes
  516. +};
  517. +
  518. +EXPORT_SYMBOL(loop_compute_sector_iv);
  519. +EXPORT_SYMBOL(loop_compute_md5_iv);
  520. +#endif /* CONFIG_BLK_DEV_LOOP_AES */
  521. /* xfer_funcs[0] is special - its release function is never called */
  522. static struct loop_func_table *xfer_funcs[MAX_LO_CRYPT] = {
  523. &none_funcs,
  524. - &xor_funcs
  525. + &xor_funcs,
  526. +#if CONFIG_BLK_DEV_LOOP_AES
  527. + [LO_CRYPT_AES] = &funcs_aes,
  528. +#endif
  529. };
  530. -static loff_t get_loop_size(struct loop_device *lo, struct file *file)
  531. -{
  532. - loff_t size, offset, loopsize;
  533. -
  534. - /* Compute loopsize in bytes */
  535. - size = i_size_read(file->f_mapping->host);
  536. - offset = lo->lo_offset;
  537. - loopsize = size - offset;
  538. - if (lo->lo_sizelimit > 0 && lo->lo_sizelimit < loopsize)
  539. - loopsize = lo->lo_sizelimit;
  540. -
  541. - /*
  542. - * Unfortunately, if we want to do I/O on the device,
  543. - * the number of 512-byte sectors has to fit into a sector_t.
  544. - */
  545. - return loopsize >> 9;
  546. +/*
  547. + * First number of 'lo_prealloc' is the default number of RAM pages
  548. + * to pre-allocate for each device backed loop. Every (configured)
  549. + * device backed loop pre-allocates this amount of RAM pages unless
  550. + * later 'lo_prealloc' numbers provide an override. 'lo_prealloc'
  551. + * overrides are defined in pairs: loop_index,number_of_pages
  552. + */
  553. +static int lo_prealloc[9] = { 125, -1, 0, -1, 0, -1, 0, -1, 0 };
  554. +#define LO_PREALLOC_MIN 4 /* minimum user defined pre-allocated RAM pages */
  555. +#define LO_PREALLOC_MAX 512 /* maximum user defined pre-allocated RAM pages */
  556. +
  557. +#ifdef MODULE
  558. +MODULE_PARM(lo_prealloc, "1-9i");
  559. +MODULE_PARM_DESC(lo_prealloc, "Number of pre-allocated pages [,index,pages]...");
  560. +#else
  561. +static int __init lo_prealloc_setup(char *str)
  562. +{
  563. + int x, y, z;
  564. +
  565. + for (x = 0; x < (sizeof(lo_prealloc) / sizeof(int)); x++) {
  566. + z = get_option(&str, &y);
  567. + if (z > 0)
  568. + lo_prealloc[x] = y;
  569. + if (z < 2)
  570. + break;
  571. + }
  572. + return 1;
  573. }
  574. +__setup("lo_prealloc=", lo_prealloc_setup);
  575. +#endif
  576. -static int
  577. -figure_loop_size(struct loop_device *lo)
  578. -{
  579. - loff_t size = get_loop_size(lo, lo->lo_backing_file);
  580. - sector_t x = (sector_t)size;
  581. +/*
  582. + * This is loop helper thread nice value in range
  583. + * from 0 (low priority) to -20 (high priority).
  584. + */
  585. +static int lo_nice = -1;
  586. - if ((loff_t)x != size)
  587. - return -EFBIG;
  588. +#ifdef MODULE
  589. +MODULE_PARM(lo_nice, "1i");
  590. +MODULE_PARM_DESC(lo_nice, "Loop thread scheduler nice (0 ... -20)");
  591. +#else
  592. +static int __init lo_nice_setup(char *str)
  593. +{
  594. + int y;
  595. - set_capacity(disks[lo->lo_number], x);
  596. - return 0;
  597. + if (get_option(&str, &y) == 1)
  598. + lo_nice = y;
  599. + return 1;
  600. }
  601. +__setup("lo_nice=", lo_nice_setup);
  602. +#endif
  603. +
  604. +struct loop_bio_extension {
  605. + struct bio *bioext_merge;
  606. + struct loop_device *bioext_loop;
  607. + sector_t bioext_iv;
  608. + int bioext_index;
  609. + int bioext_size;
  610. +};
  611. -static inline int
  612. -lo_do_transfer(struct loop_device *lo, int cmd,
  613. - struct page *rpage, unsigned roffs,
  614. - struct page *lpage, unsigned loffs,
  615. - int size, sector_t rblock)
  616. +static struct loop_device **loop_dev_ptr_arr;
  617. +
  618. +static void loop_prealloc_cleanup(struct loop_device *lo)
  619. {
  620. - if (!lo->transfer)
  621. - return 0;
  622. + struct bio *bio;
  623. - return lo->transfer(lo, cmd, rpage, roffs, lpage, loffs, size, rblock);
  624. + while ((bio = lo->lo_bio_free0)) {
  625. + lo->lo_bio_free0 = bio->bi_next;
  626. + __free_page(bio->bi_io_vec[0].bv_page);
  627. + kfree(bio->bi_private);
  628. + bio->bi_next = NULL;
  629. + bio_put(bio);
  630. + }
  631. + while ((bio = lo->lo_bio_free1)) {
  632. + lo->lo_bio_free1 = bio->bi_next;
  633. + /* bi_flags was used for other purpose */
  634. + bio->bi_flags = 0;
  635. + /* bi_cnt was used for other purpose */
  636. + atomic_set(&bio->bi_cnt, 1);
  637. + bio->bi_next = NULL;
  638. + bio_put(bio);
  639. + }
  640. }
  641. -static int
  642. -do_lo_send(struct loop_device *lo, struct bio_vec *bvec, int bsize, loff_t pos)
  643. +static int loop_prealloc_init(struct loop_device *lo, int y)
  644. {
  645. - struct file *file = lo->lo_backing_file; /* kudos to NFsckingS */
  646. - struct address_space *mapping = file->f_mapping;
  647. - struct address_space_operations *aops = mapping->a_ops;
  648. - struct page *page;
  649. - pgoff_t index;
  650. - unsigned size, offset, bv_offs;
  651. - int len;
  652. - int ret = 0;
  653. + struct bio *bio;
  654. + int x;
  655. - down(&mapping->host->i_sem);
  656. - index = pos >> PAGE_CACHE_SHIFT;
  657. - offset = pos & ((pgoff_t)PAGE_CACHE_SIZE - 1);
  658. - bv_offs = bvec->bv_offset;
  659. - len = bvec->bv_len;
  660. - while (len > 0) {
  661. - sector_t IV;
  662. - int transfer_result;
  663. -
  664. - IV = ((sector_t)index << (PAGE_CACHE_SHIFT - 9))+(offset >> 9);
  665. -
  666. - size = PAGE_CACHE_SIZE - offset;
  667. - if (size > len)
  668. - size = len;
  669. -
  670. - page = grab_cache_page(mapping, index);
  671. - if (!page)
  672. - goto fail;
  673. - if (aops->prepare_write(file, page, offset, offset+size))
  674. - goto unlock;
  675. - transfer_result = lo_do_transfer(lo, WRITE, page, offset,
  676. - bvec->bv_page, bv_offs,
  677. - size, IV);
  678. - if (transfer_result) {
  679. - char *kaddr;
  680. -
  681. - /*
  682. - * The transfer failed, but we still write the data to
  683. - * keep prepare/commit calls balanced.
  684. - */
  685. - printk(KERN_ERR "loop: transfer error block %llu\n",
  686. - (unsigned long long)index);
  687. - kaddr = kmap_atomic(page, KM_USER0);
  688. - memset(kaddr + offset, 0, size);
  689. - kunmap_atomic(kaddr, KM_USER0);
  690. + if(!y) {
  691. + y = lo_prealloc[0];
  692. + for (x = 1; x < (sizeof(lo_prealloc) / sizeof(int)); x += 2) {
  693. + if (lo_prealloc[x + 1] && (lo->lo_number == lo_prealloc[x])) {
  694. + y = lo_prealloc[x + 1];
  695. + break;
  696. + }
  697. }
  698. - flush_dcache_page(page);
  699. - if (aops->commit_write(file, page, offset, offset+size))
  700. - goto unlock;
  701. - if (transfer_result)
  702. - goto unlock;
  703. - bv_offs += size;
  704. - len -= size;
  705. - offset = 0;
  706. - index++;
  707. - pos += size;
  708. - unlock_page(page);
  709. - page_cache_release(page);
  710. }
  711. - up(&mapping->host->i_sem);
  712. -out:
  713. - return ret;
  714. + lo->lo_bio_flsh = (y * 3) / 4;
  715. -unlock:
  716. - unlock_page(page);
  717. - page_cache_release(page);
  718. -fail:
  719. - up(&mapping->host->i_sem);
  720. - ret = -1;
  721. - goto out;
  722. + for (x = 0; x < y; x++) {
  723. + bio = bio_alloc(GFP_KERNEL, 1);
  724. + if (!bio) {
  725. + fail1:
  726. + loop_prealloc_cleanup(lo);
  727. + return 1;
  728. + }
  729. + bio->bi_io_vec[0].bv_page = alloc_page(GFP_KERNEL);
  730. + if (!bio->bi_io_vec[0].bv_page) {
  731. + fail2:
  732. + bio->bi_next = NULL;
  733. + bio_put(bio);
  734. + goto fail1;
  735. + }
  736. + bio->bi_vcnt = 1;
  737. + bio->bi_private = kmalloc(sizeof(struct loop_bio_extension), GFP_KERNEL);
  738. + if (!bio->bi_private)
  739. + goto fail2;
  740. + bio->bi_next = lo->lo_bio_free0;
  741. + lo->lo_bio_free0 = bio;
  742. +
  743. + bio = bio_alloc(GFP_KERNEL, 1);
  744. + if (!bio)
  745. + goto fail1;
  746. + bio->bi_vcnt = 1;
  747. + bio->bi_next = lo->lo_bio_free1;
  748. + lo->lo_bio_free1 = bio;
  749. + }
  750. + return 0;
  751. }
  752. -static int
  753. -lo_send(struct loop_device *lo, struct bio *bio, int bsize, loff_t pos)
  754. +static void loop_add_queue_last(struct loop_device *lo, struct bio *bio, struct bio **q)
  755. {
  756. - struct bio_vec *bvec;
  757. - int i, ret = 0;
  758. + unsigned long flags;
  759. - bio_for_each_segment(bvec, bio, i) {
  760. - ret = do_lo_send(lo, bvec, bsize, pos);
  761. - if (ret < 0)
  762. - break;
  763. - pos += bvec->bv_len;
  764. + spin_lock_irqsave(&lo->lo_lock, flags);
  765. + if (*q) {
  766. + bio->bi_next = (*q)->bi_next;
  767. + (*q)->bi_next = bio;
  768. + } else {
  769. + bio->bi_next = bio;
  770. }
  771. - return ret;
  772. -}
  773. + *q = bio;
  774. + spin_unlock_irqrestore(&lo->lo_lock, flags);
  775. -struct lo_read_data {
  776. - struct loop_device *lo;
  777. - struct page *page;
  778. - unsigned offset;
  779. - int bsize;
  780. -};
  781. + if (waitqueue_active(&lo->lo_bio_wait))
  782. + wake_up_interruptible(&lo->lo_bio_wait);
  783. +}
  784. -static int
  785. -lo_read_actor(read_descriptor_t *desc, struct page *page,
  786. - unsigned long offset, unsigned long size)
  787. +static void loop_add_queue_first(struct loop_device *lo, struct bio *bio, struct bio **q)
  788. {
  789. - unsigned long count = desc->count;
  790. - struct lo_read_data *p = (struct lo_read_data*)desc->buf;
  791. - struct loop_device *lo = p->lo;
  792. - sector_t IV;
  793. -
  794. - IV = ((sector_t) page->index << (PAGE_CACHE_SHIFT - 9))+(offset >> 9);
  795. -
  796. - if (size > count)
  797. - size = count;
  798. -
  799. - if (lo_do_transfer(lo, READ, page, offset, p->page, p->offset, size, IV)) {
  800. - size = 0;
  801. - printk(KERN_ERR "loop: transfer error block %ld\n",
  802. - page->index);
  803. - desc->error = -EINVAL;
  804. + spin_lock_irq(&lo->lo_lock);
  805. + if (*q) {
  806. + bio->bi_next = (*q)->bi_next;
  807. + (*q)->bi_next = bio;
  808. + } else {
  809. + bio->bi_next = bio;
  810. + *q = bio;
  811. }
  812. -
  813. - desc->count = count - size;
  814. - desc->written += size;
  815. - p->offset += size;
  816. - return size;
  817. + spin_unlock_irq(&lo->lo_lock);
  818. }
  819. -static int
  820. -do_lo_receive(struct loop_device *lo,
  821. - struct bio_vec *bvec, int bsize, loff_t pos)
  822. +static struct bio *loop_get_bio(struct loop_device *lo, int *list_nr)
  823. {
  824. - struct lo_read_data cookie;
  825. - struct file *file;
  826. - int retval;
  827. -
  828. - cookie.lo = lo;
  829. - cookie.page = bvec->bv_page;
  830. - cookie.offset = bvec->bv_offset;
  831. - cookie.bsize = bsize;
  832. - file = lo->lo_backing_file;
  833. - retval = file->f_op->sendfile(file, &pos, bvec->bv_len,
  834. - lo_read_actor, &cookie);
  835. - return (retval < 0)? retval: 0;
  836. + struct bio *bio = NULL, *last;
  837. +
  838. + spin_lock_irq(&lo->lo_lock);
  839. + if ((last = lo->lo_bio_que0)) {
  840. + bio = last->bi_next;
  841. + if (bio == last)
  842. + lo->lo_bio_que0 = NULL;
  843. + else
  844. + last->bi_next = bio->bi_next;
  845. + bio->bi_next = NULL;
  846. + *list_nr = 0;
  847. + } else if ((last = lo->lo_bio_que1)) {
  848. + bio = last->bi_next;
  849. + if (bio == last)
  850. + lo->lo_bio_que1 = NULL;
  851. + else
  852. + last->bi_next = bio->bi_next;
  853. + bio->bi_next = NULL;
  854. + *list_nr = 1;
  855. + } else if ((last = lo->lo_bio_que2)) {
  856. + bio = last->bi_next;
  857. + if (bio == last)
  858. + lo->lo_bio_que2 = NULL;
  859. + else
  860. + last->bi_next = bio->bi_next;
  861. + bio->bi_next = NULL;
  862. + *list_nr = 2;
  863. + }
  864. + spin_unlock_irq(&lo->lo_lock);
  865. + return bio;
  866. }
  867. -static int
  868. -lo_receive(struct loop_device *lo, struct bio *bio, int bsize, loff_t pos)
  869. +static void loop_put_buffer(struct loop_device *lo, struct bio *b, int flist)
  870. {
  871. - struct bio_vec *bvec;
  872. - int i, ret = 0;
  873. + unsigned long flags;
  874. + int wk;
  875. - bio_for_each_segment(bvec, bio, i) {
  876. - ret = do_lo_receive(lo, bvec, bsize, pos);
  877. - if (ret < 0)
  878. - break;
  879. - pos += bvec->bv_len;
  880. + spin_lock_irqsave(&lo->lo_lock, flags);
  881. + if(!flist) {
  882. + b->bi_next = lo->lo_bio_free0;
  883. + lo->lo_bio_free0 = b;
  884. + wk = lo->lo_bio_need & 1;
  885. + } else {
  886. + b->bi_next = lo->lo_bio_free1;
  887. + lo->lo_bio_free1 = b;
  888. + wk = lo->lo_bio_need & 2;
  889. }
  890. - return ret;
  891. + spin_unlock_irqrestore(&lo->lo_lock, flags);
  892. +
  893. + if (wk && waitqueue_active(&lo->lo_bio_wait))
  894. + wake_up_interruptible(&lo->lo_bio_wait);
  895. }
  896. -static int do_bio_filebacked(struct loop_device *lo, struct bio *bio)
  897. +static int loop_end_io_transfer(struct bio *bio, unsigned int bytes_done, int err)
  898. {
  899. - loff_t pos;
  900. - int ret;
  901. + struct loop_bio_extension *extension = bio->bi_private;
  902. + struct bio *merge = extension->bioext_merge;
  903. + struct loop_device *lo = extension->bioext_loop;
  904. + struct bio *origbio = merge->bi_private;
  905. - pos = ((loff_t) bio->bi_sector << 9) + lo->lo_offset;
  906. - if (bio_rw(bio) == WRITE)
  907. - ret = lo_send(lo, bio, lo->lo_blocksize, pos);
  908. - else
  909. - ret = lo_receive(lo, bio, lo->lo_blocksize, pos);
  910. - return ret;
  911. + if (err)
  912. + clear_bit(0, &merge->bi_flags);
  913. + if (bio->bi_size)
  914. + return 1;
  915. + if (bio_rw(bio) == WRITE) {
  916. + loop_put_buffer(lo, bio, 0);
  917. + if (!atomic_dec_and_test(&merge->bi_cnt))
  918. + return 0;
  919. + if (bio_barrier(origbio))
  920. + atomic_dec(&lo->lo_bio_barr);
  921. + origbio->bi_next = NULL;
  922. + bio_endio(origbio, origbio->bi_size, test_bit(0, &merge->bi_flags) ? 0 : -EIO);
  923. + loop_put_buffer(lo, merge, 1);
  924. + if (atomic_dec_and_test(&lo->lo_pending))
  925. + wake_up_interruptible(&lo->lo_bio_wait);
  926. + } else {
  927. + loop_add_queue_last(lo, bio, &lo->lo_bio_que0);
  928. + }
  929. + return 0;
  930. }
  931. -/*
  932. - * Add bio to back of pending list
  933. - */
  934. -static void loop_add_bio(struct loop_device *lo, struct bio *bio)
  935. +static struct bio *loop_get_buffer(struct loop_device *lo,
  936. + struct bio *orig_bio, int from_thread, struct bio **merge_ptr)
  937. {
  938. + struct bio *bio = NULL, *merge = *merge_ptr;
  939. + struct loop_bio_extension *extension;
  940. unsigned long flags;
  941. + int len;
  942. +
  943. + /*
  944. + * If called from make_request and if there are unprocessed
  945. + * barrier requests, fail allocation so that request is
  946. + * inserted to end of no-merge-allocated list. This guarantees
  947. + * FIFO processing order of requests.
  948. + */
  949. + if (!from_thread && atomic_read(&lo->lo_bio_barr))
  950. + return NULL;
  951. spin_lock_irqsave(&lo->lo_lock, flags);
  952. - if (lo->lo_biotail) {
  953. - lo->lo_biotail->bi_next = bio;
  954. - lo->lo_biotail = bio;
  955. - } else
  956. - lo->lo_bio = lo->lo_biotail = bio;
  957. + if (!merge) {
  958. + merge = lo->lo_bio_free1;
  959. + if (merge) {
  960. + lo->lo_bio_free1 = merge->bi_next;
  961. + if (from_thread)
  962. + lo->lo_bio_need = 0;
  963. + } else {
  964. + if (from_thread)
  965. + lo->lo_bio_need = 2;
  966. + }
  967. + }
  968. +
  969. + /*
  970. + * If there are unprocessed barrier requests and a merge-bio was just
  971. + * allocated, do not allocate a buffer-bio yet. This causes request
  972. + * to be moved from head of no-merge-allocated list to end of
  973. + * merge-allocated list. This guarantees FIFO processing order
  974. + * of requests.
  975. + */
  976. + if (merge && (*merge_ptr || !atomic_read(&lo->lo_bio_barr))) {
  977. + bio = lo->lo_bio_free0;
  978. + if (bio) {
  979. + lo->lo_bio_free0 = bio->bi_next;
  980. + if (from_thread)
  981. + lo->lo_bio_need = 0;
  982. + } else {
  983. + if (from_thread)
  984. + lo->lo_bio_need = 1;
  985. + }
  986. + }
  987. spin_unlock_irqrestore(&lo->lo_lock, flags);
  988. - up(&lo->lo_bh_mutex);
  989. + if (!(*merge_ptr) && merge) {
  990. + /*
  991. + * initialize "merge-bio" which is used as
  992. + * rendezvous point among multiple vecs
  993. + */
  994. + *merge_ptr = merge;
  995. + merge->bi_sector = orig_bio->bi_sector + lo->lo_offs_sec;
  996. + set_bit(0, &merge->bi_flags);
  997. + merge->bi_idx = orig_bio->bi_idx;
  998. + atomic_set(&merge->bi_cnt, orig_bio->bi_vcnt - orig_bio->bi_idx);
  999. + merge->bi_private = orig_bio;
  1000. + }
  1001. +
  1002. + if (!bio)
  1003. + return NULL;
  1004. +
  1005. + /*
  1006. + * initialize one page "buffer-bio"
  1007. + */
  1008. + bio->bi_sector = merge->bi_sector;
  1009. + bio->bi_next = NULL;
  1010. + bio->bi_bdev = lo->lo_device;
  1011. + bio->bi_flags = 0;
  1012. + bio->bi_rw = orig_bio->bi_rw & ~(1 << BIO_RW_BARRIER);
  1013. + if (bio_barrier(orig_bio) && ((merge->bi_idx == orig_bio->bi_idx) || (merge->bi_idx == (orig_bio->bi_vcnt - 1))))
  1014. + bio->bi_rw |= (1 << BIO_RW_BARRIER);
  1015. +#if defined(BIO_RW_SYNC)
  1016. + bio->bi_rw &= ~(1 << BIO_RW_SYNC);
  1017. + if (bio_sync(orig_bio) && (merge->bi_idx == (orig_bio->bi_vcnt - 1)))
  1018. + bio->bi_rw |= (1 << BIO_RW_SYNC);
  1019. +#endif
  1020. + bio->bi_vcnt = 1;
  1021. + bio->bi_idx = 0;
  1022. + bio->bi_phys_segments = 0;
  1023. + bio->bi_hw_segments = 0;
  1024. + bio->bi_size = len = orig_bio->bi_io_vec[merge->bi_idx].bv_len;
  1025. + /* bio->bi_max_vecs not touched */
  1026. + bio->bi_io_vec[0].bv_len = len;
  1027. + bio->bi_io_vec[0].bv_offset = 0;
  1028. + bio->bi_end_io = loop_end_io_transfer;
  1029. + /* bio->bi_cnt not touched */
  1030. + /* bio->bi_private not touched */
  1031. + /* bio->bi_destructor not touched */
  1032. +
  1033. + /*
  1034. + * initialize "buffer-bio" extension. This extension is
  1035. + * permanently glued to above "buffer-bio" via bio->bi_private
  1036. + */
  1037. + extension = bio->bi_private;
  1038. + extension->bioext_merge = merge;
  1039. + extension->bioext_loop = lo;
  1040. + extension->bioext_iv = merge->bi_sector - lo->lo_iv_remove;
  1041. + extension->bioext_index = merge->bi_idx;
  1042. + extension->bioext_size = len;
  1043. +
  1044. + /*
  1045. + * prepare "merge-bio" for next vec
  1046. + */
  1047. + merge->bi_sector += len >> 9;
  1048. + merge->bi_idx++;
  1049. +
  1050. + return bio;
  1051. }
  1052. -/*
  1053. - * Grab first pending buffer
  1054. - */
  1055. -static struct bio *loop_get_bio(struct loop_device *lo)
  1056. +static int figure_loop_size(struct loop_device *lo, struct block_device *bdev)
  1057. {
  1058. - struct bio *bio;
  1059. + loff_t size, offs;
  1060. + sector_t x;
  1061. + int err = 0;
  1062. - spin_lock_irq(&lo->lo_lock);
  1063. - if ((bio = lo->lo_bio)) {
  1064. - if (bio == lo->lo_biotail)
  1065. - lo->lo_biotail = NULL;
  1066. - lo->lo_bio = bio->bi_next;
  1067. - bio->bi_next = NULL;
  1068. + size = i_size_read(lo->lo_backing_file->f_dentry->d_inode->i_mapping->host);
  1069. + offs = lo->lo_offset;
  1070. + if (!(lo->lo_flags & LO_FLAGS_DO_BMAP))
  1071. + offs &= ~((loff_t)511);
  1072. + if ((offs > 0) && (offs < size)) {
  1073. + size -= offs;
  1074. + } else {
  1075. + if (offs)
  1076. + err = -EINVAL;
  1077. + lo->lo_offset = 0;
  1078. + lo->lo_offs_sec = lo->lo_iv_remove = 0;
  1079. }
  1080. - spin_unlock_irq(&lo->lo_lock);
  1081. + if ((lo->lo_sizelimit > 0) && (lo->lo_sizelimit <= size)) {
  1082. + size = lo->lo_sizelimit;
  1083. + } else {
  1084. + if (lo->lo_sizelimit)
  1085. + err = -EINVAL;
  1086. + lo->lo_sizelimit = 0;
  1087. + }
  1088. + size >>= 9;
  1089. - return bio;
  1090. + /*
  1091. + * Unfortunately, if we want to do I/O on the device,
  1092. + * the number of 512-byte sectors has to fit into a sector_t.
  1093. + */
  1094. + x = (sector_t)size;
  1095. + if ((loff_t)x != size) {
  1096. + err = -EFBIG;
  1097. + size = 0;
  1098. + }
  1099. +
  1100. + bdev->bd_inode->i_size = size << 9; /* byte units */
  1101. + set_capacity(disks[lo->lo_number], size); /* 512 byte units */
  1102. + return err;
  1103. +}
  1104. +
  1105. +static inline int lo_do_transfer(struct loop_device *lo, int cmd, char *rbuf,
  1106. + char *lbuf, int size, sector_t rblock)
  1107. +{
  1108. + if (!lo->transfer)
  1109. + return 0;
  1110. +
  1111. + return lo->transfer(lo, cmd, rbuf, lbuf, size, rblock);
  1112. +}
  1113. +
  1114. +static int loop_file_io(struct file *file, char *buf, int size, loff_t *ppos, int w)
  1115. +{
  1116. + mm_segment_t fs;
  1117. + int x, y, z;
  1118. +
  1119. + y = 0;
  1120. + do {
  1121. + z = size - y;
  1122. + fs = get_fs();
  1123. + set_fs(get_ds());
  1124. + if (w) {
  1125. + x = file->f_op->write(file, buf + y, z, ppos);
  1126. + set_fs(fs);
  1127. + } else {
  1128. + x = file->f_op->read(file, buf + y, z, ppos);
  1129. + set_fs(fs);
  1130. + if (!x)
  1131. + return 1;
  1132. + }
  1133. + if (x < 0) {
  1134. + if ((x == -EAGAIN) || (x == -ENOMEM) || (x == -ERESTART) || (x == -EINTR)) {
  1135. + set_current_state(TASK_INTERRUPTIBLE);
  1136. + schedule_timeout(HZ / 2);
  1137. + continue;
  1138. + }
  1139. + return 1;
  1140. + }
  1141. + y += x;
  1142. + } while (y < size);
  1143. + return 0;
  1144. +}
  1145. +
  1146. +static int do_bio_filebacked(struct loop_device *lo, struct bio *bio)
  1147. +{
  1148. + loff_t pos;
  1149. + struct file *file = lo->lo_backing_file;
  1150. + char *data, *buf;
  1151. + unsigned int size, len;
  1152. + sector_t IV;
  1153. + struct page *pg;
  1154. +
  1155. + pos = ((loff_t) bio->bi_sector << 9) + lo->lo_offset;
  1156. + buf = page_address(lo->lo_bio_free0->bi_io_vec[0].bv_page);
  1157. + IV = bio->bi_sector;
  1158. + if (!lo->lo_iv_remove)
  1159. + IV += lo->lo_offs_sec;
  1160. + do {
  1161. + pg = bio->bi_io_vec[bio->bi_idx].bv_page;
  1162. + len = bio->bi_io_vec[bio->bi_idx].bv_len;
  1163. + data = kmap(pg) + bio->bi_io_vec[bio->bi_idx].bv_offset;
  1164. + while (len > 0) {
  1165. + if (!lo->lo_encryption) {
  1166. + /* this code relies that NONE transfer is a no-op */
  1167. + buf = data;
  1168. + }
  1169. + size = PAGE_SIZE;
  1170. + if (size > len)
  1171. + size = len;
  1172. + if (bio_rw(bio) == WRITE) {
  1173. + if (lo_do_transfer(lo, WRITE, buf, data, size, IV)) {
  1174. + printk(KERN_ERR "loop%d: write transfer error, sector %llu\n", lo->lo_number, (unsigned long long)IV);
  1175. + goto kunmap_and_out;
  1176. + }
  1177. + if (loop_file_io(file, buf, size, &pos, 1)) {
  1178. + printk(KERN_ERR "loop%d: write i/o error, sector %llu\n", lo->lo_number, (unsigned long long)IV);
  1179. + goto kunmap_and_out;
  1180. + }
  1181. + } else {
  1182. + if (loop_file_io(file, buf, size, &pos, 0)) {
  1183. + printk(KERN_ERR "loop%d: read i/o error, sector %llu\n", lo->lo_number, (unsigned long long)IV);
  1184. + goto kunmap_and_out;
  1185. + }
  1186. + if (lo_do_transfer(lo, READ, buf, data, size, IV)) {
  1187. + printk(KERN_ERR "loop%d: read transfer error, sector %llu\n", lo->lo_number, (unsigned long long)IV);
  1188. + goto kunmap_and_out;
  1189. + }
  1190. + }
  1191. + data += size;
  1192. + len -= size;
  1193. + IV += size >> 9;
  1194. + }
  1195. + kunmap(pg);
  1196. + } while (++bio->bi_idx < bio->bi_vcnt);
  1197. + return 0;
  1198. +
  1199. +kunmap_and_out:
  1200. + kunmap(pg);
  1201. + return -EIO;
  1202. +}
  1203. +
  1204. +static int loop_make_request_err(request_queue_t *q, struct bio *old_bio)
  1205. +{
  1206. + old_bio->bi_next = NULL;
  1207. + bio_io_error(old_bio, old_bio->bi_size);
  1208. + return 0;
  1209. }
  1210. -static int loop_make_request(request_queue_t *q, struct bio *old_bio)
  1211. +static int loop_make_request_real(request_queue_t *q, struct bio *old_bio)
  1212. {
  1213. + struct bio *new_bio, *merge;
  1214. struct loop_device *lo = q->queuedata;
  1215. - int rw = bio_rw(old_bio);
  1216. + struct loop_bio_extension *extension;
  1217. + int rw = bio_rw(old_bio), y;
  1218. + char *md;
  1219. + set_current_state(TASK_RUNNING);
  1220. if (!lo)
  1221. goto out;
  1222. -
  1223. - spin_lock_irq(&lo->lo_lock);
  1224. - if (lo->lo_state != Lo_bound)
  1225. - goto inactive;
  1226. + if ((rw == WRITE) && (lo->lo_flags & LO_FLAGS_READ_ONLY))
  1227. + goto out;
  1228. atomic_inc(&lo->lo_pending);
  1229. - spin_unlock_irq(&lo->lo_lock);
  1230. + /*
  1231. + * file backed, queue for loop_thread to handle
  1232. + */
  1233. + if (lo->lo_flags & LO_FLAGS_DO_BMAP) {
  1234. + loop_add_queue_last(lo, old_bio, &lo->lo_bio_que0);
  1235. + return 0;
  1236. + }
  1237. +
  1238. + /*
  1239. + * device backed, just remap bdev & sector for NONE transfer
  1240. + */
  1241. + if (!lo->lo_encryption) {
  1242. + old_bio->bi_sector += lo->lo_offs_sec;
  1243. + old_bio->bi_bdev = lo->lo_device;
  1244. + generic_make_request(old_bio);
  1245. + if (atomic_dec_and_test(&lo->lo_pending))
  1246. + wake_up_interruptible(&lo->lo_bio_wait);
  1247. + return 0;
  1248. + }
  1249. +
  1250. + /*
  1251. + * device backed, start reads and writes now if buffer available
  1252. + */
  1253. + merge = NULL;
  1254. + if (bio_barrier(old_bio))
  1255. + atomic_inc(&lo->lo_bio_barr);
  1256. + try_next_old_bio_vec:
  1257. + new_bio = loop_get_buffer(lo, old_bio, 0, &merge);
  1258. + if (!new_bio) {
  1259. + /* just queue request and let thread handle allocs later */
  1260. + if (merge)
  1261. + loop_add_queue_last(lo, merge, &lo->lo_bio_que1);
  1262. + else
  1263. + loop_add_queue_last(lo, old_bio, &lo->lo_bio_que2);
  1264. + return 0;
  1265. + }
  1266. if (rw == WRITE) {
  1267. - if (lo->lo_flags & LO_FLAGS_READ_ONLY)
  1268. - goto err;
  1269. - } else if (rw == READA) {
  1270. - rw = READ;
  1271. - } else if (rw != READ) {
  1272. - printk(KERN_ERR "loop: unknown command (%x)\n", rw);
  1273. - goto err;
  1274. + extension = new_bio->bi_private;
  1275. + y = extension->bioext_index;
  1276. + md = kmap(old_bio->bi_io_vec[y].bv_page) + old_bio->bi_io_vec[y].bv_offset;
  1277. + if (lo_do_transfer(lo, WRITE, page_address(new_bio->bi_io_vec[0].bv_page), md, extension->bioext_size, extension->bioext_iv)) {
  1278. + clear_bit(0, &merge->bi_flags);
  1279. + }
  1280. + kunmap(old_bio->bi_io_vec[y].bv_page);
  1281. }
  1282. - loop_add_bio(lo, old_bio);
  1283. +
  1284. + /* merge & old_bio may vanish during generic_make_request() */
  1285. + /* if last vec gets processed before function returns */
  1286. + y = (merge->bi_idx < old_bio->bi_vcnt) ? 1 : 0;
  1287. + generic_make_request(new_bio);
  1288. +
  1289. + /* other vecs may need processing too */
  1290. + if (y)
  1291. + goto try_next_old_bio_vec;
  1292. return 0;
  1293. -err:
  1294. - if (atomic_dec_and_test(&lo->lo_pending))
  1295. - up(&lo->lo_bh_mutex);
  1296. +
  1297. out:
  1298. + old_bio->bi_next = NULL;
  1299. bio_io_error(old_bio, old_bio->bi_size);
  1300. return 0;
  1301. -inactive:
  1302. - spin_unlock_irq(&lo->lo_lock);
  1303. - goto out;
  1304. }
  1305. -/*
  1306. - * kick off io on the underlying address space
  1307. - */
  1308. -static void loop_unplug(request_queue_t *q)
  1309. +static void loop_unplug_backingdev(request_queue_t *bq)
  1310. {
  1311. - struct loop_device *lo = q->queuedata;
  1312. -
  1313. - clear_bit(QUEUE_FLAG_PLUGGED, &q->queue_flags);
  1314. - blk_run_address_space(lo->lo_backing_file->f_mapping);
  1315. +#if defined(QUEUE_FLAG_PLUGGED)
  1316. + if(bq && bq->unplug_fn)
  1317. + bq->unplug_fn(bq);
  1318. +#else
  1319. + blk_run_queues();
  1320. +#endif
  1321. }
  1322. -struct switch_request {
  1323. - struct file *file;
  1324. - struct completion wait;
  1325. -};
  1326. -
  1327. -static void do_loop_switch(struct loop_device *, struct switch_request *);
  1328. -
  1329. -static inline void loop_handle_bio(struct loop_device *lo, struct bio *bio)
  1330. +#if defined(QUEUE_FLAG_PLUGGED)
  1331. +static void loop_unplug_loopdev(request_queue_t *mq)
  1332. {
  1333. - int ret;
  1334. + struct loop_device *lo;
  1335. + struct file *f;
  1336. - if (unlikely(!bio->bi_bdev)) {
  1337. - do_loop_switch(lo, bio->bi_private);
  1338. - bio_put(bio);
  1339. - } else {
  1340. - ret = do_bio_filebacked(lo, bio);
  1341. - bio_endio(bio, bio->bi_size, ret);
  1342. - }
  1343. + clear_bit(QUEUE_FLAG_PLUGGED, &mq->queue_flags);
  1344. + lo = mq->queuedata;
  1345. + if(!lo)
  1346. + return;
  1347. + f = lo->lo_backing_file;
  1348. + if(!f)
  1349. + return;
  1350. + blk_run_address_space(f->f_mapping);
  1351. }
  1352. +#endif
  1353. /*
  1354. * worker thread that handles reads/writes to file backed loop devices,
  1355. @@ -474,20 +1139,41 @@ static inline void loop_handle_bio(struc
  1356. static int loop_thread(void *data)
  1357. {
  1358. struct loop_device *lo = data;
  1359. - struct bio *bio;
  1360. + struct bio *bio, *xbio, *merge;
  1361. + struct loop_bio_extension *extension;
  1362. + int x, y, flushcnt = 0;
  1363. + wait_queue_t waitq;
  1364. + char *md;
  1365. + request_queue_t *backingQueue;
  1366. + static const struct rlimit loop_rlim_defaults[RLIM_NLIMITS] = INIT_RLIMITS;
  1367. + init_waitqueue_entry(&waitq, current);
  1368. + memcpy(&current->rlim[0], &loop_rlim_defaults[0], sizeof(current->rlim));
  1369. daemonize("loop%d", lo->lo_number);
  1370. + if(lo->lo_device)
  1371. + backingQueue = bdev_get_queue(lo->lo_device);
  1372. + else
  1373. + backingQueue = NULL;
  1374. +
  1375. /*
  1376. * loop can be used in an encrypted device,
  1377. * hence, it mustn't be stopped at all
  1378. * because it could be indirectly used during suspension
  1379. */
  1380. +#if defined(PF_NOFREEZE)
  1381. current->flags |= PF_NOFREEZE;
  1382. +#elif defined(PF_IOTHREAD)
  1383. + current->flags |= PF_IOTHREAD;
  1384. +#endif
  1385. + current->flags |= PF_LESS_THROTTLE;
  1386. - set_user_nice(current, -20);
  1387. + if (lo_nice > 0)
  1388. + lo_nice = 0;
  1389. + if (lo_nice < -20)
  1390. + lo_nice = -20;
  1391. + set_user_nice(current, lo_nice);
  1392. - lo->lo_state = Lo_bound;
  1393. atomic_inc(&lo->lo_pending);
  1394. /*
  1395. @@ -496,23 +1182,138 @@ static int loop_thread(void *data)
  1396. up(&lo->lo_sem);
  1397. for (;;) {
  1398. - down_interruptible(&lo->lo_bh_mutex);
  1399. + add_wait_queue(&lo->lo_bio_wait, &waitq);
  1400. + for (;;) {
  1401. + set_current_state(TASK_INTERRUPTIBLE);
  1402. + if (!atomic_read(&lo->lo_pending))
  1403. + break;
  1404. +
  1405. + x = 0;
  1406. + spin_lock_irq(&lo->lo_lock);
  1407. + if (lo->lo_bio_que0) {
  1408. + /* don't sleep if device backed READ needs processing */
  1409. + /* don't sleep if file backed READ/WRITE needs processing */
  1410. + x = 1;
  1411. + } else if (lo->lo_bio_que1) {
  1412. + /* don't sleep if a buffer-bio is available */
  1413. + /* don't sleep if need-buffer-bio request is not set */
  1414. + if (lo->lo_bio_free0 || !(lo->lo_bio_need & 1))
  1415. + x = 1;
  1416. + } else if (lo->lo_bio_que2) {
  1417. + /* don't sleep if a merge-bio is available */
  1418. + /* don't sleep if need-merge-bio request is not set */
  1419. + if (lo->lo_bio_free1 || !(lo->lo_bio_need & 2))
  1420. + x = 1;
  1421. + }
  1422. + spin_unlock_irq(&lo->lo_lock);
  1423. + if (x)
  1424. + break;
  1425. +
  1426. + schedule();
  1427. + }
  1428. + set_current_state(TASK_RUNNING);
  1429. + remove_wait_queue(&lo->lo_bio_wait, &waitq);
  1430. +
  1431. /*
  1432. - * could be upped because of tear-down, not because of
  1433. + * could be woken because of tear-down, not because of
  1434. * pending work
  1435. */
  1436. if (!atomic_read(&lo->lo_pending))
  1437. break;
  1438. - bio = loop_get_bio(lo);
  1439. - if (!bio) {
  1440. - printk("loop: missing bio\n");
  1441. + bio = loop_get_bio(lo, &x);
  1442. + if (!bio)
  1443. continue;
  1444. +
  1445. + /*
  1446. + * x list tag usage(has-buffer,has-merge)
  1447. + * --- --------------- ---------------------------
  1448. + * 0 lo->lo_bio_que0 dev-r(y,y) / file-rw
  1449. + * 1 lo->lo_bio_que1 dev-rw(n,y)
  1450. + * 2 lo->lo_bio_que2 dev-rw(n,n)
  1451. + */
  1452. + if (x >= 1) {
  1453. + /* loop_make_request_real didn't allocate a buffer, do that now */
  1454. + if (x == 1) {
  1455. + merge = bio;
  1456. + bio = merge->bi_private;
  1457. + } else {
  1458. + merge = NULL;
  1459. + }
  1460. + try_next_bio_vec:
  1461. + xbio = loop_get_buffer(lo, bio, 1, &merge);
  1462. + if (!xbio) {
  1463. + loop_unplug_backingdev(backingQueue);
  1464. + flushcnt = 0;
  1465. + if (merge)
  1466. + loop_add_queue_first(lo, merge, &lo->lo_bio_que1);
  1467. + else
  1468. + loop_add_queue_first(lo, bio, &lo->lo_bio_que2);
  1469. + /* lo->lo_bio_need should be non-zero now, go back to sleep */
  1470. + continue;
  1471. + }
  1472. + if (bio_rw(bio) == WRITE) {
  1473. + extension = xbio->bi_private;
  1474. + y = extension->bioext_index;
  1475. + md = kmap(bio->bi_io_vec[y].bv_page) + bio->bi_io_vec[y].bv_offset;
  1476. + if (lo_do_transfer(lo, WRITE, page_address(xbio->bi_io_vec[0].bv_page), md, extension->bioext_size, extension->bioext_iv)) {
  1477. + clear_bit(0, &merge->bi_flags);
  1478. + }
  1479. + kunmap(bio->bi_io_vec[y].bv_page);
  1480. + }
  1481. +
  1482. + /* merge & bio may vanish during generic_make_request() */
  1483. + /* if last vec gets processed before function returns */
  1484. + y = (merge->bi_idx < bio->bi_vcnt) ? 1 : 0;
  1485. + generic_make_request(xbio);
  1486. +
  1487. + /* start I/O if there are no more requests lacking buffers */
  1488. + x = 0;
  1489. + spin_lock_irq(&lo->lo_lock);
  1490. + if (!y && !lo->lo_bio_que1 && !lo->lo_bio_que2)
  1491. + x = 1;
  1492. + spin_unlock_irq(&lo->lo_lock);
  1493. + if (x || (++flushcnt >= lo->lo_bio_flsh)) {
  1494. + loop_unplug_backingdev(backingQueue);
  1495. + flushcnt = 0;
  1496. + }
  1497. +
  1498. + /* other vecs may need processing too */
  1499. + if (y)
  1500. + goto try_next_bio_vec;
  1501. +
  1502. + /* request not completely processed yet */
  1503. + continue;
  1504. + }
  1505. +
  1506. + if (lo->lo_flags & LO_FLAGS_DO_BMAP) {
  1507. + /* request is for file backed device */
  1508. + y = do_bio_filebacked(lo, bio);
  1509. + bio->bi_next = NULL;
  1510. + bio_endio(bio, bio->bi_size, y);
  1511. + } else {
  1512. + /* device backed read has completed, do decrypt now */
  1513. + extension = bio->bi_private;
  1514. + merge = extension->bioext_merge;
  1515. + y = extension->bioext_index;
  1516. + xbio = merge->bi_private;
  1517. + md = kmap(xbio->bi_io_vec[y].bv_page) + xbio->bi_io_vec[y].bv_offset;
  1518. + if (lo_do_transfer(lo, READ, page_address(bio->bi_io_vec[0].bv_page), md, extension->bioext_size, extension->bioext_iv)) {
  1519. + clear_bit(0, &merge->bi_flags);
  1520. + }
  1521. + kunmap(xbio->bi_io_vec[y].bv_page);
  1522. + loop_put_buffer(lo, bio, 0);
  1523. + if (!atomic_dec_and_test(&merge->bi_cnt))
  1524. + continue;
  1525. + if (bio_barrier(xbio))
  1526. + atomic_dec(&lo->lo_bio_barr);
  1527. + xbio->bi_next = NULL;
  1528. + bio_endio(xbio, xbio->bi_size, test_bit(0, &merge->bi_flags) ? 0 : -EIO);
  1529. + loop_put_buffer(lo, merge, 1);
  1530. }
  1531. - loop_handle_bio(lo, bio);
  1532. /*
  1533. - * upped both for pending work and tear-down, lo_pending
  1534. + * woken both for pending work and tear-down, lo_pending
  1535. * will hit zero then
  1536. */
  1537. if (atomic_dec_and_test(&lo->lo_pending))
  1538. @@ -523,101 +1324,26 @@ static int loop_thread(void *data)
  1539. return 0;
  1540. }
  1541. -/*
  1542. - * loop_switch performs the hard work of switching a backing store.
  1543. - * First it needs to flush existing IO, it does this by sending a magic
  1544. - * BIO down the pipe. The completion of this BIO does the actual switch.
  1545. - */
  1546. -static int loop_switch(struct loop_device *lo, struct file *file)
  1547. -{
  1548. - struct switch_request w;
  1549. - struct bio *bio = bio_alloc(GFP_KERNEL, 1);
  1550. - if (!bio)
  1551. - return -ENOMEM;
  1552. - init_completion(&w.wait);
  1553. - w.file = file;
  1554. - bio->bi_private = &w;
  1555. - bio->bi_bdev = NULL;
  1556. - loop_make_request(lo->lo_queue, bio);
  1557. - wait_for_completion(&w.wait);
  1558. - return 0;
  1559. -}
  1560. -
  1561. -/*
  1562. - * Do the actual switch; called from the BIO completion routine
  1563. - */
  1564. -static void do_loop_switch(struct loop_device *lo, struct switch_request *p)
  1565. +static void loop_set_softblksz(struct loop_device *lo, struct block_device *bdev)
  1566. {
  1567. - struct file *file = p->file;
  1568. - struct file *old_file = lo->lo_backing_file;
  1569. - struct address_space *mapping = file->f_mapping;
  1570. + int bs, x;
  1571. - mapping_set_gfp_mask(old_file->f_mapping, lo->old_gfp_mask);
  1572. - lo->lo_backing_file = file;
  1573. - lo->lo_blocksize = mapping->host->i_blksize;
  1574. - lo->old_gfp_mask = mapping_gfp_mask(mapping);
  1575. - mapping_set_gfp_mask(mapping, lo->old_gfp_mask & ~(__GFP_IO|__GFP_FS));
  1576. - complete(&p->wait);
  1577. -}
  1578. -
  1579. -
  1580. -/*
  1581. - * loop_change_fd switched the backing store of a loopback device to
  1582. - * a new file. This is useful for operating system installers to free up
  1583. - * the original file and in High Availability environments to switch to
  1584. - * an alternative location for the content in case of server meltdown.
  1585. - * This can only work if the loop device is used read-only, and if the
  1586. - * new backing store is the same size and type as the old backing store.
  1587. - */
  1588. -static int loop_change_fd(struct loop_device *lo, struct file *lo_file,
  1589. - struct block_device *bdev, unsigned int arg)
  1590. -{
  1591. - struct file *file, *old_file;
  1592. - struct inode *inode;
  1593. - int error;
  1594. -
  1595. - error = -ENXIO;
  1596. - if (lo->lo_state != Lo_bound)
  1597. - goto out;
  1598. -
  1599. - /* the loop device has to be read-only */
  1600. - error = -EINVAL;
  1601. - if (lo->lo_flags != LO_FLAGS_READ_ONLY)
  1602. - goto out;
  1603. -
  1604. - error = -EBADF;
  1605. - file = fget(arg);
  1606. - if (!file)
  1607. - goto out;
  1608. -
  1609. - inode = file->f_mapping->host;
  1610. - old_file = lo->lo_backing_file;
  1611. -
  1612. - error = -EINVAL;
  1613. -
  1614. - if (!S_ISREG(inode->i_mode) && !S_ISBLK(inode->i_mode))
  1615. - goto out_putf;
  1616. -
  1617. - /* new backing store needs to support loop (eg sendfile) */
  1618. - if (!inode->i_fop->sendfile)
  1619. - goto out_putf;
  1620. -
  1621. - /* size of the new backing store needs to be the same */
  1622. - if (get_loop_size(lo, file) != get_loop_size(lo, old_file))
  1623. - goto out_putf;
  1624. -
  1625. - /* and ... switch */
  1626. - error = loop_switch(lo, file);
  1627. - if (error)
  1628. - goto out_putf;
  1629. -
  1630. - fput(old_file);
  1631. - return 0;
  1632. -
  1633. - out_putf:
  1634. - fput(file);
  1635. - out:
  1636. - return error;
  1637. + if (lo->lo_device)
  1638. + bs = block_size(lo->lo_device);
  1639. + else
  1640. + bs = PAGE_SIZE;
  1641. + if (lo->lo_flags & LO_FLAGS_DO_BMAP) {
  1642. + x = (int) bdev->bd_inode->i_size;
  1643. + if ((bs == 8192) && (x & 0x1E00))
  1644. + bs = 4096;
  1645. + if ((bs == 4096) && (x & 0x0E00))
  1646. + bs = 2048;
  1647. + if ((bs == 2048) && (x & 0x0600))
  1648. + bs = 1024;
  1649. + if ((bs == 1024) && (x & 0x0200))
  1650. + bs = 512;
  1651. + }
  1652. + set_blocksize(bdev, bs);
  1653. }
  1654. static int loop_set_fd(struct loop_device *lo, struct file *lo_file,
  1655. @@ -625,124 +1351,170 @@ static int loop_set_fd(struct loop_devic
  1656. {
  1657. struct file *file;
  1658. struct inode *inode;
  1659. - struct address_space *mapping;
  1660. - unsigned lo_blocksize;
  1661. + struct block_device *lo_device = NULL;
  1662. int lo_flags = 0;
  1663. int error;
  1664. - loff_t size;
  1665. -
  1666. - /* This is safe, since we have a reference from open(). */
  1667. - __module_get(THIS_MODULE);
  1668. -
  1669. - error = -EBUSY;
  1670. - if (lo->lo_state != Lo_unbound)
  1671. - goto out;
  1672. error = -EBADF;
  1673. file = fget(arg);
  1674. if (!file)
  1675. goto out;
  1676. - mapping = file->f_mapping;
  1677. - inode = mapping->host;
  1678. + error = -EINVAL;
  1679. + inode = file->f_dentry->d_inode;
  1680. if (!(file->f_mode & FMODE_WRITE))
  1681. lo_flags |= LO_FLAGS_READ_ONLY;
  1682. - error = -EINVAL;
  1683. - if (S_ISREG(inode->i_mode) || S_ISBLK(inode->i_mode)) {
  1684. - struct address_space_operations *aops = mapping->a_ops;
  1685. + init_MUTEX_LOCKED(&lo->lo_sem);
  1686. + spin_lock_init(&lo->lo_lock);
  1687. + init_waitqueue_head(&lo->lo_bio_wait);
  1688. + atomic_set(&lo->lo_pending, 0);
  1689. + atomic_set(&lo->lo_bio_barr, 0);
  1690. + lo->lo_offset = lo->lo_sizelimit = 0;
  1691. + lo->lo_offs_sec = lo->lo_iv_remove = 0;
  1692. + lo->lo_encryption = NULL;
  1693. + lo->lo_encrypt_key_size = 0;
  1694. + lo->transfer = NULL;
  1695. + lo->lo_crypt_name[0] = 0;
  1696. + lo->lo_file_name[0] = 0;
  1697. + lo->lo_init[1] = lo->lo_init[0] = 0;
  1698. + lo->lo_key_owner = 0;
  1699. + lo->ioctl = NULL;
  1700. + lo->key_data = NULL;
  1701. + lo->lo_bio_que2 = lo->lo_bio_que1 = lo->lo_bio_que0 = NULL;
  1702. + lo->lo_bio_free1 = lo->lo_bio_free0 = NULL;
  1703. + lo->lo_bio_flsh = lo->lo_bio_need = 0;
  1704. +
  1705. + if (S_ISBLK(inode->i_mode)) {
  1706. + lo_device = inode->i_bdev;
  1707. + if (lo_device == bdev) {
  1708. + error = -EBUSY;
  1709. + goto out_putf;
  1710. + }
  1711. + if (loop_prealloc_init(lo, 0)) {
  1712. + error = -ENOMEM;
  1713. + goto out_putf;
  1714. + }
  1715. + if (bdev_read_only(lo_device))
  1716. + lo_flags |= LO_FLAGS_READ_ONLY;
  1717. + else
  1718. + filemap_fdatawrite(inode->i_mapping);
  1719. + } else if (S_ISREG(inode->i_mode)) {
  1720. /*
  1721. * If we can't read - sorry. If we only can't write - well,
  1722. * it's going to be read-only.
  1723. */
  1724. - if (!file->f_op->sendfile)
  1725. + if (!file->f_op || !file->f_op->read)
  1726. goto out_putf;
  1727. - if (!aops->prepare_write || !aops->commit_write)
  1728. + if (!file->f_op->write)
  1729. lo_flags |= LO_FLAGS_READ_ONLY;
  1730. - lo_blocksize = inode->i_blksize;
  1731. - error = 0;
  1732. - } else {
  1733. + lo_flags |= LO_FLAGS_DO_BMAP;
  1734. + if (loop_prealloc_init(lo, 1)) {
  1735. + error = -ENOMEM;
  1736. + goto out_putf;
  1737. + }
  1738. + } else
  1739. goto out_putf;
  1740. - }
  1741. - size = get_loop_size(lo, file);
  1742. -
  1743. - if ((loff_t)(sector_t)size != size) {
  1744. - error = -EFBIG;
  1745. - goto out_putf;
  1746. - }
  1747. + get_file(file);
  1748. if (!(lo_file->f_mode & FMODE_WRITE))
  1749. lo_flags |= LO_FLAGS_READ_ONLY;
  1750. set_device_ro(bdev, (lo_flags & LO_FLAGS_READ_ONLY) != 0);
  1751. - lo->lo_blocksize = lo_blocksize;
  1752. - lo->lo_device = bdev;
  1753. + lo->lo_device = lo_device;
  1754. lo->lo_flags = lo_flags;
  1755. + if(lo_flags & LO_FLAGS_READ_ONLY)
  1756. + lo->lo_flags |= 0x200000; /* export to user space */
  1757. lo->lo_backing_file = file;
  1758. - lo->transfer = NULL;
  1759. - lo->ioctl = NULL;
  1760. - lo->lo_sizelimit = 0;
  1761. - lo->old_gfp_mask = mapping_gfp_mask(mapping);
  1762. - mapping_set_gfp_mask(mapping, lo->old_gfp_mask & ~(__GFP_IO|__GFP_FS));
  1763. -
  1764. - lo->lo_bio = lo->lo_biotail = NULL;
  1765. + if (figure_loop_size(lo, bdev)) {
  1766. + error = -EFBIG;
  1767. + goto out_cleanup;
  1768. + }
  1769. /*
  1770. * set queue make_request_fn, and add limits based on lower level
  1771. * device
  1772. */
  1773. - blk_queue_make_request(lo->lo_queue, loop_make_request);
  1774. - lo->lo_queue->queuedata = lo;
  1775. - lo->lo_queue->unplug_fn = loop_unplug;
  1776. + blk_queue_make_request(lo->lo_queue, loop_make_request_err);
  1777. + blk_queue_bounce_limit(lo->lo_queue, BLK_BOUNCE_ANY);
  1778. + blk_queue_max_segment_size(lo->lo_queue, MAX_SEGMENT_SIZE);
  1779. - set_capacity(disks[lo->lo_number], size);
  1780. - bd_set_size(bdev, size << 9);
  1781. + /*
  1782. + * we remap to a block device, make sure we correctly stack limits
  1783. + */
  1784. + if (S_ISBLK(inode->i_mode) && lo_device) {
  1785. + request_queue_t *q = bdev_get_queue(lo_device);
  1786. - set_blocksize(bdev, lo_blocksize);
  1787. + blk_queue_max_sectors(lo->lo_queue, q->max_sectors);
  1788. + blk_queue_max_phys_segments(lo->lo_queue,q->max_phys_segments);
  1789. + blk_queue_max_hw_segments(lo->lo_queue, q->max_hw_segments);
  1790. + blk_queue_max_segment_size(lo->lo_queue, q->max_segment_size);
  1791. + blk_queue_segment_boundary(lo->lo_queue, q->seg_boundary_mask);
  1792. + blk_queue_merge_bvec(lo->lo_queue, q->merge_bvec_fn);
  1793. + blk_queue_hardsect_size(lo->lo_queue, q->hardsect_size);
  1794. + }
  1795. +
  1796. + if (lo_flags & LO_FLAGS_DO_BMAP) {
  1797. + lo->old_gfp_mask = mapping_gfp_mask(inode->i_mapping);
  1798. + mapping_set_gfp_mask(inode->i_mapping, (lo->old_gfp_mask & ~(__GFP_IO|__GFP_FS)) | __GFP_HIGH);
  1799. + } else {
  1800. + lo->old_gfp_mask = -1;
  1801. + }
  1802. - kernel_thread(loop_thread, lo, CLONE_KERNEL);
  1803. + loop_set_softblksz(lo, bdev);
  1804. +
  1805. + error = kernel_thread(loop_thread, lo, CLONE_KERNEL);
  1806. + if(error < 0)
  1807. + goto out_mapping;
  1808. down(&lo->lo_sem);
  1809. + fput(file);
  1810. +#if defined(QUEUE_FLAG_PLUGGED)
  1811. + lo->lo_queue->unplug_fn = loop_unplug_loopdev;
  1812. +#endif
  1813. + lo->lo_queue->queuedata = lo;
  1814. + __module_get(THIS_MODULE);
  1815. return 0;
  1816. + out_mapping:
  1817. + if(lo->old_gfp_mask != -1)
  1818. + mapping_set_gfp_mask(inode->i_mapping, lo->old_gfp_mask);
  1819. + out_cleanup:
  1820. + loop_prealloc_cleanup(lo);
  1821. + fput(file);
  1822. out_putf:
  1823. fput(file);
  1824. out:
  1825. - /* This is safe: open() is still holding a reference. */
  1826. - module_put(THIS_MODULE);
  1827. return error;
  1828. }
  1829. -static int
  1830. -loop_release_xfer(struct loop_device *lo)
  1831. +static int loop_release_xfer(struct loop_device *lo)
  1832. {
  1833. int err = 0;
  1834. struct loop_func_table *xfer = lo->lo_encryption;
  1835. if (xfer) {
  1836. + lo->transfer = NULL;
  1837. if (xfer->release)
  1838. err = xfer->release(lo);
  1839. - lo->transfer = NULL;
  1840. lo->lo_encryption = NULL;
  1841. module_put(xfer->owner);
  1842. }
  1843. return err;
  1844. }
  1845. -static int
  1846. -loop_init_xfer(struct loop_device *lo, struct loop_func_table *xfer,
  1847. - const struct loop_info64 *i)
  1848. +static int loop_init_xfer(struct loop_device *lo, struct loop_func_table *xfer, struct loop_info64 *i)
  1849. {
  1850. int err = 0;
  1851. if (xfer) {
  1852. struct module *owner = xfer->owner;
  1853. - if (!try_module_get(owner))
  1854. + if(!try_module_get(owner))
  1855. return -EINVAL;
  1856. if (xfer->init)
  1857. err = xfer->init(lo, i);
  1858. @@ -759,59 +1531,51 @@ static int loop_clr_fd(struct loop_devic
  1859. struct file *filp = lo->lo_backing_file;
  1860. int gfp = lo->old_gfp_mask;
  1861. - if (lo->lo_state != Lo_bound)
  1862. - return -ENXIO;
  1863. -
  1864. - if (lo->lo_refcnt > 1) /* we needed one fd for the ioctl */
  1865. + if (bdev->bd_openers != 1) /* one for this fd being open */
  1866. return -EBUSY;
  1867. -
  1868. - if (filp == NULL)
  1869. + if (filp==NULL)
  1870. return -EINVAL;
  1871. - spin_lock_irq(&lo->lo_lock);
  1872. - lo->lo_state = Lo_rundown;
  1873. + lo->lo_queue->queuedata = NULL;
  1874. + lo->lo_queue->make_request_fn = loop_make_request_err;
  1875. if (atomic_dec_and_test(&lo->lo_pending))
  1876. - up(&lo->lo_bh_mutex);
  1877. - spin_unlock_irq(&lo->lo_lock);
  1878. -
  1879. + wake_up_interruptible(&lo->lo_bio_wait);
  1880. down(&lo->lo_sem);
  1881. + loop_prealloc_cleanup(lo);
  1882. lo->lo_backing_file = NULL;
  1883. -
  1884. loop_release_xfer(lo);
  1885. lo->transfer = NULL;
  1886. lo->ioctl = NULL;
  1887. lo->lo_device = NULL;
  1888. lo->lo_encryption = NULL;
  1889. - lo->lo_offset = 0;
  1890. - lo->lo_sizelimit = 0;
  1891. + lo->lo_offset = lo->lo_sizelimit = 0;
  1892. + lo->lo_offs_sec = lo->lo_iv_remove = 0;
  1893. lo->lo_encrypt_key_size = 0;
  1894. lo->lo_flags = 0;
  1895. + lo->lo_init[1] = lo->lo_init[0] = 0;
  1896. + lo->lo_key_owner = 0;
  1897. + lo->key_data = NULL;
  1898. memset(lo->lo_encrypt_key, 0, LO_KEY_SIZE);
  1899. memset(lo->lo_crypt_name, 0, LO_NAME_SIZE);
  1900. memset(lo->lo_file_name, 0, LO_NAME_SIZE);
  1901. invalidate_bdev(bdev, 0);
  1902. set_capacity(disks[lo->lo_number], 0);
  1903. - bd_set_size(bdev, 0);
  1904. - mapping_set_gfp_mask(filp->f_mapping, gfp);
  1905. - lo->lo_state = Lo_unbound;
  1906. + if (gfp != -1)
  1907. + mapping_set_gfp_mask(filp->f_dentry->d_inode->i_mapping, gfp);
  1908. fput(filp);
  1909. - /* This is safe: open() is still holding a reference. */
  1910. module_put(THIS_MODULE);
  1911. return 0;
  1912. }
  1913. -static int
  1914. -loop_set_status(struct loop_device *lo, const struct loop_info64 *info)
  1915. +static int loop_set_status(struct loop_device *lo, struct block_device *bdev, struct loop_info64 *info)
  1916. {
  1917. int err;
  1918. - struct loop_func_table *xfer;
  1919. + struct loop_func_table *xfer = NULL;
  1920. if (lo->lo_encrypt_key_size && lo->lo_key_owner != current->uid &&
  1921. !capable(CAP_SYS_ADMIN))
  1922. return -EPERM;
  1923. - if (lo->lo_state != Lo_bound)
  1924. - return -ENXIO;
  1925. if ((unsigned int) info->lo_encrypt_key_size > LO_KEY_SIZE)
  1926. return -EINVAL;
  1927. @@ -819,6 +1583,22 @@ loop_set_status(struct loop_device *lo,
  1928. if (err)
  1929. return err;
  1930. + if ((loff_t)info->lo_offset < 0) {
  1931. + /* negative offset == remove offset from IV computations */
  1932. + lo->lo_offset = -(info->lo_offset);
  1933. + lo->lo_iv_remove = lo->lo_offset >> 9;
  1934. + } else {
  1935. + /* positive offset == include offset in IV computations */
  1936. + lo->lo_offset = info->lo_offset;
  1937. + lo->lo_iv_remove = 0;
  1938. + }
  1939. + lo->lo_offs_sec = lo->lo_offset >> 9;
  1940. + lo->lo_sizelimit = info->lo_sizelimit;
  1941. + err = figure_loop_size(lo, bdev);
  1942. + if (err)
  1943. + return err;
  1944. + loop_set_softblksz(lo, bdev);
  1945. +
  1946. if (info->lo_encrypt_type) {
  1947. unsigned int type = info->lo_encrypt_type;
  1948. @@ -827,31 +1607,20 @@ loop_set_status(struct loop_device *lo,
  1949. xfer = xfer_funcs[type];
  1950. if (xfer == NULL)
  1951. return -EINVAL;
  1952. - } else
  1953. - xfer = NULL;
  1954. -
  1955. + }
  1956. err = loop_init_xfer(lo, xfer, info);
  1957. if (err)
  1958. return err;
  1959. - if (lo->lo_offset != info->lo_offset ||
  1960. - lo->lo_sizelimit != info->lo_sizelimit) {
  1961. - lo->lo_offset = info->lo_offset;
  1962. - lo->lo_sizelimit = info->lo_sizelimit;
  1963. - if (figure_loop_size(lo))
  1964. - return -EFBIG;
  1965. - }
  1966. -
  1967. - memcpy(lo->lo_file_name, info->lo_file_name, LO_NAME_SIZE);
  1968. - memcpy(lo->lo_crypt_name, info->lo_crypt_name, LO_NAME_SIZE);
  1969. - lo->lo_file_name[LO_NAME_SIZE-1] = 0;
  1970. - lo->lo_crypt_name[LO_NAME_SIZE-1] = 0;
  1971. -
  1972. if (!xfer)
  1973. xfer = &none_funcs;
  1974. lo->transfer = xfer->transfer;
  1975. lo->ioctl = xfer->ioctl;
  1976. -
  1977. +
  1978. + memcpy(lo->lo_file_name, info->lo_file_name, LO_NAME_SIZE);
  1979. + memcpy(lo->lo_crypt_name, info->lo_crypt_name, LO_NAME_SIZE);
  1980. + lo->lo_file_name[LO_NAME_SIZE-1] = 0;
  1981. + lo->lo_crypt_name[LO_NAME_SIZE-1] = 0;
  1982. lo->lo_encrypt_key_size = info->lo_encrypt_key_size;
  1983. lo->lo_init[0] = info->lo_init[0];
  1984. lo->lo_init[1] = info->lo_init[1];
  1985. @@ -861,18 +1630,16 @@ loop_set_status(struct loop_device *lo,
  1986. lo->lo_key_owner = current->uid;
  1987. }
  1988. + lo->lo_queue->make_request_fn = loop_make_request_real;
  1989. return 0;
  1990. }
  1991. -static int
  1992. -loop_get_status(struct loop_device *lo, struct loop_info64 *info)
  1993. +static int loop_get_status(struct loop_device *lo, struct loop_info64 *info)
  1994. {
  1995. struct file *file = lo->lo_backing_file;
  1996. struct kstat stat;
  1997. int error;
  1998. - if (lo->lo_state != Lo_bound)
  1999. - return -ENXIO;
  2000. error = vfs_getattr(file->f_vfsmnt, file->f_dentry, &stat);
  2001. if (error)
  2002. return error;
  2003. @@ -881,17 +1648,18 @@ loop_get_status(struct loop_device *lo,
  2004. info->lo_device = huge_encode_dev(stat.dev);
  2005. info->lo_inode = stat.ino;
  2006. info->lo_rdevice = huge_encode_dev(lo->lo_device ? stat.rdev : stat.dev);
  2007. - info->lo_offset = lo->lo_offset;
  2008. + info->lo_offset = lo->lo_iv_remove ? -(lo->lo_offset) : lo->lo_offset;
  2009. info->lo_sizelimit = lo->lo_sizelimit;
  2010. info->lo_flags = lo->lo_flags;
  2011. memcpy(info->lo_file_name, lo->lo_file_name, LO_NAME_SIZE);
  2012. memcpy(info->lo_crypt_name, lo->lo_crypt_name, LO_NAME_SIZE);
  2013. - info->lo_encrypt_type =
  2014. - lo->lo_encryption ? lo->lo_encryption->number : 0;
  2015. + info->lo_encrypt_type = lo->lo_encryption ? lo->lo_encryption->number : 0;
  2016. if (lo->lo_encrypt_key_size && capable(CAP_SYS_ADMIN)) {
  2017. info->lo_encrypt_key_size = lo->lo_encrypt_key_size;
  2018. memcpy(info->lo_encrypt_key, lo->lo_encrypt_key,
  2019. lo->lo_encrypt_key_size);
  2020. + info->lo_init[0] = lo->lo_init[0];
  2021. + info->lo_init[1] = lo->lo_init[1];
  2022. }
  2023. return 0;
  2024. }
  2025. @@ -905,7 +1673,6 @@ loop_info64_from_old(const struct loop_i
  2026. info64->lo_inode = info->lo_inode;
  2027. info64->lo_rdevice = info->lo_rdevice;
  2028. info64->lo_offset = info->lo_offset;
  2029. - info64->lo_sizelimit = 0;
  2030. info64->lo_encrypt_type = info->lo_encrypt_type;
  2031. info64->lo_encrypt_key_size = info->lo_encrypt_key_size;
  2032. info64->lo_flags = info->lo_flags;
  2033. @@ -919,7 +1686,7 @@ loop_info64_from_old(const struct loop_i
  2034. }
  2035. static int
  2036. -loop_info64_to_old(const struct loop_info64 *info64, struct loop_info *info)
  2037. +loop_info64_to_old(struct loop_info64 *info64, struct loop_info *info)
  2038. {
  2039. memset(info, 0, sizeof(*info));
  2040. info->lo_number = info64->lo_number;
  2041. @@ -942,14 +1709,15 @@ loop_info64_to_old(const struct loop_inf
  2042. if (info->lo_device != info64->lo_device ||
  2043. info->lo_rdevice != info64->lo_rdevice ||
  2044. info->lo_inode != info64->lo_inode ||
  2045. - info->lo_offset != info64->lo_offset)
  2046. + info->lo_offset != info64->lo_offset ||
  2047. + info64->lo_sizelimit)
  2048. return -EOVERFLOW;
  2049. return 0;
  2050. }
  2051. static int
  2052. -loop_set_status_old(struct loop_device *lo, const struct loop_info *arg)
  2053. +loop_set_status_old(struct loop_device *lo, struct block_device *bdev, const struct loop_info *arg)
  2054. {
  2055. struct loop_info info;
  2056. struct loop_info64 info64;
  2057. @@ -957,17 +1725,18 @@ loop_set_status_old(struct loop_device *
  2058. if (copy_from_user(&info, arg, sizeof (struct loop_info)))
  2059. return -EFAULT;
  2060. loop_info64_from_old(&info, &info64);
  2061. - return loop_set_status(lo, &info64);
  2062. + memset(&info.lo_encrypt_key[0], 0, sizeof(info.lo_encrypt_key));
  2063. + return loop_set_status(lo, bdev, &info64);
  2064. }
  2065. static int
  2066. -loop_set_status64(struct loop_device *lo, const struct loop_info64 *arg)
  2067. +loop_set_status64(struct loop_device *lo, struct block_device *bdev, struct loop_info64 *arg)
  2068. {
  2069. struct loop_info64 info64;
  2070. if (copy_from_user(&info64, arg, sizeof (struct loop_info64)))
  2071. return -EFAULT;
  2072. - return loop_set_status(lo, &info64);
  2073. + return loop_set_status(lo, bdev, &info64);
  2074. }
  2075. static int
  2076. @@ -1003,31 +1772,45 @@ loop_get_status64(struct loop_device *lo
  2077. return err;
  2078. }
  2079. -static int lo_ioctl(struct inode * inode, struct file * file,
  2080. - unsigned int cmd, unsigned long arg)
  2081. +static int lo_ioctl(struct inode *inode, struct file * file, unsigned int cmd, unsigned long arg)
  2082. {
  2083. - struct loop_device *lo = inode->i_bdev->bd_disk->private_data;
  2084. + struct block_device *bdev = inode->i_bdev;
  2085. + struct loop_device *lo = bdev->bd_disk->private_data;
  2086. int err;
  2087. - down(&lo->lo_ctl_mutex);
  2088. + down(&bdev->bd_sem);
  2089. +
  2090. + /*
  2091. + * LOOP_SET_FD can only be called when no device is attached.
  2092. + * All other ioctls can only be called when a device is attached.
  2093. + */
  2094. + if (bdev->bd_disk->queue->queuedata != NULL) {
  2095. + if (cmd == LOOP_SET_FD) {
  2096. + err = -EBUSY;
  2097. + goto out_err;
  2098. + }
  2099. + } else {
  2100. + if (cmd != LOOP_SET_FD) {
  2101. + err = -ENXIO;
  2102. + goto out_err;
  2103. + }
  2104. + }
  2105. +
  2106. switch (cmd) {
  2107. case LOOP_SET_FD:
  2108. - err = loop_set_fd(lo, file, inode->i_bdev, arg);
  2109. - break;
  2110. - case LOOP_CHANGE_FD:
  2111. - err = loop_change_fd(lo, file, inode->i_bdev, arg);
  2112. + err = loop_set_fd(lo, file, bdev, arg);
  2113. break;
  2114. case LOOP_CLR_FD:
  2115. - err = loop_clr_fd(lo, inode->i_bdev);
  2116. + err = loop_clr_fd(lo, bdev);
  2117. break;
  2118. case LOOP_SET_STATUS:
  2119. - err = loop_set_status_old(lo, (struct loop_info *) arg);
  2120. + err = loop_set_status_old(lo, bdev, (struct loop_info *) arg);
  2121. break;
  2122. case LOOP_GET_STATUS:
  2123. err = loop_get_status_old(lo, (struct loop_info *) arg);
  2124. break;
  2125. case LOOP_SET_STATUS64:
  2126. - err = loop_set_status64(lo, (struct loop_info64 *) arg);
  2127. + err = loop_set_status64(lo, bdev, (struct loop_info64 *) arg);
  2128. break;
  2129. case LOOP_GET_STATUS64:
  2130. err = loop_get_status64(lo, (struct loop_info64 *) arg);
  2131. @@ -1035,29 +1818,19 @@ static int lo_ioctl(struct inode * inode
  2132. default:
  2133. err = lo->ioctl ? lo->ioctl(lo, cmd, arg) : -EINVAL;
  2134. }
  2135. - up(&lo->lo_ctl_mutex);
  2136. +out_err:
  2137. + up(&bdev->bd_sem);
  2138. return err;
  2139. }
  2140. static int lo_open(struct inode *inode, struct file *file)
  2141. {
  2142. - struct loop_device *lo = inode->i_bdev->bd_disk->private_data;
  2143. -
  2144. - down(&lo->lo_ctl_mutex);
  2145. - lo->lo_refcnt++;
  2146. - up(&lo->lo_ctl_mutex);
  2147. -
  2148. return 0;
  2149. }
  2150. static int lo_release(struct inode *inode, struct file *file)
  2151. {
  2152. - struct loop_device *lo = inode->i_bdev->bd_disk->private_data;
  2153. -
  2154. - down(&lo->lo_ctl_mutex);
  2155. - --lo->lo_refcnt;
  2156. - up(&lo->lo_ctl_mutex);
  2157. -
  2158. + sync_blockdev(inode->i_bdev);
  2159. return 0;
  2160. }
  2161. @@ -1091,21 +1864,18 @@ int loop_unregister_transfer(int number)
  2162. unsigned int n = number;
  2163. struct loop_device *lo;
  2164. struct loop_func_table *xfer;
  2165. + int x;
  2166. if (n == 0 || n >= MAX_LO_CRYPT || (xfer = xfer_funcs[n]) == NULL)
  2167. return -EINVAL;
  2168. -
  2169. xfer_funcs[n] = NULL;
  2170. -
  2171. - for (lo = &loop_dev[0]; lo < &loop_dev[max_loop]; lo++) {
  2172. - down(&lo->lo_ctl_mutex);
  2173. -
  2174. + for (x = 0; x < max_loop; x++) {
  2175. + lo = loop_dev_ptr_arr[x];
  2176. + if (!lo)
  2177. + continue;
  2178. if (lo->lo_encryption == xfer)
  2179. loop_release_xfer(lo);
  2180. -
  2181. - up(&lo->lo_ctl_mutex);
  2182. }
  2183. -
  2184. return 0;
  2185. }
  2186. @@ -1116,7 +1886,7 @@ int __init loop_init(void)
  2187. {
  2188. int i;
  2189. - if (max_loop < 1 || max_loop > 256) {
  2190. + if ((max_loop < 1) || (max_loop > 256)) {
  2191. printk(KERN_WARNING "loop: invalid max_loop (must be between"
  2192. " 1 and 256), using default (8)\n");
  2193. max_loop = 8;
  2194. @@ -1125,62 +1895,81 @@ int __init loop_init(void)
  2195. if (register_blkdev(LOOP_MAJOR, "loop"))
  2196. return -EIO;
  2197. - loop_dev = kmalloc(max_loop * sizeof(struct loop_device), GFP_KERNEL);
  2198. - if (!loop_dev)
  2199. + loop_dev_ptr_arr = kmalloc(max_loop * sizeof(struct loop_device *), GFP_KERNEL);
  2200. + if (!loop_dev_ptr_arr)
  2201. goto out_mem1;
  2202. - memset(loop_dev, 0, max_loop * sizeof(struct loop_device));
  2203. disks = kmalloc(max_loop * sizeof(struct gendisk *), GFP_KERNEL);
  2204. if (!disks)
  2205. goto out_mem2;
  2206. for (i = 0; i < max_loop; i++) {
  2207. + loop_dev_ptr_arr[i] = kmalloc(sizeof(struct loop_device), GFP_KERNEL);
  2208. + if (!loop_dev_ptr_arr[i])
  2209. + goto out_mem3;
  2210. + }
  2211. +
  2212. + for (i = 0; i < max_loop; i++) {
  2213. disks[i] = alloc_disk(1);
  2214. if (!disks[i])
  2215. - goto out_mem3;
  2216. + goto out_mem4;
  2217. + }
  2218. +
  2219. + for (i = 0; i < max_loop; i++) {
  2220. + disks[i]->queue = blk_alloc_queue(GFP_KERNEL);
  2221. + if (!disks[i]->queue)
  2222. + goto out_mem5;
  2223. + disks[i]->queue->queuedata = NULL;
  2224. + blk_queue_make_request(disks[i]->queue, loop_make_request_err);
  2225. + }
  2226. +
  2227. + for (i = 0; i < (sizeof(lo_prealloc) / sizeof(int)); i += 2) {
  2228. + if (!lo_prealloc[i])
  2229. + continue;
  2230. + if (lo_prealloc[i] < LO_PREALLOC_MIN)
  2231. + lo_prealloc[i] = LO_PREALLOC_MIN;
  2232. + if (lo_prealloc[i] > LO_PREALLOC_MAX)
  2233. + lo_prealloc[i] = LO_PREALLOC_MAX;
  2234. }
  2235. +#if defined(IOCTL32_COMPATIBLE_PTR)
  2236. + register_ioctl32_conversion(LOOP_MULTI_KEY_SETUP, IOCTL32_COMPATIBLE_PTR);
  2237. +#endif
  2238. +
  2239. devfs_mk_dir("loop");
  2240. for (i = 0; i < max_loop; i++) {
  2241. - struct loop_device *lo = &loop_dev[i];
  2242. + struct loop_device *lo = loop_dev_ptr_arr[i];
  2243. struct gendisk *disk = disks[i];
  2244. -
  2245. - memset(lo, 0, sizeof(*lo));
  2246. - lo->lo_queue = blk_alloc_queue(GFP_KERNEL);
  2247. - if (!lo->lo_queue)
  2248. - goto out_mem4;
  2249. - init_MUTEX(&lo->lo_ctl_mutex);
  2250. - init_MUTEX_LOCKED(&lo->lo_sem);
  2251. - init_MUTEX_LOCKED(&lo->lo_bh_mutex);
  2252. + memset(lo, 0, sizeof(struct loop_device));
  2253. lo->lo_number = i;
  2254. - spin_lock_init(&lo->lo_lock);
  2255. + lo->lo_queue = disk->queue;
  2256. disk->major = LOOP_MAJOR;
  2257. disk->first_minor = i;
  2258. disk->fops = &lo_fops;
  2259. sprintf(disk->disk_name, "loop%d", i);
  2260. sprintf(disk->devfs_name, "loop/%d", i);
  2261. disk->private_data = lo;
  2262. - disk->queue = lo->lo_queue;
  2263. + add_disk(disk);
  2264. }
  2265. - /* We cannot fail after we call this, so another loop!*/
  2266. - for (i = 0; i < max_loop; i++)
  2267. - add_disk(disks[i]);
  2268. printk(KERN_INFO "loop: loaded (max %d devices)\n", max_loop);
  2269. return 0;
  2270. +out_mem5:
  2271. + while (i--)
  2272. + blk_put_queue(disks[i]->queue);
  2273. + i = max_loop;
  2274. out_mem4:
  2275. while (i--)
  2276. - blk_put_queue(loop_dev[i].lo_queue);
  2277. - devfs_remove("loop");
  2278. + put_disk(disks[i]);
  2279. i = max_loop;
  2280. out_mem3:
  2281. while (i--)
  2282. - put_disk(disks[i]);
  2283. + kfree(loop_dev_ptr_arr[i]);
  2284. kfree(disks);
  2285. out_mem2:
  2286. - kfree(loop_dev);
  2287. + kfree(loop_dev_ptr_arr);
  2288. out_mem1:
  2289. unregister_blkdev(LOOP_MAJOR, "loop");
  2290. printk(KERN_ERR "loop: ran out of memory\n");
  2291. @@ -1193,15 +1982,18 @@ void loop_exit(void)
  2292. for (i = 0; i < max_loop; i++) {
  2293. del_gendisk(disks[i]);
  2294. - blk_put_queue(loop_dev[i].lo_queue);
  2295. put_disk(disks[i]);
  2296. + blk_put_queue(loop_dev_ptr_arr[i]->lo_queue);
  2297. + kfree(loop_dev_ptr_arr[i]);
  2298. }
  2299. devfs_remove("loop");
  2300. - if (unregister_blkdev(LOOP_MAJOR, "loop"))
  2301. - printk(KERN_WARNING "loop: cannot unregister blkdev\n");
  2302. -
  2303. + unregister_blkdev(LOOP_MAJOR, "loop");
  2304. kfree(disks);
  2305. - kfree(loop_dev);
  2306. + kfree(loop_dev_ptr_arr);
  2307. +
  2308. +#if defined(IOCTL32_COMPATIBLE_PTR)
  2309. + unregister_ioctl32_conversion(LOOP_MULTI_KEY_SETUP);
  2310. +#endif
  2311. }
  2312. module_init(loop_init);
  2313. diff -pruN linux-2.6.6_orig/drivers/misc/Makefile linux-2.6.6/drivers/misc/Makefile
  2314. --- linux-2.6.6_orig/drivers/misc/Makefile 2004-05-16 15:07:54.000000000 +0200
  2315. +++ linux-2.6.6/drivers/misc/Makefile 2004-05-16 15:08:27.000000000 +0200
  2316. @@ -4,3 +4,23 @@
  2317. obj- := misc.o # Dummy rule to force built-in.o to be made
  2318. obj-$(CONFIG_IBM_ASM) += ibmasm/
  2319. +
  2320. +ifeq ($(CONFIG_BLK_DEV_LOOP_AES),y)
  2321. +AES_PENTIUM_ASM=n
  2322. +ifeq ($(CONFIG_X86),y)
  2323. +ifneq ($(CONFIG_X86_64),y)
  2324. +ifneq ($(CONFIG_M386),y)
  2325. +ifneq ($(CONFIG_M486),y)
  2326. + AES_PENTIUM_ASM=y
  2327. +endif
  2328. +endif
  2329. +endif
  2330. +endif
  2331. +ifeq ($(AES_PENTIUM_ASM),y)
  2332. + obj-y += aes-i586.o md5-i586.o crypto-ksym.o
  2333. + AFLAGS_aes-i586.o := -DUSE_UNDERLINE=1
  2334. +else
  2335. + obj-y += aes.o md5.o crypto-ksym.o
  2336. + CFLAGS_aes.o := -DDATA_ALWAYS_ALIGNED=1
  2337. +endif
  2338. +endif
  2339. diff -pruN linux-2.6.6_orig/drivers/misc/aes-i586.S linux-2.6.6/drivers/misc/aes-i586.S
  2340. --- linux-2.6.6_orig/drivers/misc/aes-i586.S 1970-01-01 01:00:00.000000000 +0100
  2341. +++ linux-2.6.6/drivers/misc/aes-i586.S 2004-05-16 15:08:27.000000000 +0200
  2342. @@ -0,0 +1,922 @@
  2343. +//
  2344. +// Copyright (c) 2001, Dr Brian Gladman <brg@gladman.uk.net>, Worcester, UK.
  2345. +// All rights reserved.
  2346. +//
  2347. +// TERMS
  2348. +//
  2349. +// Redistribution and use in source and binary forms, with or without
  2350. +// modification, are permitted subject to the following conditions:
  2351. +//
  2352. +// 1. Redistributions of source code must retain the above copyright
  2353. +// notice, this list of conditions and the following disclaimer.
  2354. +//
  2355. +// 2. Redistributions in binary form must reproduce the above copyright
  2356. +// notice, this list of conditions and the following disclaimer in the
  2357. +// documentation and/or other materials provided with the distribution.
  2358. +//
  2359. +// 3. The copyright holder's name must not be used to endorse or promote
  2360. +// any products derived from this software without his specific prior
  2361. +// written permission.
  2362. +//
  2363. +// This software is provided 'as is' with no express or implied warranties
  2364. +// of correctness or fitness for purpose.
  2365. +
  2366. +// Modified by Jari Ruusu, December 24 2001
  2367. +// - Converted syntax to GNU CPP/assembler syntax
  2368. +// - C programming interface converted back to "old" API
  2369. +// - Minor portability cleanups and speed optimizations
  2370. +
  2371. +// Modified by Jari Ruusu, April 11 2002
  2372. +// - Added above copyright and terms to resulting object code so that
  2373. +// binary distributions can avoid legal trouble
  2374. +
  2375. +// An AES (Rijndael) implementation for the Pentium. This version only
  2376. +// implements the standard AES block length (128 bits, 16 bytes). This code
  2377. +// does not preserve the eax, ecx or edx registers or the artihmetic status
  2378. +// flags. However, the ebx, esi, edi, and ebp registers are preserved across
  2379. +// calls.
  2380. +
  2381. +// void aes_set_key(aes_context *cx, const unsigned char key[], const int key_len, const int f)
  2382. +// void aes_encrypt(const aes_context *cx, const unsigned char in_blk[], unsigned char out_blk[])
  2383. +// void aes_decrypt(const aes_context *cx, const unsigned char in_blk[], unsigned char out_blk[])
  2384. +
  2385. +#if defined(USE_UNDERLINE)
  2386. +# define aes_set_key _aes_set_key
  2387. +# define aes_encrypt _aes_encrypt
  2388. +# define aes_decrypt _aes_decrypt
  2389. +#endif
  2390. +#if !defined(ALIGN32BYTES)
  2391. +# define ALIGN32BYTES 32
  2392. +#endif
  2393. +
  2394. + .file "aes-i586.S"
  2395. + .globl aes_set_key
  2396. + .globl aes_encrypt
  2397. + .globl aes_decrypt
  2398. +
  2399. + .text
  2400. +copyright:
  2401. + .ascii " \000"
  2402. + .ascii "Copyright (c) 2001, Dr Brian Gladman <brg@gladman.uk.net>, Worcester, UK.\000"
  2403. + .ascii "All rights reserved.\000"
  2404. + .ascii " \000"
  2405. + .ascii "TERMS\000"
  2406. + .ascii " \000"
  2407. + .ascii " Redistribution and use in source and binary forms, with or without\000"
  2408. + .ascii " modification, are permitted subject to the following conditions:\000"
  2409. + .ascii " \000"
  2410. + .ascii " 1. Redistributions of source code must retain the above copyright\000"
  2411. + .ascii " notice, this list of conditions and the following disclaimer.\000"
  2412. + .ascii " \000"
  2413. + .ascii " 2. Redistributions in binary form must reproduce the above copyright\000"
  2414. + .ascii " notice, this list of conditions and the following disclaimer in the\000"
  2415. + .ascii " documentation and/or other materials provided with the distribution.\000"
  2416. + .ascii " \000"
  2417. + .ascii " 3. The copyright holder's name must not be used to endorse or promote\000"
  2418. + .ascii " any products derived from this software without his specific prior\000"
  2419. + .ascii " written permission.\000"
  2420. + .ascii " \000"
  2421. + .ascii " This software is provided 'as is' with no express or implied warranties\000"
  2422. + .ascii " of correctness or fitness for purpose.\000"
  2423. + .ascii " \000"
  2424. +
  2425. +#define tlen 1024 // length of each of 4 'xor' arrays (256 32-bit words)
  2426. +
  2427. +// offsets to parameters with one register pushed onto stack
  2428. +
  2429. +#define ctx 8 // AES context structure
  2430. +#define in_blk 12 // input byte array address parameter
  2431. +#define out_blk 16 // output byte array address parameter
  2432. +
  2433. +// offsets in context structure
  2434. +
  2435. +#define nkey 0 // key length, size 4
  2436. +#define nrnd 4 // number of rounds, size 4
  2437. +#define ekey 8 // encryption key schedule base address, size 256
  2438. +#define dkey 264 // decryption key schedule base address, size 256
  2439. +
  2440. +// This macro performs a forward encryption cycle. It is entered with
  2441. +// the first previous round column values in %eax, %ebx, %esi and %edi and
  2442. +// exits with the final values in the same registers.
  2443. +
  2444. +#define fwd_rnd(p1,p2) \
  2445. + mov %ebx,(%esp) ;\
  2446. + movzbl %al,%edx ;\
  2447. + mov %eax,%ecx ;\
  2448. + mov p2(%ebp),%eax ;\
  2449. + mov %edi,4(%esp) ;\
  2450. + mov p2+12(%ebp),%edi ;\
  2451. + xor p1(,%edx,4),%eax ;\
  2452. + movzbl %ch,%edx ;\
  2453. + shr $16,%ecx ;\
  2454. + mov p2+4(%ebp),%ebx ;\
  2455. + xor p1+tlen(,%edx,4),%edi ;\
  2456. + movzbl %cl,%edx ;\
  2457. + movzbl %ch,%ecx ;\
  2458. + xor p1+3*tlen(,%ecx,4),%ebx ;\
  2459. + mov %esi,%ecx ;\
  2460. + mov p1+2*tlen(,%edx,4),%esi ;\
  2461. + movzbl %cl,%edx ;\
  2462. + xor p1(,%edx,4),%esi ;\
  2463. + movzbl %ch,%edx ;\
  2464. + shr $16,%ecx ;\
  2465. + xor p1+tlen(,%edx,4),%ebx ;\
  2466. + movzbl %cl,%edx ;\
  2467. + movzbl %ch,%ecx ;\
  2468. + xor p1+2*tlen(,%edx,4),%eax ;\
  2469. + mov (%esp),%edx ;\
  2470. + xor p1+3*tlen(,%ecx,4),%edi ;\
  2471. + movzbl %dl,%ecx ;\
  2472. + xor p2+8(%ebp),%esi ;\
  2473. + xor p1(,%ecx,4),%ebx ;\
  2474. + movzbl %dh,%ecx ;\
  2475. + shr $16,%edx ;\
  2476. + xor p1+tlen(,%ecx,4),%eax ;\
  2477. + movzbl %dl,%ecx ;\
  2478. + movzbl %dh,%edx ;\
  2479. + xor p1+2*tlen(,%ecx,4),%edi ;\
  2480. + mov 4(%esp),%ecx ;\
  2481. + xor p1+3*tlen(,%edx,4),%esi ;\
  2482. + movzbl %cl,%edx ;\
  2483. + xor p1(,%edx,4),%edi ;\
  2484. + movzbl %ch,%edx ;\
  2485. + shr $16,%ecx ;\
  2486. + xor p1+tlen(,%edx,4),%esi ;\
  2487. + movzbl %cl,%edx ;\
  2488. + movzbl %ch,%ecx ;\
  2489. + xor p1+2*tlen(,%edx,4),%ebx ;\
  2490. + xor p1+3*tlen(,%ecx,4),%eax
  2491. +
  2492. +// This macro performs an inverse encryption cycle. It is entered with
  2493. +// the first previous round column values in %eax, %ebx, %esi and %edi and
  2494. +// exits with the final values in the same registers.
  2495. +
  2496. +#define inv_rnd(p1,p2) \
  2497. + movzbl %al,%edx ;\
  2498. + mov %ebx,(%esp) ;\
  2499. + mov %eax,%ecx ;\
  2500. + mov p2(%ebp),%eax ;\
  2501. + mov %edi,4(%esp) ;\
  2502. + mov p2+4(%ebp),%ebx ;\
  2503. + xor p1(,%edx,4),%eax ;\
  2504. + movzbl %ch,%edx ;\
  2505. + shr $16,%ecx ;\
  2506. + mov p2+12(%ebp),%edi ;\
  2507. + xor p1+tlen(,%edx,4),%ebx ;\
  2508. + movzbl %cl,%edx ;\
  2509. + movzbl %ch,%ecx ;\
  2510. + xor p1+3*tlen(,%ecx,4),%edi ;\
  2511. + mov %esi,%ecx ;\
  2512. + mov p1+2*tlen(,%edx,4),%esi ;\
  2513. + movzbl %cl,%edx ;\
  2514. + xor p1(,%edx,4),%esi ;\
  2515. + movzbl %ch,%edx ;\
  2516. + shr $16,%ecx ;\
  2517. + xor p1+tlen(,%edx,4),%edi ;\
  2518. + movzbl %cl,%edx ;\
  2519. + movzbl %ch,%ecx ;\
  2520. + xor p1+2*tlen(,%edx,4),%eax ;\
  2521. + mov (%esp),%edx ;\
  2522. + xor p1+3*tlen(,%ecx,4),%ebx ;\
  2523. + movzbl %dl,%ecx ;\
  2524. + xor p2+8(%ebp),%esi ;\
  2525. + xor p1(,%ecx,4),%ebx ;\
  2526. + movzbl %dh,%ecx ;\
  2527. + shr $16,%edx ;\
  2528. + xor p1+tlen(,%ecx,4),%esi ;\
  2529. + movzbl %dl,%ecx ;\
  2530. + movzbl %dh,%edx ;\
  2531. + xor p1+2*tlen(,%ecx,4),%edi ;\
  2532. + mov 4(%esp),%ecx ;\
  2533. + xor p1+3*tlen(,%edx,4),%eax ;\
  2534. + movzbl %cl,%edx ;\
  2535. + xor p1(,%edx,4),%edi ;\
  2536. + movzbl %ch,%edx ;\
  2537. + shr $16,%ecx ;\
  2538. + xor p1+tlen(,%edx,4),%eax ;\
  2539. + movzbl %cl,%edx ;\
  2540. + movzbl %ch,%ecx ;\
  2541. + xor p1+2*tlen(,%edx,4),%ebx ;\
  2542. + xor p1+3*tlen(,%ecx,4),%esi
  2543. +
  2544. +// AES (Rijndael) Encryption Subroutine
  2545. +
  2546. + .text
  2547. + .align ALIGN32BYTES
  2548. +aes_encrypt:
  2549. + push %ebp
  2550. + mov ctx(%esp),%ebp // pointer to context
  2551. + mov in_blk(%esp),%ecx
  2552. + push %ebx
  2553. + push %esi
  2554. + push %edi
  2555. + mov nrnd(%ebp),%edx // number of rounds
  2556. + lea ekey+16(%ebp),%ebp // key pointer
  2557. +
  2558. +// input four columns and xor in first round key
  2559. +
  2560. + mov (%ecx),%eax
  2561. + mov 4(%ecx),%ebx
  2562. + mov 8(%ecx),%esi
  2563. + mov 12(%ecx),%edi
  2564. + xor -16(%ebp),%eax
  2565. + xor -12(%ebp),%ebx
  2566. + xor -8(%ebp),%esi
  2567. + xor -4(%ebp),%edi
  2568. +
  2569. + sub $8,%esp // space for register saves on stack
  2570. +
  2571. + sub $10,%edx
  2572. + je aes_15
  2573. + add $32,%ebp
  2574. + sub $2,%edx
  2575. + je aes_13
  2576. + add $32,%ebp
  2577. +
  2578. + fwd_rnd(aes_ft_tab,-64) // 14 rounds for 256-bit key
  2579. + fwd_rnd(aes_ft_tab,-48)
  2580. +aes_13: fwd_rnd(aes_ft_tab,-32) // 12 rounds for 192-bit key
  2581. + fwd_rnd(aes_ft_tab,-16)
  2582. +aes_15: fwd_rnd(aes_ft_tab,0) // 10 rounds for 128-bit key
  2583. + fwd_rnd(aes_ft_tab,16)
  2584. + fwd_rnd(aes_ft_tab,32)
  2585. + fwd_rnd(aes_ft_tab,48)
  2586. + fwd_rnd(aes_ft_tab,64)
  2587. + fwd_rnd(aes_ft_tab,80)
  2588. + fwd_rnd(aes_ft_tab,96)
  2589. + fwd_rnd(aes_ft_tab,112)
  2590. + fwd_rnd(aes_ft_tab,128)
  2591. + fwd_rnd(aes_fl_tab,144) // last round uses a different table
  2592. +
  2593. +// move final values to the output array.
  2594. +
  2595. + mov out_blk+20(%esp),%ebp
  2596. + add $8,%esp
  2597. + mov %eax,(%ebp)
  2598. + mov %ebx,4(%ebp)
  2599. + mov %esi,8(%ebp)
  2600. + mov %edi,12(%ebp)
  2601. + pop %edi
  2602. + pop %esi
  2603. + pop %ebx
  2604. + pop %ebp
  2605. + ret
  2606. +
  2607. +
  2608. +// AES (Rijndael) Decryption Subroutine
  2609. +
  2610. + .align ALIGN32BYTES
  2611. +aes_decrypt:
  2612. + push %ebp
  2613. + mov ctx(%esp),%ebp // pointer to context
  2614. + mov in_blk(%esp),%ecx
  2615. + push %ebx
  2616. + push %esi
  2617. + push %edi
  2618. + mov nrnd(%ebp),%edx // number of rounds
  2619. + lea dkey+16(%ebp),%ebp // key pointer
  2620. +
  2621. +// input four columns and xor in first round key
  2622. +
  2623. + mov (%ecx),%eax
  2624. + mov 4(%ecx),%ebx
  2625. + mov 8(%ecx),%esi
  2626. + mov 12(%ecx),%edi
  2627. + xor -16(%ebp),%eax
  2628. + xor -12(%ebp),%ebx
  2629. + xor -8(%ebp),%esi
  2630. + xor -4(%ebp),%edi
  2631. +
  2632. + sub $8,%esp // space for register saves on stack
  2633. +
  2634. + sub $10,%edx
  2635. + je aes_25
  2636. + add $32,%ebp
  2637. + sub $2,%edx
  2638. + je aes_23
  2639. + add $32,%ebp
  2640. +
  2641. + inv_rnd(aes_it_tab,-64) // 14 rounds for 256-bit key
  2642. + inv_rnd(aes_it_tab,-48)
  2643. +aes_23: inv_rnd(aes_it_tab,-32) // 12 rounds for 192-bit key
  2644. + inv_rnd(aes_it_tab,-16)
  2645. +aes_25: inv_rnd(aes_it_tab,0) // 10 rounds for 128-bit key
  2646. + inv_rnd(aes_it_tab,16)
  2647. + inv_rnd(aes_it_tab,32)
  2648. + inv_rnd(aes_it_tab,48)
  2649. + inv_rnd(aes_it_tab,64)
  2650. + inv_rnd(aes_it_tab,80)
  2651. + inv_rnd(aes_it_tab,96)
  2652. + inv_rnd(aes_it_tab,112)
  2653. + inv_rnd(aes_it_tab,128)
  2654. + inv_rnd(aes_il_tab,144) // last round uses a different table
  2655. +
  2656. +// move final values to the output array.
  2657. +
  2658. + mov out_blk+20(%esp),%ebp
  2659. + add $8,%esp
  2660. + mov %eax,(%ebp)
  2661. + mov %ebx,4(%ebp)
  2662. + mov %esi,8(%ebp)
  2663. + mov %edi,12(%ebp)
  2664. + pop %edi
  2665. + pop %esi
  2666. + pop %ebx
  2667. + pop %ebp
  2668. + ret
  2669. +
  2670. +// AES (Rijndael) Key Schedule Subroutine
  2671. +
  2672. +// input/output parameters
  2673. +
  2674. +#define aes_cx 12 // AES context
  2675. +#define in_key 16 // key input array address
  2676. +#define key_ln 20 // key length, bytes (16,24,32) or bits (128,192,256)
  2677. +#define ed_flg 24 // 0=create both encr/decr keys, 1=create encr key only
  2678. +
  2679. +// offsets for locals
  2680. +
  2681. +#define cnt -4
  2682. +#define kpf -8
  2683. +#define slen 8
  2684. +
  2685. +// This macro performs a column mixing operation on an input 32-bit
  2686. +// word to give a 32-bit result. It uses each of the 4 bytes in the
  2687. +// the input column to index 4 different tables of 256 32-bit words
  2688. +// that are xored together to form the output value.
  2689. +
  2690. +#define mix_col(p1) \
  2691. + movzbl %bl,%ecx ;\
  2692. + mov p1(,%ecx,4),%eax ;\
  2693. + movzbl %bh,%ecx ;\
  2694. + ror $16,%ebx ;\
  2695. + xor p1+tlen(,%ecx,4),%eax ;\
  2696. + movzbl %bl,%ecx ;\
  2697. + xor p1+2*tlen(,%ecx,4),%eax ;\
  2698. + movzbl %bh,%ecx ;\
  2699. + xor p1+3*tlen(,%ecx,4),%eax
  2700. +
  2701. +// Key Schedule Macros
  2702. +
  2703. +#define ksc4(p1) \
  2704. + rol $24,%ebx ;\
  2705. + mix_col(aes_fl_tab) ;\
  2706. + ror $8,%ebx ;\
  2707. + xor 4*p1+aes_rcon_tab,%eax ;\
  2708. + xor %eax,%esi ;\
  2709. + xor %esi,%ebp ;\
  2710. + mov %esi,16*p1(%edi) ;\
  2711. + mov %ebp,16*p1+4(%edi) ;\
  2712. + xor %ebp,%edx ;\
  2713. + xor %edx,%ebx ;\
  2714. + mov %edx,16*p1+8(%edi) ;\
  2715. + mov %ebx,16*p1+12(%edi)
  2716. +
  2717. +#define ksc6(p1) \
  2718. + rol $24,%ebx ;\
  2719. + mix_col(aes_fl_tab) ;\
  2720. + ror $8,%ebx ;\
  2721. + xor 4*p1+aes_rcon_tab,%eax ;\
  2722. + xor 24*p1-24(%edi),%eax ;\
  2723. + mov %eax,24*p1(%edi) ;\
  2724. + xor 24*p1-20(%edi),%eax ;\
  2725. + mov %eax,24*p1+4(%edi) ;\
  2726. + xor %eax,%esi ;\
  2727. + xor %esi,%ebp ;\
  2728. + mov %esi,24*p1+8(%edi) ;\
  2729. + mov %ebp,24*p1+12(%edi) ;\
  2730. + xor %ebp,%edx ;\
  2731. + xor %edx,%ebx ;\
  2732. + mov %edx,24*p1+16(%edi) ;\
  2733. + mov %ebx,24*p1+20(%edi)
  2734. +
  2735. +#define ksc8(p1) \
  2736. + rol $24,%ebx ;\
  2737. + mix_col(aes_fl_tab) ;\
  2738. + ror $8,%ebx ;\
  2739. + xor 4*p1+aes_rcon_tab,%eax ;\
  2740. + xor 32*p1-32(%edi),%eax ;\
  2741. + mov %eax,32*p1(%edi) ;\
  2742. + xor 32*p1-28(%edi),%eax ;\
  2743. + mov %eax,32*p1+4(%edi) ;\
  2744. + xor 32*p1-24(%edi),%eax ;\
  2745. + mov %eax,32*p1+8(%edi) ;\
  2746. + xor 32*p1-20(%edi),%eax ;\
  2747. + mov %eax,32*p1+12(%edi) ;\
  2748. + push %ebx ;\
  2749. + mov %eax,%ebx ;\
  2750. + mix_col(aes_fl_tab) ;\
  2751. + pop %ebx ;\
  2752. + xor %eax,%esi ;\
  2753. + xor %esi,%ebp ;\
  2754. + mov %esi,32*p1+16(%edi) ;\
  2755. + mov %ebp,32*p1+20(%edi) ;\
  2756. + xor %ebp,%edx ;\
  2757. + xor %edx,%ebx ;\
  2758. + mov %edx,32*p1+24(%edi) ;\
  2759. + mov %ebx,32*p1+28(%edi)
  2760. +
  2761. + .align ALIGN32BYTES
  2762. +aes_set_key:
  2763. + pushfl
  2764. + push %ebp
  2765. + mov %esp,%ebp
  2766. + sub $slen,%esp
  2767. + push %ebx
  2768. + push %esi
  2769. + push %edi
  2770. +
  2771. + mov aes_cx(%ebp),%edx // edx -> AES context
  2772. +
  2773. + mov key_ln(%ebp),%ecx // key length
  2774. + cmpl $128,%ecx
  2775. + jb aes_30
  2776. + shr $3,%ecx
  2777. +aes_30: cmpl $32,%ecx
  2778. + je aes_32
  2779. + cmpl $24,%ecx
  2780. + je aes_32
  2781. + mov $16,%ecx
  2782. +aes_32: shr $2,%ecx
  2783. + mov %ecx,nkey(%edx)
  2784. +
  2785. + lea 6(%ecx),%eax // 10/12/14 for 4/6/8 32-bit key length
  2786. + mov %eax,nrnd(%edx)
  2787. +
  2788. + mov in_key(%ebp),%esi // key input array
  2789. + lea ekey(%edx),%edi // key position in AES context
  2790. + cld
  2791. + push %ebp
  2792. + mov %ecx,%eax // save key length in eax
  2793. + rep ; movsl // words in the key schedule
  2794. + mov -4(%esi),%ebx // put some values in registers
  2795. + mov -8(%esi),%edx // to allow faster code
  2796. + mov -12(%esi),%ebp
  2797. + mov -16(%esi),%esi
  2798. +
  2799. + cmpl $4,%eax // jump on key size
  2800. + je aes_36
  2801. + cmpl $6,%eax
  2802. + je aes_35
  2803. +
  2804. + ksc8(0)
  2805. + ksc8(1)
  2806. + ksc8(2)
  2807. + ksc8(3)
  2808. + ksc8(4)
  2809. + ksc8(5)
  2810. + ksc8(6)
  2811. + jmp aes_37
  2812. +aes_35: ksc6(0)
  2813. + ksc6(1)
  2814. + ksc6(2)
  2815. + ksc6(3)
  2816. + ksc6(4)
  2817. + ksc6(5)
  2818. + ksc6(6)
  2819. + ksc6(7)
  2820. + jmp aes_37
  2821. +aes_36: ksc4(0)
  2822. + ksc4(1)
  2823. + ksc4(2)
  2824. + ksc4(3)
  2825. + ksc4(4)
  2826. + ksc4(5)
  2827. + ksc4(6)
  2828. + ksc4(7)
  2829. + ksc4(8)
  2830. + ksc4(9)
  2831. +aes_37: pop %ebp
  2832. + mov aes_cx(%ebp),%edx // edx -> AES context
  2833. + cmpl $0,ed_flg(%ebp)
  2834. + jne aes_39
  2835. +
  2836. +// compile decryption key schedule from encryption schedule - reverse
  2837. +// order and do mix_column operation on round keys except first and last
  2838. +
  2839. + mov nrnd(%edx),%eax // kt = cx->d_key + nc * cx->Nrnd
  2840. + shl $2,%eax
  2841. + lea dkey(%edx,%eax,4),%edi
  2842. + lea ekey(%edx),%esi // kf = cx->e_key
  2843. +
  2844. + movsl // copy first round key (unmodified)
  2845. + movsl
  2846. + movsl
  2847. + movsl
  2848. + sub $32,%edi
  2849. + movl $1,cnt(%ebp)
  2850. +aes_38: // do mix column on each column of
  2851. + lodsl // each round key
  2852. + mov %eax,%ebx
  2853. + mix_col(aes_im_tab)
  2854. + stosl
  2855. + lodsl
  2856. + mov %eax,%ebx
  2857. + mix_col(aes_im_tab)
  2858. + stosl
  2859. + lodsl
  2860. + mov %eax,%ebx
  2861. + mix_col(aes_im_tab)
  2862. + stosl
  2863. + lodsl
  2864. + mov %eax,%ebx
  2865. + mix_col(aes_im_tab)
  2866. + stosl
  2867. + sub $32,%edi
  2868. +
  2869. + incl cnt(%ebp)
  2870. + mov cnt(%ebp),%eax
  2871. + cmp nrnd(%edx),%eax
  2872. + jb aes_38
  2873. +
  2874. + movsl // copy last round key (unmodified)
  2875. + movsl
  2876. + movsl
  2877. + movsl
  2878. +aes_39: pop %edi
  2879. + pop %esi
  2880. + pop %ebx
  2881. + mov %ebp,%esp
  2882. + pop %ebp
  2883. + popfl
  2884. + ret
  2885. +
  2886. +
  2887. +// finite field multiplies by {02}, {04} and {08}
  2888. +
  2889. +#define f2(x) ((x<<1)^(((x>>7)&1)*0x11b))
  2890. +#define f4(x) ((x<<2)^(((x>>6)&1)*0x11b)^(((x>>6)&2)*0x11b))
  2891. +#define f8(x) ((x<<3)^(((x>>5)&1)*0x11b)^(((x>>5)&2)*0x11b)^(((x>>5)&4)*0x11b))
  2892. +
  2893. +// finite field multiplies required in table generation
  2894. +
  2895. +#define f3(x) (f2(x) ^ x)
  2896. +#define f9(x) (f8(x) ^ x)
  2897. +#define fb(x) (f8(x) ^ f2(x) ^ x)
  2898. +#define fd(x) (f8(x) ^ f4(x) ^ x)
  2899. +#define fe(x) (f8(x) ^ f4(x) ^ f2(x))
  2900. +
  2901. +// These defines generate the forward table entries
  2902. +
  2903. +#define u0(x) ((f3(x) << 24) | (x << 16) | (x << 8) | f2(x))
  2904. +#define u1(x) ((x << 24) | (x << 16) | (f2(x) << 8) | f3(x))
  2905. +#define u2(x) ((x << 24) | (f2(x) << 16) | (f3(x) << 8) | x)
  2906. +#define u3(x) ((f2(x) << 24) | (f3(x) << 16) | (x << 8) | x)
  2907. +
  2908. +// These defines generate the inverse table entries
  2909. +
  2910. +#define v0(x) ((fb(x) << 24) | (fd(x) << 16) | (f9(x) << 8) | fe(x))
  2911. +#define v1(x) ((fd(x) << 24) | (f9(x) << 16) | (fe(x) << 8) | fb(x))
  2912. +#define v2(x) ((f9(x) << 24) | (fe(x) << 16) | (fb(x) << 8) | fd(x))
  2913. +#define v3(x) ((fe(x) << 24) | (fb(x) << 16) | (fd(x) << 8) | f9(x))
  2914. +
  2915. +// These defines generate entries for the last round tables
  2916. +
  2917. +#define w0(x) (x)
  2918. +#define w1(x) (x << 8)
  2919. +#define w2(x) (x << 16)
  2920. +#define w3(x) (x << 24)
  2921. +
  2922. +// macro to generate inverse mix column tables (needed for the key schedule)
  2923. +
  2924. +#define im_data0(p1) \
  2925. + .long p1(0x00),p1(0x01),p1(0x02),p1(0x03),p1(0x04),p1(0x05),p1(0x06),p1(0x07) ;\
  2926. + .long p1(0x08),p1(0x09),p1(0x0a),p1(0x0b),p1(0x0c),p1(0x0d),p1(0x0e),p1(0x0f) ;\
  2927. + .long p1(0x10),p1(0x11),p1(0x12),p1(0x13),p1(0x14),p1(0x15),p1(0x16),p1(0x17) ;\
  2928. + .long p1(0x18),p1(0x19),p1(0x1a),p1(0x1b),p1(0x1c),p1(0x1d),p1(0x1e),p1(0x1f)
  2929. +#define im_data1(p1) \
  2930. + .long p1(0x20),p1(0x21),p1(0x22),p1(0x23),p1(0x24),p1(0x25),p1(0x26),p1(0x27) ;\
  2931. + .long p1(0x28),p1(0x29),p1(0x2a),p1(0x2b),p1(0x2c),p1(0x2d),p1(0x2e),p1(0x2f) ;\
  2932. + .long p1(0x30),p1(0x31),p1(0x32),p1(0x33),p1(0x34),p1(0x35),p1(0x36),p1(0x37) ;\
  2933. + .long p1(0x38),p1(0x39),p1(0x3a),p1(0x3b),p1(0x3c),p1(0x3d),p1(0x3e),p1(0x3f)
  2934. +#define im_data2(p1) \
  2935. + .long p1(0x40),p1(0x41),p1(0x42),p1(0x43),p1(0x44),p1(0x45),p1(0x46),p1(0x47) ;\
  2936. + .long p1(0x48),p1(0x49),p1(0x4a),p1(0x4b),p1(0x4c),p1(0x4d),p1(0x4e),p1(0x4f) ;\
  2937. + .long p1(0x50),p1(0x51),p1(0x52),p1(0x53),p1(0x54),p1(0x55),p1(0x56),p1(0x57) ;\
  2938. + .long p1(0x58),p1(0x59),p1(0x5a),p1(0x5b),p1(0x5c),p1(0x5d),p1(0x5e),p1(0x5f)
  2939. +#define im_data3(p1) \
  2940. + .long p1(0x60),p1(0x61),p1(0x62),p1(0x63),p1(0x64),p1(0x65),p1(0x66),p1(0x67) ;\
  2941. + .long p1(0x68),p1(0x69),p1(0x6a),p1(0x6b),p1(0x6c),p1(0x6d),p1(0x6e),p1(0x6f) ;\
  2942. + .long p1(0x70),p1(0x71),p1(0x72),p1(0x73),p1(0x74),p1(0x75),p1(0x76),p1(0x77) ;\
  2943. + .long p1(0x78),p1(0x79),p1(0x7a),p1(0x7b),p1(0x7c),p1(0x7d),p1(0x7e),p1(0x7f)
  2944. +#define im_data4(p1) \
  2945. + .long p1(0x80),p1(0x81),p1(0x82),p1(0x83),p1(0x84),p1(0x85),p1(0x86),p1(0x87) ;\
  2946. + .long p1(0x88),p1(0x89),p1(0x8a),p1(0x8b),p1(0x8c),p1(0x8d),p1(0x8e),p1(0x8f) ;\
  2947. + .long p1(0x90),p1(0x91),p1(0x92),p1(0x93),p1(0x94),p1(0x95),p1(0x96),p1(0x97) ;\
  2948. + .long p1(0x98),p1(0x99),p1(0x9a),p1(0x9b),p1(0x9c),p1(0x9d),p1(0x9e),p1(0x9f)
  2949. +#define im_data5(p1) \
  2950. + .long p1(0xa0),p1(0xa1),p1(0xa2),p1(0xa3),p1(0xa4),p1(0xa5),p1(0xa6),p1(0xa7) ;\
  2951. + .long p1(0xa8),p1(0xa9),p1(0xaa),p1(0xab),p1(0xac),p1(0xad),p1(0xae),p1(0xaf) ;\
  2952. + .long p1(0xb0),p1(0xb1),p1(0xb2),p1(0xb3),p1(0xb4),p1(0xb5),p1(0xb6),p1(0xb7) ;\
  2953. + .long p1(0xb8),p1(0xb9),p1(0xba),p1(0xbb),p1(0xbc),p1(0xbd),p1(0xbe),p1(0xbf)
  2954. +#define im_data6(p1) \
  2955. + .long p1(0xc0),p1(0xc1),p1(0xc2),p1(0xc3),p1(0xc4),p1(0xc5),p1(0xc6),p1(0xc7) ;\
  2956. + .long p1(0xc8),p1(0xc9),p1(0xca),p1(0xcb),p1(0xcc),p1(0xcd),p1(0xce),p1(0xcf) ;\
  2957. + .long p1(0xd0),p1(0xd1),p1(0xd2),p1(0xd3),p1(0xd4),p1(0xd5),p1(0xd6),p1(0xd7) ;\
  2958. + .long p1(0xd8),p1(0xd9),p1(0xda),p1(0xdb),p1(0xdc),p1(0xdd),p1(0xde),p1(0xdf)
  2959. +#define im_data7(p1) \
  2960. + .long p1(0xe0),p1(0xe1),p1(0xe2),p1(0xe3),p1(0xe4),p1(0xe5),p1(0xe6),p1(0xe7) ;\
  2961. + .long p1(0xe8),p1(0xe9),p1(0xea),p1(0xeb),p1(0xec),p1(0xed),p1(0xee),p1(0xef) ;\
  2962. + .long p1(0xf0),p1(0xf1),p1(0xf2),p1(0xf3),p1(0xf4),p1(0xf5),p1(0xf6),p1(0xf7) ;\
  2963. + .long p1(0xf8),p1(0xf9),p1(0xfa),p1(0xfb),p1(0xfc),p1(0xfd),p1(0xfe),p1(0xff)
  2964. +
  2965. +// S-box data - 256 entries
  2966. +
  2967. +#define sb_data0(p1) \
  2968. + .long p1(0x63),p1(0x7c),p1(0x77),p1(0x7b),p1(0xf2),p1(0x6b),p1(0x6f),p1(0xc5) ;\
  2969. + .long p1(0x30),p1(0x01),p1(0x67),p1(0x2b),p1(0xfe),p1(0xd7),p1(0xab),p1(0x76) ;\
  2970. + .long p1(0xca),p1(0x82),p1(0xc9),p1(0x7d),p1(0xfa),p1(0x59),p1(0x47),p1(0xf0) ;\
  2971. + .long p1(0xad),p1(0xd4),p1(0xa2),p1(0xaf),p1(0x9c),p1(0xa4),p1(0x72),p1(0xc0)
  2972. +#define sb_data1(p1) \
  2973. + .long p1(0xb7),p1(0xfd),p1(0x93),p1(0x26),p1(0x36),p1(0x3f),p1(0xf7),p1(0xcc) ;\
  2974. + .long p1(0x34),p1(0xa5),p1(0xe5),p1(0xf1),p1(0x71),p1(0xd8),p1(0x31),p1(0x15) ;\
  2975. + .long p1(0x04),p1(0xc7),p1(0x23),p1(0xc3),p1(0x18),p1(0x96),p1(0x05),p1(0x9a) ;\
  2976. + .long p1(0x07),p1(0x12),p1(0x80),p1(0xe2),p1(0xeb),p1(0x27),p1(0xb2),p1(0x75)
  2977. +#define sb_data2(p1) \
  2978. + .long p1(0x09),p1(0x83),p1(0x2c),p1(0x1a),p1(0x1b),p1(0x6e),p1(0x5a),p1(0xa0) ;\
  2979. + .long p1(0x52),p1(0x3b),p1(0xd6),p1(0xb3),p1(0x29),p1(0xe3),p1(0x2f),p1(0x84) ;\
  2980. + .long p1(0x53),p1(0xd1),p1(0x00),p1(0xed),p1(0x20),p1(0xfc),p1(0xb1),p1(0x5b) ;\
  2981. + .long p1(0x6a),p1(0xcb),p1(0xbe),p1(0x39),p1(0x4a),p1(0x4c),p1(0x58),p1(0xcf)
  2982. +#define sb_data3(p1) \
  2983. + .long p1(0xd0),p1(0xef),p1(0xaa),p1(0xfb),p1(0x43),p1(0x4d),p1(0x33),p1(0x85) ;\
  2984. + .long p1(0x45),p1(0xf9),p1(0x02),p1(0x7f),p1(0x50),p1(0x3c),p1(0x9f),p1(0xa8) ;\
  2985. + .long p1(0x51),p1(0xa3),p1(0x40),p1(0x8f),p1(0x92),p1(0x9d),p1(0x38),p1(0xf5) ;\
  2986. + .long p1(0xbc),p1(0xb6),p1(0xda),p1(0x21),p1(0x10),p1(0xff),p1(0xf3),p1(0xd2)
  2987. +#define sb_data4(p1) \
  2988. + .long p1(0xcd),p1(0x0c),p1(0x13),p1(0xec),p1(0x5f),p1(0x97),p1(0x44),p1(0x17) ;\
  2989. + .long p1(0xc4),p1(0xa7),p1(0x7e),p1(0x3d),p1(0x64),p1(0x5d),p1(0x19),p1(0x73) ;\
  2990. + .long p1(0x60),p1(0x81),p1(0x4f),p1(0xdc),p1(0x22),p1(0x2a),p1(0x90),p1(0x88) ;\
  2991. + .long p1(0x46),p1(0xee),p1(0xb8),p1(0x14),p1(0xde),p1(0x5e),p1(0x0b),p1(0xdb)
  2992. +#define sb_data5(p1) \
  2993. + .long p1(0xe0),p1(0x32),p1(0x3a),p1(0x0a),p1(0x49),p1(0x06),p1(0x24),p1(0x5c) ;\
  2994. + .long p1(0xc2),p1(0xd3),p1(0xac),p1(0x62),p1(0x91),p1(0x95),p1(0xe4),p1(0x79) ;\
  2995. + .long p1(0xe7),p1(0xc8),p1(0x37),p1(0x6d),p1(0x8d),p1(0xd5),p1(0x4e),p1(0xa9) ;\
  2996. + .long p1(0x6c),p1(0x56),p1(0xf4),p1(0xea),p1(0x65),p1(0x7a),p1(0xae),p1(0x08)
  2997. +#define sb_data6(p1) \
  2998. + .long p1(0xba),p1(0x78),p1(0x25),p1(0x2e),p1(0x1c),p1(0xa6),p1(0xb4),p1(0xc6) ;\
  2999. + .long p1(0xe8),p1(0xdd),p1(0x74),p1(0x1f),p1(0x4b),p1(0xbd),p1(0x8b),p1(0x8a) ;\
  3000. + .long p1(0x70),p1(0x3e),p1(0xb5),p1(0x66),p1(0x48),p1(0x03),p1(0xf6),p1(0x0e) ;\
  3001. + .long p1(0x61),p1(0x35),p1(0x57),p1(0xb9),p1(0x86),p1(0xc1),p1(0x1d),p1(0x9e)
  3002. +#define sb_data7(p1) \
  3003. + .long p1(0xe1),p1(0xf8),p1(0x98),p1(0x11),p1(0x69),p1(0xd9),p1(0x8e),p1(0x94) ;\
  3004. + .long p1(0x9b),p1(0x1e),p1(0x87),p1(0xe9),p1(0xce),p1(0x55),p1(0x28),p1(0xdf) ;\
  3005. + .long p1(0x8c),p1(0xa1),p1(0x89),p1(0x0d),p1(0xbf),p1(0xe6),p1(0x42),p1(0x68) ;\
  3006. + .long p1(0x41),p1(0x99),p1(0x2d),p1(0x0f),p1(0xb0),p1(0x54),p1(0xbb),p1(0x16)
  3007. +
  3008. +// Inverse S-box data - 256 entries
  3009. +
  3010. +#define ib_data0(p1) \
  3011. + .long p1(0x52),p1(0x09),p1(0x6a),p1(0xd5),p1(0x30),p1(0x36),p1(0xa5),p1(0x38) ;\
  3012. + .long p1(0xbf),p1(0x40),p1(0xa3),p1(0x9e),p1(0x81),p1(0xf3),p1(0xd7),p1(0xfb) ;\
  3013. + .long p1(0x7c),p1(0xe3),p1(0x39),p1(0x82),p1(0x9b),p1(0x2f),p1(0xff),p1(0x87) ;\
  3014. + .long p1(0x34),p1(0x8e),p1(0x43),p1(0x44),p1(0xc4),p1(0xde),p1(0xe9),p1(0xcb)
  3015. +#define ib_data1(p1) \
  3016. + .long p1(0x54),p1(0x7b),p1(0x94),p1(0x32),p1(0xa6),p1(0xc2),p1(0x23),p1(0x3d) ;\
  3017. + .long p1(0xee),p1(0x4c),p1(0x95),p1(0x0b),p1(0x42),p1(0xfa),p1(0xc3),p1(0x4e) ;\
  3018. + .long p1(0x08),p1(0x2e),p1(0xa1),p1(0x66),p1(0x28),p1(0xd9),p1(0x24),p1(0xb2) ;\
  3019. + .long p1(0x76),p1(0x5b),p1(0xa2),p1(0x49),p1(0x6d),p1(0x8b),p1(0xd1),p1(0x25)
  3020. +#define ib_data2(p1) \
  3021. + .long p1(0x72),p1(0xf8),p1(0xf6),p1(0x64),p1(0x86),p1(0x68),p1(0x98),p1(0x16) ;\
  3022. + .long p1(0xd4),p1(0xa4),p1(0x5c),p1(0xcc),p1(0x5d),p1(0x65),p1(0xb6),p1(0x92) ;\
  3023. + .long p1(0x6c),p1(0x70),p1(0x48),p1(0x50),p1(0xfd),p1(0xed),p1(0xb9),p1(0xda) ;\
  3024. + .long p1(0x5e),p1(0x15),p1(0x46),p1(0x57),p1(0xa7),p1(0x8d),p1(0x9d),p1(0x84)
  3025. +#define ib_data3(p1) \
  3026. + .long p1(0x90),p1(0xd8),p1(0xab),p1(0x00),p1(0x8c),p1(0xbc),p1(0xd3),p1(0x0a) ;\
  3027. + .long p1(0xf7),p1(0xe4),p1(0x58),p1(0x05),p1(0xb8),p1(0xb3),p1(0x45),p1(0x06) ;\
  3028. + .long p1(0xd0),p1(0x2c),p1(0x1e),p1(0x8f),p1(0xca),p1(0x3f),p1(0x0f),p1(0x02) ;\
  3029. + .long p1(0xc1),p1(0xaf),p1(0xbd),p1(0x03),p1(0x01),p1(0x13),p1(0x8a),p1(0x6b)
  3030. +#define ib_data4(p1) \
  3031. + .long p1(0x3a),p1(0x91),p1(0x11),p1(0x41),p1(0x4f),p1(0x67),p1(0xdc),p1(0xea) ;\
  3032. + .long p1(0x97),p1(0xf2),p1(0xcf),p1(0xce),p1(0xf0),p1(0xb4),p1(0xe6),p1(0x73) ;\
  3033. + .long p1(0x96),p1(0xac),p1(0x74),p1(0x22),p1(0xe7),p1(0xad),p1(0x35),p1(0x85) ;\
  3034. + .long p1(0xe2),p1(0xf9),p1(0x37),p1(0xe8),p1(0x1c),p1(0x75),p1(0xdf),p1(0x6e)
  3035. +#define ib_data5(p1) \
  3036. + .long p1(0x47),p1(0xf1),p1(0x1a),p1(0x71),p1(0x1d),p1(0x29),p1(0xc5),p1(0x89) ;\
  3037. + .long p1(0x6f),p1(0xb7),p1(0x62),p1(0x0e),p1(0xaa),p1(0x18),p1(0xbe),p1(0x1b) ;\
  3038. + .long p1(0xfc),p1(0x56),p1(0x3e),p1(0x4b),p1(0xc6),p1(0xd2),p1(0x79),p1(0x20) ;\
  3039. + .long p1(0x9a),p1(0xdb),p1(0xc0),p1(0xfe),p1(0x78),p1(0xcd),p1(0x5a),p1(0xf4)
  3040. +#define ib_data6(p1) \
  3041. + .long p1(0x1f),p1(0xdd),p1(0xa8),p1(0x33),p1(0x88),p1(0x07),p1(0xc7),p1(0x31) ;\
  3042. + .long p1(0xb1),p1(0x12),p1(0x10),p1(0x59),p1(0x27),p1(0x80),p1(0xec),p1(0x5f) ;\
  3043. + .long p1(0x60),p1(0x51),p1(0x7f),p1(0xa9),p1(0x19),p1(0xb5),p1(0x4a),p1(0x0d) ;\
  3044. + .long p1(0x2d),p1(0xe5),p1(0x7a),p1(0x9f),p1(0x93),p1(0xc9),p1(0x9c),p1(0xef)
  3045. +#define ib_data7(p1) \
  3046. + .long p1(0xa0),p1(0xe0),p1(0x3b),p1(0x4d),p1(0xae),p1(0x2a),p1(0xf5),p1(0xb0) ;\
  3047. + .long p1(0xc8),p1(0xeb),p1(0xbb),p1(0x3c),p1(0x83),p1(0x53),p1(0x99),p1(0x61) ;\
  3048. + .long p1(0x17),p1(0x2b),p1(0x04),p1(0x7e),p1(0xba),p1(0x77),p1(0xd6),p1(0x26) ;\
  3049. + .long p1(0xe1),p1(0x69),p1(0x14),p1(0x63),p1(0x55),p1(0x21),p1(0x0c),p1(0x7d)
  3050. +
  3051. +// The rcon_table (needed for the key schedule)
  3052. +//
  3053. +// Here is original Dr Brian Gladman's source code:
  3054. +// _rcon_tab:
  3055. +// %assign x 1
  3056. +// %rep 29
  3057. +// dd x
  3058. +// %assign x f2(x)
  3059. +// %endrep
  3060. +//
  3061. +// Here is precomputed output (it's more portable this way):
  3062. +
  3063. + .align ALIGN32BYTES
  3064. +aes_rcon_tab:
  3065. + .long 0x01,0x02,0x04,0x08,0x10,0x20,0x40,0x80
  3066. + .long 0x1b,0x36,0x6c,0xd8,0xab,0x4d,0x9a,0x2f
  3067. + .long 0x5e,0xbc,0x63,0xc6,0x97,0x35,0x6a,0xd4
  3068. + .long 0xb3,0x7d,0xfa,0xef,0xc5
  3069. +
  3070. +// The forward xor tables
  3071. +
  3072. + .align ALIGN32BYTES
  3073. +aes_ft_tab:
  3074. + sb_data0(u0)
  3075. + sb_data1(u0)
  3076. + sb_data2(u0)
  3077. + sb_data3(u0)
  3078. + sb_data4(u0)
  3079. + sb_data5(u0)
  3080. + sb_data6(u0)
  3081. + sb_data7(u0)
  3082. +
  3083. + sb_data0(u1)
  3084. + sb_data1(u1)
  3085. + sb_data2(u1)
  3086. + sb_data3(u1)
  3087. + sb_data4(u1)
  3088. + sb_data5(u1)
  3089. + sb_data6(u1)
  3090. + sb_data7(u1)
  3091. +
  3092. + sb_data0(u2)
  3093. + sb_data1(u2)
  3094. + sb_data2(u2)
  3095. + sb_data3(u2)
  3096. + sb_data4(u2)
  3097. + sb_data5(u2)
  3098. + sb_data6(u2)
  3099. + sb_data7(u2)
  3100. +
  3101. + sb_data0(u3)
  3102. + sb_data1(u3)
  3103. + sb_data2(u3)
  3104. + sb_data3(u3)
  3105. + sb_data4(u3)
  3106. + sb_data5(u3)
  3107. + sb_data6(u3)
  3108. + sb_data7(u3)
  3109. +
  3110. + .align ALIGN32BYTES
  3111. +aes_fl_tab:
  3112. + sb_data0(w0)
  3113. + sb_data1(w0)
  3114. + sb_data2(w0)
  3115. + sb_data3(w0)
  3116. + sb_data4(w0)
  3117. + sb_data5(w0)
  3118. + sb_data6(w0)
  3119. + sb_data7(w0)
  3120. +
  3121. + sb_data0(w1)
  3122. + sb_data1(w1)
  3123. + sb_data2(w1)
  3124. + sb_data3(w1)
  3125. + sb_data4(w1)
  3126. + sb_data5(w1)
  3127. + sb_data6(w1)
  3128. + sb_data7(w1)
  3129. +
  3130. + sb_data0(w2)
  3131. + sb_data1(w2)
  3132. + sb_data2(w2)
  3133. + sb_data3(w2)
  3134. + sb_data4(w2)
  3135. + sb_data5(w2)
  3136. + sb_data6(w2)
  3137. + sb_data7(w2)
  3138. +
  3139. + sb_data0(w3)
  3140. + sb_data1(w3)
  3141. + sb_data2(w3)
  3142. + sb_data3(w3)
  3143. + sb_data4(w3)
  3144. + sb_data5(w3)
  3145. + sb_data6(w3)
  3146. + sb_data7(w3)
  3147. +
  3148. +// The inverse xor tables
  3149. +
  3150. + .align ALIGN32BYTES
  3151. +aes_it_tab:
  3152. + ib_data0(v0)
  3153. + ib_data1(v0)
  3154. + ib_data2(v0)
  3155. + ib_data3(v0)
  3156. + ib_data4(v0)
  3157. + ib_data5(v0)
  3158. + ib_data6(v0)
  3159. + ib_data7(v0)
  3160. +
  3161. + ib_data0(v1)
  3162. + ib_data1(v1)
  3163. + ib_data2(v1)
  3164. + ib_data3(v1)
  3165. + ib_data4(v1)
  3166. + ib_data5(v1)
  3167. + ib_data6(v1)
  3168. + ib_data7(v1)
  3169. +
  3170. + ib_data0(v2)
  3171. + ib_data1(v2)
  3172. + ib_data2(v2)
  3173. + ib_data3(v2)
  3174. + ib_data4(v2)
  3175. + ib_data5(v2)
  3176. + ib_data6(v2)
  3177. + ib_data7(v2)
  3178. +
  3179. + ib_data0(v3)
  3180. + ib_data1(v3)
  3181. + ib_data2(v3)
  3182. + ib_data3(v3)
  3183. + ib_data4(v3)
  3184. + ib_data5(v3)
  3185. + ib_data6(v3)
  3186. + ib_data7(v3)
  3187. +
  3188. + .align ALIGN32BYTES
  3189. +aes_il_tab:
  3190. + ib_data0(w0)
  3191. + ib_data1(w0)
  3192. + ib_data2(w0)
  3193. + ib_data3(w0)
  3194. + ib_data4(w0)
  3195. + ib_data5(w0)
  3196. + ib_data6(w0)
  3197. + ib_data7(w0)
  3198. +
  3199. + ib_data0(w1)
  3200. + ib_data1(w1)
  3201. + ib_data2(w1)
  3202. + ib_data3(w1)
  3203. + ib_data4(w1)
  3204. + ib_data5(w1)
  3205. + ib_data6(w1)
  3206. + ib_data7(w1)
  3207. +
  3208. + ib_data0(w2)
  3209. + ib_data1(w2)
  3210. + ib_data2(w2)
  3211. + ib_data3(w2)
  3212. + ib_data4(w2)
  3213. + ib_data5(w2)
  3214. + ib_data6(w2)
  3215. + ib_data7(w2)
  3216. +
  3217. + ib_data0(w3)
  3218. + ib_data1(w3)
  3219. + ib_data2(w3)
  3220. + ib_data3(w3)
  3221. + ib_data4(w3)
  3222. + ib_data5(w3)
  3223. + ib_data6(w3)
  3224. + ib_data7(w3)
  3225. +
  3226. +// The inverse mix column tables
  3227. +
  3228. + .align ALIGN32BYTES
  3229. +aes_im_tab:
  3230. + im_data0(v0)
  3231. + im_data1(v0)
  3232. + im_data2(v0)
  3233. + im_data3(v0)
  3234. + im_data4(v0)
  3235. + im_data5(v0)
  3236. + im_data6(v0)
  3237. + im_data7(v0)
  3238. +
  3239. + im_data0(v1)
  3240. + im_data1(v1)
  3241. + im_data2(v1)
  3242. + im_data3(v1)
  3243. + im_data4(v1)
  3244. + im_data5(v1)
  3245. + im_data6(v1)
  3246. + im_data7(v1)
  3247. +
  3248. + im_data0(v2)
  3249. + im_data1(v2)
  3250. + im_data2(v2)
  3251. + im_data3(v2)
  3252. + im_data4(v2)
  3253. + im_data5(v2)
  3254. + im_data6(v2)
  3255. + im_data7(v2)
  3256. +
  3257. + im_data0(v3)
  3258. + im_data1(v3)
  3259. + im_data2(v3)
  3260. + im_data3(v3)
  3261. + im_data4(v3)
  3262. + im_data5(v3)
  3263. + im_data6(v3)
  3264. + im_data7(v3)
  3265. diff -pruN linux-2.6.6_orig/drivers/misc/aes.c linux-2.6.6/drivers/misc/aes.c
  3266. --- linux-2.6.6_orig/drivers/misc/aes.c 1970-01-01 01:00:00.000000000 +0100
  3267. +++ linux-2.6.6/drivers/misc/aes.c 2004-05-16 15:08:27.000000000 +0200
  3268. @@ -0,0 +1,1479 @@
  3269. +// I retain copyright in this code but I encourage its free use provided
  3270. +// that I don't carry any responsibility for the results. I am especially
  3271. +// happy to see it used in free and open source software. If you do use
  3272. +// it I would appreciate an acknowledgement of its origin in the code or
  3273. +// the product that results and I would also appreciate knowing a little
  3274. +// about the use to which it is being put. I am grateful to Frank Yellin
  3275. +// for some ideas that are used in this implementation.
  3276. +//
  3277. +// Dr B. R. Gladman <brg@gladman.uk.net> 6th April 2001.
  3278. +//
  3279. +// This is an implementation of the AES encryption algorithm (Rijndael)
  3280. +// designed by Joan Daemen and Vincent Rijmen. This version is designed
  3281. +// to provide both fixed and dynamic block and key lengths and can also
  3282. +// run with either big or little endian internal byte order (see aes.h).
  3283. +// It inputs block and key lengths in bytes with the legal values being
  3284. +// 16, 24 and 32.
  3285. +
  3286. +/*
  3287. + * Modified by Jari Ruusu, May 1 2001
  3288. + * - Fixed some compile warnings, code was ok but gcc warned anyway.
  3289. + * - Changed basic types: byte -> unsigned char, word -> u_int32_t
  3290. + * - Major name space cleanup: Names visible to outside now begin
  3291. + * with "aes_" or "AES_". A lot of stuff moved from aes.h to aes.c
  3292. + * - Removed C++ and DLL support as part of name space cleanup.
  3293. + * - Eliminated unnecessary recomputation of tables. (actual bug fix)
  3294. + * - Merged precomputed constant tables to aes.c file.
  3295. + * - Removed data alignment restrictions for portability reasons.
  3296. + * - Made block and key lengths accept bit count (128/192/256)
  3297. + * as well byte count (16/24/32).
  3298. + * - Removed all error checks. This change also eliminated the need
  3299. + * to preinitialize the context struct to zero.
  3300. + * - Removed some totally unused constants.
  3301. + */
  3302. +/*
  3303. + * Modified by Jari Ruusu, April 21 2004
  3304. + * - Added back code that avoids byte swaps on big endian boxes.
  3305. + */
  3306. +
  3307. +#include "aes.h"
  3308. +
  3309. +// CONFIGURATION OPTIONS (see also aes.h)
  3310. +//
  3311. +// 1. Define UNROLL for full loop unrolling in encryption and decryption.
  3312. +// 2. Define PARTIAL_UNROLL to unroll two loops in encryption and decryption.
  3313. +// 3. Define FIXED_TABLES for compiled rather than dynamic tables.
  3314. +// 4. Define FF_TABLES to use tables for field multiplies and inverses.
  3315. +// Do not enable this without understanding stack space requirements.
  3316. +// 5. Define ARRAYS to use arrays to hold the local state block. If this
  3317. +// is not defined, individually declared 32-bit words are used.
  3318. +// 6. Define FAST_VARIABLE if a high speed variable block implementation
  3319. +// is needed (essentially three separate fixed block size code sequences)
  3320. +// 7. Define either ONE_TABLE or FOUR_TABLES for a fast table driven
  3321. +// version using 1 table (2 kbytes of table space) or 4 tables (8
  3322. +// kbytes of table space) for higher speed.
  3323. +// 8. Define either ONE_LR_TABLE or FOUR_LR_TABLES for a further speed
  3324. +// increase by using tables for the last rounds but with more table
  3325. +// space (2 or 8 kbytes extra).
  3326. +// 9. If neither ONE_TABLE nor FOUR_TABLES is defined, a compact but
  3327. +// slower version is provided.
  3328. +// 10. If fast decryption key scheduling is needed define ONE_IM_TABLE
  3329. +// or FOUR_IM_TABLES for higher speed (2 or 8 kbytes extra).
  3330. +
  3331. +#define UNROLL
  3332. +//#define PARTIAL_UNROLL
  3333. +
  3334. +#define FIXED_TABLES
  3335. +//#define FF_TABLES
  3336. +//#define ARRAYS
  3337. +#define FAST_VARIABLE
  3338. +
  3339. +//#define ONE_TABLE
  3340. +#define FOUR_TABLES
  3341. +
  3342. +//#define ONE_LR_TABLE
  3343. +#define FOUR_LR_TABLES
  3344. +
  3345. +//#define ONE_IM_TABLE
  3346. +#define FOUR_IM_TABLES
  3347. +
  3348. +#if defined(UNROLL) && defined (PARTIAL_UNROLL)
  3349. +#error both UNROLL and PARTIAL_UNROLL are defined
  3350. +#endif
  3351. +
  3352. +#if defined(ONE_TABLE) && defined (FOUR_TABLES)
  3353. +#error both ONE_TABLE and FOUR_TABLES are defined
  3354. +#endif
  3355. +
  3356. +#if defined(ONE_LR_TABLE) && defined (FOUR_LR_TABLES)
  3357. +#error both ONE_LR_TABLE and FOUR_LR_TABLES are defined
  3358. +#endif
  3359. +
  3360. +#if defined(ONE_IM_TABLE) && defined (FOUR_IM_TABLES)
  3361. +#error both ONE_IM_TABLE and FOUR_IM_TABLES are defined
  3362. +#endif
  3363. +
  3364. +#if defined(AES_BLOCK_SIZE) && AES_BLOCK_SIZE != 16 && AES_BLOCK_SIZE != 24 && AES_BLOCK_SIZE != 32
  3365. +#error an illegal block size has been specified
  3366. +#endif
  3367. +
  3368. +/* INTERNAL_BYTE_ORDER: 0=unknown, 1=little endian, 2=big endian */
  3369. +#if defined(INTERNAL_BYTE_ORDER)
  3370. +#elif defined(__i386__)||defined(__i386)||defined(__x86_64__)||defined(__x86_64)||defined(__amd64__)||defined(__amd64)||defined(__AMD64__)||defined(__AMD64)
  3371. +# define INTERNAL_BYTE_ORDER 1
  3372. +# undef DATA_ALWAYS_ALIGNED
  3373. +# define DATA_ALWAYS_ALIGNED 1 /* unaligned access is always ok */
  3374. +#elif defined(__ppc__)||defined(__ppc)||defined(__PPC__)||defined(__PPC)||defined(__powerpc__)||defined(__powerpc)||defined(__POWERPC__)||defined(__POWERPC)||defined(__PowerPC__)||defined(__PowerPC)||defined(__ppc64__)||defined(__ppc64)||defined(__PPC64__)||defined(__PPC64)||defined(__powerpc64__)||defined(__powerpc64)||defined(__s390__)||defined(__s390)
  3375. +# define INTERNAL_BYTE_ORDER 2
  3376. +# undef DATA_ALWAYS_ALIGNED
  3377. +# define DATA_ALWAYS_ALIGNED 1 /* unaligned access is always ok */
  3378. +#elif defined(__alpha__)||defined(__alpha)||defined(__ia64__)||defined(__ia64)
  3379. +# define INTERNAL_BYTE_ORDER 1
  3380. +#elif defined(__hppa__)||defined(__hppa)||defined(__HPPA__)||defined(__HPPA)||defined(__parisc__)||defined(__parisc)||defined(__sparc__)||defined(__sparc)||defined(__sparc_v9__)||defined(__sparc_v9)||defined(__sparc64__)||defined(__sparc64)||defined(__mc68000__)||defined(__mc68000)
  3381. +# define INTERNAL_BYTE_ORDER 2
  3382. +#elif defined(CONFIGURE_DETECTS_BYTE_ORDER)
  3383. +# if defined(WORDS_BIGENDIAN)
  3384. +# define INTERNAL_BYTE_ORDER 2
  3385. +# else
  3386. +# define INTERNAL_BYTE_ORDER 1
  3387. +# endif
  3388. +#elif defined(__linux__) && defined(__KERNEL__)
  3389. +# include <asm/byteorder.h>
  3390. +# if defined(__BIG_ENDIAN)
  3391. +# define INTERNAL_BYTE_ORDER 2
  3392. +# else
  3393. +# define INTERNAL_BYTE_ORDER 1
  3394. +# endif
  3395. +#else
  3396. +# include <sys/param.h>
  3397. +# if (defined(BYTE_ORDER) && defined(LITTLE_ENDIAN) && (BYTE_ORDER == LITTLE_ENDIAN)) || (defined(__BYTE_ORDER) && defined(__LITTLE_ENDIAN) && (__BYTE_ORDER == __LITTLE_ENDIAN))
  3398. +# define INTERNAL_BYTE_ORDER 1
  3399. +# elif defined(WORDS_BIGENDIAN) || defined(__BIG_ENDIAN__) || (defined(BYTE_ORDER) && defined(BIG_ENDIAN) && (BYTE_ORDER == BIG_ENDIAN)) || (defined(__BYTE_ORDER) && defined(__BIG_ENDIAN) && (__BYTE_ORDER == __BIG_ENDIAN))
  3400. +# define INTERNAL_BYTE_ORDER 2
  3401. +# else
  3402. +# define INTERNAL_BYTE_ORDER 0
  3403. +# endif
  3404. +#endif
  3405. +
  3406. +#if defined(DATA_ALWAYS_ALIGNED) && (INTERNAL_BYTE_ORDER > 0)
  3407. +# define word_in(x) *(u_int32_t*)(x)
  3408. +# define word_out(x,v) *(u_int32_t*)(x) = (v)
  3409. +#elif defined(__linux__) && defined(__KERNEL__)
  3410. +# include <asm/unaligned.h>
  3411. +# define word_in(x) get_unaligned((u_int32_t*)(x))
  3412. +# define word_out(x,v) put_unaligned((v),(u_int32_t*)(x))
  3413. +#else
  3414. +/* unknown endianness and/or unable to handle unaligned data */
  3415. +# undef INTERNAL_BYTE_ORDER
  3416. +# define INTERNAL_BYTE_ORDER 1
  3417. +# define word_in(x) ((u_int32_t)(((unsigned char *)(x))[0])|((u_int32_t)(((unsigned char *)(x))[1])<<8)|((u_int32_t)(((unsigned char *)(x))[2])<<16)|((u_int32_t)(((unsigned char *)(x))[3])<<24))
  3418. +# define word_out(x,v) ((unsigned char *)(x))[0]=(v),((unsigned char *)(x))[1]=((v)>>8),((unsigned char *)(x))[2]=((v)>>16),((unsigned char *)(x))[3]=((v)>>24)
  3419. +#endif
  3420. +
  3421. +// upr(x,n): rotates bytes within words by n positions, moving bytes
  3422. +// to higher index positions with wrap around into low positions
  3423. +// ups(x,n): moves bytes by n positions to higher index positions in
  3424. +// words but without wrap around
  3425. +// bval(x,n): extracts a byte from a word
  3426. +
  3427. +#if (INTERNAL_BYTE_ORDER < 2)
  3428. +/* little endian */
  3429. +#define upr(x,n) (((x) << 8 * (n)) | ((x) >> (32 - 8 * (n))))
  3430. +#define ups(x,n) ((x) << 8 * (n))
  3431. +#define bval(x,n) ((unsigned char)((x) >> 8 * (n)))
  3432. +#define bytes2word(b0, b1, b2, b3) \
  3433. + ((u_int32_t)(b3) << 24 | (u_int32_t)(b2) << 16 | (u_int32_t)(b1) << 8 | (b0))
  3434. +#else
  3435. +/* big endian */
  3436. +#define upr(x,n) (((x) >> 8 * (n)) | ((x) << (32 - 8 * (n))))
  3437. +#define ups(x,n) ((x) >> 8 * (n)))
  3438. +#define bval(x,n) ((unsigned char)((x) >> (24 - 8 * (n))))
  3439. +#define bytes2word(b0, b1, b2, b3) \
  3440. + ((u_int32_t)(b0) << 24 | (u_int32_t)(b1) << 16 | (u_int32_t)(b2) << 8 | (b3))
  3441. +#endif
  3442. +
  3443. +// Disable at least some poor combinations of options
  3444. +
  3445. +#if !defined(ONE_TABLE) && !defined(FOUR_TABLES)
  3446. +#define FIXED_TABLES
  3447. +#undef UNROLL
  3448. +#undef ONE_LR_TABLE
  3449. +#undef FOUR_LR_TABLES
  3450. +#undef ONE_IM_TABLE
  3451. +#undef FOUR_IM_TABLES
  3452. +#elif !defined(FOUR_TABLES)
  3453. +#ifdef FOUR_LR_TABLES
  3454. +#undef FOUR_LR_TABLES
  3455. +#define ONE_LR_TABLE
  3456. +#endif
  3457. +#ifdef FOUR_IM_TABLES
  3458. +#undef FOUR_IM_TABLES
  3459. +#define ONE_IM_TABLE
  3460. +#endif
  3461. +#elif !defined(AES_BLOCK_SIZE)
  3462. +#if defined(UNROLL)
  3463. +#define PARTIAL_UNROLL
  3464. +#undef UNROLL
  3465. +#endif
  3466. +#endif
  3467. +
  3468. +// the finite field modular polynomial and elements
  3469. +
  3470. +#define ff_poly 0x011b
  3471. +#define ff_hi 0x80
  3472. +
  3473. +// multiply four bytes in GF(2^8) by 'x' {02} in parallel
  3474. +
  3475. +#define m1 0x80808080
  3476. +#define m2 0x7f7f7f7f
  3477. +#define m3 0x0000001b
  3478. +#define FFmulX(x) ((((x) & m2) << 1) ^ ((((x) & m1) >> 7) * m3))
  3479. +
  3480. +// The following defines provide alternative definitions of FFmulX that might
  3481. +// give improved performance if a fast 32-bit multiply is not available. Note
  3482. +// that a temporary variable u needs to be defined where FFmulX is used.
  3483. +
  3484. +// #define FFmulX(x) (u = (x) & m1, u |= (u >> 1), ((x) & m2) << 1) ^ ((u >> 3) | (u >> 6))
  3485. +// #define m4 0x1b1b1b1b
  3486. +// #define FFmulX(x) (u = (x) & m1, ((x) & m2) << 1) ^ ((u - (u >> 7)) & m4)
  3487. +
  3488. +// perform column mix operation on four bytes in parallel
  3489. +
  3490. +#define fwd_mcol(x) (f2 = FFmulX(x), f2 ^ upr(x ^ f2,3) ^ upr(x,2) ^ upr(x,1))
  3491. +
  3492. +#if defined(FIXED_TABLES)
  3493. +
  3494. +// the S-Box table
  3495. +
  3496. +static const unsigned char s_box[256] =
  3497. +{
  3498. + 0x63, 0x7c, 0x77, 0x7b, 0xf2, 0x6b, 0x6f, 0xc5,
  3499. + 0x30, 0x01, 0x67, 0x2b, 0xfe, 0xd7, 0xab, 0x76,
  3500. + 0xca, 0x82, 0xc9, 0x7d, 0xfa, 0x59, 0x47, 0xf0,
  3501. + 0xad, 0xd4, 0xa2, 0xaf, 0x9c, 0xa4, 0x72, 0xc0,
  3502. + 0xb7, 0xfd, 0x93, 0x26, 0x36, 0x3f, 0xf7, 0xcc,
  3503. + 0x34, 0xa5, 0xe5, 0xf1, 0x71, 0xd8, 0x31, 0x15,
  3504. + 0x04, 0xc7, 0x23, 0xc3, 0x18, 0x96, 0x05, 0x9a,
  3505. + 0x07, 0x12, 0x80, 0xe2, 0xeb, 0x27, 0xb2, 0x75,
  3506. + 0x09, 0x83, 0x2c, 0x1a, 0x1b, 0x6e, 0x5a, 0xa0,
  3507. + 0x52, 0x3b, 0xd6, 0xb3, 0x29, 0xe3, 0x2f, 0x84,
  3508. + 0x53, 0xd1, 0x00, 0xed, 0x20, 0xfc, 0xb1, 0x5b,
  3509. + 0x6a, 0xcb, 0xbe, 0x39, 0x4a, 0x4c, 0x58, 0xcf,
  3510. + 0xd0, 0xef, 0xaa, 0xfb, 0x43, 0x4d, 0x33, 0x85,
  3511. + 0x45, 0xf9, 0x02, 0x7f, 0x50, 0x3c, 0x9f, 0xa8,
  3512. + 0x51, 0xa3, 0x40, 0x8f, 0x92, 0x9d, 0x38, 0xf5,
  3513. + 0xbc, 0xb6, 0xda, 0x21, 0x10, 0xff, 0xf3, 0xd2,
  3514. + 0xcd, 0x0c, 0x13, 0xec, 0x5f, 0x97, 0x44, 0x17,
  3515. + 0xc4, 0xa7, 0x7e, 0x3d, 0x64, 0x5d, 0x19, 0x73,
  3516. + 0x60, 0x81, 0x4f, 0xdc, 0x22, 0x2a, 0x90, 0x88,
  3517. + 0x46, 0xee, 0xb8, 0x14, 0xde, 0x5e, 0x0b, 0xdb,
  3518. + 0xe0, 0x32, 0x3a, 0x0a, 0x49, 0x06, 0x24, 0x5c,
  3519. + 0xc2, 0xd3, 0xac, 0x62, 0x91, 0x95, 0xe4, 0x79,
  3520. + 0xe7, 0xc8, 0x37, 0x6d, 0x8d, 0xd5, 0x4e, 0xa9,
  3521. + 0x6c, 0x56, 0xf4, 0xea, 0x65, 0x7a, 0xae, 0x08,
  3522. + 0xba, 0x78, 0x25, 0x2e, 0x1c, 0xa6, 0xb4, 0xc6,
  3523. + 0xe8, 0xdd, 0x74, 0x1f, 0x4b, 0xbd, 0x8b, 0x8a,
  3524. + 0x70, 0x3e, 0xb5, 0x66, 0x48, 0x03, 0xf6, 0x0e,
  3525. + 0x61, 0x35, 0x57, 0xb9, 0x86, 0xc1, 0x1d, 0x9e,
  3526. + 0xe1, 0xf8, 0x98, 0x11, 0x69, 0xd9, 0x8e, 0x94,
  3527. + 0x9b, 0x1e, 0x87, 0xe9, 0xce, 0x55, 0x28, 0xdf,
  3528. + 0x8c, 0xa1, 0x89, 0x0d, 0xbf, 0xe6, 0x42, 0x68,
  3529. + 0x41, 0x99, 0x2d, 0x0f, 0xb0, 0x54, 0xbb, 0x16
  3530. +};
  3531. +
  3532. +// the inverse S-Box table
  3533. +
  3534. +static const unsigned char inv_s_box[256] =
  3535. +{
  3536. + 0x52, 0x09, 0x6a, 0xd5, 0x30, 0x36, 0xa5, 0x38,
  3537. + 0xbf, 0x40, 0xa3, 0x9e, 0x81, 0xf3, 0xd7, 0xfb,
  3538. + 0x7c, 0xe3, 0x39, 0x82, 0x9b, 0x2f, 0xff, 0x87,
  3539. + 0x34, 0x8e, 0x43, 0x44, 0xc4, 0xde, 0xe9, 0xcb,
  3540. + 0x54, 0x7b, 0x94, 0x32, 0xa6, 0xc2, 0x23, 0x3d,
  3541. + 0xee, 0x4c, 0x95, 0x0b, 0x42, 0xfa, 0xc3, 0x4e,
  3542. + 0x08, 0x2e, 0xa1, 0x66, 0x28, 0xd9, 0x24, 0xb2,
  3543. + 0x76, 0x5b, 0xa2, 0x49, 0x6d, 0x8b, 0xd1, 0x25,
  3544. + 0x72, 0xf8, 0xf6, 0x64, 0x86, 0x68, 0x98, 0x16,
  3545. + 0xd4, 0xa4, 0x5c, 0xcc, 0x5d, 0x65, 0xb6, 0x92,
  3546. + 0x6c, 0x70, 0x48, 0x50, 0xfd, 0xed, 0xb9, 0xda,
  3547. + 0x5e, 0x15, 0x46, 0x57, 0xa7, 0x8d, 0x9d, 0x84,
  3548. + 0x90, 0xd8, 0xab, 0x00, 0x8c, 0xbc, 0xd3, 0x0a,
  3549. + 0xf7, 0xe4, 0x58, 0x05, 0xb8, 0xb3, 0x45, 0x06,
  3550. + 0xd0, 0x2c, 0x1e, 0x8f, 0xca, 0x3f, 0x0f, 0x02,
  3551. + 0xc1, 0xaf, 0xbd, 0x03, 0x01, 0x13, 0x8a, 0x6b,
  3552. + 0x3a, 0x91, 0x11, 0x41, 0x4f, 0x67, 0xdc, 0xea,
  3553. + 0x97, 0xf2, 0xcf, 0xce, 0xf0, 0xb4, 0xe6, 0x73,
  3554. + 0x96, 0xac, 0x74, 0x22, 0xe7, 0xad, 0x35, 0x85,
  3555. + 0xe2, 0xf9, 0x37, 0xe8, 0x1c, 0x75, 0xdf, 0x6e,
  3556. + 0x47, 0xf1, 0x1a, 0x71, 0x1d, 0x29, 0xc5, 0x89,
  3557. + 0x6f, 0xb7, 0x62, 0x0e, 0xaa, 0x18, 0xbe, 0x1b,
  3558. + 0xfc, 0x56, 0x3e, 0x4b, 0xc6, 0xd2, 0x79, 0x20,
  3559. + 0x9a, 0xdb, 0xc0, 0xfe, 0x78, 0xcd, 0x5a, 0xf4,
  3560. + 0x1f, 0xdd, 0xa8, 0x33, 0x88, 0x07, 0xc7, 0x31,
  3561. + 0xb1, 0x12, 0x10, 0x59, 0x27, 0x80, 0xec, 0x5f,
  3562. + 0x60, 0x51, 0x7f, 0xa9, 0x19, 0xb5, 0x4a, 0x0d,
  3563. + 0x2d, 0xe5, 0x7a, 0x9f, 0x93, 0xc9, 0x9c, 0xef,
  3564. + 0xa0, 0xe0, 0x3b, 0x4d, 0xae, 0x2a, 0xf5, 0xb0,
  3565. + 0xc8, 0xeb, 0xbb, 0x3c, 0x83, 0x53, 0x99, 0x61,
  3566. + 0x17, 0x2b, 0x04, 0x7e, 0xba, 0x77, 0xd6, 0x26,
  3567. + 0xe1, 0x69, 0x14, 0x63, 0x55, 0x21, 0x0c, 0x7d
  3568. +};
  3569. +
  3570. +// used to ensure table is generated in the right format
  3571. +// depending on the internal byte order required
  3572. +
  3573. +#if (INTERNAL_BYTE_ORDER < 2)
  3574. +/* little endian */
  3575. +#define w0(p) 0x000000##p
  3576. +#else
  3577. +/* big endian */
  3578. +#define w0(p) 0x##p##000000
  3579. +#endif
  3580. +
  3581. +// Number of elements required in this table for different
  3582. +// block and key lengths is:
  3583. +//
  3584. +// Nk = 4 6 8
  3585. +// ----------
  3586. +// Nb = 4 | 10 8 7
  3587. +// 6 | 19 12 11
  3588. +// 8 | 29 19 14
  3589. +//
  3590. +// this table can be a table of bytes if the key schedule
  3591. +// code is adjusted accordingly
  3592. +
  3593. +static const u_int32_t rcon_tab[29] =
  3594. +{
  3595. + w0(01), w0(02), w0(04), w0(08),
  3596. + w0(10), w0(20), w0(40), w0(80),
  3597. + w0(1b), w0(36), w0(6c), w0(d8),
  3598. + w0(ab), w0(4d), w0(9a), w0(2f),
  3599. + w0(5e), w0(bc), w0(63), w0(c6),
  3600. + w0(97), w0(35), w0(6a), w0(d4),
  3601. + w0(b3), w0(7d), w0(fa), w0(ef),
  3602. + w0(c5)
  3603. +};
  3604. +
  3605. +#undef w0
  3606. +
  3607. +// used to ensure table is generated in the right format
  3608. +// depending on the internal byte order required
  3609. +
  3610. +#if (INTERNAL_BYTE_ORDER < 2)
  3611. +/* little endian */
  3612. +#define r0(p,q,r,s) 0x##p##q##r##s
  3613. +#define r1(p,q,r,s) 0x##q##r##s##p
  3614. +#define r2(p,q,r,s) 0x##r##s##p##q
  3615. +#define r3(p,q,r,s) 0x##s##p##q##r
  3616. +#define w0(p) 0x000000##p
  3617. +#define w1(p) 0x0000##p##00
  3618. +#define w2(p) 0x00##p##0000
  3619. +#define w3(p) 0x##p##000000
  3620. +#else
  3621. +/* big endian */
  3622. +#define r0(p,q,r,s) 0x##s##r##q##p
  3623. +#define r1(p,q,r,s) 0x##p##s##r##q
  3624. +#define r2(p,q,r,s) 0x##q##p##s##r
  3625. +#define r3(p,q,r,s) 0x##r##q##p##s
  3626. +#define w0(p) 0x##p##000000
  3627. +#define w1(p) 0x00##p##0000
  3628. +#define w2(p) 0x0000##p##00
  3629. +#define w3(p) 0x000000##p
  3630. +#endif
  3631. +
  3632. +#if defined(FIXED_TABLES) && (defined(ONE_TABLE) || defined(FOUR_TABLES))
  3633. +
  3634. +// data for forward tables (other than last round)
  3635. +
  3636. +#define f_table \
  3637. + r(a5,63,63,c6), r(84,7c,7c,f8), r(99,77,77,ee), r(8d,7b,7b,f6),\
  3638. + r(0d,f2,f2,ff), r(bd,6b,6b,d6), r(b1,6f,6f,de), r(54,c5,c5,91),\
  3639. + r(50,30,30,60), r(03,01,01,02), r(a9,67,67,ce), r(7d,2b,2b,56),\
  3640. + r(19,fe,fe,e7), r(62,d7,d7,b5), r(e6,ab,ab,4d), r(9a,76,76,ec),\
  3641. + r(45,ca,ca,8f), r(9d,82,82,1f), r(40,c9,c9,89), r(87,7d,7d,fa),\
  3642. + r(15,fa,fa,ef), r(eb,59,59,b2), r(c9,47,47,8e), r(0b,f0,f0,fb),\
  3643. + r(ec,ad,ad,41), r(67,d4,d4,b3), r(fd,a2,a2,5f), r(ea,af,af,45),\
  3644. + r(bf,9c,9c,23), r(f7,a4,a4,53), r(96,72,72,e4), r(5b,c0,c0,9b),\
  3645. + r(c2,b7,b7,75), r(1c,fd,fd,e1), r(ae,93,93,3d), r(6a,26,26,4c),\
  3646. + r(5a,36,36,6c), r(41,3f,3f,7e), r(02,f7,f7,f5), r(4f,cc,cc,83),\
  3647. + r(5c,34,34,68), r(f4,a5,a5,51), r(34,e5,e5,d1), r(08,f1,f1,f9),\
  3648. + r(93,71,71,e2), r(73,d8,d8,ab), r(53,31,31,62), r(3f,15,15,2a),\
  3649. + r(0c,04,04,08), r(52,c7,c7,95), r(65,23,23,46), r(5e,c3,c3,9d),\
  3650. + r(28,18,18,30), r(a1,96,96,37), r(0f,05,05,0a), r(b5,9a,9a,2f),\
  3651. + r(09,07,07,0e), r(36,12,12,24), r(9b,80,80,1b), r(3d,e2,e2,df),\
  3652. + r(26,eb,eb,cd), r(69,27,27,4e), r(cd,b2,b2,7f), r(9f,75,75,ea),\
  3653. + r(1b,09,09,12), r(9e,83,83,1d), r(74,2c,2c,58), r(2e,1a,1a,34),\
  3654. + r(2d,1b,1b,36), r(b2,6e,6e,dc), r(ee,5a,5a,b4), r(fb,a0,a0,5b),\
  3655. + r(f6,52,52,a4), r(4d,3b,3b,76), r(61,d6,d6,b7), r(ce,b3,b3,7d),\
  3656. + r(7b,29,29,52), r(3e,e3,e3,dd), r(71,2f,2f,5e), r(97,84,84,13),\
  3657. + r(f5,53,53,a6), r(68,d1,d1,b9), r(00,00,00,00), r(2c,ed,ed,c1),\
  3658. + r(60,20,20,40), r(1f,fc,fc,e3), r(c8,b1,b1,79), r(ed,5b,5b,b6),\
  3659. + r(be,6a,6a,d4), r(46,cb,cb,8d), r(d9,be,be,67), r(4b,39,39,72),\
  3660. + r(de,4a,4a,94), r(d4,4c,4c,98), r(e8,58,58,b0), r(4a,cf,cf,85),\
  3661. + r(6b,d0,d0,bb), r(2a,ef,ef,c5), r(e5,aa,aa,4f), r(16,fb,fb,ed),\
  3662. + r(c5,43,43,86), r(d7,4d,4d,9a), r(55,33,33,66), r(94,85,85,11),\
  3663. + r(cf,45,45,8a), r(10,f9,f9,e9), r(06,02,02,04), r(81,7f,7f,fe),\
  3664. + r(f0,50,50,a0), r(44,3c,3c,78), r(ba,9f,9f,25), r(e3,a8,a8,4b),\
  3665. + r(f3,51,51,a2), r(fe,a3,a3,5d), r(c0,40,40,80), r(8a,8f,8f,05),\
  3666. + r(ad,92,92,3f), r(bc,9d,9d,21), r(48,38,38,70), r(04,f5,f5,f1),\
  3667. + r(df,bc,bc,63), r(c1,b6,b6,77), r(75,da,da,af), r(63,21,21,42),\
  3668. + r(30,10,10,20), r(1a,ff,ff,e5), r(0e,f3,f3,fd), r(6d,d2,d2,bf),\
  3669. + r(4c,cd,cd,81), r(14,0c,0c,18), r(35,13,13,26), r(2f,ec,ec,c3),\
  3670. + r(e1,5f,5f,be), r(a2,97,97,35), r(cc,44,44,88), r(39,17,17,2e),\
  3671. + r(57,c4,c4,93), r(f2,a7,a7,55), r(82,7e,7e,fc), r(47,3d,3d,7a),\
  3672. + r(ac,64,64,c8), r(e7,5d,5d,ba), r(2b,19,19,32), r(95,73,73,e6),\
  3673. + r(a0,60,60,c0), r(98,81,81,19), r(d1,4f,4f,9e), r(7f,dc,dc,a3),\
  3674. + r(66,22,22,44), r(7e,2a,2a,54), r(ab,90,90,3b), r(83,88,88,0b),\
  3675. + r(ca,46,46,8c), r(29,ee,ee,c7), r(d3,b8,b8,6b), r(3c,14,14,28),\
  3676. + r(79,de,de,a7), r(e2,5e,5e,bc), r(1d,0b,0b,16), r(76,db,db,ad),\
  3677. + r(3b,e0,e0,db), r(56,32,32,64), r(4e,3a,3a,74), r(1e,0a,0a,14),\
  3678. + r(db,49,49,92), r(0a,06,06,0c), r(6c,24,24,48), r(e4,5c,5c,b8),\
  3679. + r(5d,c2,c2,9f), r(6e,d3,d3,bd), r(ef,ac,ac,43), r(a6,62,62,c4),\
  3680. + r(a8,91,91,39), r(a4,95,95,31), r(37,e4,e4,d3), r(8b,79,79,f2),\
  3681. + r(32,e7,e7,d5), r(43,c8,c8,8b), r(59,37,37,6e), r(b7,6d,6d,da),\
  3682. + r(8c,8d,8d,01), r(64,d5,d5,b1), r(d2,4e,4e,9c), r(e0,a9,a9,49),\
  3683. + r(b4,6c,6c,d8), r(fa,56,56,ac), r(07,f4,f4,f3), r(25,ea,ea,cf),\
  3684. + r(af,65,65,ca), r(8e,7a,7a,f4), r(e9,ae,ae,47), r(18,08,08,10),\
  3685. + r(d5,ba,ba,6f), r(88,78,78,f0), r(6f,25,25,4a), r(72,2e,2e,5c),\
  3686. + r(24,1c,1c,38), r(f1,a6,a6,57), r(c7,b4,b4,73), r(51,c6,c6,97),\
  3687. + r(23,e8,e8,cb), r(7c,dd,dd,a1), r(9c,74,74,e8), r(21,1f,1f,3e),\
  3688. + r(dd,4b,4b,96), r(dc,bd,bd,61), r(86,8b,8b,0d), r(85,8a,8a,0f),\
  3689. + r(90,70,70,e0), r(42,3e,3e,7c), r(c4,b5,b5,71), r(aa,66,66,cc),\
  3690. + r(d8,48,48,90), r(05,03,03,06), r(01,f6,f6,f7), r(12,0e,0e,1c),\
  3691. + r(a3,61,61,c2), r(5f,35,35,6a), r(f9,57,57,ae), r(d0,b9,b9,69),\
  3692. + r(91,86,86,17), r(58,c1,c1,99), r(27,1d,1d,3a), r(b9,9e,9e,27),\
  3693. + r(38,e1,e1,d9), r(13,f8,f8,eb), r(b3,98,98,2b), r(33,11,11,22),\
  3694. + r(bb,69,69,d2), r(70,d9,d9,a9), r(89,8e,8e,07), r(a7,94,94,33),\
  3695. + r(b6,9b,9b,2d), r(22,1e,1e,3c), r(92,87,87,15), r(20,e9,e9,c9),\
  3696. + r(49,ce,ce,87), r(ff,55,55,aa), r(78,28,28,50), r(7a,df,df,a5),\
  3697. + r(8f,8c,8c,03), r(f8,a1,a1,59), r(80,89,89,09), r(17,0d,0d,1a),\
  3698. + r(da,bf,bf,65), r(31,e6,e6,d7), r(c6,42,42,84), r(b8,68,68,d0),\
  3699. + r(c3,41,41,82), r(b0,99,99,29), r(77,2d,2d,5a), r(11,0f,0f,1e),\
  3700. + r(cb,b0,b0,7b), r(fc,54,54,a8), r(d6,bb,bb,6d), r(3a,16,16,2c)
  3701. +
  3702. +// data for inverse tables (other than last round)
  3703. +
  3704. +#define i_table \
  3705. + r(50,a7,f4,51), r(53,65,41,7e), r(c3,a4,17,1a), r(96,5e,27,3a),\
  3706. + r(cb,6b,ab,3b), r(f1,45,9d,1f), r(ab,58,fa,ac), r(93,03,e3,4b),\
  3707. + r(55,fa,30,20), r(f6,6d,76,ad), r(91,76,cc,88), r(25,4c,02,f5),\
  3708. + r(fc,d7,e5,4f), r(d7,cb,2a,c5), r(80,44,35,26), r(8f,a3,62,b5),\
  3709. + r(49,5a,b1,de), r(67,1b,ba,25), r(98,0e,ea,45), r(e1,c0,fe,5d),\
  3710. + r(02,75,2f,c3), r(12,f0,4c,81), r(a3,97,46,8d), r(c6,f9,d3,6b),\
  3711. + r(e7,5f,8f,03), r(95,9c,92,15), r(eb,7a,6d,bf), r(da,59,52,95),\
  3712. + r(2d,83,be,d4), r(d3,21,74,58), r(29,69,e0,49), r(44,c8,c9,8e),\
  3713. + r(6a,89,c2,75), r(78,79,8e,f4), r(6b,3e,58,99), r(dd,71,b9,27),\
  3714. + r(b6,4f,e1,be), r(17,ad,88,f0), r(66,ac,20,c9), r(b4,3a,ce,7d),\
  3715. + r(18,4a,df,63), r(82,31,1a,e5), r(60,33,51,97), r(45,7f,53,62),\
  3716. + r(e0,77,64,b1), r(84,ae,6b,bb), r(1c,a0,81,fe), r(94,2b,08,f9),\
  3717. + r(58,68,48,70), r(19,fd,45,8f), r(87,6c,de,94), r(b7,f8,7b,52),\
  3718. + r(23,d3,73,ab), r(e2,02,4b,72), r(57,8f,1f,e3), r(2a,ab,55,66),\
  3719. + r(07,28,eb,b2), r(03,c2,b5,2f), r(9a,7b,c5,86), r(a5,08,37,d3),\
  3720. + r(f2,87,28,30), r(b2,a5,bf,23), r(ba,6a,03,02), r(5c,82,16,ed),\
  3721. + r(2b,1c,cf,8a), r(92,b4,79,a7), r(f0,f2,07,f3), r(a1,e2,69,4e),\
  3722. + r(cd,f4,da,65), r(d5,be,05,06), r(1f,62,34,d1), r(8a,fe,a6,c4),\
  3723. + r(9d,53,2e,34), r(a0,55,f3,a2), r(32,e1,8a,05), r(75,eb,f6,a4),\
  3724. + r(39,ec,83,0b), r(aa,ef,60,40), r(06,9f,71,5e), r(51,10,6e,bd),\
  3725. + r(f9,8a,21,3e), r(3d,06,dd,96), r(ae,05,3e,dd), r(46,bd,e6,4d),\
  3726. + r(b5,8d,54,91), r(05,5d,c4,71), r(6f,d4,06,04), r(ff,15,50,60),\
  3727. + r(24,fb,98,19), r(97,e9,bd,d6), r(cc,43,40,89), r(77,9e,d9,67),\
  3728. + r(bd,42,e8,b0), r(88,8b,89,07), r(38,5b,19,e7), r(db,ee,c8,79),\
  3729. + r(47,0a,7c,a1), r(e9,0f,42,7c), r(c9,1e,84,f8), r(00,00,00,00),\
  3730. + r(83,86,80,09), r(48,ed,2b,32), r(ac,70,11,1e), r(4e,72,5a,6c),\
  3731. + r(fb,ff,0e,fd), r(56,38,85,0f), r(1e,d5,ae,3d), r(27,39,2d,36),\
  3732. + r(64,d9,0f,0a), r(21,a6,5c,68), r(d1,54,5b,9b), r(3a,2e,36,24),\
  3733. + r(b1,67,0a,0c), r(0f,e7,57,93), r(d2,96,ee,b4), r(9e,91,9b,1b),\
  3734. + r(4f,c5,c0,80), r(a2,20,dc,61), r(69,4b,77,5a), r(16,1a,12,1c),\
  3735. + r(0a,ba,93,e2), r(e5,2a,a0,c0), r(43,e0,22,3c), r(1d,17,1b,12),\
  3736. + r(0b,0d,09,0e), r(ad,c7,8b,f2), r(b9,a8,b6,2d), r(c8,a9,1e,14),\
  3737. + r(85,19,f1,57), r(4c,07,75,af), r(bb,dd,99,ee), r(fd,60,7f,a3),\
  3738. + r(9f,26,01,f7), r(bc,f5,72,5c), r(c5,3b,66,44), r(34,7e,fb,5b),\
  3739. + r(76,29,43,8b), r(dc,c6,23,cb), r(68,fc,ed,b6), r(63,f1,e4,b8),\
  3740. + r(ca,dc,31,d7), r(10,85,63,42), r(40,22,97,13), r(20,11,c6,84),\
  3741. + r(7d,24,4a,85), r(f8,3d,bb,d2), r(11,32,f9,ae), r(6d,a1,29,c7),\
  3742. + r(4b,2f,9e,1d), r(f3,30,b2,dc), r(ec,52,86,0d), r(d0,e3,c1,77),\
  3743. + r(6c,16,b3,2b), r(99,b9,70,a9), r(fa,48,94,11), r(22,64,e9,47),\
  3744. + r(c4,8c,fc,a8), r(1a,3f,f0,a0), r(d8,2c,7d,56), r(ef,90,33,22),\
  3745. + r(c7,4e,49,87), r(c1,d1,38,d9), r(fe,a2,ca,8c), r(36,0b,d4,98),\
  3746. + r(cf,81,f5,a6), r(28,de,7a,a5), r(26,8e,b7,da), r(a4,bf,ad,3f),\
  3747. + r(e4,9d,3a,2c), r(0d,92,78,50), r(9b,cc,5f,6a), r(62,46,7e,54),\
  3748. + r(c2,13,8d,f6), r(e8,b8,d8,90), r(5e,f7,39,2e), r(f5,af,c3,82),\
  3749. + r(be,80,5d,9f), r(7c,93,d0,69), r(a9,2d,d5,6f), r(b3,12,25,cf),\
  3750. + r(3b,99,ac,c8), r(a7,7d,18,10), r(6e,63,9c,e8), r(7b,bb,3b,db),\
  3751. + r(09,78,26,cd), r(f4,18,59,6e), r(01,b7,9a,ec), r(a8,9a,4f,83),\
  3752. + r(65,6e,95,e6), r(7e,e6,ff,aa), r(08,cf,bc,21), r(e6,e8,15,ef),\
  3753. + r(d9,9b,e7,ba), r(ce,36,6f,4a), r(d4,09,9f,ea), r(d6,7c,b0,29),\
  3754. + r(af,b2,a4,31), r(31,23,3f,2a), r(30,94,a5,c6), r(c0,66,a2,35),\
  3755. + r(37,bc,4e,74), r(a6,ca,82,fc), r(b0,d0,90,e0), r(15,d8,a7,33),\
  3756. + r(4a,98,04,f1), r(f7,da,ec,41), r(0e,50,cd,7f), r(2f,f6,91,17),\
  3757. + r(8d,d6,4d,76), r(4d,b0,ef,43), r(54,4d,aa,cc), r(df,04,96,e4),\
  3758. + r(e3,b5,d1,9e), r(1b,88,6a,4c), r(b8,1f,2c,c1), r(7f,51,65,46),\
  3759. + r(04,ea,5e,9d), r(5d,35,8c,01), r(73,74,87,fa), r(2e,41,0b,fb),\
  3760. + r(5a,1d,67,b3), r(52,d2,db,92), r(33,56,10,e9), r(13,47,d6,6d),\
  3761. + r(8c,61,d7,9a), r(7a,0c,a1,37), r(8e,14,f8,59), r(89,3c,13,eb),\
  3762. + r(ee,27,a9,ce), r(35,c9,61,b7), r(ed,e5,1c,e1), r(3c,b1,47,7a),\
  3763. + r(59,df,d2,9c), r(3f,73,f2,55), r(79,ce,14,18), r(bf,37,c7,73),\
  3764. + r(ea,cd,f7,53), r(5b,aa,fd,5f), r(14,6f,3d,df), r(86,db,44,78),\
  3765. + r(81,f3,af,ca), r(3e,c4,68,b9), r(2c,34,24,38), r(5f,40,a3,c2),\
  3766. + r(72,c3,1d,16), r(0c,25,e2,bc), r(8b,49,3c,28), r(41,95,0d,ff),\
  3767. + r(71,01,a8,39), r(de,b3,0c,08), r(9c,e4,b4,d8), r(90,c1,56,64),\
  3768. + r(61,84,cb,7b), r(70,b6,32,d5), r(74,5c,6c,48), r(42,57,b8,d0)
  3769. +
  3770. +// generate the required tables in the desired endian format
  3771. +
  3772. +#undef r
  3773. +#define r r0
  3774. +
  3775. +#if defined(ONE_TABLE)
  3776. +static const u_int32_t ft_tab[256] =
  3777. + { f_table };
  3778. +#elif defined(FOUR_TABLES)
  3779. +static const u_int32_t ft_tab[4][256] =
  3780. +{ { f_table },
  3781. +#undef r
  3782. +#define r r1
  3783. + { f_table },
  3784. +#undef r
  3785. +#define r r2
  3786. + { f_table },
  3787. +#undef r
  3788. +#define r r3
  3789. + { f_table }
  3790. +};
  3791. +#endif
  3792. +
  3793. +#undef r
  3794. +#define r r0
  3795. +#if defined(ONE_TABLE)
  3796. +static const u_int32_t it_tab[256] =
  3797. + { i_table };
  3798. +#elif defined(FOUR_TABLES)
  3799. +static const u_int32_t it_tab[4][256] =
  3800. +{ { i_table },
  3801. +#undef r
  3802. +#define r r1
  3803. + { i_table },
  3804. +#undef r
  3805. +#define r r2
  3806. + { i_table },
  3807. +#undef r
  3808. +#define r r3
  3809. + { i_table }
  3810. +};
  3811. +#endif
  3812. +
  3813. +#endif
  3814. +
  3815. +#if defined(FIXED_TABLES) && (defined(ONE_LR_TABLE) || defined(FOUR_LR_TABLES))
  3816. +
  3817. +// data for inverse tables (last round)
  3818. +
  3819. +#define li_table \
  3820. + w(52), w(09), w(6a), w(d5), w(30), w(36), w(a5), w(38),\
  3821. + w(bf), w(40), w(a3), w(9e), w(81), w(f3), w(d7), w(fb),\
  3822. + w(7c), w(e3), w(39), w(82), w(9b), w(2f), w(ff), w(87),\
  3823. + w(34), w(8e), w(43), w(44), w(c4), w(de), w(e9), w(cb),\
  3824. + w(54), w(7b), w(94), w(32), w(a6), w(c2), w(23), w(3d),\
  3825. + w(ee), w(4c), w(95), w(0b), w(42), w(fa), w(c3), w(4e),\
  3826. + w(08), w(2e), w(a1), w(66), w(28), w(d9), w(24), w(b2),\
  3827. + w(76), w(5b), w(a2), w(49), w(6d), w(8b), w(d1), w(25),\
  3828. + w(72), w(f8), w(f6), w(64), w(86), w(68), w(98), w(16),\
  3829. + w(d4), w(a4), w(5c), w(cc), w(5d), w(65), w(b6), w(92),\
  3830. + w(6c), w(70), w(48), w(50), w(fd), w(ed), w(b9), w(da),\
  3831. + w(5e), w(15), w(46), w(57), w(a7), w(8d), w(9d), w(84),\
  3832. + w(90), w(d8), w(ab), w(00), w(8c), w(bc), w(d3), w(0a),\
  3833. + w(f7), w(e4), w(58), w(05), w(b8), w(b3), w(45), w(06),\
  3834. + w(d0), w(2c), w(1e), w(8f), w(ca), w(3f), w(0f), w(02),\
  3835. + w(c1), w(af), w(bd), w(03), w(01), w(13), w(8a), w(6b),\
  3836. + w(3a), w(91), w(11), w(41), w(4f), w(67), w(dc), w(ea),\
  3837. + w(97), w(f2), w(cf), w(ce), w(f0), w(b4), w(e6), w(73),\
  3838. + w(96), w(ac), w(74), w(22), w(e7), w(ad), w(35), w(85),\
  3839. + w(e2), w(f9), w(37), w(e8), w(1c), w(75), w(df), w(6e),\
  3840. + w(47), w(f1), w(1a), w(71), w(1d), w(29), w(c5), w(89),\
  3841. + w(6f), w(b7), w(62), w(0e), w(aa), w(18), w(be), w(1b),\
  3842. + w(fc), w(56), w(3e), w(4b), w(c6), w(d2), w(79), w(20),\
  3843. + w(9a), w(db), w(c0), w(fe), w(78), w(cd), w(5a), w(f4),\
  3844. + w(1f), w(dd), w(a8), w(33), w(88), w(07), w(c7), w(31),\
  3845. + w(b1), w(12), w(10), w(59), w(27), w(80), w(ec), w(5f),\
  3846. + w(60), w(51), w(7f), w(a9), w(19), w(b5), w(4a), w(0d),\
  3847. + w(2d), w(e5), w(7a), w(9f), w(93), w(c9), w(9c), w(ef),\
  3848. + w(a0), w(e0), w(3b), w(4d), w(ae), w(2a), w(f5), w(b0),\
  3849. + w(c8), w(eb), w(bb), w(3c), w(83), w(53), w(99), w(61),\
  3850. + w(17), w(2b), w(04), w(7e), w(ba), w(77), w(d6), w(26),\
  3851. + w(e1), w(69), w(14), w(63), w(55), w(21), w(0c), w(7d),
  3852. +
  3853. +// generate the required tables in the desired endian format
  3854. +
  3855. +#undef r
  3856. +#define r(p,q,r,s) w0(q)
  3857. +#if defined(ONE_LR_TABLE)
  3858. +static const u_int32_t fl_tab[256] =
  3859. + { f_table };
  3860. +#elif defined(FOUR_LR_TABLES)
  3861. +static const u_int32_t fl_tab[4][256] =
  3862. +{ { f_table },
  3863. +#undef r
  3864. +#define r(p,q,r,s) w1(q)
  3865. + { f_table },
  3866. +#undef r
  3867. +#define r(p,q,r,s) w2(q)
  3868. + { f_table },
  3869. +#undef r
  3870. +#define r(p,q,r,s) w3(q)
  3871. + { f_table }
  3872. +};
  3873. +#endif
  3874. +
  3875. +#undef w
  3876. +#define w w0
  3877. +#if defined(ONE_LR_TABLE)
  3878. +static const u_int32_t il_tab[256] =
  3879. + { li_table };
  3880. +#elif defined(FOUR_LR_TABLES)
  3881. +static const u_int32_t il_tab[4][256] =
  3882. +{ { li_table },
  3883. +#undef w
  3884. +#define w w1
  3885. + { li_table },
  3886. +#undef w
  3887. +#define w w2
  3888. + { li_table },
  3889. +#undef w
  3890. +#define w w3
  3891. + { li_table }
  3892. +};
  3893. +#endif
  3894. +
  3895. +#endif
  3896. +
  3897. +#if defined(FIXED_TABLES) && (defined(ONE_IM_TABLE) || defined(FOUR_IM_TABLES))
  3898. +
  3899. +#define m_table \
  3900. + r(00,00,00,00), r(0b,0d,09,0e), r(16,1a,12,1c), r(1d,17,1b,12),\
  3901. + r(2c,34,24,38), r(27,39,2d,36), r(3a,2e,36,24), r(31,23,3f,2a),\
  3902. + r(58,68,48,70), r(53,65,41,7e), r(4e,72,5a,6c), r(45,7f,53,62),\
  3903. + r(74,5c,6c,48), r(7f,51,65,46), r(62,46,7e,54), r(69,4b,77,5a),\
  3904. + r(b0,d0,90,e0), r(bb,dd,99,ee), r(a6,ca,82,fc), r(ad,c7,8b,f2),\
  3905. + r(9c,e4,b4,d8), r(97,e9,bd,d6), r(8a,fe,a6,c4), r(81,f3,af,ca),\
  3906. + r(e8,b8,d8,90), r(e3,b5,d1,9e), r(fe,a2,ca,8c), r(f5,af,c3,82),\
  3907. + r(c4,8c,fc,a8), r(cf,81,f5,a6), r(d2,96,ee,b4), r(d9,9b,e7,ba),\
  3908. + r(7b,bb,3b,db), r(70,b6,32,d5), r(6d,a1,29,c7), r(66,ac,20,c9),\
  3909. + r(57,8f,1f,e3), r(5c,82,16,ed), r(41,95,0d,ff), r(4a,98,04,f1),\
  3910. + r(23,d3,73,ab), r(28,de,7a,a5), r(35,c9,61,b7), r(3e,c4,68,b9),\
  3911. + r(0f,e7,57,93), r(04,ea,5e,9d), r(19,fd,45,8f), r(12,f0,4c,81),\
  3912. + r(cb,6b,ab,3b), r(c0,66,a2,35), r(dd,71,b9,27), r(d6,7c,b0,29),\
  3913. + r(e7,5f,8f,03), r(ec,52,86,0d), r(f1,45,9d,1f), r(fa,48,94,11),\
  3914. + r(93,03,e3,4b), r(98,0e,ea,45), r(85,19,f1,57), r(8e,14,f8,59),\
  3915. + r(bf,37,c7,73), r(b4,3a,ce,7d), r(a9,2d,d5,6f), r(a2,20,dc,61),\
  3916. + r(f6,6d,76,ad), r(fd,60,7f,a3), r(e0,77,64,b1), r(eb,7a,6d,bf),\
  3917. + r(da,59,52,95), r(d1,54,5b,9b), r(cc,43,40,89), r(c7,4e,49,87),\
  3918. + r(ae,05,3e,dd), r(a5,08,37,d3), r(b8,1f,2c,c1), r(b3,12,25,cf),\
  3919. + r(82,31,1a,e5), r(89,3c,13,eb), r(94,2b,08,f9), r(9f,26,01,f7),\
  3920. + r(46,bd,e6,4d), r(4d,b0,ef,43), r(50,a7,f4,51), r(5b,aa,fd,5f),\
  3921. + r(6a,89,c2,75), r(61,84,cb,7b), r(7c,93,d0,69), r(77,9e,d9,67),\
  3922. + r(1e,d5,ae,3d), r(15,d8,a7,33), r(08,cf,bc,21), r(03,c2,b5,2f),\
  3923. + r(32,e1,8a,05), r(39,ec,83,0b), r(24,fb,98,19), r(2f,f6,91,17),\
  3924. + r(8d,d6,4d,76), r(86,db,44,78), r(9b,cc,5f,6a), r(90,c1,56,64),\
  3925. + r(a1,e2,69,4e), r(aa,ef,60,40), r(b7,f8,7b,52), r(bc,f5,72,5c),\
  3926. + r(d5,be,05,06), r(de,b3,0c,08), r(c3,a4,17,1a), r(c8,a9,1e,14),\
  3927. + r(f9,8a,21,3e), r(f2,87,28,30), r(ef,90,33,22), r(e4,9d,3a,2c),\
  3928. + r(3d,06,dd,96), r(36,0b,d4,98), r(2b,1c,cf,8a), r(20,11,c6,84),\
  3929. + r(11,32,f9,ae), r(1a,3f,f0,a0), r(07,28,eb,b2), r(0c,25,e2,bc),\
  3930. + r(65,6e,95,e6), r(6e,63,9c,e8), r(73,74,87,fa), r(78,79,8e,f4),\
  3931. + r(49,5a,b1,de), r(42,57,b8,d0), r(5f,40,a3,c2), r(54,4d,aa,cc),\
  3932. + r(f7,da,ec,41), r(fc,d7,e5,4f), r(e1,c0,fe,5d), r(ea,cd,f7,53),\
  3933. + r(db,ee,c8,79), r(d0,e3,c1,77), r(cd,f4,da,65), r(c6,f9,d3,6b),\
  3934. + r(af,b2,a4,31), r(a4,bf,ad,3f), r(b9,a8,b6,2d), r(b2,a5,bf,23),\
  3935. + r(83,86,80,09), r(88,8b,89,07), r(95,9c,92,15), r(9e,91,9b,1b),\
  3936. + r(47,0a,7c,a1), r(4c,07,75,af), r(51,10,6e,bd), r(5a,1d,67,b3),\
  3937. + r(6b,3e,58,99), r(60,33,51,97), r(7d,24,4a,85), r(76,29,43,8b),\
  3938. + r(1f,62,34,d1), r(14,6f,3d,df), r(09,78,26,cd), r(02,75,2f,c3),\
  3939. + r(33,56,10,e9), r(38,5b,19,e7), r(25,4c,02,f5), r(2e,41,0b,fb),\
  3940. + r(8c,61,d7,9a), r(87,6c,de,94), r(9a,7b,c5,86), r(91,76,cc,88),\
  3941. + r(a0,55,f3,a2), r(ab,58,fa,ac), r(b6,4f,e1,be), r(bd,42,e8,b0),\
  3942. + r(d4,09,9f,ea), r(df,04,96,e4), r(c2,13,8d,f6), r(c9,1e,84,f8),\
  3943. + r(f8,3d,bb,d2), r(f3,30,b2,dc), r(ee,27,a9,ce), r(e5,2a,a0,c0),\
  3944. + r(3c,b1,47,7a), r(37,bc,4e,74), r(2a,ab,55,66), r(21,a6,5c,68),\
  3945. + r(10,85,63,42), r(1b,88,6a,4c), r(06,9f,71,5e), r(0d,92,78,50),\
  3946. + r(64,d9,0f,0a), r(6f,d4,06,04), r(72,c3,1d,16), r(79,ce,14,18),\
  3947. + r(48,ed,2b,32), r(43,e0,22,3c), r(5e,f7,39,2e), r(55,fa,30,20),\
  3948. + r(01,b7,9a,ec), r(0a,ba,93,e2), r(17,ad,88,f0), r(1c,a0,81,fe),\
  3949. + r(2d,83,be,d4), r(26,8e,b7,da), r(3b,99,ac,c8), r(30,94,a5,c6),\
  3950. + r(59,df,d2,9c), r(52,d2,db,92), r(4f,c5,c0,80), r(44,c8,c9,8e),\
  3951. + r(75,eb,f6,a4), r(7e,e6,ff,aa), r(63,f1,e4,b8), r(68,fc,ed,b6),\
  3952. + r(b1,67,0a,0c), r(ba,6a,03,02), r(a7,7d,18,10), r(ac,70,11,1e),\
  3953. + r(9d,53,2e,34), r(96,5e,27,3a), r(8b,49,3c,28), r(80,44,35,26),\
  3954. + r(e9,0f,42,7c), r(e2,02,4b,72), r(ff,15,50,60), r(f4,18,59,6e),\
  3955. + r(c5,3b,66,44), r(ce,36,6f,4a), r(d3,21,74,58), r(d8,2c,7d,56),\
  3956. + r(7a,0c,a1,37), r(71,01,a8,39), r(6c,16,b3,2b), r(67,1b,ba,25),\
  3957. + r(56,38,85,0f), r(5d,35,8c,01), r(40,22,97,13), r(4b,2f,9e,1d),\
  3958. + r(22,64,e9,47), r(29,69,e0,49), r(34,7e,fb,5b), r(3f,73,f2,55),\
  3959. + r(0e,50,cd,7f), r(05,5d,c4,71), r(18,4a,df,63), r(13,47,d6,6d),\
  3960. + r(ca,dc,31,d7), r(c1,d1,38,d9), r(dc,c6,23,cb), r(d7,cb,2a,c5),\
  3961. + r(e6,e8,15,ef), r(ed,e5,1c,e1), r(f0,f2,07,f3), r(fb,ff,0e,fd),\
  3962. + r(92,b4,79,a7), r(99,b9,70,a9), r(84,ae,6b,bb), r(8f,a3,62,b5),\
  3963. + r(be,80,5d,9f), r(b5,8d,54,91), r(a8,9a,4f,83), r(a3,97,46,8d)
  3964. +
  3965. +#undef r
  3966. +#define r r0
  3967. +
  3968. +#if defined(ONE_IM_TABLE)
  3969. +static const u_int32_t im_tab[256] =
  3970. + { m_table };
  3971. +#elif defined(FOUR_IM_TABLES)
  3972. +static const u_int32_t im_tab[4][256] =
  3973. +{ { m_table },
  3974. +#undef r
  3975. +#define r r1
  3976. + { m_table },
  3977. +#undef r
  3978. +#define r r2
  3979. + { m_table },
  3980. +#undef r
  3981. +#define r r3
  3982. + { m_table }
  3983. +};
  3984. +#endif
  3985. +
  3986. +#endif
  3987. +
  3988. +#else
  3989. +
  3990. +static int tab_gen = 0;
  3991. +
  3992. +static unsigned char s_box[256]; // the S box
  3993. +static unsigned char inv_s_box[256]; // the inverse S box
  3994. +static u_int32_t rcon_tab[AES_RC_LENGTH]; // table of round constants
  3995. +
  3996. +#if defined(ONE_TABLE)
  3997. +static u_int32_t ft_tab[256];
  3998. +static u_int32_t it_tab[256];
  3999. +#elif defined(FOUR_TABLES)
  4000. +static u_int32_t ft_tab[4][256];
  4001. +static u_int32_t it_tab[4][256];
  4002. +#endif
  4003. +
  4004. +#if defined(ONE_LR_TABLE)
  4005. +static u_int32_t fl_tab[256];
  4006. +static u_int32_t il_tab[256];
  4007. +#elif defined(FOUR_LR_TABLES)
  4008. +static u_int32_t fl_tab[4][256];
  4009. +static u_int32_t il_tab[4][256];
  4010. +#endif
  4011. +
  4012. +#if defined(ONE_IM_TABLE)
  4013. +static u_int32_t im_tab[256];
  4014. +#elif defined(FOUR_IM_TABLES)
  4015. +static u_int32_t im_tab[4][256];
  4016. +#endif
  4017. +
  4018. +// Generate the tables for the dynamic table option
  4019. +
  4020. +#if !defined(FF_TABLES)
  4021. +
  4022. +// It will generally be sensible to use tables to compute finite
  4023. +// field multiplies and inverses but where memory is scarse this
  4024. +// code might sometimes be better.
  4025. +
  4026. +// return 2 ^ (n - 1) where n is the bit number of the highest bit
  4027. +// set in x with x in the range 1 < x < 0x00000200. This form is
  4028. +// used so that locals within FFinv can be bytes rather than words
  4029. +
  4030. +static unsigned char hibit(const u_int32_t x)
  4031. +{ unsigned char r = (unsigned char)((x >> 1) | (x >> 2));
  4032. +
  4033. + r |= (r >> 2);
  4034. + r |= (r >> 4);
  4035. + return (r + 1) >> 1;
  4036. +}
  4037. +
  4038. +// return the inverse of the finite field element x
  4039. +
  4040. +static unsigned char FFinv(const unsigned char x)
  4041. +{ unsigned char p1 = x, p2 = 0x1b, n1 = hibit(x), n2 = 0x80, v1 = 1, v2 = 0;
  4042. +
  4043. + if(x < 2) return x;
  4044. +
  4045. + for(;;)
  4046. + {
  4047. + if(!n1) return v1;
  4048. +
  4049. + while(n2 >= n1)
  4050. + {
  4051. + n2 /= n1; p2 ^= p1 * n2; v2 ^= v1 * n2; n2 = hibit(p2);
  4052. + }
  4053. +
  4054. + if(!n2) return v2;
  4055. +
  4056. + while(n1 >= n2)
  4057. + {
  4058. + n1 /= n2; p1 ^= p2 * n1; v1 ^= v2 * n1; n1 = hibit(p1);
  4059. + }
  4060. + }
  4061. +}
  4062. +
  4063. +// define the finite field multiplies required for Rijndael
  4064. +
  4065. +#define FFmul02(x) ((((x) & 0x7f) << 1) ^ ((x) & 0x80 ? 0x1b : 0))
  4066. +#define FFmul03(x) ((x) ^ FFmul02(x))
  4067. +#define FFmul09(x) ((x) ^ FFmul02(FFmul02(FFmul02(x))))
  4068. +#define FFmul0b(x) ((x) ^ FFmul02((x) ^ FFmul02(FFmul02(x))))
  4069. +#define FFmul0d(x) ((x) ^ FFmul02(FFmul02((x) ^ FFmul02(x))))
  4070. +#define FFmul0e(x) FFmul02((x) ^ FFmul02((x) ^ FFmul02(x)))
  4071. +
  4072. +#else
  4073. +
  4074. +#define FFinv(x) ((x) ? pow[255 - log[x]]: 0)
  4075. +
  4076. +#define FFmul02(x) (x ? pow[log[x] + 0x19] : 0)
  4077. +#define FFmul03(x) (x ? pow[log[x] + 0x01] : 0)
  4078. +#define FFmul09(x) (x ? pow[log[x] + 0xc7] : 0)
  4079. +#define FFmul0b(x) (x ? pow[log[x] + 0x68] : 0)
  4080. +#define FFmul0d(x) (x ? pow[log[x] + 0xee] : 0)
  4081. +#define FFmul0e(x) (x ? pow[log[x] + 0xdf] : 0)
  4082. +
  4083. +#endif
  4084. +
  4085. +// The forward and inverse affine transformations used in the S-box
  4086. +
  4087. +#define fwd_affine(x) \
  4088. + (w = (u_int32_t)x, w ^= (w<<1)^(w<<2)^(w<<3)^(w<<4), 0x63^(unsigned char)(w^(w>>8)))
  4089. +
  4090. +#define inv_affine(x) \
  4091. + (w = (u_int32_t)x, w = (w<<1)^(w<<3)^(w<<6), 0x05^(unsigned char)(w^(w>>8)))
  4092. +
  4093. +static void gen_tabs(void)
  4094. +{ u_int32_t i, w;
  4095. +
  4096. +#if defined(FF_TABLES)
  4097. +
  4098. + unsigned char pow[512], log[256];
  4099. +
  4100. + // log and power tables for GF(2^8) finite field with
  4101. + // 0x011b as modular polynomial - the simplest primitive
  4102. + // root is 0x03, used here to generate the tables
  4103. +
  4104. + i = 0; w = 1;
  4105. + do
  4106. + {
  4107. + pow[i] = (unsigned char)w;
  4108. + pow[i + 255] = (unsigned char)w;
  4109. + log[w] = (unsigned char)i++;
  4110. + w ^= (w << 1) ^ (w & ff_hi ? ff_poly : 0);
  4111. + }
  4112. + while (w != 1);
  4113. +
  4114. +#endif
  4115. +
  4116. + for(i = 0, w = 1; i < AES_RC_LENGTH; ++i)
  4117. + {
  4118. + rcon_tab[i] = bytes2word(w, 0, 0, 0);
  4119. + w = (w << 1) ^ (w & ff_hi ? ff_poly : 0);
  4120. + }
  4121. +
  4122. + for(i = 0; i < 256; ++i)
  4123. + { unsigned char b;
  4124. +
  4125. + s_box[i] = b = fwd_affine(FFinv((unsigned char)i));
  4126. +
  4127. + w = bytes2word(b, 0, 0, 0);
  4128. +#if defined(ONE_LR_TABLE)
  4129. + fl_tab[i] = w;
  4130. +#elif defined(FOUR_LR_TABLES)
  4131. + fl_tab[0][i] = w;
  4132. + fl_tab[1][i] = upr(w,1);
  4133. + fl_tab[2][i] = upr(w,2);
  4134. + fl_tab[3][i] = upr(w,3);
  4135. +#endif
  4136. + w = bytes2word(FFmul02(b), b, b, FFmul03(b));
  4137. +#if defined(ONE_TABLE)
  4138. + ft_tab[i] = w;
  4139. +#elif defined(FOUR_TABLES)
  4140. + ft_tab[0][i] = w;
  4141. + ft_tab[1][i] = upr(w,1);
  4142. + ft_tab[2][i] = upr(w,2);
  4143. + ft_tab[3][i] = upr(w,3);
  4144. +#endif
  4145. + inv_s_box[i] = b = FFinv(inv_affine((unsigned char)i));
  4146. +
  4147. + w = bytes2word(b, 0, 0, 0);
  4148. +#if defined(ONE_LR_TABLE)
  4149. + il_tab[i] = w;
  4150. +#elif defined(FOUR_LR_TABLES)
  4151. + il_tab[0][i] = w;
  4152. + il_tab[1][i] = upr(w,1);
  4153. + il_tab[2][i] = upr(w,2);
  4154. + il_tab[3][i] = upr(w,3);
  4155. +#endif
  4156. + w = bytes2word(FFmul0e(b), FFmul09(b), FFmul0d(b), FFmul0b(b));
  4157. +#if defined(ONE_TABLE)
  4158. + it_tab[i] = w;
  4159. +#elif defined(FOUR_TABLES)
  4160. + it_tab[0][i] = w;
  4161. + it_tab[1][i] = upr(w,1);
  4162. + it_tab[2][i] = upr(w,2);
  4163. + it_tab[3][i] = upr(w,3);
  4164. +#endif
  4165. +#if defined(ONE_IM_TABLE)
  4166. + im_tab[b] = w;
  4167. +#elif defined(FOUR_IM_TABLES)
  4168. + im_tab[0][b] = w;
  4169. + im_tab[1][b] = upr(w,1);
  4170. + im_tab[2][b] = upr(w,2);
  4171. + im_tab[3][b] = upr(w,3);
  4172. +#endif
  4173. +
  4174. + }
  4175. +}
  4176. +
  4177. +#endif
  4178. +
  4179. +#define no_table(x,box,vf,rf,c) bytes2word( \
  4180. + box[bval(vf(x,0,c),rf(0,c))], \
  4181. + box[bval(vf(x,1,c),rf(1,c))], \
  4182. + box[bval(vf(x,2,c),rf(2,c))], \
  4183. + box[bval(vf(x,3,c),rf(3,c))])
  4184. +
  4185. +#define one_table(x,op,tab,vf,rf,c) \
  4186. + ( tab[bval(vf(x,0,c),rf(0,c))] \
  4187. + ^ op(tab[bval(vf(x,1,c),rf(1,c))],1) \
  4188. + ^ op(tab[bval(vf(x,2,c),rf(2,c))],2) \
  4189. + ^ op(tab[bval(vf(x,3,c),rf(3,c))],3))
  4190. +
  4191. +#define four_tables(x,tab,vf,rf,c) \
  4192. + ( tab[0][bval(vf(x,0,c),rf(0,c))] \
  4193. + ^ tab[1][bval(vf(x,1,c),rf(1,c))] \
  4194. + ^ tab[2][bval(vf(x,2,c),rf(2,c))] \
  4195. + ^ tab[3][bval(vf(x,3,c),rf(3,c))])
  4196. +
  4197. +#define vf1(x,r,c) (x)
  4198. +#define rf1(r,c) (r)
  4199. +#define rf2(r,c) ((r-c)&3)
  4200. +
  4201. +#if defined(FOUR_LR_TABLES)
  4202. +#define ls_box(x,c) four_tables(x,fl_tab,vf1,rf2,c)
  4203. +#elif defined(ONE_LR_TABLE)
  4204. +#define ls_box(x,c) one_table(x,upr,fl_tab,vf1,rf2,c)
  4205. +#else
  4206. +#define ls_box(x,c) no_table(x,s_box,vf1,rf2,c)
  4207. +#endif
  4208. +
  4209. +#if defined(FOUR_IM_TABLES)
  4210. +#define inv_mcol(x) four_tables(x,im_tab,vf1,rf1,0)
  4211. +#elif defined(ONE_IM_TABLE)
  4212. +#define inv_mcol(x) one_table(x,upr,im_tab,vf1,rf1,0)
  4213. +#else
  4214. +#define inv_mcol(x) \
  4215. + (f9 = (x),f2 = FFmulX(f9), f4 = FFmulX(f2), f8 = FFmulX(f4), f9 ^= f8, \
  4216. + f2 ^= f4 ^ f8 ^ upr(f2 ^ f9,3) ^ upr(f4 ^ f9,2) ^ upr(f9,1))
  4217. +#endif
  4218. +
  4219. +// Subroutine to set the block size (if variable) in bytes, legal
  4220. +// values being 16, 24 and 32.
  4221. +
  4222. +#if defined(AES_BLOCK_SIZE)
  4223. +#define nc (AES_BLOCK_SIZE / 4)
  4224. +#else
  4225. +#define nc (cx->aes_Ncol)
  4226. +
  4227. +void aes_set_blk(aes_context *cx, int n_bytes)
  4228. +{
  4229. +#if !defined(FIXED_TABLES)
  4230. + if(!tab_gen) { gen_tabs(); tab_gen = 1; }
  4231. +#endif
  4232. +
  4233. + switch(n_bytes) {
  4234. + case 32: /* bytes */
  4235. + case 256: /* bits */
  4236. + nc = 8;
  4237. + break;
  4238. + case 24: /* bytes */
  4239. + case 192: /* bits */
  4240. + nc = 6;
  4241. + break;
  4242. + case 16: /* bytes */
  4243. + case 128: /* bits */
  4244. + default:
  4245. + nc = 4;
  4246. + break;
  4247. + }
  4248. +}
  4249. +
  4250. +#endif
  4251. +
  4252. +// Initialise the key schedule from the user supplied key. The key
  4253. +// length is now specified in bytes - 16, 24 or 32 as appropriate.
  4254. +// This corresponds to bit lengths of 128, 192 and 256 bits, and
  4255. +// to Nk values of 4, 6 and 8 respectively.
  4256. +
  4257. +#define mx(t,f) (*t++ = inv_mcol(*f),f++)
  4258. +#define cp(t,f) *t++ = *f++
  4259. +
  4260. +#if AES_BLOCK_SIZE == 16
  4261. +#define cpy(d,s) cp(d,s); cp(d,s); cp(d,s); cp(d,s)
  4262. +#define mix(d,s) mx(d,s); mx(d,s); mx(d,s); mx(d,s)
  4263. +#elif AES_BLOCK_SIZE == 24
  4264. +#define cpy(d,s) cp(d,s); cp(d,s); cp(d,s); cp(d,s); \
  4265. + cp(d,s); cp(d,s)
  4266. +#define mix(d,s) mx(d,s); mx(d,s); mx(d,s); mx(d,s); \
  4267. + mx(d,s); mx(d,s)
  4268. +#elif AES_BLOCK_SIZE == 32
  4269. +#define cpy(d,s) cp(d,s); cp(d,s); cp(d,s); cp(d,s); \
  4270. + cp(d,s); cp(d,s); cp(d,s); cp(d,s)
  4271. +#define mix(d,s) mx(d,s); mx(d,s); mx(d,s); mx(d,s); \
  4272. + mx(d,s); mx(d,s); mx(d,s); mx(d,s)
  4273. +#else
  4274. +
  4275. +#define cpy(d,s) \
  4276. +switch(nc) \
  4277. +{ case 8: cp(d,s); cp(d,s); \
  4278. + case 6: cp(d,s); cp(d,s); \
  4279. + case 4: cp(d,s); cp(d,s); \
  4280. + cp(d,s); cp(d,s); \
  4281. +}
  4282. +
  4283. +#define mix(d,s) \
  4284. +switch(nc) \
  4285. +{ case 8: mx(d,s); mx(d,s); \
  4286. + case 6: mx(d,s); mx(d,s); \
  4287. + case 4: mx(d,s); mx(d,s); \
  4288. + mx(d,s); mx(d,s); \
  4289. +}
  4290. +
  4291. +#endif
  4292. +
  4293. +void aes_set_key(aes_context *cx, const unsigned char in_key[], int n_bytes, const int f)
  4294. +{ u_int32_t *kf, *kt, rci;
  4295. +
  4296. +#if !defined(FIXED_TABLES)
  4297. + if(!tab_gen) { gen_tabs(); tab_gen = 1; }
  4298. +#endif
  4299. +
  4300. + switch(n_bytes) {
  4301. + case 32: /* bytes */
  4302. + case 256: /* bits */
  4303. + cx->aes_Nkey = 8;
  4304. + break;
  4305. + case 24: /* bytes */
  4306. + case 192: /* bits */
  4307. + cx->aes_Nkey = 6;
  4308. + break;
  4309. + case 16: /* bytes */
  4310. + case 128: /* bits */
  4311. + default:
  4312. + cx->aes_Nkey = 4;
  4313. + break;
  4314. + }
  4315. +
  4316. + cx->aes_Nrnd = (cx->aes_Nkey > nc ? cx->aes_Nkey : nc) + 6;
  4317. +
  4318. + cx->aes_e_key[0] = word_in(in_key );
  4319. + cx->aes_e_key[1] = word_in(in_key + 4);
  4320. + cx->aes_e_key[2] = word_in(in_key + 8);
  4321. + cx->aes_e_key[3] = word_in(in_key + 12);
  4322. +
  4323. + kf = cx->aes_e_key;
  4324. + kt = kf + nc * (cx->aes_Nrnd + 1) - cx->aes_Nkey;
  4325. + rci = 0;
  4326. +
  4327. + switch(cx->aes_Nkey)
  4328. + {
  4329. + case 4: do
  4330. + { kf[4] = kf[0] ^ ls_box(kf[3],3) ^ rcon_tab[rci++];
  4331. + kf[5] = kf[1] ^ kf[4];
  4332. + kf[6] = kf[2] ^ kf[5];
  4333. + kf[7] = kf[3] ^ kf[6];
  4334. + kf += 4;
  4335. + }
  4336. + while(kf < kt);
  4337. + break;
  4338. +
  4339. + case 6: cx->aes_e_key[4] = word_in(in_key + 16);
  4340. + cx->aes_e_key[5] = word_in(in_key + 20);
  4341. + do
  4342. + { kf[ 6] = kf[0] ^ ls_box(kf[5],3) ^ rcon_tab[rci++];
  4343. + kf[ 7] = kf[1] ^ kf[ 6];
  4344. + kf[ 8] = kf[2] ^ kf[ 7];
  4345. + kf[ 9] = kf[3] ^ kf[ 8];
  4346. + kf[10] = kf[4] ^ kf[ 9];
  4347. + kf[11] = kf[5] ^ kf[10];
  4348. + kf += 6;
  4349. + }
  4350. + while(kf < kt);
  4351. + break;
  4352. +
  4353. + case 8: cx->aes_e_key[4] = word_in(in_key + 16);
  4354. + cx->aes_e_key[5] = word_in(in_key + 20);
  4355. + cx->aes_e_key[6] = word_in(in_key + 24);
  4356. + cx->aes_e_key[7] = word_in(in_key + 28);
  4357. + do
  4358. + { kf[ 8] = kf[0] ^ ls_box(kf[7],3) ^ rcon_tab[rci++];
  4359. + kf[ 9] = kf[1] ^ kf[ 8];
  4360. + kf[10] = kf[2] ^ kf[ 9];
  4361. + kf[11] = kf[3] ^ kf[10];
  4362. + kf[12] = kf[4] ^ ls_box(kf[11],0);
  4363. + kf[13] = kf[5] ^ kf[12];
  4364. + kf[14] = kf[6] ^ kf[13];
  4365. + kf[15] = kf[7] ^ kf[14];
  4366. + kf += 8;
  4367. + }
  4368. + while (kf < kt);
  4369. + break;
  4370. + }
  4371. +
  4372. + if(!f)
  4373. + { u_int32_t i;
  4374. +
  4375. + kt = cx->aes_d_key + nc * cx->aes_Nrnd;
  4376. + kf = cx->aes_e_key;
  4377. +
  4378. + cpy(kt, kf); kt -= 2 * nc;
  4379. +
  4380. + for(i = 1; i < cx->aes_Nrnd; ++i)
  4381. + {
  4382. +#if defined(ONE_TABLE) || defined(FOUR_TABLES)
  4383. +#if !defined(ONE_IM_TABLE) && !defined(FOUR_IM_TABLES)
  4384. + u_int32_t f2, f4, f8, f9;
  4385. +#endif
  4386. + mix(kt, kf);
  4387. +#else
  4388. + cpy(kt, kf);
  4389. +#endif
  4390. + kt -= 2 * nc;
  4391. + }
  4392. +
  4393. + cpy(kt, kf);
  4394. + }
  4395. +}
  4396. +
  4397. +// y = output word, x = input word, r = row, c = column
  4398. +// for r = 0, 1, 2 and 3 = column accessed for row r
  4399. +
  4400. +#if defined(ARRAYS)
  4401. +#define s(x,c) x[c]
  4402. +#else
  4403. +#define s(x,c) x##c
  4404. +#endif
  4405. +
  4406. +// I am grateful to Frank Yellin for the following constructions
  4407. +// which, given the column (c) of the output state variable that
  4408. +// is being computed, return the input state variables which are
  4409. +// needed for each row (r) of the state
  4410. +
  4411. +// For the fixed block size options, compilers reduce these two
  4412. +// expressions to fixed variable references. For variable block
  4413. +// size code conditional clauses will sometimes be returned
  4414. +
  4415. +#define unused 77 // Sunset Strip
  4416. +
  4417. +#define fwd_var(x,r,c) \
  4418. + ( r==0 ? \
  4419. + ( c==0 ? s(x,0) \
  4420. + : c==1 ? s(x,1) \
  4421. + : c==2 ? s(x,2) \
  4422. + : c==3 ? s(x,3) \
  4423. + : c==4 ? s(x,4) \
  4424. + : c==5 ? s(x,5) \
  4425. + : c==6 ? s(x,6) \
  4426. + : s(x,7)) \
  4427. + : r==1 ? \
  4428. + ( c==0 ? s(x,1) \
  4429. + : c==1 ? s(x,2) \
  4430. + : c==2 ? s(x,3) \
  4431. + : c==3 ? nc==4 ? s(x,0) : s(x,4) \
  4432. + : c==4 ? s(x,5) \
  4433. + : c==5 ? nc==8 ? s(x,6) : s(x,0) \
  4434. + : c==6 ? s(x,7) \
  4435. + : s(x,0)) \
  4436. + : r==2 ? \
  4437. + ( c==0 ? nc==8 ? s(x,3) : s(x,2) \
  4438. + : c==1 ? nc==8 ? s(x,4) : s(x,3) \
  4439. + : c==2 ? nc==4 ? s(x,0) : nc==8 ? s(x,5) : s(x,4) \
  4440. + : c==3 ? nc==4 ? s(x,1) : nc==8 ? s(x,6) : s(x,5) \
  4441. + : c==4 ? nc==8 ? s(x,7) : s(x,0) \
  4442. + : c==5 ? nc==8 ? s(x,0) : s(x,1) \
  4443. + : c==6 ? s(x,1) \
  4444. + : s(x,2)) \
  4445. + : \
  4446. + ( c==0 ? nc==8 ? s(x,4) : s(x,3) \
  4447. + : c==1 ? nc==4 ? s(x,0) : nc==8 ? s(x,5) : s(x,4) \
  4448. + : c==2 ? nc==4 ? s(x,1) : nc==8 ? s(x,6) : s(x,5) \
  4449. + : c==3 ? nc==4 ? s(x,2) : nc==8 ? s(x,7) : s(x,0) \
  4450. + : c==4 ? nc==8 ? s(x,0) : s(x,1) \
  4451. + : c==5 ? nc==8 ? s(x,1) : s(x,2) \
  4452. + : c==6 ? s(x,2) \
  4453. + : s(x,3)))
  4454. +
  4455. +#define inv_var(x,r,c) \
  4456. + ( r==0 ? \
  4457. + ( c==0 ? s(x,0) \
  4458. + : c==1 ? s(x,1) \
  4459. + : c==2 ? s(x,2) \
  4460. + : c==3 ? s(x,3) \
  4461. + : c==4 ? s(x,4) \
  4462. + : c==5 ? s(x,5) \
  4463. + : c==6 ? s(x,6) \
  4464. + : s(x,7)) \
  4465. + : r==1 ? \
  4466. + ( c==0 ? nc==4 ? s(x,3) : nc==8 ? s(x,7) : s(x,5) \
  4467. + : c==1 ? s(x,0) \
  4468. + : c==2 ? s(x,1) \
  4469. + : c==3 ? s(x,2) \
  4470. + : c==4 ? s(x,3) \
  4471. + : c==5 ? s(x,4) \
  4472. + : c==6 ? s(x,5) \
  4473. + : s(x,6)) \
  4474. + : r==2 ? \
  4475. + ( c==0 ? nc==4 ? s(x,2) : nc==8 ? s(x,5) : s(x,4) \
  4476. + : c==1 ? nc==4 ? s(x,3) : nc==8 ? s(x,6) : s(x,5) \
  4477. + : c==2 ? nc==8 ? s(x,7) : s(x,0) \
  4478. + : c==3 ? nc==8 ? s(x,0) : s(x,1) \
  4479. + : c==4 ? nc==8 ? s(x,1) : s(x,2) \
  4480. + : c==5 ? nc==8 ? s(x,2) : s(x,3) \
  4481. + : c==6 ? s(x,3) \
  4482. + : s(x,4)) \
  4483. + : \
  4484. + ( c==0 ? nc==4 ? s(x,1) : nc==8 ? s(x,4) : s(x,3) \
  4485. + : c==1 ? nc==4 ? s(x,2) : nc==8 ? s(x,5) : s(x,4) \
  4486. + : c==2 ? nc==4 ? s(x,3) : nc==8 ? s(x,6) : s(x,5) \
  4487. + : c==3 ? nc==8 ? s(x,7) : s(x,0) \
  4488. + : c==4 ? nc==8 ? s(x,0) : s(x,1) \
  4489. + : c==5 ? nc==8 ? s(x,1) : s(x,2) \
  4490. + : c==6 ? s(x,2) \
  4491. + : s(x,3)))
  4492. +
  4493. +#define si(y,x,k,c) s(y,c) = word_in(x + 4 * c) ^ k[c]
  4494. +#define so(y,x,c) word_out(y + 4 * c, s(x,c))
  4495. +
  4496. +#if defined(FOUR_TABLES)
  4497. +#define fwd_rnd(y,x,k,c) s(y,c)= (k)[c] ^ four_tables(x,ft_tab,fwd_var,rf1,c)
  4498. +#define inv_rnd(y,x,k,c) s(y,c)= (k)[c] ^ four_tables(x,it_tab,inv_var,rf1,c)
  4499. +#elif defined(ONE_TABLE)
  4500. +#define fwd_rnd(y,x,k,c) s(y,c)= (k)[c] ^ one_table(x,upr,ft_tab,fwd_var,rf1,c)
  4501. +#define inv_rnd(y,x,k,c) s(y,c)= (k)[c] ^ one_table(x,upr,it_tab,inv_var,rf1,c)
  4502. +#else
  4503. +#define fwd_rnd(y,x,k,c) s(y,c) = fwd_mcol(no_table(x,s_box,fwd_var,rf1,c)) ^ (k)[c]
  4504. +#define inv_rnd(y,x,k,c) s(y,c) = inv_mcol(no_table(x,inv_s_box,inv_var,rf1,c) ^ (k)[c])
  4505. +#endif
  4506. +
  4507. +#if defined(FOUR_LR_TABLES)
  4508. +#define fwd_lrnd(y,x,k,c) s(y,c)= (k)[c] ^ four_tables(x,fl_tab,fwd_var,rf1,c)
  4509. +#define inv_lrnd(y,x,k,c) s(y,c)= (k)[c] ^ four_tables(x,il_tab,inv_var,rf1,c)
  4510. +#elif defined(ONE_LR_TABLE)
  4511. +#define fwd_lrnd(y,x,k,c) s(y,c)= (k)[c] ^ one_table(x,ups,fl_tab,fwd_var,rf1,c)
  4512. +#define inv_lrnd(y,x,k,c) s(y,c)= (k)[c] ^ one_table(x,ups,il_tab,inv_var,rf1,c)
  4513. +#else
  4514. +#define fwd_lrnd(y,x,k,c) s(y,c) = no_table(x,s_box,fwd_var,rf1,c) ^ (k)[c]
  4515. +#define inv_lrnd(y,x,k,c) s(y,c) = no_table(x,inv_s_box,inv_var,rf1,c) ^ (k)[c]
  4516. +#endif
  4517. +
  4518. +#if AES_BLOCK_SIZE == 16
  4519. +
  4520. +#if defined(ARRAYS)
  4521. +#define locals(y,x) x[4],y[4]
  4522. +#else
  4523. +#define locals(y,x) x##0,x##1,x##2,x##3,y##0,y##1,y##2,y##3
  4524. +// the following defines prevent the compiler requiring the declaration
  4525. +// of generated but unused variables in the fwd_var and inv_var macros
  4526. +#define b04 unused
  4527. +#define b05 unused
  4528. +#define b06 unused
  4529. +#define b07 unused
  4530. +#define b14 unused
  4531. +#define b15 unused
  4532. +#define b16 unused
  4533. +#define b17 unused
  4534. +#endif
  4535. +#define l_copy(y, x) s(y,0) = s(x,0); s(y,1) = s(x,1); \
  4536. + s(y,2) = s(x,2); s(y,3) = s(x,3);
  4537. +#define state_in(y,x,k) si(y,x,k,0); si(y,x,k,1); si(y,x,k,2); si(y,x,k,3)
  4538. +#define state_out(y,x) so(y,x,0); so(y,x,1); so(y,x,2); so(y,x,3)
  4539. +#define round(rm,y,x,k) rm(y,x,k,0); rm(y,x,k,1); rm(y,x,k,2); rm(y,x,k,3)
  4540. +
  4541. +#elif AES_BLOCK_SIZE == 24
  4542. +
  4543. +#if defined(ARRAYS)
  4544. +#define locals(y,x) x[6],y[6]
  4545. +#else
  4546. +#define locals(y,x) x##0,x##1,x##2,x##3,x##4,x##5, \
  4547. + y##0,y##1,y##2,y##3,y##4,y##5
  4548. +#define b06 unused
  4549. +#define b07 unused
  4550. +#define b16 unused
  4551. +#define b17 unused
  4552. +#endif
  4553. +#define l_copy(y, x) s(y,0) = s(x,0); s(y,1) = s(x,1); \
  4554. + s(y,2) = s(x,2); s(y,3) = s(x,3); \
  4555. + s(y,4) = s(x,4); s(y,5) = s(x,5);
  4556. +#define state_in(y,x,k) si(y,x,k,0); si(y,x,k,1); si(y,x,k,2); \
  4557. + si(y,x,k,3); si(y,x,k,4); si(y,x,k,5)
  4558. +#define state_out(y,x) so(y,x,0); so(y,x,1); so(y,x,2); \
  4559. + so(y,x,3); so(y,x,4); so(y,x,5)
  4560. +#define round(rm,y,x,k) rm(y,x,k,0); rm(y,x,k,1); rm(y,x,k,2); \
  4561. + rm(y,x,k,3); rm(y,x,k,4); rm(y,x,k,5)
  4562. +#else
  4563. +
  4564. +#if defined(ARRAYS)
  4565. +#define locals(y,x) x[8],y[8]
  4566. +#else
  4567. +#define locals(y,x) x##0,x##1,x##2,x##3,x##4,x##5,x##6,x##7, \
  4568. + y##0,y##1,y##2,y##3,y##4,y##5,y##6,y##7
  4569. +#endif
  4570. +#define l_copy(y, x) s(y,0) = s(x,0); s(y,1) = s(x,1); \
  4571. + s(y,2) = s(x,2); s(y,3) = s(x,3); \
  4572. + s(y,4) = s(x,4); s(y,5) = s(x,5); \
  4573. + s(y,6) = s(x,6); s(y,7) = s(x,7);
  4574. +
  4575. +#if AES_BLOCK_SIZE == 32
  4576. +
  4577. +#define state_in(y,x,k) si(y,x,k,0); si(y,x,k,1); si(y,x,k,2); si(y,x,k,3); \
  4578. + si(y,x,k,4); si(y,x,k,5); si(y,x,k,6); si(y,x,k,7)
  4579. +#define state_out(y,x) so(y,x,0); so(y,x,1); so(y,x,2); so(y,x,3); \
  4580. + so(y,x,4); so(y,x,5); so(y,x,6); so(y,x,7)
  4581. +#define round(rm,y,x,k) rm(y,x,k,0); rm(y,x,k,1); rm(y,x,k,2); rm(y,x,k,3); \
  4582. + rm(y,x,k,4); rm(y,x,k,5); rm(y,x,k,6); rm(y,x,k,7)
  4583. +#else
  4584. +
  4585. +#define state_in(y,x,k) \
  4586. +switch(nc) \
  4587. +{ case 8: si(y,x,k,7); si(y,x,k,6); \
  4588. + case 6: si(y,x,k,5); si(y,x,k,4); \
  4589. + case 4: si(y,x,k,3); si(y,x,k,2); \
  4590. + si(y,x,k,1); si(y,x,k,0); \
  4591. +}
  4592. +
  4593. +#define state_out(y,x) \
  4594. +switch(nc) \
  4595. +{ case 8: so(y,x,7); so(y,x,6); \
  4596. + case 6: so(y,x,5); so(y,x,4); \
  4597. + case 4: so(y,x,3); so(y,x,2); \
  4598. + so(y,x,1); so(y,x,0); \
  4599. +}
  4600. +
  4601. +#if defined(FAST_VARIABLE)
  4602. +
  4603. +#define round(rm,y,x,k) \
  4604. +switch(nc) \
  4605. +{ case 8: rm(y,x,k,7); rm(y,x,k,6); \
  4606. + rm(y,x,k,5); rm(y,x,k,4); \
  4607. + rm(y,x,k,3); rm(y,x,k,2); \
  4608. + rm(y,x,k,1); rm(y,x,k,0); \
  4609. + break; \
  4610. + case 6: rm(y,x,k,5); rm(y,x,k,4); \
  4611. + rm(y,x,k,3); rm(y,x,k,2); \
  4612. + rm(y,x,k,1); rm(y,x,k,0); \
  4613. + break; \
  4614. + case 4: rm(y,x,k,3); rm(y,x,k,2); \
  4615. + rm(y,x,k,1); rm(y,x,k,0); \
  4616. + break; \
  4617. +}
  4618. +#else
  4619. +
  4620. +#define round(rm,y,x,k) \
  4621. +switch(nc) \
  4622. +{ case 8: rm(y,x,k,7); rm(y,x,k,6); \
  4623. + case 6: rm(y,x,k,5); rm(y,x,k,4); \
  4624. + case 4: rm(y,x,k,3); rm(y,x,k,2); \
  4625. + rm(y,x,k,1); rm(y,x,k,0); \
  4626. +}
  4627. +
  4628. +#endif
  4629. +
  4630. +#endif
  4631. +#endif
  4632. +
  4633. +void aes_encrypt(const aes_context *cx, const unsigned char in_blk[], unsigned char out_blk[])
  4634. +{ u_int32_t locals(b0, b1);
  4635. + const u_int32_t *kp = cx->aes_e_key;
  4636. +
  4637. +#if !defined(ONE_TABLE) && !defined(FOUR_TABLES)
  4638. + u_int32_t f2;
  4639. +#endif
  4640. +
  4641. + state_in(b0, in_blk, kp); kp += nc;
  4642. +
  4643. +#if defined(UNROLL)
  4644. +
  4645. + switch(cx->aes_Nrnd)
  4646. + {
  4647. + case 14: round(fwd_rnd, b1, b0, kp );
  4648. + round(fwd_rnd, b0, b1, kp + nc ); kp += 2 * nc;
  4649. + case 12: round(fwd_rnd, b1, b0, kp );
  4650. + round(fwd_rnd, b0, b1, kp + nc ); kp += 2 * nc;
  4651. + case 10: round(fwd_rnd, b1, b0, kp );
  4652. + round(fwd_rnd, b0, b1, kp + nc);
  4653. + round(fwd_rnd, b1, b0, kp + 2 * nc);
  4654. + round(fwd_rnd, b0, b1, kp + 3 * nc);
  4655. + round(fwd_rnd, b1, b0, kp + 4 * nc);
  4656. + round(fwd_rnd, b0, b1, kp + 5 * nc);
  4657. + round(fwd_rnd, b1, b0, kp + 6 * nc);
  4658. + round(fwd_rnd, b0, b1, kp + 7 * nc);
  4659. + round(fwd_rnd, b1, b0, kp + 8 * nc);
  4660. + round(fwd_lrnd, b0, b1, kp + 9 * nc);
  4661. + }
  4662. +
  4663. +#elif defined(PARTIAL_UNROLL)
  4664. + { u_int32_t rnd;
  4665. +
  4666. + for(rnd = 0; rnd < (cx->aes_Nrnd >> 1) - 1; ++rnd)
  4667. + {
  4668. + round(fwd_rnd, b1, b0, kp);
  4669. + round(fwd_rnd, b0, b1, kp + nc); kp += 2 * nc;
  4670. + }
  4671. +
  4672. + round(fwd_rnd, b1, b0, kp);
  4673. + round(fwd_lrnd, b0, b1, kp + nc);
  4674. + }
  4675. +#else
  4676. + { u_int32_t rnd;
  4677. +
  4678. + for(rnd = 0; rnd < cx->aes_Nrnd - 1; ++rnd)
  4679. + {
  4680. + round(fwd_rnd, b1, b0, kp);
  4681. + l_copy(b0, b1); kp += nc;
  4682. + }
  4683. +
  4684. + round(fwd_lrnd, b0, b1, kp);
  4685. + }
  4686. +#endif
  4687. +
  4688. + state_out(out_blk, b0);
  4689. +}
  4690. +
  4691. +void aes_decrypt(const aes_context *cx, const unsigned char in_blk[], unsigned char out_blk[])
  4692. +{ u_int32_t locals(b0, b1);
  4693. + const u_int32_t *kp = cx->aes_d_key;
  4694. +
  4695. +#if !defined(ONE_TABLE) && !defined(FOUR_TABLES)
  4696. + u_int32_t f2, f4, f8, f9;
  4697. +#endif
  4698. +
  4699. + state_in(b0, in_blk, kp); kp += nc;
  4700. +
  4701. +#if defined(UNROLL)
  4702. +
  4703. + switch(cx->aes_Nrnd)
  4704. + {
  4705. + case 14: round(inv_rnd, b1, b0, kp );
  4706. + round(inv_rnd, b0, b1, kp + nc ); kp += 2 * nc;
  4707. + case 12: round(inv_rnd, b1, b0, kp );
  4708. + round(inv_rnd, b0, b1, kp + nc ); kp += 2 * nc;
  4709. + case 10: round(inv_rnd, b1, b0, kp );
  4710. + round(inv_rnd, b0, b1, kp + nc);
  4711. + round(inv_rnd, b1, b0, kp + 2 * nc);
  4712. + round(inv_rnd, b0, b1, kp + 3 * nc);
  4713. + round(inv_rnd, b1, b0, kp + 4 * nc);
  4714. + round(inv_rnd, b0, b1, kp + 5 * nc);
  4715. + round(inv_rnd, b1, b0, kp + 6 * nc);
  4716. + round(inv_rnd, b0, b1, kp + 7 * nc);
  4717. + round(inv_rnd, b1, b0, kp + 8 * nc);
  4718. + round(inv_lrnd, b0, b1, kp + 9 * nc);
  4719. + }
  4720. +
  4721. +#elif defined(PARTIAL_UNROLL)
  4722. + { u_int32_t rnd;
  4723. +
  4724. + for(rnd = 0; rnd < (cx->aes_Nrnd >> 1) - 1; ++rnd)
  4725. + {
  4726. + round(inv_rnd, b1, b0, kp);
  4727. + round(inv_rnd, b0, b1, kp + nc); kp += 2 * nc;
  4728. + }
  4729. +
  4730. + round(inv_rnd, b1, b0, kp);
  4731. + round(inv_lrnd, b0, b1, kp + nc);
  4732. + }
  4733. +#else
  4734. + { u_int32_t rnd;
  4735. +
  4736. + for(rnd = 0; rnd < cx->aes_Nrnd - 1; ++rnd)
  4737. + {
  4738. + round(inv_rnd, b1, b0, kp);
  4739. + l_copy(b0, b1); kp += nc;
  4740. + }
  4741. +
  4742. + round(inv_lrnd, b0, b1, kp);
  4743. + }
  4744. +#endif
  4745. +
  4746. + state_out(out_blk, b0);
  4747. +}
  4748. diff -pruN linux-2.6.6_orig/drivers/misc/aes.h linux-2.6.6/drivers/misc/aes.h
  4749. --- linux-2.6.6_orig/drivers/misc/aes.h 1970-01-01 01:00:00.000000000 +0100
  4750. +++ linux-2.6.6/drivers/misc/aes.h 2004-05-16 15:08:27.000000000 +0200
  4751. @@ -0,0 +1,113 @@
  4752. +// I retain copyright in this code but I encourage its free use provided
  4753. +// that I don't carry any responsibility for the results. I am especially
  4754. +// happy to see it used in free and open source software. If you do use
  4755. +// it I would appreciate an acknowledgement of its origin in the code or
  4756. +// the product that results and I would also appreciate knowing a little
  4757. +// about the use to which it is being put. I am grateful to Frank Yellin
  4758. +// for some ideas that are used in this implementation.
  4759. +//
  4760. +// Dr B. R. Gladman <brg@gladman.uk.net> 6th April 2001.
  4761. +//
  4762. +// This is an implementation of the AES encryption algorithm (Rijndael)
  4763. +// designed by Joan Daemen and Vincent Rijmen. This version is designed
  4764. +// to provide both fixed and dynamic block and key lengths and can also
  4765. +// run with either big or little endian internal byte order (see aes.h).
  4766. +// It inputs block and key lengths in bytes with the legal values being
  4767. +// 16, 24 and 32.
  4768. +
  4769. +/*
  4770. + * Modified by Jari Ruusu, May 1 2001
  4771. + * - Fixed some compile warnings, code was ok but gcc warned anyway.
  4772. + * - Changed basic types: byte -> unsigned char, word -> u_int32_t
  4773. + * - Major name space cleanup: Names visible to outside now begin
  4774. + * with "aes_" or "AES_". A lot of stuff moved from aes.h to aes.c
  4775. + * - Removed C++ and DLL support as part of name space cleanup.
  4776. + * - Eliminated unnecessary recomputation of tables. (actual bug fix)
  4777. + * - Merged precomputed constant tables to aes.c file.
  4778. + * - Removed data alignment restrictions for portability reasons.
  4779. + * - Made block and key lengths accept bit count (128/192/256)
  4780. + * as well byte count (16/24/32).
  4781. + * - Removed all error checks. This change also eliminated the need
  4782. + * to preinitialize the context struct to zero.
  4783. + * - Removed some totally unused constants.
  4784. + */
  4785. +
  4786. +#ifndef _AES_H
  4787. +#define _AES_H
  4788. +
  4789. +#include <linux/types.h>
  4790. +#include <linux/linkage.h>
  4791. +#include <linux/config.h>
  4792. +#include <linux/module.h>
  4793. +
  4794. +// CONFIGURATION OPTIONS (see also aes.c)
  4795. +//
  4796. +// Define AES_BLOCK_SIZE to set the cipher block size (16, 24 or 32) or
  4797. +// leave this undefined for dynamically variable block size (this will
  4798. +// result in much slower code).
  4799. +// IMPORTANT NOTE: AES_BLOCK_SIZE is in BYTES (16, 24, 32 or undefined). If
  4800. +// left undefined a slower version providing variable block length is compiled
  4801. +
  4802. +#define AES_BLOCK_SIZE 16
  4803. +
  4804. +// The number of key schedule words for different block and key lengths
  4805. +// allowing for method of computation which requires the length to be a
  4806. +// multiple of the key length
  4807. +//
  4808. +// Nk = 4 6 8
  4809. +// -------------
  4810. +// Nb = 4 | 60 60 64
  4811. +// 6 | 96 90 96
  4812. +// 8 | 120 120 120
  4813. +
  4814. +#if !defined(AES_BLOCK_SIZE) || (AES_BLOCK_SIZE == 32)
  4815. +#define AES_KS_LENGTH 120
  4816. +#define AES_RC_LENGTH 29
  4817. +#else
  4818. +#define AES_KS_LENGTH 4 * AES_BLOCK_SIZE
  4819. +#define AES_RC_LENGTH (9 * AES_BLOCK_SIZE) / 8 - 8
  4820. +#endif
  4821. +
  4822. +typedef struct
  4823. +{
  4824. + u_int32_t aes_Nkey; // the number of words in the key input block
  4825. + u_int32_t aes_Nrnd; // the number of cipher rounds
  4826. + u_int32_t aes_e_key[AES_KS_LENGTH]; // the encryption key schedule
  4827. + u_int32_t aes_d_key[AES_KS_LENGTH]; // the decryption key schedule
  4828. +#if !defined(AES_BLOCK_SIZE)
  4829. + u_int32_t aes_Ncol; // the number of columns in the cipher state
  4830. +#endif
  4831. +} aes_context;
  4832. +
  4833. +// avoid global name conflict with mainline kernel
  4834. +#define aes_set_key _aes_set_key
  4835. +#define aes_encrypt _aes_encrypt
  4836. +#define aes_decrypt _aes_decrypt
  4837. +
  4838. +// THE CIPHER INTERFACE
  4839. +
  4840. +#if !defined(AES_BLOCK_SIZE)
  4841. +extern void aes_set_blk(aes_context *, const int);
  4842. +#endif
  4843. +
  4844. +#if defined(CONFIG_X86) && !defined(CONFIG_X86_64) && !defined(CONFIG_M386) && !defined(CONFIG_M486)
  4845. + asmlinkage
  4846. +#endif
  4847. +extern void aes_set_key(aes_context *, const unsigned char [], const int, const int);
  4848. +
  4849. +#if defined(CONFIG_X86) && !defined(CONFIG_X86_64) && !defined(CONFIG_M386) && !defined(CONFIG_M486)
  4850. + asmlinkage
  4851. +#endif
  4852. +extern void aes_encrypt(const aes_context *, const unsigned char [], unsigned char []);
  4853. +
  4854. +#if defined(CONFIG_X86) && !defined(CONFIG_X86_64) && !defined(CONFIG_M386) && !defined(CONFIG_M486)
  4855. + asmlinkage
  4856. +#endif
  4857. +extern void aes_decrypt(const aes_context *, const unsigned char [], unsigned char []);
  4858. +
  4859. +// The block length inputs to aes_set_block and aes_set_key are in numbers
  4860. +// of bytes or bits. The calls to subroutines must be made in the above
  4861. +// order but multiple calls can be made without repeating earlier calls
  4862. +// if their parameters have not changed.
  4863. +
  4864. +#endif // _AES_H
  4865. diff -pruN linux-2.6.6_orig/drivers/misc/crypto-ksym.c linux-2.6.6/drivers/misc/crypto-ksym.c
  4866. --- linux-2.6.6_orig/drivers/misc/crypto-ksym.c 1970-01-01 01:00:00.000000000 +0100
  4867. +++ linux-2.6.6/drivers/misc/crypto-ksym.c 2004-05-16 15:08:27.000000000 +0200
  4868. @@ -0,0 +1,7 @@
  4869. +#include <linux/module.h>
  4870. +#include "aes.h"
  4871. +#include "md5.h"
  4872. +EXPORT_SYMBOL(aes_set_key);
  4873. +EXPORT_SYMBOL(aes_encrypt);
  4874. +EXPORT_SYMBOL(aes_decrypt);
  4875. +EXPORT_SYMBOL(md5_transform_CPUbyteorder);
  4876. diff -pruN linux-2.6.6_orig/drivers/misc/md5-i586.S linux-2.6.6/drivers/misc/md5-i586.S
  4877. --- linux-2.6.6_orig/drivers/misc/md5-i586.S 1970-01-01 01:00:00.000000000 +0100
  4878. +++ linux-2.6.6/drivers/misc/md5-i586.S 2004-05-16 15:08:27.000000000 +0200
  4879. @@ -0,0 +1,201 @@
  4880. +//
  4881. +// md5-i586.S
  4882. +//
  4883. +// Written by Jari Ruusu, October 1 2003
  4884. +//
  4885. +// Copyright 2003 by Jari Ruusu.
  4886. +// Redistribution of this file is permitted under the GNU Public License.
  4887. +//
  4888. +
  4889. +// void md5_transform_CPUbyteorder(u_int32_t *hash, u_int32_t *in)
  4890. +
  4891. +#if defined(USE_UNDERLINE)
  4892. +# define md5_transform_CPUbyteorder _md5_transform_CPUbyteorder
  4893. +#endif
  4894. +#if !defined(ALIGN32BYTES)
  4895. +# define ALIGN32BYTES 32
  4896. +#endif
  4897. +
  4898. + .file "md5-i586.S"
  4899. + .globl md5_transform_CPUbyteorder
  4900. + .text
  4901. + .align ALIGN32BYTES
  4902. +
  4903. +md5_transform_CPUbyteorder:
  4904. + push %ebp
  4905. + mov 4+4(%esp),%eax // pointer to 'hash' input
  4906. + mov 8+4(%esp),%ebp // pointer to 'in' array
  4907. + push %ebx
  4908. + push %esi
  4909. + push %edi
  4910. +
  4911. + mov (%eax),%esi
  4912. + mov 4(%eax),%edi
  4913. + mov 8(%eax),%ecx
  4914. + mov 12(%eax),%eax
  4915. + mov (%ebp),%ebx
  4916. + mov %eax,%edx
  4917. + xor %ecx,%eax
  4918. +
  4919. +#define REPEAT1(p1w,p2x,p3z,p4c,p5s,p6Nin,p7Nz,p8Ny) \
  4920. + add $p4c,p1w ;\
  4921. + and p2x,%eax ;\
  4922. + add %ebx,p1w ;\
  4923. + xor p3z,%eax ;\
  4924. + mov p6Nin*4(%ebp),%ebx ;\
  4925. + add %eax,p1w ;\
  4926. + mov p7Nz,%eax ;\
  4927. + rol $p5s,p1w ;\
  4928. + xor p8Ny,%eax ;\
  4929. + add p2x,p1w
  4930. +
  4931. + REPEAT1(%esi,%edi,%edx,0xd76aa478, 7, 1,%ecx,%edi)
  4932. + REPEAT1(%edx,%esi,%ecx,0xe8c7b756,12, 2,%edi,%esi)
  4933. + REPEAT1(%ecx,%edx,%edi,0x242070db,17, 3,%esi,%edx)
  4934. + REPEAT1(%edi,%ecx,%esi,0xc1bdceee,22, 4,%edx,%ecx)
  4935. + REPEAT1(%esi,%edi,%edx,0xf57c0faf, 7, 5,%ecx,%edi)
  4936. + REPEAT1(%edx,%esi,%ecx,0x4787c62a,12, 6,%edi,%esi)
  4937. + REPEAT1(%ecx,%edx,%edi,0xa8304613,17, 7,%esi,%edx)
  4938. + REPEAT1(%edi,%ecx,%esi,0xfd469501,22, 8,%edx,%ecx)
  4939. + REPEAT1(%esi,%edi,%edx,0x698098d8, 7, 9,%ecx,%edi)
  4940. + REPEAT1(%edx,%esi,%ecx,0x8b44f7af,12,10,%edi,%esi)
  4941. + REPEAT1(%ecx,%edx,%edi,0xffff5bb1,17,11,%esi,%edx)
  4942. + REPEAT1(%edi,%ecx,%esi,0x895cd7be,22,12,%edx,%ecx)
  4943. + REPEAT1(%esi,%edi,%edx,0x6b901122, 7,13,%ecx,%edi)
  4944. + REPEAT1(%edx,%esi,%ecx,0xfd987193,12,14,%edi,%esi)
  4945. + REPEAT1(%ecx,%edx,%edi,0xa679438e,17,15,%esi,%edx)
  4946. +
  4947. + add $0x49b40821,%edi
  4948. + and %ecx,%eax
  4949. + add %ebx,%edi
  4950. + xor %esi,%eax
  4951. + mov 1*4(%ebp),%ebx
  4952. + add %eax,%edi
  4953. + mov %ecx,%eax
  4954. + rol $22,%edi
  4955. + add %ecx,%edi
  4956. +
  4957. +#define REPEAT2(p1w,p2x,p3y,p4z,p5c,p6s,p7Nin,p8Ny) \
  4958. + xor p2x,%eax ;\
  4959. + add $p5c,p1w ;\
  4960. + and p4z,%eax ;\
  4961. + add %ebx,p1w ;\
  4962. + xor p3y,%eax ;\
  4963. + mov p7Nin*4(%ebp),%ebx ;\
  4964. + add %eax,p1w ;\
  4965. + mov p8Ny,%eax ;\
  4966. + rol $p6s,p1w ;\
  4967. + add p2x,p1w
  4968. +
  4969. + REPEAT2(%esi,%edi,%ecx,%edx,0xf61e2562, 5, 6,%edi)
  4970. + REPEAT2(%edx,%esi,%edi,%ecx,0xc040b340, 9,11,%esi)
  4971. + REPEAT2(%ecx,%edx,%esi,%edi,0x265e5a51,14, 0,%edx)
  4972. + REPEAT2(%edi,%ecx,%edx,%esi,0xe9b6c7aa,20, 5,%ecx)
  4973. + REPEAT2(%esi,%edi,%ecx,%edx,0xd62f105d, 5,10,%edi)
  4974. + REPEAT2(%edx,%esi,%edi,%ecx,0x02441453, 9,15,%esi)
  4975. + REPEAT2(%ecx,%edx,%esi,%edi,0xd8a1e681,14, 4,%edx)
  4976. + REPEAT2(%edi,%ecx,%edx,%esi,0xe7d3fbc8,20, 9,%ecx)
  4977. + REPEAT2(%esi,%edi,%ecx,%edx,0x21e1cde6, 5,14,%edi)
  4978. + REPEAT2(%edx,%esi,%edi,%ecx,0xc33707d6, 9, 3,%esi)
  4979. + REPEAT2(%ecx,%edx,%esi,%edi,0xf4d50d87,14, 8,%edx)
  4980. + REPEAT2(%edi,%ecx,%edx,%esi,0x455a14ed,20,13,%ecx)
  4981. + REPEAT2(%esi,%edi,%ecx,%edx,0xa9e3e905, 5, 2,%edi)
  4982. + REPEAT2(%edx,%esi,%edi,%ecx,0xfcefa3f8, 9, 7,%esi)
  4983. + REPEAT2(%ecx,%edx,%esi,%edi,0x676f02d9,14,12,%edx)
  4984. +
  4985. + xor %ecx,%eax
  4986. + add $0x8d2a4c8a,%edi
  4987. + and %esi,%eax
  4988. + add %ebx,%edi
  4989. + xor %edx,%eax
  4990. + mov 5*4(%ebp),%ebx
  4991. + add %eax,%edi
  4992. + mov %ecx,%eax
  4993. + rol $20,%edi
  4994. + xor %edx,%eax
  4995. + add %ecx,%edi
  4996. +
  4997. +#define REPEAT3(p1w,p2x,p3c,p4s,p5Nin,p6Ny,p7Nz) \
  4998. + add $p3c,p1w ;\
  4999. + xor p2x,%eax ;\
  5000. + add %ebx,p1w ;\
  5001. + mov p5Nin*4(%ebp),%ebx ;\
  5002. + add %eax,p1w ;\
  5003. + mov p6Ny,%eax ;\
  5004. + rol $p4s,p1w ;\
  5005. + xor p7Nz,%eax ;\
  5006. + add p2x,p1w
  5007. +
  5008. + REPEAT3(%esi,%edi,0xfffa3942, 4, 8,%edi,%ecx)
  5009. + REPEAT3(%edx,%esi,0x8771f681,11,11,%esi,%edi)
  5010. + REPEAT3(%ecx,%edx,0x6d9d6122,16,14,%edx,%esi)
  5011. + REPEAT3(%edi,%ecx,0xfde5380c,23, 1,%ecx,%edx)
  5012. + REPEAT3(%esi,%edi,0xa4beea44, 4, 4,%edi,%ecx)
  5013. + REPEAT3(%edx,%esi,0x4bdecfa9,11, 7,%esi,%edi)
  5014. + REPEAT3(%ecx,%edx,0xf6bb4b60,16,10,%edx,%esi)
  5015. + REPEAT3(%edi,%ecx,0xbebfbc70,23,13,%ecx,%edx)
  5016. + REPEAT3(%esi,%edi,0x289b7ec6, 4, 0,%edi,%ecx)
  5017. + REPEAT3(%edx,%esi,0xeaa127fa,11, 3,%esi,%edi)
  5018. + REPEAT3(%ecx,%edx,0xd4ef3085,16, 6,%edx,%esi)
  5019. + REPEAT3(%edi,%ecx,0x04881d05,23, 9,%ecx,%edx)
  5020. + REPEAT3(%esi,%edi,0xd9d4d039, 4,12,%edi,%ecx)
  5021. + REPEAT3(%edx,%esi,0xe6db99e5,11,15,%esi,%edi)
  5022. + REPEAT3(%ecx,%edx,0x1fa27cf8,16, 2,%edx,%esi)
  5023. +
  5024. + add $0xc4ac5665,%edi
  5025. + xor %ecx,%eax
  5026. + add %ebx,%edi
  5027. + mov (%ebp),%ebx
  5028. + add %eax,%edi
  5029. + mov %edx,%eax
  5030. + rol $23,%edi
  5031. + not %eax
  5032. + add %ecx,%edi
  5033. +
  5034. +#define REPEAT4(p1w,p2x,p3y,p4c,p5s,p6Nin,p7Nz) \
  5035. + add $p4c,p1w ;\
  5036. + or p2x,%eax ;\
  5037. + add %ebx,p1w ;\
  5038. + xor p3y,%eax ;\
  5039. + mov p6Nin*4(%ebp),%ebx ;\
  5040. + add %eax,p1w ;\
  5041. + mov p7Nz,%eax ;\
  5042. + rol $p5s,p1w ;\
  5043. + not %eax ;\
  5044. + add p2x,p1w
  5045. +
  5046. + REPEAT4(%esi,%edi,%ecx,0xf4292244, 6, 7,%ecx)
  5047. + REPEAT4(%edx,%esi,%edi,0x432aff97,10,14,%edi)
  5048. + REPEAT4(%ecx,%edx,%esi,0xab9423a7,15, 5,%esi)
  5049. + REPEAT4(%edi,%ecx,%edx,0xfc93a039,21,12,%edx)
  5050. + REPEAT4(%esi,%edi,%ecx,0x655b59c3, 6, 3,%ecx)
  5051. + REPEAT4(%edx,%esi,%edi,0x8f0ccc92,10,10,%edi)
  5052. + REPEAT4(%ecx,%edx,%esi,0xffeff47d,15, 1,%esi)
  5053. + REPEAT4(%edi,%ecx,%edx,0x85845dd1,21, 8,%edx)
  5054. + REPEAT4(%esi,%edi,%ecx,0x6fa87e4f, 6,15,%ecx)
  5055. + REPEAT4(%edx,%esi,%edi,0xfe2ce6e0,10, 6,%edi)
  5056. + REPEAT4(%ecx,%edx,%esi,0xa3014314,15,13,%esi)
  5057. + REPEAT4(%edi,%ecx,%edx,0x4e0811a1,21, 4,%edx)
  5058. + REPEAT4(%esi,%edi,%ecx,0xf7537e82, 6,11,%ecx)
  5059. + REPEAT4(%edx,%esi,%edi,0xbd3af235,10, 2,%edi)
  5060. + REPEAT4(%ecx,%edx,%esi,0x2ad7d2bb,15, 9,%esi)
  5061. +
  5062. + add $0xeb86d391,%edi
  5063. + or %ecx,%eax
  5064. + add %ebx,%edi
  5065. + xor %edx,%eax
  5066. + mov 4+16(%esp),%ebp // pointer to 'hash' output
  5067. + add %eax,%edi
  5068. + rol $21,%edi
  5069. + add %ecx,%edi
  5070. +
  5071. + add %esi,(%ebp)
  5072. + add %edi,4(%ebp)
  5073. + add %ecx,8(%ebp)
  5074. + add %edx,12(%ebp)
  5075. +
  5076. + pop %edi
  5077. + pop %esi
  5078. + pop %ebx
  5079. + pop %ebp
  5080. + ret
  5081. diff -pruN linux-2.6.6_orig/drivers/misc/md5.c linux-2.6.6/drivers/misc/md5.c
  5082. --- linux-2.6.6_orig/drivers/misc/md5.c 1970-01-01 01:00:00.000000000 +0100
  5083. +++ linux-2.6.6/drivers/misc/md5.c 2004-05-16 15:08:27.000000000 +0200
  5084. @@ -0,0 +1,106 @@
  5085. +/*
  5086. + * MD5 Message Digest Algorithm (RFC1321).
  5087. + *
  5088. + * Derived from cryptoapi implementation, originally based on the
  5089. + * public domain implementation written by Colin Plumb in 1993.
  5090. + *
  5091. + * Copyright (c) Cryptoapi developers.
  5092. + * Copyright (c) 2002 James Morris <jmorris@intercode.com.au>
  5093. + *
  5094. + * This program is free software; you can redistribute it and/or modify it
  5095. + * under the terms of the GNU General Public License as published by the Free
  5096. + * Software Foundation; either version 2 of the License, or (at your option)
  5097. + * any later version.
  5098. + */
  5099. +
  5100. +#include "md5.h"
  5101. +
  5102. +#define MD5_F1(x, y, z) (z ^ (x & (y ^ z)))
  5103. +#define MD5_F2(x, y, z) MD5_F1(z, x, y)
  5104. +#define MD5_F3(x, y, z) (x ^ y ^ z)
  5105. +#define MD5_F4(x, y, z) (y ^ (x | ~z))
  5106. +#define MD5_STEP(f, w, x, y, z, in, s) \
  5107. + (w += f(x, y, z) + in, w = (w<<s | w>>(32-s)) + x)
  5108. +
  5109. +void md5_transform_CPUbyteorder(u_int32_t *hash, u_int32_t const *in)
  5110. +{
  5111. + u_int32_t a, b, c, d;
  5112. +
  5113. + a = hash[0];
  5114. + b = hash[1];
  5115. + c = hash[2];
  5116. + d = hash[3];
  5117. +
  5118. + MD5_STEP(MD5_F1, a, b, c, d, in[0] + 0xd76aa478, 7);
  5119. + MD5_STEP(MD5_F1, d, a, b, c, in[1] + 0xe8c7b756, 12);
  5120. + MD5_STEP(MD5_F1, c, d, a, b, in[2] + 0x242070db, 17);
  5121. + MD5_STEP(MD5_F1, b, c, d, a, in[3] + 0xc1bdceee, 22);
  5122. + MD5_STEP(MD5_F1, a, b, c, d, in[4] + 0xf57c0faf, 7);
  5123. + MD5_STEP(MD5_F1, d, a, b, c, in[5] + 0x4787c62a, 12);
  5124. + MD5_STEP(MD5_F1, c, d, a, b, in[6] + 0xa8304613, 17);
  5125. + MD5_STEP(MD5_F1, b, c, d, a, in[7] + 0xfd469501, 22);
  5126. + MD5_STEP(MD5_F1, a, b, c, d, in[8] + 0x698098d8, 7);
  5127. + MD5_STEP(MD5_F1, d, a, b, c, in[9] + 0x8b44f7af, 12);
  5128. + MD5_STEP(MD5_F1, c, d, a, b, in[10] + 0xffff5bb1, 17);
  5129. + MD5_STEP(MD5_F1, b, c, d, a, in[11] + 0x895cd7be, 22);
  5130. + MD5_STEP(MD5_F1, a, b, c, d, in[12] + 0x6b901122, 7);
  5131. + MD5_STEP(MD5_F1, d, a, b, c, in[13] + 0xfd987193, 12);
  5132. + MD5_STEP(MD5_F1, c, d, a, b, in[14] + 0xa679438e, 17);
  5133. + MD5_STEP(MD5_F1, b, c, d, a, in[15] + 0x49b40821, 22);
  5134. +
  5135. + MD5_STEP(MD5_F2, a, b, c, d, in[1] + 0xf61e2562, 5);
  5136. + MD5_STEP(MD5_F2, d, a, b, c, in[6] + 0xc040b340, 9);
  5137. + MD5_STEP(MD5_F2, c, d, a, b, in[11] + 0x265e5a51, 14);
  5138. + MD5_STEP(MD5_F2, b, c, d, a, in[0] + 0xe9b6c7aa, 20);
  5139. + MD5_STEP(MD5_F2, a, b, c, d, in[5] + 0xd62f105d, 5);
  5140. + MD5_STEP(MD5_F2, d, a, b, c, in[10] + 0x02441453, 9);
  5141. + MD5_STEP(MD5_F2, c, d, a, b, in[15] + 0xd8a1e681, 14);
  5142. + MD5_STEP(MD5_F2, b, c, d, a, in[4] + 0xe7d3fbc8, 20);
  5143. + MD5_STEP(MD5_F2, a, b, c, d, in[9] + 0x21e1cde6, 5);
  5144. + MD5_STEP(MD5_F2, d, a, b, c, in[14] + 0xc33707d6, 9);
  5145. + MD5_STEP(MD5_F2, c, d, a, b, in[3] + 0xf4d50d87, 14);
  5146. + MD5_STEP(MD5_F2, b, c, d, a, in[8] + 0x455a14ed, 20);
  5147. + MD5_STEP(MD5_F2, a, b, c, d, in[13] + 0xa9e3e905, 5);
  5148. + MD5_STEP(MD5_F2, d, a, b, c, in[2] + 0xfcefa3f8, 9);
  5149. + MD5_STEP(MD5_F2, c, d, a, b, in[7] + 0x676f02d9, 14);
  5150. + MD5_STEP(MD5_F2, b, c, d, a, in[12] + 0x8d2a4c8a, 20);
  5151. +
  5152. + MD5_STEP(MD5_F3, a, b, c, d, in[5] + 0xfffa3942, 4);
  5153. + MD5_STEP(MD5_F3, d, a, b, c, in[8] + 0x8771f681, 11);
  5154. + MD5_STEP(MD5_F3, c, d, a, b, in[11] + 0x6d9d6122, 16);
  5155. + MD5_STEP(MD5_F3, b, c, d, a, in[14] + 0xfde5380c, 23);
  5156. + MD5_STEP(MD5_F3, a, b, c, d, in[1] + 0xa4beea44, 4);
  5157. + MD5_STEP(MD5_F3, d, a, b, c, in[4] + 0x4bdecfa9, 11);
  5158. + MD5_STEP(MD5_F3, c, d, a, b, in[7] + 0xf6bb4b60, 16);
  5159. + MD5_STEP(MD5_F3, b, c, d, a, in[10] + 0xbebfbc70, 23);
  5160. + MD5_STEP(MD5_F3, a, b, c, d, in[13] + 0x289b7ec6, 4);
  5161. + MD5_STEP(MD5_F3, d, a, b, c, in[0] + 0xeaa127fa, 11);
  5162. + MD5_STEP(MD5_F3, c, d, a, b, in[3] + 0xd4ef3085, 16);
  5163. + MD5_STEP(MD5_F3, b, c, d, a, in[6] + 0x04881d05, 23);
  5164. + MD5_STEP(MD5_F3, a, b, c, d, in[9] + 0xd9d4d039, 4);
  5165. + MD5_STEP(MD5_F3, d, a, b, c, in[12] + 0xe6db99e5, 11);
  5166. + MD5_STEP(MD5_F3, c, d, a, b, in[15] + 0x1fa27cf8, 16);
  5167. + MD5_STEP(MD5_F3, b, c, d, a, in[2] + 0xc4ac5665, 23);
  5168. +
  5169. + MD5_STEP(MD5_F4, a, b, c, d, in[0] + 0xf4292244, 6);
  5170. + MD5_STEP(MD5_F4, d, a, b, c, in[7] + 0x432aff97, 10);
  5171. + MD5_STEP(MD5_F4, c, d, a, b, in[14] + 0xab9423a7, 15);
  5172. + MD5_STEP(MD5_F4, b, c, d, a, in[5] + 0xfc93a039, 21);
  5173. + MD5_STEP(MD5_F4, a, b, c, d, in[12] + 0x655b59c3, 6);
  5174. + MD5_STEP(MD5_F4, d, a, b, c, in[3] + 0x8f0ccc92, 10);
  5175. + MD5_STEP(MD5_F4, c, d, a, b, in[10] + 0xffeff47d, 15);
  5176. + MD5_STEP(MD5_F4, b, c, d, a, in[1] + 0x85845dd1, 21);
  5177. + MD5_STEP(MD5_F4, a, b, c, d, in[8] + 0x6fa87e4f, 6);
  5178. + MD5_STEP(MD5_F4, d, a, b, c, in[15] + 0xfe2ce6e0, 10);
  5179. + MD5_STEP(MD5_F4, c, d, a, b, in[6] + 0xa3014314, 15);
  5180. + MD5_STEP(MD5_F4, b, c, d, a, in[13] + 0x4e0811a1, 21);
  5181. + MD5_STEP(MD5_F4, a, b, c, d, in[4] + 0xf7537e82, 6);
  5182. + MD5_STEP(MD5_F4, d, a, b, c, in[11] + 0xbd3af235, 10);
  5183. + MD5_STEP(MD5_F4, c, d, a, b, in[2] + 0x2ad7d2bb, 15);
  5184. + MD5_STEP(MD5_F4, b, c, d, a, in[9] + 0xeb86d391, 21);
  5185. +
  5186. + hash[0] += a;
  5187. + hash[1] += b;
  5188. + hash[2] += c;
  5189. + hash[3] += d;
  5190. +}
  5191. diff -pruN linux-2.6.6_orig/drivers/misc/md5.h linux-2.6.6/drivers/misc/md5.h
  5192. --- linux-2.6.6_orig/drivers/misc/md5.h 1970-01-01 01:00:00.000000000 +0100
  5193. +++ linux-2.6.6/drivers/misc/md5.h 2004-05-16 15:08:27.000000000 +0200
  5194. @@ -0,0 +1,11 @@
  5195. +/* md5.h */
  5196. +
  5197. +#include <linux/types.h>
  5198. +#include <linux/linkage.h>
  5199. +#include <linux/config.h>
  5200. +#include <linux/module.h>
  5201. +
  5202. +#if defined(CONFIG_X86) && !defined(CONFIG_X86_64) && !defined(CONFIG_M386) && !defined(CONFIG_M486)
  5203. + asmlinkage
  5204. +#endif
  5205. +extern void md5_transform_CPUbyteorder(u_int32_t *, u_int32_t const *);
  5206. diff -pruN linux-2.6.6_orig/include/linux/loop.h linux-2.6.6/include/linux/loop.h
  5207. --- linux-2.6.6_orig/include/linux/loop.h 2004-05-16 15:07:54.000000000 +0200
  5208. +++ linux-2.6.6/include/linux/loop.h 2004-05-16 15:08:27.000000000 +0200
  5209. @@ -16,54 +16,52 @@
  5210. #ifdef __KERNEL__
  5211. #include <linux/bio.h>
  5212. #include <linux/blkdev.h>
  5213. +#include <linux/module.h>
  5214. #include <linux/spinlock.h>
  5215. -/* Possible states of device */
  5216. -enum {
  5217. - Lo_unbound,
  5218. - Lo_bound,
  5219. - Lo_rundown,
  5220. -};
  5221. -
  5222. struct loop_func_table;
  5223. struct loop_device {
  5224. int lo_number;
  5225. - int lo_refcnt;
  5226. + int lo_flags;
  5227. loff_t lo_offset;
  5228. loff_t lo_sizelimit;
  5229. - int lo_flags;
  5230. int (*transfer)(struct loop_device *, int cmd,
  5231. - struct page *raw_page, unsigned raw_off,
  5232. - struct page *loop_page, unsigned loop_off,
  5233. - int size, sector_t real_block);
  5234. + char *raw_buf, char *loop_buf, int size,
  5235. + sector_t real_block);
  5236. + struct loop_func_table *lo_encryption;
  5237. char lo_file_name[LO_NAME_SIZE];
  5238. char lo_crypt_name[LO_NAME_SIZE];
  5239. char lo_encrypt_key[LO_KEY_SIZE];
  5240. int lo_encrypt_key_size;
  5241. - struct loop_func_table *lo_encryption;
  5242. - __u32 lo_init[2];
  5243. uid_t lo_key_owner; /* Who set the key */
  5244. - int (*ioctl)(struct loop_device *, int cmd,
  5245. - unsigned long arg);
  5246. + __u32 lo_init[2];
  5247. + int (*ioctl)(struct loop_device *, int cmd,
  5248. + unsigned long arg);
  5249. struct file * lo_backing_file;
  5250. struct block_device *lo_device;
  5251. - unsigned lo_blocksize;
  5252. - void *key_data;
  5253. + void *key_data;
  5254. int old_gfp_mask;
  5255. spinlock_t lo_lock;
  5256. - struct bio *lo_bio;
  5257. - struct bio *lo_biotail;
  5258. - int lo_state;
  5259. struct semaphore lo_sem;
  5260. - struct semaphore lo_ctl_mutex;
  5261. - struct semaphore lo_bh_mutex;
  5262. atomic_t lo_pending;
  5263. request_queue_t *lo_queue;
  5264. +
  5265. + struct bio *lo_bio_que0;
  5266. + struct bio *lo_bio_que1;
  5267. + struct bio *lo_bio_que2;
  5268. + struct bio *lo_bio_free0;
  5269. + struct bio *lo_bio_free1;
  5270. + atomic_t lo_bio_barr;
  5271. + int lo_bio_flsh;
  5272. + int lo_bio_need;
  5273. + wait_queue_head_t lo_bio_wait;
  5274. + sector_t lo_offs_sec;
  5275. + sector_t lo_iv_remove;
  5276. };
  5277. #endif /* __KERNEL__ */
  5278. @@ -71,7 +69,8 @@ struct loop_device {
  5279. /*
  5280. * Loop flags
  5281. */
  5282. -#define LO_FLAGS_READ_ONLY 1
  5283. +#define LO_FLAGS_DO_BMAP 1
  5284. +#define LO_FLAGS_READ_ONLY 2
  5285. #include <asm/posix_types.h> /* for __kernel_old_dev_t */
  5286. #include <asm/types.h> /* for __u64 */
  5287. @@ -121,26 +120,25 @@ struct loop_info64 {
  5288. #define LO_CRYPT_IDEA 6
  5289. #define LO_CRYPT_DUMMY 9
  5290. #define LO_CRYPT_SKIPJACK 10
  5291. +#define LO_CRYPT_AES 16
  5292. #define LO_CRYPT_CRYPTOAPI 18
  5293. #define MAX_LO_CRYPT 20
  5294. #ifdef __KERNEL__
  5295. /* Support for loadable transfer modules */
  5296. struct loop_func_table {
  5297. - int number; /* filter type */
  5298. - int (*transfer)(struct loop_device *lo, int cmd,
  5299. - struct page *raw_page, unsigned raw_off,
  5300. - struct page *loop_page, unsigned loop_off,
  5301. - int size, sector_t real_block);
  5302. - int (*init)(struct loop_device *, const struct loop_info64 *);
  5303. + int number; /* filter type */
  5304. + int (*transfer)(struct loop_device *lo, int cmd, char *raw_buf,
  5305. + char *loop_buf, int size, sector_t real_block);
  5306. + int (*init)(struct loop_device *, struct loop_info64 *);
  5307. /* release is called from loop_unregister_transfer or clr_fd */
  5308. - int (*release)(struct loop_device *);
  5309. + int (*release)(struct loop_device *);
  5310. int (*ioctl)(struct loop_device *, int cmd, unsigned long arg);
  5311. struct module *owner;
  5312. -};
  5313. +};
  5314. int loop_register_transfer(struct loop_func_table *funcs);
  5315. -int loop_unregister_transfer(int number);
  5316. +int loop_unregister_transfer(int number);
  5317. #endif
  5318. /*
  5319. @@ -155,4 +153,5 @@ int loop_unregister_transfer(int number)
  5320. #define LOOP_GET_STATUS64 0x4C05
  5321. #define LOOP_CHANGE_FD 0x4C06
  5322. +#define LOOP_MULTI_KEY_SETUP 0x4C4D
  5323. #endif