mirror of the now-defunct rocklinux.org
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

6939 lines
217 KiB

  1. # --- ROCK-COPYRIGHT-NOTE-BEGIN ---
  2. #
  3. # This copyright note is auto-generated by ./scripts/Create-CopyPatch.
  4. # Please add additional copyright information _after_ the line containing
  5. # the ROCK-COPYRIGHT-NOTE-END tag. Otherwise it might get removed by
  6. # the ./scripts/Create-CopyPatch script. Do not edit this copyright text!
  7. #
  8. # ROCK Linux: rock-src/package/blindcoder/loop-aes/linux26_cryptoloop.diff
  9. # ROCK Linux is Copyright (C) 1998 - 2004 Clifford Wolf
  10. #
  11. # This patch file is dual-licensed. It is available under the license the
  12. # patched project is licensed under, as long as it is an OpenSource license
  13. # as defined at http://www.opensource.org/ (e.g. BSD, X11) or under the terms
  14. # of the GNU General Public License as published by the Free Software
  15. # Foundation; either version 2 of the License, or (at your option) any later
  16. # version.
  17. #
  18. # --- ROCK-COPYRIGHT-NOTE-END ---
  19. diff -pruN linux-2.6.9_orig/drivers/block/Kconfig linux-2.6.9/drivers/block/Kconfig
  20. --- linux-2.6.9_orig/drivers/block/Kconfig 2004-10-18 23:53:43.000000000 +0200
  21. +++ linux-2.6.9/drivers/block/Kconfig 2004-10-25 14:26:31.839233872 +0200
  22. @@ -235,14 +235,6 @@ config BLK_DEV_LOOP
  23. bits of, say, a sound file). This is also safe if the file resides
  24. on a remote file server.
  25. - There are several ways of encrypting disks. Some of these require
  26. - kernel patches. The vanilla kernel offers the cryptoloop option
  27. - and a Device Mapper target (which is superior, as it supports all
  28. - file systems). If you want to use the cryptoloop, say Y to both
  29. - LOOP and CRYPTOLOOP, and make sure you have a recent (version 2.12
  30. - or later) version of util-linux. Additionally, be aware that
  31. - the cryptoloop is not safe for storing journaled filesystems.
  32. -
  33. Note that this loop device has nothing to do with the loopback
  34. device used for network connections from the machine to itself.
  35. @@ -251,19 +243,24 @@ config BLK_DEV_LOOP
  36. Most users will answer N here.
  37. -config BLK_DEV_CRYPTOLOOP
  38. - tristate "Cryptoloop Support"
  39. - select CRYPTO
  40. +config BLK_DEV_LOOP_AES
  41. + bool "AES encrypted loop device support"
  42. depends on BLK_DEV_LOOP
  43. ---help---
  44. - Say Y here if you want to be able to use the ciphers that are
  45. - provided by the CryptoAPI as loop transformation. This might be
  46. - used as hard disk encryption.
  47. -
  48. - WARNING: This device is not safe for journaled file systems like
  49. - ext3 or Reiserfs. Please use the Device Mapper crypto module
  50. - instead, which can be configured to be on-disk compatible with the
  51. - cryptoloop device.
  52. + If you want to use AES encryption algorithm to encrypt loop
  53. + devices, say Y here. If you don't know what to do here, say N.
  54. +
  55. +config BLK_DEV_LOOP_KEYSCRUB
  56. + bool "loop encryption key scrubbing support"
  57. + depends on BLK_DEV_LOOP
  58. + ---help---
  59. + Loop encryption key scrubbing moves and inverts key bits in
  60. + kernel RAM so that the thin oxide which forms the storage
  61. + capacitor dielectric of DRAM cells is not permitted to develop
  62. + detectable property. For more info, see Peter Gutmann's paper:
  63. + http://www.cs.auckland.ac.nz/~pgut001/pubs/secure_del.html
  64. +
  65. + Paranoid tinfoil hat crowd say Y here, everyone else say N.
  66. config BLK_DEV_NBD
  67. tristate "Network block device support"
  68. diff -pruN linux-2.6.9_orig/drivers/block/Makefile linux-2.6.9/drivers/block/Makefile
  69. --- linux-2.6.9_orig/drivers/block/Makefile 2004-10-18 23:54:55.000000000 +0200
  70. +++ linux-2.6.9/drivers/block/Makefile 2004-10-25 14:26:31.840233720 +0200
  71. @@ -1,6 +1,9 @@
  72. #
  73. # Makefile for the kernel block device drivers.
  74. #
  75. +
  76. +CFLAGS_loop.o:=$(shell if grep -q -s "current->rlim" kernel/sys.c; then echo "-DOLD_PER_THREAD_RLIMITS"; fi)
  77. +
  78. # 12 June 2000, Christoph Hellwig <hch@infradead.org>
  79. # Rewritten to use lists instead of if-statements.
  80. #
  81. diff -pruN linux-2.6.9_orig/drivers/block/loop.c linux-2.6.9/drivers/block/loop.c
  82. --- linux-2.6.9_orig/drivers/block/loop.c 2004-10-18 23:54:32.000000000 +0200
  83. +++ linux-2.6.9/drivers/block/loop.c 2004-10-25 14:26:31.845232960 +0200
  84. @@ -39,18 +39,46 @@
  85. * Support up to 256 loop devices
  86. * Heinz Mauelshagen <mge@sistina.com>, Feb 2002
  87. *
  88. + * AES transfer added. IV is now passed as (512 byte) sector number.
  89. + * Jari Ruusu, May 18 2001
  90. + *
  91. + * External encryption module locking bug fixed.
  92. + * Ingo Rohloff <rohloff@in.tum.de>, June 21 2001
  93. + *
  94. + * Make device backed loop work with swap (pre-allocated buffers + queue rewrite).
  95. + * Jari Ruusu, September 2 2001
  96. + *
  97. + * Ported 'pre-allocated buffers + queue rewrite' to BIO for 2.5 kernels
  98. + * Ben Slusky <sluskyb@stwing.org>, March 1 2002
  99. + * Jari Ruusu, March 27 2002
  100. + *
  101. + * File backed code now uses file->f_op->read/write. Based on Andrew Morton's idea.
  102. + * Jari Ruusu, May 23 2002
  103. + *
  104. + * Exported hard sector size correctly, fixed file-backed-loop-on-tmpfs bug,
  105. + * plus many more enhancements and optimizations.
  106. + * Adam J. Richter <adam@yggdrasil.com>, Aug 2002
  107. + *
  108. + * Added support for removing offset from IV computations.
  109. + * Jari Ruusu, September 21 2003
  110. + *
  111. + * Added support for MD5 IV computation and multi-key operation.
  112. + * Jari Ruusu, October 8 2003
  113. + *
  114. + *
  115. * Still To Fix:
  116. * - Advisory locking is ignored here.
  117. * - Should use an own CAP_* category instead of CAP_SYS_ADMIN
  118. - *
  119. */
  120. +#include <linux/version.h>
  121. #include <linux/config.h>
  122. #include <linux/module.h>
  123. -
  124. +#include <linux/moduleparam.h>
  125. #include <linux/sched.h>
  126. #include <linux/fs.h>
  127. #include <linux/file.h>
  128. +#include <linux/bio.h>
  129. #include <linux/stat.h>
  130. #include <linux/errno.h>
  131. #include <linux/major.h>
  132. @@ -66,45 +94,59 @@
  133. #include <linux/suspend.h>
  134. #include <linux/writeback.h>
  135. #include <linux/buffer_head.h> /* for invalidate_bdev() */
  136. -#include <linux/completion.h>
  137. +#if LINUX_VERSION_CODE >= 0x20606
  138. +# include <linux/mqueue.h>
  139. +#endif
  140. +#include <linux/spinlock.h>
  141. #include <asm/uaccess.h>
  142. +#include <asm/byteorder.h>
  143. +
  144. +#include "../misc/aes.h"
  145. +#include "../misc/md5.h"
  146. +
  147. +#if defined(CONFIG_COMPAT)
  148. +# include <linux/ioctl32.h>
  149. +# define IOCTL32_COMPATIBLE_PTR ((void*)0)
  150. +#endif
  151. static int max_loop = 8;
  152. -static struct loop_device *loop_dev;
  153. +
  154. +#ifdef MODULE
  155. +module_param(max_loop, int, 0);
  156. +MODULE_PARM_DESC(max_loop, "Maximum number of loop devices (1-256)");
  157. +#else
  158. +static int __init max_loop_setup(char *str)
  159. +{
  160. + int y;
  161. +
  162. + if (get_option(&str, &y) == 1)
  163. + max_loop = y;
  164. + return 1;
  165. +}
  166. +__setup("max_loop=", max_loop_setup);
  167. +#endif
  168. +
  169. static struct gendisk **disks;
  170. /*
  171. * Transfer functions
  172. */
  173. -static int transfer_none(struct loop_device *lo, int cmd,
  174. - struct page *raw_page, unsigned raw_off,
  175. - struct page *loop_page, unsigned loop_off,
  176. - int size, sector_t real_block)
  177. +static int transfer_none(struct loop_device *lo, int cmd, char *raw_buf,
  178. + char *loop_buf, int size, sector_t real_block)
  179. {
  180. - char *raw_buf = kmap_atomic(raw_page, KM_USER0) + raw_off;
  181. - char *loop_buf = kmap_atomic(loop_page, KM_USER1) + loop_off;
  182. + /* this code is only called from file backed loop */
  183. + /* and that code expects this function to be no-op */
  184. - if (cmd == READ)
  185. - memcpy(loop_buf, raw_buf, size);
  186. - else
  187. - memcpy(raw_buf, loop_buf, size);
  188. -
  189. - kunmap_atomic(raw_buf, KM_USER0);
  190. - kunmap_atomic(loop_buf, KM_USER1);
  191. cond_resched();
  192. return 0;
  193. }
  194. -static int transfer_xor(struct loop_device *lo, int cmd,
  195. - struct page *raw_page, unsigned raw_off,
  196. - struct page *loop_page, unsigned loop_off,
  197. - int size, sector_t real_block)
  198. -{
  199. - char *raw_buf = kmap_atomic(raw_page, KM_USER0) + raw_off;
  200. - char *loop_buf = kmap_atomic(loop_page, KM_USER1) + loop_off;
  201. - char *in, *out, *key;
  202. - int i, keysize;
  203. +static int transfer_xor(struct loop_device *lo, int cmd, char *raw_buf,
  204. + char *loop_buf, int size, sector_t real_block)
  205. +{
  206. + char *in, *out, *key;
  207. + int i, keysize;
  208. if (cmd == READ) {
  209. in = raw_buf;
  210. @@ -118,14 +160,11 @@ static int transfer_xor(struct loop_devi
  211. keysize = lo->lo_encrypt_key_size;
  212. for (i = 0; i < size; i++)
  213. *out++ = *in++ ^ key[(i & 511) % keysize];
  214. -
  215. - kunmap_atomic(raw_buf, KM_USER0);
  216. - kunmap_atomic(loop_buf, KM_USER1);
  217. cond_resched();
  218. return 0;
  219. }
  220. -static int xor_init(struct loop_device *lo, const struct loop_info64 *info)
  221. +static int xor_init(struct loop_device *lo, struct loop_info64 *info)
  222. {
  223. if (info->lo_encrypt_key_size <= 0)
  224. return -EINVAL;
  225. @@ -135,337 +174,1207 @@ static int xor_init(struct loop_device *
  226. static struct loop_func_table none_funcs = {
  227. .number = LO_CRYPT_NONE,
  228. .transfer = transfer_none,
  229. -};
  230. +};
  231. static struct loop_func_table xor_funcs = {
  232. .number = LO_CRYPT_XOR,
  233. .transfer = transfer_xor,
  234. - .init = xor_init
  235. -};
  236. -
  237. -/* xfer_funcs[0] is special - its release function is never called */
  238. -static struct loop_func_table *xfer_funcs[MAX_LO_CRYPT] = {
  239. - &none_funcs,
  240. - &xor_funcs
  241. + .init = xor_init,
  242. };
  243. -static loff_t get_loop_size(struct loop_device *lo, struct file *file)
  244. +#if CONFIG_BLK_DEV_LOOP_AES
  245. +#if CONFIG_BLK_DEV_LOOP_KEYSCRUB
  246. +# define KEY_ALLOC_COUNT 128
  247. +#else
  248. +# define KEY_ALLOC_COUNT 64
  249. +#endif
  250. +
  251. +typedef struct {
  252. + aes_context *keyPtr[KEY_ALLOC_COUNT];
  253. + unsigned keyMask;
  254. +#if CONFIG_BLK_DEV_LOOP_KEYSCRUB
  255. + rwlock_t rwlock;
  256. + unsigned reversed;
  257. + unsigned blocked;
  258. + struct timer_list timer;
  259. +#endif
  260. +} AESmultiKey;
  261. +
  262. +#if CONFIG_BLK_DEV_LOOP_KEYSCRUB
  263. +static void keyScrubWork(AESmultiKey *m)
  264. {
  265. - loff_t size, offset, loopsize;
  266. + aes_context *a0, *a1;
  267. + u_int32_t *p;
  268. + int x, y, z;
  269. +
  270. + z = m->keyMask + 1;
  271. + for(x = 0; x < z; x++) {
  272. + a0 = m->keyPtr[x];
  273. + a1 = m->keyPtr[x + z];
  274. + memcpy(a1, a0, sizeof(aes_context));
  275. + m->keyPtr[x] = a1;
  276. + m->keyPtr[x + z] = a0;
  277. + p = (u_int32_t *) a0;
  278. + y = sizeof(aes_context) / sizeof(u_int32_t);
  279. + while(y > 0) {
  280. + *p ^= 0xFFFFFFFF;
  281. + p++;
  282. + y--;
  283. + }
  284. + }
  285. + m->reversed ^= 1;
  286. +
  287. + /* try to flush dirty cache data to RAM */
  288. +#if defined(CONFIG_X86_64) || (defined(CONFIG_X86) && !defined(CONFIG_M386) && !defined(CONFIG_CPU_386))
  289. + __asm__ __volatile__ ("wbinvd": : :"memory");
  290. +#else
  291. + mb();
  292. +#endif
  293. +}
  294. - /* Compute loopsize in bytes */
  295. - size = i_size_read(file->f_mapping->host);
  296. - offset = lo->lo_offset;
  297. - loopsize = size - offset;
  298. - if (lo->lo_sizelimit > 0 && lo->lo_sizelimit < loopsize)
  299. - loopsize = lo->lo_sizelimit;
  300. +/* called only from loop thread process context */
  301. +static void keyScrubThreadFn(AESmultiKey *m)
  302. +{
  303. + write_lock(&m->rwlock);
  304. + if(!m->blocked) keyScrubWork(m);
  305. + write_unlock(&m->rwlock);
  306. +}
  307. - /*
  308. - * Unfortunately, if we want to do I/O on the device,
  309. - * the number of 512-byte sectors has to fit into a sector_t.
  310. - */
  311. - return loopsize >> 9;
  312. +static void keyScrubTimerInit(struct loop_device *lo)
  313. +{
  314. + AESmultiKey *m;
  315. + unsigned long expire;
  316. + static void keyScrubTimerFn(unsigned long);
  317. +
  318. + m = (AESmultiKey *)lo->key_data;
  319. + expire = jiffies + HZ;
  320. + init_timer(&m->timer);
  321. + m->timer.expires = expire;
  322. + m->timer.data = (unsigned long)lo;
  323. + m->timer.function = keyScrubTimerFn;
  324. + add_timer(&m->timer);
  325. }
  326. -static int
  327. -figure_loop_size(struct loop_device *lo)
  328. +/* called only from timer handler context */
  329. +static void keyScrubTimerFn(unsigned long d)
  330. {
  331. - loff_t size = get_loop_size(lo, lo->lo_backing_file);
  332. - sector_t x = (sector_t)size;
  333. + struct loop_device *lo = (struct loop_device *)d;
  334. + extern void loop_add_keyscrub_fn(struct loop_device *, void (*)(void *), void *);
  335. - if ((loff_t)x != size)
  336. - return -EFBIG;
  337. + /* rw lock needs process context, so make loop thread do scrubbing */
  338. + loop_add_keyscrub_fn(lo, (void (*)(void*))keyScrubThreadFn, lo->key_data);
  339. + /* start timer again */
  340. + keyScrubTimerInit(lo);
  341. +}
  342. +#endif
  343. - set_capacity(disks[lo->lo_number], x);
  344. - return 0;
  345. +static AESmultiKey *allocMultiKey(void)
  346. +{
  347. + AESmultiKey *m;
  348. + aes_context *a;
  349. + int x = 0, n;
  350. +
  351. + m = (AESmultiKey *) kmalloc(sizeof(AESmultiKey), GFP_KERNEL);
  352. + if(!m) return 0;
  353. + memset(m, 0, sizeof(AESmultiKey));
  354. +#if CONFIG_BLK_DEV_LOOP_KEYSCRUB
  355. + rwlock_init(&m->rwlock);
  356. + init_timer(&m->timer);
  357. + again:
  358. +#endif
  359. +
  360. + n = PAGE_SIZE / sizeof(aes_context);
  361. + if(!n) n = 1;
  362. +
  363. + a = (aes_context *) kmalloc(sizeof(aes_context) * n, GFP_KERNEL);
  364. + if(!a) {
  365. +#if CONFIG_BLK_DEV_LOOP_KEYSCRUB
  366. + if(x) kfree(m->keyPtr[0]);
  367. +#endif
  368. + kfree(m);
  369. + return 0;
  370. + }
  371. +
  372. + while((x < KEY_ALLOC_COUNT) && n) {
  373. + m->keyPtr[x] = a;
  374. + a++;
  375. + x++;
  376. + n--;
  377. + }
  378. +#if CONFIG_BLK_DEV_LOOP_KEYSCRUB
  379. + if(x < 2) goto again;
  380. +#endif
  381. + return m;
  382. }
  383. -static inline int
  384. -lo_do_transfer(struct loop_device *lo, int cmd,
  385. - struct page *rpage, unsigned roffs,
  386. - struct page *lpage, unsigned loffs,
  387. - int size, sector_t rblock)
  388. +static void clearAndFreeMultiKey(AESmultiKey *m)
  389. {
  390. - if (!lo->transfer)
  391. - return 0;
  392. + aes_context *a;
  393. + int x, n;
  394. - return lo->transfer(lo, cmd, rpage, roffs, lpage, loffs, size, rblock);
  395. +#if CONFIG_BLK_DEV_LOOP_KEYSCRUB
  396. + /* stop scrub timer. loop thread was killed earlier */
  397. + del_timer_sync(&m->timer);
  398. + /* make sure allocated keys are in original order */
  399. + if(m->reversed) keyScrubWork(m);
  400. +#endif
  401. + n = PAGE_SIZE / sizeof(aes_context);
  402. + if(!n) n = 1;
  403. +
  404. + x = 0;
  405. + while(x < KEY_ALLOC_COUNT) {
  406. + a = m->keyPtr[x];
  407. + if(!a) break;
  408. + memset(a, 0, sizeof(aes_context) * n);
  409. + kfree(a);
  410. + x += n;
  411. + }
  412. +
  413. + kfree(m);
  414. +}
  415. +
  416. +static int multiKeySetup(struct loop_device *lo, unsigned char *k)
  417. +{
  418. + AESmultiKey *m;
  419. + aes_context *a;
  420. + int x, y, n, err = 0;
  421. + union {
  422. + u_int32_t w[8]; /* needed for 4 byte alignment for b[] */
  423. + unsigned char b[32];
  424. + } un;
  425. +
  426. + if(lo->lo_key_owner != current->uid && !capable(CAP_SYS_ADMIN))
  427. + return -EPERM;
  428. +
  429. + m = (AESmultiKey *)lo->key_data;
  430. + if(!m) return -ENXIO;
  431. +
  432. +#if CONFIG_BLK_DEV_LOOP_KEYSCRUB
  433. + /* temporarily prevent loop thread from messing with keys */
  434. + write_lock(&m->rwlock);
  435. + m->blocked = 1;
  436. + /* make sure allocated keys are in original order */
  437. + if(m->reversed) keyScrubWork(m);
  438. + write_unlock(&m->rwlock);
  439. +#endif
  440. + n = PAGE_SIZE / sizeof(aes_context);
  441. + if(!n) n = 1;
  442. +
  443. + x = 0;
  444. + while(x < KEY_ALLOC_COUNT) {
  445. + if(!m->keyPtr[x]) {
  446. + a = (aes_context *) kmalloc(sizeof(aes_context) * n, GFP_KERNEL);
  447. + if(!a) {
  448. + err = -ENOMEM;
  449. + goto error_out;
  450. + }
  451. + y = x;
  452. + while((y < (x + n)) && (y < KEY_ALLOC_COUNT)) {
  453. + m->keyPtr[y] = a;
  454. + a++;
  455. + y++;
  456. + }
  457. + }
  458. +#if CONFIG_BLK_DEV_LOOP_KEYSCRUB
  459. + if(x >= 64) {
  460. + x++;
  461. + continue;
  462. + }
  463. +#endif
  464. + if(copy_from_user(&un.b[0], k, 32)) {
  465. + err = -EFAULT;
  466. + goto error_out;
  467. + }
  468. + aes_set_key(m->keyPtr[x], &un.b[0], lo->lo_encrypt_key_size, 0);
  469. + k += 32;
  470. + x++;
  471. + }
  472. + m->keyMask = 0x3F; /* range 0...63 */
  473. + lo->lo_flags |= 0x100000; /* multi-key (info exported to user space) */
  474. + memset(&un.b[0], 0, 32);
  475. +error_out:
  476. +#if CONFIG_BLK_DEV_LOOP_KEYSCRUB
  477. + /* re-enable loop thread key scrubbing */
  478. + write_lock(&m->rwlock);
  479. + m->blocked = 0;
  480. + write_unlock(&m->rwlock);
  481. +#endif
  482. + return err;
  483. }
  484. -static int
  485. -do_lo_send(struct loop_device *lo, struct bio_vec *bvec, int bsize, loff_t pos)
  486. +void loop_compute_sector_iv(sector_t devSect, u_int32_t *ivout)
  487. {
  488. - struct file *file = lo->lo_backing_file; /* kudos to NFsckingS */
  489. - struct address_space *mapping = file->f_mapping;
  490. - struct address_space_operations *aops = mapping->a_ops;
  491. - struct page *page;
  492. - pgoff_t index;
  493. - unsigned size, offset, bv_offs;
  494. - int len;
  495. - int ret = 0;
  496. + if(sizeof(sector_t) == 8) {
  497. + ivout[0] = cpu_to_le32(devSect);
  498. + ivout[1] = cpu_to_le32((u_int64_t)devSect>>32);
  499. + ivout[3] = ivout[2] = 0;
  500. + } else {
  501. + ivout[0] = cpu_to_le32(devSect);
  502. + ivout[3] = ivout[2] = ivout[1] = 0;
  503. + }
  504. +}
  505. - down(&mapping->host->i_sem);
  506. - index = pos >> PAGE_CACHE_SHIFT;
  507. - offset = pos & ((pgoff_t)PAGE_CACHE_SIZE - 1);
  508. - bv_offs = bvec->bv_offset;
  509. - len = bvec->bv_len;
  510. - while (len > 0) {
  511. - sector_t IV;
  512. - int transfer_result;
  513. -
  514. - IV = ((sector_t)index << (PAGE_CACHE_SHIFT - 9))+(offset >> 9);
  515. -
  516. - size = PAGE_CACHE_SIZE - offset;
  517. - if (size > len)
  518. - size = len;
  519. -
  520. - page = grab_cache_page(mapping, index);
  521. - if (!page)
  522. - goto fail;
  523. - if (aops->prepare_write(file, page, offset, offset+size))
  524. - goto unlock;
  525. - transfer_result = lo_do_transfer(lo, WRITE, page, offset,
  526. - bvec->bv_page, bv_offs,
  527. - size, IV);
  528. - if (transfer_result) {
  529. - char *kaddr;
  530. -
  531. - /*
  532. - * The transfer failed, but we still write the data to
  533. - * keep prepare/commit calls balanced.
  534. - */
  535. - printk(KERN_ERR "loop: transfer error block %llu\n",
  536. - (unsigned long long)index);
  537. - kaddr = kmap_atomic(page, KM_USER0);
  538. - memset(kaddr + offset, 0, size);
  539. - kunmap_atomic(kaddr, KM_USER0);
  540. - }
  541. - flush_dcache_page(page);
  542. - if (aops->commit_write(file, page, offset, offset+size))
  543. - goto unlock;
  544. - if (transfer_result)
  545. - goto unlock;
  546. - bv_offs += size;
  547. - len -= size;
  548. - offset = 0;
  549. - index++;
  550. - pos += size;
  551. - unlock_page(page);
  552. - page_cache_release(page);
  553. - }
  554. - up(&mapping->host->i_sem);
  555. -out:
  556. - return ret;
  557. +void loop_compute_md5_iv(sector_t devSect, u_int32_t *ivout, u_int32_t *data)
  558. +{
  559. + int x;
  560. +#if defined(__BIG_ENDIAN)
  561. + int y, e;
  562. +#endif
  563. + u_int32_t buf[16];
  564. -unlock:
  565. - unlock_page(page);
  566. - page_cache_release(page);
  567. -fail:
  568. - up(&mapping->host->i_sem);
  569. - ret = -1;
  570. - goto out;
  571. + ivout[0] = 0x67452301;
  572. + ivout[1] = 0xefcdab89;
  573. + ivout[2] = 0x98badcfe;
  574. + ivout[3] = 0x10325476;
  575. +
  576. +#if defined(__BIG_ENDIAN)
  577. + y = 7;
  578. + e = 16;
  579. + do {
  580. + if (!y) {
  581. + e = 12;
  582. + /* md5_transform_CPUbyteorder wants data in CPU byte order */
  583. + /* devSect is already in CPU byte order -- no need to convert */
  584. + if(sizeof(sector_t) == 8) {
  585. + /* use only 56 bits of sector number */
  586. + buf[12] = devSect;
  587. + buf[13] = (((u_int64_t)devSect >> 32) & 0xFFFFFF) | 0x80000000;
  588. + } else {
  589. + /* 32 bits of sector number + 24 zero bits */
  590. + buf[12] = devSect;
  591. + buf[13] = 0x80000000;
  592. + }
  593. + /* 4024 bits == 31 * 128 bit plaintext blocks + 56 bits of sector number */
  594. + buf[14] = 4024;
  595. + buf[15] = 0;
  596. + }
  597. + x = 0;
  598. + do {
  599. + buf[x ] = cpu_to_le32(data[0]);
  600. + buf[x + 1] = cpu_to_le32(data[1]);
  601. + buf[x + 2] = cpu_to_le32(data[2]);
  602. + buf[x + 3] = cpu_to_le32(data[3]);
  603. + x += 4;
  604. + data += 4;
  605. + } while (x < e);
  606. + md5_transform_CPUbyteorder(&ivout[0], &buf[0]);
  607. + } while (--y >= 0);
  608. + ivout[0] = cpu_to_le32(ivout[0]);
  609. + ivout[1] = cpu_to_le32(ivout[1]);
  610. + ivout[2] = cpu_to_le32(ivout[2]);
  611. + ivout[3] = cpu_to_le32(ivout[3]);
  612. +#else
  613. + x = 6;
  614. + do {
  615. + md5_transform_CPUbyteorder(&ivout[0], data);
  616. + data += 16;
  617. + } while (--x >= 0);
  618. + memcpy(buf, data, 48);
  619. + /* md5_transform_CPUbyteorder wants data in CPU byte order */
  620. + /* devSect is already in CPU byte order -- no need to convert */
  621. + if(sizeof(sector_t) == 8) {
  622. + /* use only 56 bits of sector number */
  623. + buf[12] = devSect;
  624. + buf[13] = (((u_int64_t)devSect >> 32) & 0xFFFFFF) | 0x80000000;
  625. + } else {
  626. + /* 32 bits of sector number + 24 zero bits */
  627. + buf[12] = devSect;
  628. + buf[13] = 0x80000000;
  629. + }
  630. + /* 4024 bits == 31 * 128 bit plaintext blocks + 56 bits of sector number */
  631. + buf[14] = 4024;
  632. + buf[15] = 0;
  633. + md5_transform_CPUbyteorder(&ivout[0], &buf[0]);
  634. +#endif
  635. }
  636. -static int
  637. -lo_send(struct loop_device *lo, struct bio *bio, int bsize, loff_t pos)
  638. +static int transfer_aes(struct loop_device *lo, int cmd, char *raw_buf,
  639. + char *loop_buf, int size, sector_t devSect)
  640. {
  641. - struct bio_vec *bvec;
  642. - int i, ret = 0;
  643. + aes_context *a;
  644. + AESmultiKey *m;
  645. + int x;
  646. + unsigned y;
  647. + u_int32_t iv[8];
  648. +
  649. + if(!size || (size & 511)) {
  650. + return -EINVAL;
  651. + }
  652. + m = (AESmultiKey *)lo->key_data;
  653. + y = m->keyMask;
  654. + if(cmd == READ) {
  655. + while(size) {
  656. +#if CONFIG_BLK_DEV_LOOP_KEYSCRUB
  657. + read_lock(&m->rwlock);
  658. +#endif
  659. + a = m->keyPtr[((unsigned)devSect) & y];
  660. + if(y) {
  661. + memcpy(&iv[0], raw_buf, 16);
  662. + raw_buf += 16;
  663. + loop_buf += 16;
  664. + } else {
  665. + loop_compute_sector_iv(devSect, &iv[0]);
  666. + }
  667. + x = 15;
  668. + do {
  669. + memcpy(&iv[4], raw_buf, 16);
  670. + aes_decrypt(a, raw_buf, loop_buf);
  671. + *((u_int32_t *)(&loop_buf[ 0])) ^= iv[0];
  672. + *((u_int32_t *)(&loop_buf[ 4])) ^= iv[1];
  673. + *((u_int32_t *)(&loop_buf[ 8])) ^= iv[2];
  674. + *((u_int32_t *)(&loop_buf[12])) ^= iv[3];
  675. + if(y && !x) {
  676. + raw_buf -= 496;
  677. + loop_buf -= 496;
  678. + loop_compute_md5_iv(devSect, &iv[4], (u_int32_t *)(&loop_buf[16]));
  679. + } else {
  680. + raw_buf += 16;
  681. + loop_buf += 16;
  682. + memcpy(&iv[0], raw_buf, 16);
  683. + }
  684. + aes_decrypt(a, raw_buf, loop_buf);
  685. + *((u_int32_t *)(&loop_buf[ 0])) ^= iv[4];
  686. + *((u_int32_t *)(&loop_buf[ 4])) ^= iv[5];
  687. + *((u_int32_t *)(&loop_buf[ 8])) ^= iv[6];
  688. + *((u_int32_t *)(&loop_buf[12])) ^= iv[7];
  689. + if(y && !x) {
  690. + raw_buf += 512;
  691. + loop_buf += 512;
  692. + } else {
  693. + raw_buf += 16;
  694. + loop_buf += 16;
  695. + }
  696. + } while(--x >= 0);
  697. +#if CONFIG_BLK_DEV_LOOP_KEYSCRUB
  698. + read_unlock(&m->rwlock);
  699. +#endif
  700. + cond_resched();
  701. + size -= 512;
  702. + devSect++;
  703. + }
  704. + } else {
  705. + while(size) {
  706. +#if CONFIG_BLK_DEV_LOOP_KEYSCRUB
  707. + read_lock(&m->rwlock);
  708. +#endif
  709. + a = m->keyPtr[((unsigned)devSect) & y];
  710. + if(y) {
  711. + /* on 2.4 and later kernels, real raw_buf is not doing */
  712. + /* any writes now so it can be used as temp buffer */
  713. + memcpy(raw_buf, loop_buf, 512);
  714. + loop_compute_md5_iv(devSect, &iv[0], (u_int32_t *)(&raw_buf[16]));
  715. + x = 15;
  716. + do {
  717. + iv[0] ^= *((u_int32_t *)(&raw_buf[ 0]));
  718. + iv[1] ^= *((u_int32_t *)(&raw_buf[ 4]));
  719. + iv[2] ^= *((u_int32_t *)(&raw_buf[ 8]));
  720. + iv[3] ^= *((u_int32_t *)(&raw_buf[12]));
  721. + aes_encrypt(a, (unsigned char *)(&iv[0]), raw_buf);
  722. + memcpy(&iv[0], raw_buf, 16);
  723. + raw_buf += 16;
  724. + iv[0] ^= *((u_int32_t *)(&raw_buf[ 0]));
  725. + iv[1] ^= *((u_int32_t *)(&raw_buf[ 4]));
  726. + iv[2] ^= *((u_int32_t *)(&raw_buf[ 8]));
  727. + iv[3] ^= *((u_int32_t *)(&raw_buf[12]));
  728. + aes_encrypt(a, (unsigned char *)(&iv[0]), raw_buf);
  729. + memcpy(&iv[0], raw_buf, 16);
  730. + raw_buf += 16;
  731. + } while(--x >= 0);
  732. + loop_buf += 512;
  733. + } else {
  734. + loop_compute_sector_iv(devSect, &iv[0]);
  735. + x = 15;
  736. + do {
  737. + iv[0] ^= *((u_int32_t *)(&loop_buf[ 0]));
  738. + iv[1] ^= *((u_int32_t *)(&loop_buf[ 4]));
  739. + iv[2] ^= *((u_int32_t *)(&loop_buf[ 8]));
  740. + iv[3] ^= *((u_int32_t *)(&loop_buf[12]));
  741. + aes_encrypt(a, (unsigned char *)(&iv[0]), raw_buf);
  742. + memcpy(&iv[0], raw_buf, 16);
  743. + loop_buf += 16;
  744. + raw_buf += 16;
  745. + iv[0] ^= *((u_int32_t *)(&loop_buf[ 0]));
  746. + iv[1] ^= *((u_int32_t *)(&loop_buf[ 4]));
  747. + iv[2] ^= *((u_int32_t *)(&loop_buf[ 8]));
  748. + iv[3] ^= *((u_int32_t *)(&loop_buf[12]));
  749. + aes_encrypt(a, (unsigned char *)(&iv[0]), raw_buf);
  750. + memcpy(&iv[0], raw_buf, 16);
  751. + loop_buf += 16;
  752. + raw_buf += 16;
  753. + } while(--x >= 0);
  754. + }
  755. +#if CONFIG_BLK_DEV_LOOP_KEYSCRUB
  756. + read_unlock(&m->rwlock);
  757. +#endif
  758. + cond_resched();
  759. + size -= 512;
  760. + devSect++;
  761. + }
  762. + }
  763. + return(0);
  764. +}
  765. +
  766. +static int keySetup_aes(struct loop_device *lo, struct loop_info64 *info)
  767. +{
  768. + AESmultiKey *m;
  769. + union {
  770. + u_int32_t w[8]; /* needed for 4 byte alignment for b[] */
  771. + unsigned char b[32];
  772. + } un;
  773. +
  774. + lo->key_data = m = allocMultiKey();
  775. + if(!m) return(-ENOMEM);
  776. + memcpy(&un.b[0], &info->lo_encrypt_key[0], 32);
  777. + aes_set_key(m->keyPtr[0], &un.b[0], info->lo_encrypt_key_size, 0);
  778. + memset(&info->lo_encrypt_key[0], 0, sizeof(info->lo_encrypt_key));
  779. + memset(&un.b[0], 0, 32);
  780. +#if CONFIG_BLK_DEV_LOOP_KEYSCRUB
  781. + keyScrubTimerInit(lo);
  782. +#endif
  783. + return(0);
  784. +}
  785. +
  786. +static int keyClean_aes(struct loop_device *lo)
  787. +{
  788. + if(lo->key_data) {
  789. + clearAndFreeMultiKey((AESmultiKey *)lo->key_data);
  790. + lo->key_data = 0;
  791. + }
  792. + return(0);
  793. +}
  794. - bio_for_each_segment(bvec, bio, i) {
  795. - ret = do_lo_send(lo, bvec, bsize, pos);
  796. - if (ret < 0)
  797. +static int handleIoctl_aes(struct loop_device *lo, int cmd, unsigned long arg)
  798. +{
  799. + int err;
  800. +
  801. + switch (cmd) {
  802. + case LOOP_MULTI_KEY_SETUP:
  803. + err = multiKeySetup(lo, (unsigned char *)arg);
  804. + break;
  805. + default:
  806. + err = -EINVAL;
  807. + }
  808. + return err;
  809. +}
  810. +
  811. +static struct loop_func_table funcs_aes = {
  812. + number: 16, /* 16 == AES */
  813. + transfer: transfer_aes,
  814. + init: keySetup_aes,
  815. + release: keyClean_aes,
  816. + ioctl: handleIoctl_aes
  817. +};
  818. +
  819. +EXPORT_SYMBOL(loop_compute_sector_iv);
  820. +EXPORT_SYMBOL(loop_compute_md5_iv);
  821. +#endif /* CONFIG_BLK_DEV_LOOP_AES */
  822. +
  823. +/* xfer_funcs[0] is special - its release function is never called */
  824. +static struct loop_func_table *xfer_funcs[MAX_LO_CRYPT] = {
  825. + &none_funcs,
  826. + &xor_funcs,
  827. +#if CONFIG_BLK_DEV_LOOP_AES
  828. + [LO_CRYPT_AES] = &funcs_aes,
  829. +#endif
  830. +};
  831. +
  832. +/*
  833. + * First number of 'lo_prealloc' is the default number of RAM pages
  834. + * to pre-allocate for each device backed loop. Every (configured)
  835. + * device backed loop pre-allocates this amount of RAM pages unless
  836. + * later 'lo_prealloc' numbers provide an override. 'lo_prealloc'
  837. + * overrides are defined in pairs: loop_index,number_of_pages
  838. + */
  839. +static int lo_prealloc[9] = { 125, -1, 0, -1, 0, -1, 0, -1, 0 };
  840. +#define LO_PREALLOC_MIN 4 /* minimum user defined pre-allocated RAM pages */
  841. +#define LO_PREALLOC_MAX 512 /* maximum user defined pre-allocated RAM pages */
  842. +
  843. +#ifdef MODULE
  844. +static int dummy1;
  845. +#if LINUX_VERSION_CODE >= 0x2060a
  846. +module_param_array(lo_prealloc, int, &dummy1, 0);
  847. +#else
  848. +module_param_array(lo_prealloc, int, dummy1, 0);
  849. +#endif
  850. +MODULE_PARM_DESC(lo_prealloc, "Number of pre-allocated pages [,index,pages]...");
  851. +#else
  852. +static int __init lo_prealloc_setup(char *str)
  853. +{
  854. + int x, y, z;
  855. +
  856. + for (x = 0; x < (sizeof(lo_prealloc) / sizeof(int)); x++) {
  857. + z = get_option(&str, &y);
  858. + if (z > 0)
  859. + lo_prealloc[x] = y;
  860. + if (z < 2)
  861. break;
  862. - pos += bvec->bv_len;
  863. }
  864. - return ret;
  865. + return 1;
  866. }
  867. +__setup("lo_prealloc=", lo_prealloc_setup);
  868. +#endif
  869. -struct lo_read_data {
  870. - struct loop_device *lo;
  871. - struct page *page;
  872. - unsigned offset;
  873. - int bsize;
  874. -};
  875. +/*
  876. + * This is loop helper thread nice value in range
  877. + * from 0 (low priority) to -20 (high priority).
  878. + */
  879. +static int lo_nice = -1;
  880. -static int
  881. -lo_read_actor(read_descriptor_t *desc, struct page *page,
  882. - unsigned long offset, unsigned long size)
  883. +#ifdef MODULE
  884. +module_param(lo_nice, int, 0);
  885. +MODULE_PARM_DESC(lo_nice, "Loop thread scheduler nice (0 ... -20)");
  886. +#else
  887. +static int __init lo_nice_setup(char *str)
  888. {
  889. - unsigned long count = desc->count;
  890. - struct lo_read_data *p = desc->arg.data;
  891. - struct loop_device *lo = p->lo;
  892. - sector_t IV;
  893. + int y;
  894. - IV = ((sector_t) page->index << (PAGE_CACHE_SHIFT - 9))+(offset >> 9);
  895. + if (get_option(&str, &y) == 1)
  896. + lo_nice = y;
  897. + return 1;
  898. +}
  899. +__setup("lo_nice=", lo_nice_setup);
  900. +#endif
  901. - if (size > count)
  902. - size = count;
  903. +struct loop_bio_extension {
  904. + struct bio *bioext_merge;
  905. + struct loop_device *bioext_loop;
  906. + sector_t bioext_iv;
  907. + int bioext_index;
  908. + int bioext_size;
  909. +};
  910. - if (lo_do_transfer(lo, READ, page, offset, p->page, p->offset, size, IV)) {
  911. - size = 0;
  912. - printk(KERN_ERR "loop: transfer error block %ld\n",
  913. - page->index);
  914. - desc->error = -EINVAL;
  915. +static struct loop_device **loop_dev_ptr_arr;
  916. +
  917. +static void loop_prealloc_cleanup(struct loop_device *lo)
  918. +{
  919. + struct bio *bio;
  920. +
  921. + while ((bio = lo->lo_bio_free0)) {
  922. + lo->lo_bio_free0 = bio->bi_next;
  923. + __free_page(bio->bi_io_vec[0].bv_page);
  924. + kfree(bio->bi_private);
  925. + bio->bi_next = NULL;
  926. + bio_put(bio);
  927. + }
  928. + while ((bio = lo->lo_bio_free1)) {
  929. + lo->lo_bio_free1 = bio->bi_next;
  930. + /* bi_flags was used for other purpose */
  931. + bio->bi_flags = 0;
  932. + /* bi_size was used for other purpose */
  933. + bio->bi_size = 0;
  934. + /* bi_cnt was used for other purpose */
  935. + atomic_set(&bio->bi_cnt, 1);
  936. + bio->bi_next = NULL;
  937. + bio_put(bio);
  938. }
  939. +}
  940. +
  941. +static int loop_prealloc_init(struct loop_device *lo, int y)
  942. +{
  943. + struct bio *bio;
  944. + int x;
  945. - flush_dcache_page(p->page);
  946. + if(!y) {
  947. + y = lo_prealloc[0];
  948. + for (x = 1; x < (sizeof(lo_prealloc) / sizeof(int)); x += 2) {
  949. + if (lo_prealloc[x + 1] && (lo->lo_number == lo_prealloc[x])) {
  950. + y = lo_prealloc[x + 1];
  951. + break;
  952. + }
  953. + }
  954. + }
  955. + lo->lo_bio_flsh = (y * 3) / 4;
  956. - desc->count = count - size;
  957. - desc->written += size;
  958. - p->offset += size;
  959. - return size;
  960. + for (x = 0; x < y; x++) {
  961. + bio = bio_alloc(GFP_KERNEL, 1);
  962. + if (!bio) {
  963. + fail1:
  964. + loop_prealloc_cleanup(lo);
  965. + return 1;
  966. + }
  967. + bio->bi_io_vec[0].bv_page = alloc_page(GFP_KERNEL);
  968. + if (!bio->bi_io_vec[0].bv_page) {
  969. + fail2:
  970. + bio->bi_next = NULL;
  971. + bio_put(bio);
  972. + goto fail1;
  973. + }
  974. + bio->bi_vcnt = 1;
  975. + bio->bi_private = kmalloc(sizeof(struct loop_bio_extension), GFP_KERNEL);
  976. + if (!bio->bi_private)
  977. + goto fail2;
  978. + bio->bi_next = lo->lo_bio_free0;
  979. + lo->lo_bio_free0 = bio;
  980. +
  981. + bio = bio_alloc(GFP_KERNEL, 1);
  982. + if (!bio)
  983. + goto fail1;
  984. + bio->bi_vcnt = 1;
  985. + bio->bi_next = lo->lo_bio_free1;
  986. + lo->lo_bio_free1 = bio;
  987. + }
  988. + return 0;
  989. }
  990. -static int
  991. -do_lo_receive(struct loop_device *lo,
  992. - struct bio_vec *bvec, int bsize, loff_t pos)
  993. +static void loop_add_queue_last(struct loop_device *lo, struct bio *bio, struct bio **q)
  994. {
  995. - struct lo_read_data cookie;
  996. - struct file *file;
  997. - int retval;
  998. -
  999. - cookie.lo = lo;
  1000. - cookie.page = bvec->bv_page;
  1001. - cookie.offset = bvec->bv_offset;
  1002. - cookie.bsize = bsize;
  1003. - file = lo->lo_backing_file;
  1004. - retval = file->f_op->sendfile(file, &pos, bvec->bv_len,
  1005. - lo_read_actor, &cookie);
  1006. - return (retval < 0)? retval: 0;
  1007. + unsigned long flags;
  1008. +
  1009. + spin_lock_irqsave(&lo->lo_lock, flags);
  1010. + if (*q) {
  1011. + bio->bi_next = (*q)->bi_next;
  1012. + (*q)->bi_next = bio;
  1013. + } else {
  1014. + bio->bi_next = bio;
  1015. + }
  1016. + *q = bio;
  1017. + spin_unlock_irqrestore(&lo->lo_lock, flags);
  1018. +
  1019. + if (waitqueue_active(&lo->lo_bio_wait))
  1020. + wake_up_interruptible(&lo->lo_bio_wait);
  1021. }
  1022. -static int
  1023. -lo_receive(struct loop_device *lo, struct bio *bio, int bsize, loff_t pos)
  1024. +static void loop_add_queue_first(struct loop_device *lo, struct bio *bio, struct bio **q)
  1025. {
  1026. - struct bio_vec *bvec;
  1027. - int i, ret = 0;
  1028. + spin_lock_irq(&lo->lo_lock);
  1029. + if (*q) {
  1030. + bio->bi_next = (*q)->bi_next;
  1031. + (*q)->bi_next = bio;
  1032. + } else {
  1033. + bio->bi_next = bio;
  1034. + *q = bio;
  1035. + }
  1036. + spin_unlock_irq(&lo->lo_lock);
  1037. +}
  1038. - bio_for_each_segment(bvec, bio, i) {
  1039. - ret = do_lo_receive(lo, bvec, bsize, pos);
  1040. - if (ret < 0)
  1041. - break;
  1042. - pos += bvec->bv_len;
  1043. +static struct bio *loop_get_bio(struct loop_device *lo, int *list_nr)
  1044. +{
  1045. + struct bio *bio = NULL, *last;
  1046. +
  1047. + spin_lock_irq(&lo->lo_lock);
  1048. + if ((last = lo->lo_bio_que0)) {
  1049. + bio = last->bi_next;
  1050. + if (bio == last)
  1051. + lo->lo_bio_que0 = NULL;
  1052. + else
  1053. + last->bi_next = bio->bi_next;
  1054. + bio->bi_next = NULL;
  1055. + *list_nr = 0;
  1056. + } else if ((last = lo->lo_bio_que1)) {
  1057. + bio = last->bi_next;
  1058. + if (bio == last)
  1059. + lo->lo_bio_que1 = NULL;
  1060. + else
  1061. + last->bi_next = bio->bi_next;
  1062. + bio->bi_next = NULL;
  1063. + *list_nr = 1;
  1064. + } else if ((last = lo->lo_bio_que2)) {
  1065. + bio = last->bi_next;
  1066. + if (bio == last)
  1067. + lo->lo_bio_que2 = NULL;
  1068. + else
  1069. + last->bi_next = bio->bi_next;
  1070. + bio->bi_next = NULL;
  1071. + *list_nr = 2;
  1072. }
  1073. - return ret;
  1074. + spin_unlock_irq(&lo->lo_lock);
  1075. + return bio;
  1076. }
  1077. -static int do_bio_filebacked(struct loop_device *lo, struct bio *bio)
  1078. +static void loop_put_buffer(struct loop_device *lo, struct bio *b, int flist)
  1079. {
  1080. - loff_t pos;
  1081. - int ret;
  1082. + unsigned long flags;
  1083. + int wk;
  1084. - pos = ((loff_t) bio->bi_sector << 9) + lo->lo_offset;
  1085. - if (bio_rw(bio) == WRITE)
  1086. - ret = lo_send(lo, bio, lo->lo_blocksize, pos);
  1087. - else
  1088. - ret = lo_receive(lo, bio, lo->lo_blocksize, pos);
  1089. - return ret;
  1090. + spin_lock_irqsave(&lo->lo_lock, flags);
  1091. + if(!flist) {
  1092. + b->bi_next = lo->lo_bio_free0;
  1093. + lo->lo_bio_free0 = b;
  1094. + wk = lo->lo_bio_need & 1;
  1095. + } else {
  1096. + b->bi_next = lo->lo_bio_free1;
  1097. + lo->lo_bio_free1 = b;
  1098. + wk = lo->lo_bio_need & 2;
  1099. + }
  1100. + spin_unlock_irqrestore(&lo->lo_lock, flags);
  1101. +
  1102. + if (wk && waitqueue_active(&lo->lo_bio_wait))
  1103. + wake_up_interruptible(&lo->lo_bio_wait);
  1104. }
  1105. -/*
  1106. - * Add bio to back of pending list
  1107. - */
  1108. -static void loop_add_bio(struct loop_device *lo, struct bio *bio)
  1109. +static int loop_end_io_transfer(struct bio *bio, unsigned int bytes_done, int err)
  1110. {
  1111. + struct loop_bio_extension *extension = bio->bi_private;
  1112. + struct bio *merge = extension->bioext_merge;
  1113. + struct loop_device *lo = extension->bioext_loop;
  1114. + struct bio *origbio = merge->bi_private;
  1115. +
  1116. + if (err) {
  1117. + merge->bi_size = err; /* used as error code */
  1118. + if(err == -EIO)
  1119. + clear_bit(0, &merge->bi_flags);
  1120. + printk(KERN_ERR "loop%d: loop_end_io_transfer err=%d bi_rw=0x%lx\n", lo->lo_number, err, bio->bi_rw);
  1121. + }
  1122. + if (bio->bi_size)
  1123. + return 1;
  1124. + if (bio_rw(bio) == WRITE) {
  1125. + loop_put_buffer(lo, bio, 0);
  1126. + if (!atomic_dec_and_test(&merge->bi_cnt))
  1127. + return 0;
  1128. + origbio->bi_next = NULL;
  1129. + bio_endio(origbio, origbio->bi_size, test_bit(0, &merge->bi_flags) ? (int)merge->bi_size : -EIO);
  1130. + loop_put_buffer(lo, merge, 1);
  1131. + if (atomic_dec_and_test(&lo->lo_pending))
  1132. + wake_up_interruptible(&lo->lo_bio_wait);
  1133. + } else {
  1134. + loop_add_queue_last(lo, bio, &lo->lo_bio_que0);
  1135. + }
  1136. + return 0;
  1137. +}
  1138. +
  1139. +static struct bio *loop_get_buffer(struct loop_device *lo, struct bio *orig_bio,
  1140. + int from_thread, struct bio **merge_ptr, int *isBarrBioPtr)
  1141. +{
  1142. + struct bio *bio = NULL, *merge = *merge_ptr;
  1143. + struct loop_bio_extension *extension;
  1144. unsigned long flags;
  1145. + int len;
  1146. +
  1147. + /*
  1148. + * If called from make_request and if there are unprocessed
  1149. + * barrier requests, fail allocation so that request is
  1150. + * inserted to end of no-merge-allocated list. This guarantees
  1151. + * FIFO processing order of requests.
  1152. + */
  1153. + if (!from_thread && atomic_read(&lo->lo_bio_barr))
  1154. + return NULL;
  1155. spin_lock_irqsave(&lo->lo_lock, flags);
  1156. - if (lo->lo_biotail) {
  1157. - lo->lo_biotail->bi_next = bio;
  1158. - lo->lo_biotail = bio;
  1159. - } else
  1160. - lo->lo_bio = lo->lo_biotail = bio;
  1161. + if (!merge) {
  1162. + merge = lo->lo_bio_free1;
  1163. + if (merge) {
  1164. + lo->lo_bio_free1 = merge->bi_next;
  1165. + if (from_thread)
  1166. + lo->lo_bio_need = 0;
  1167. + } else {
  1168. + if (from_thread)
  1169. + lo->lo_bio_need = 2;
  1170. + }
  1171. + }
  1172. +
  1173. + /*
  1174. + * If there are unprocessed barrier requests and a merge-bio was just
  1175. + * allocated, do not allocate a buffer-bio yet. This causes request
  1176. + * to be moved from head of no-merge-allocated list to end of
  1177. + * merge-allocated list. This guarantees FIFO processing order
  1178. + * of requests.
  1179. + */
  1180. + if (merge && (*merge_ptr || !atomic_read(&lo->lo_bio_barr))) {
  1181. + bio = lo->lo_bio_free0;
  1182. + if (bio) {
  1183. + lo->lo_bio_free0 = bio->bi_next;
  1184. + if (from_thread)
  1185. + lo->lo_bio_need = 0;
  1186. + } else {
  1187. + if (from_thread)
  1188. + lo->lo_bio_need = 1;
  1189. + }
  1190. + }
  1191. spin_unlock_irqrestore(&lo->lo_lock, flags);
  1192. - up(&lo->lo_bh_mutex);
  1193. + if (!(*merge_ptr) && merge) {
  1194. + /*
  1195. + * initialize "merge-bio" which is used as
  1196. + * rendezvous point among multiple vecs
  1197. + */
  1198. + *merge_ptr = merge;
  1199. + merge->bi_sector = orig_bio->bi_sector + lo->lo_offs_sec;
  1200. + merge->bi_size = 0; /* used as error code */
  1201. + set_bit(0, &merge->bi_flags);
  1202. + merge->bi_idx = orig_bio->bi_idx;
  1203. + atomic_set(&merge->bi_cnt, orig_bio->bi_vcnt - orig_bio->bi_idx);
  1204. + merge->bi_private = orig_bio;
  1205. + }
  1206. +
  1207. + if (!bio)
  1208. + return NULL;
  1209. +
  1210. + /*
  1211. + * initialize one page "buffer-bio"
  1212. + */
  1213. + bio->bi_sector = merge->bi_sector;
  1214. + bio->bi_next = NULL;
  1215. + bio->bi_bdev = lo->lo_device;
  1216. + bio->bi_flags = (1 << BIO_UPTODATE);
  1217. + /* read-ahead bit needs to be cleared to work around kernel bug */
  1218. + /* that causes I/O errors on -EWOULDBLOCK I/O elevator failures */
  1219. + bio->bi_rw = orig_bio->bi_rw & ~((1 << BIO_RW_BARRIER) | (1 << BIO_RW_AHEAD));
  1220. + if (bio_barrier(orig_bio)) {
  1221. + if(merge->bi_idx == (orig_bio->bi_vcnt - 1)) {
  1222. +#if LINUX_VERSION_CODE >= 0x20609
  1223. + setBarr2:
  1224. + orig_bio->bi_hw_front_size = 0;
  1225. +#endif
  1226. + *isBarrBioPtr = 1;
  1227. + setBarr1:
  1228. + bio->bi_rw |= (1 << BIO_RW_BARRIER);
  1229. + } else if(merge->bi_idx == orig_bio->bi_idx) {
  1230. + goto setBarr1;
  1231. + }
  1232. + }
  1233. +#if LINUX_VERSION_CODE >= 0x20609
  1234. + else if(orig_bio->bi_hw_front_size == 1536) {
  1235. + goto setBarr2;
  1236. + }
  1237. +#endif
  1238. +#if defined(BIO_RW_SYNC)
  1239. + bio->bi_rw &= ~(1 << BIO_RW_SYNC);
  1240. + if (bio_sync(orig_bio) && (merge->bi_idx == (orig_bio->bi_vcnt - 1)))
  1241. + bio->bi_rw |= (1 << BIO_RW_SYNC);
  1242. +#endif
  1243. + bio->bi_vcnt = 1;
  1244. + bio->bi_idx = 0;
  1245. + bio->bi_phys_segments = 0;
  1246. + bio->bi_hw_segments = 0;
  1247. + bio->bi_size = len = orig_bio->bi_io_vec[merge->bi_idx].bv_len;
  1248. +#if defined(BIOVEC_VIRT_START_SIZE) || (LINUX_VERSION_CODE >= 0x20608)
  1249. + bio->bi_hw_front_size = 0;
  1250. + bio->bi_hw_back_size = 0;
  1251. +#endif
  1252. + /* bio->bi_max_vecs not touched */
  1253. + bio->bi_io_vec[0].bv_len = len;
  1254. + bio->bi_io_vec[0].bv_offset = 0;
  1255. + bio->bi_end_io = loop_end_io_transfer;
  1256. + /* bio->bi_cnt not touched */
  1257. + /* bio->bi_private not touched */
  1258. + /* bio->bi_destructor not touched */
  1259. +
  1260. + /*
  1261. + * initialize "buffer-bio" extension. This extension is
  1262. + * permanently glued to above "buffer-bio" via bio->bi_private
  1263. + */
  1264. + extension = bio->bi_private;
  1265. + extension->bioext_merge = merge;
  1266. + extension->bioext_loop = lo;
  1267. + extension->bioext_iv = merge->bi_sector - lo->lo_iv_remove;
  1268. + extension->bioext_index = merge->bi_idx;
  1269. + extension->bioext_size = len;
  1270. +
  1271. + /*
  1272. + * prepare "merge-bio" for next vec
  1273. + */
  1274. + merge->bi_sector += len >> 9;
  1275. + merge->bi_idx++;
  1276. +
  1277. + return bio;
  1278. }
  1279. -/*
  1280. - * Grab first pending buffer
  1281. - */
  1282. -static struct bio *loop_get_bio(struct loop_device *lo)
  1283. +static int figure_loop_size(struct loop_device *lo, struct block_device *bdev)
  1284. {
  1285. - struct bio *bio;
  1286. + loff_t size, offs;
  1287. + sector_t x;
  1288. + int err = 0;
  1289. - spin_lock_irq(&lo->lo_lock);
  1290. - if ((bio = lo->lo_bio)) {
  1291. - if (bio == lo->lo_biotail)
  1292. - lo->lo_biotail = NULL;
  1293. - lo->lo_bio = bio->bi_next;
  1294. - bio->bi_next = NULL;
  1295. + size = i_size_read(lo->lo_backing_file->f_dentry->d_inode->i_mapping->host);
  1296. + offs = lo->lo_offset;
  1297. + if (!(lo->lo_flags & LO_FLAGS_DO_BMAP))
  1298. + offs &= ~((loff_t)511);
  1299. + if ((offs > 0) && (offs < size)) {
  1300. + size -= offs;
  1301. + } else {
  1302. + if (offs)
  1303. + err = -EINVAL;
  1304. + lo->lo_offset = 0;
  1305. + lo->lo_offs_sec = lo->lo_iv_remove = 0;
  1306. + }
  1307. + if ((lo->lo_sizelimit > 0) && (lo->lo_sizelimit <= size)) {
  1308. + size = lo->lo_sizelimit;
  1309. + } else {
  1310. + if (lo->lo_sizelimit)
  1311. + err = -EINVAL;
  1312. + lo->lo_sizelimit = 0;
  1313. + }
  1314. + size >>= 9;
  1315. +
  1316. + /*
  1317. + * Unfortunately, if we want to do I/O on the device,
  1318. + * the number of 512-byte sectors has to fit into a sector_t.
  1319. + */
  1320. + x = (sector_t)size;
  1321. + if ((loff_t)x != size) {
  1322. + err = -EFBIG;
  1323. + size = 0;
  1324. + }
  1325. +
  1326. + bdev->bd_inode->i_size = size << 9; /* byte units */
  1327. + set_capacity(disks[lo->lo_number], size); /* 512 byte units */
  1328. + return err;
  1329. +}
  1330. +
  1331. +static inline int lo_do_transfer(struct loop_device *lo, int cmd, char *rbuf,
  1332. + char *lbuf, int size, sector_t rblock)
  1333. +{
  1334. + if (!lo->transfer)
  1335. + return 0;
  1336. +
  1337. + return lo->transfer(lo, cmd, rbuf, lbuf, size, rblock);
  1338. +}
  1339. +
  1340. +static int loop_file_io(struct file *file, char *buf, int size, loff_t *ppos, int w)
  1341. +{
  1342. + mm_segment_t fs;
  1343. + int x, y, z;
  1344. +
  1345. + y = 0;
  1346. + do {
  1347. + z = size - y;
  1348. + fs = get_fs();
  1349. + set_fs(get_ds());
  1350. + if (w) {
  1351. + x = file->f_op->write(file, buf + y, z, ppos);
  1352. + set_fs(fs);
  1353. + } else {
  1354. + x = file->f_op->read(file, buf + y, z, ppos);
  1355. + set_fs(fs);
  1356. + if (!x)
  1357. + return 1;
  1358. + }
  1359. + if (x < 0) {
  1360. + if ((x == -EAGAIN) || (x == -ENOMEM) || (x == -ERESTART) || (x == -EINTR)) {
  1361. + set_current_state(TASK_INTERRUPTIBLE);
  1362. + schedule_timeout(HZ / 2);
  1363. + continue;
  1364. + }
  1365. + return 1;
  1366. + }
  1367. + y += x;
  1368. + } while (y < size);
  1369. + return 0;
  1370. +}
  1371. +
  1372. +static int do_bio_filebacked(struct loop_device *lo, struct bio *bio)
  1373. +{
  1374. + loff_t pos;
  1375. + struct file *file = lo->lo_backing_file;
  1376. + char *data, *buf;
  1377. + unsigned int size, len;
  1378. + sector_t IV;
  1379. + struct page *pg;
  1380. +
  1381. + pos = ((loff_t) bio->bi_sector << 9) + lo->lo_offset;
  1382. + buf = page_address(lo->lo_bio_free0->bi_io_vec[0].bv_page);
  1383. + IV = bio->bi_sector;
  1384. + if (!lo->lo_iv_remove)
  1385. + IV += lo->lo_offs_sec;
  1386. + do {
  1387. + pg = bio->bi_io_vec[bio->bi_idx].bv_page;
  1388. + len = bio->bi_io_vec[bio->bi_idx].bv_len;
  1389. + data = kmap(pg) + bio->bi_io_vec[bio->bi_idx].bv_offset;
  1390. + while (len > 0) {
  1391. + if (!lo->lo_encryption) {
  1392. + /* this code relies that NONE transfer is a no-op */
  1393. + buf = data;
  1394. + }
  1395. + size = PAGE_CACHE_SIZE;
  1396. + if (size > len)
  1397. + size = len;
  1398. + if (bio_rw(bio) == WRITE) {
  1399. + if (lo_do_transfer(lo, WRITE, buf, data, size, IV)) {
  1400. + printk(KERN_ERR "loop%d: write transfer error, sector %llu\n", lo->lo_number, (unsigned long long)IV);
  1401. + goto kunmap_and_out;
  1402. + }
  1403. + if (loop_file_io(file, buf, size, &pos, 1)) {
  1404. + printk(KERN_ERR "loop%d: write i/o error, sector %llu\n", lo->lo_number, (unsigned long long)IV);
  1405. + goto kunmap_and_out;
  1406. + }
  1407. + } else {
  1408. + if (loop_file_io(file, buf, size, &pos, 0)) {
  1409. + printk(KERN_ERR "loop%d: read i/o error, sector %llu\n", lo->lo_number, (unsigned long long)IV);
  1410. + goto kunmap_and_out;
  1411. + }
  1412. + if (lo_do_transfer(lo, READ, buf, data, size, IV)) {
  1413. + printk(KERN_ERR "loop%d: read transfer error, sector %llu\n", lo->lo_number, (unsigned long long)IV);
  1414. + goto kunmap_and_out;
  1415. + }
  1416. + flush_dcache_page(pg);
  1417. + }
  1418. + data += size;
  1419. + len -= size;
  1420. + IV += size >> 9;
  1421. + }
  1422. + kunmap(pg);
  1423. + } while (++bio->bi_idx < bio->bi_vcnt);
  1424. + return 0;
  1425. +
  1426. +kunmap_and_out:
  1427. + kunmap(pg);
  1428. + return -EIO;
  1429. +}
  1430. +
  1431. +#if LINUX_VERSION_CODE >= 0x20609
  1432. +static int loop_issue_flush(request_queue_t *q, struct gendisk *disk, sector_t *error_sector)
  1433. +{
  1434. + struct loop_device *lo = q->queuedata;
  1435. + struct block_device *bdev;
  1436. + request_queue_t *bqu;
  1437. + sector_t sect;
  1438. + int ret;
  1439. +
  1440. + if(!lo)
  1441. + return 0;
  1442. + if(lo->lo_flags & LO_FLAGS_DO_BMAP)
  1443. + return 0;
  1444. + bdev = lo->lo_device;
  1445. + if(!bdev)
  1446. + return 0;
  1447. + bqu = bdev_get_queue(bdev);
  1448. + if(!bqu)
  1449. + return 0;
  1450. + if(!bqu->issue_flush_fn)
  1451. + return -EOPNOTSUPP;
  1452. + if(!lo->lo_encryption) {
  1453. + /* bdev & sector remapped for NONE transfer */
  1454. + sect = 0;
  1455. + ret = bqu->issue_flush_fn(bqu, bdev->bd_disk, &sect);
  1456. + if(ret && error_sector) {
  1457. + if(sect >= lo->lo_offs_sec) {
  1458. + sect -= lo->lo_offs_sec;
  1459. + } else {
  1460. + sect = 0;
  1461. + }
  1462. + *error_sector = sect;
  1463. + }
  1464. + return ret;
  1465. }
  1466. - spin_unlock_irq(&lo->lo_lock);
  1467. + if(!(bqu->queue_flags & (1 << QUEUE_FLAG_ORDERED)))
  1468. + return -EOPNOTSUPP;
  1469. + /* encrypted loop is not flushed now, but next request that */
  1470. + /* arrives at loop_make_request_real() gets tagged as barrier */
  1471. + set_bit(0, &lo->lo_bio_flag);
  1472. + return 0;
  1473. +}
  1474. +#endif
  1475. - return bio;
  1476. +static int loop_make_request_err(request_queue_t *q, struct bio *old_bio)
  1477. +{
  1478. + old_bio->bi_next = NULL;
  1479. + bio_io_error(old_bio, old_bio->bi_size);
  1480. + return 0;
  1481. }
  1482. -static int loop_make_request(request_queue_t *q, struct bio *old_bio)
  1483. +static int loop_make_request_real(request_queue_t *q, struct bio *old_bio)
  1484. {
  1485. + struct bio *new_bio, *merge;
  1486. struct loop_device *lo = q->queuedata;
  1487. - int rw = bio_rw(old_bio);
  1488. + struct loop_bio_extension *extension;
  1489. + int rw = bio_rw(old_bio), y;
  1490. + char *md;
  1491. + set_current_state(TASK_RUNNING);
  1492. if (!lo)
  1493. goto out;
  1494. -
  1495. - spin_lock_irq(&lo->lo_lock);
  1496. - if (lo->lo_state != Lo_bound)
  1497. - goto inactive;
  1498. + if ((rw == WRITE) && (lo->lo_flags & LO_FLAGS_READ_ONLY))
  1499. + goto out;
  1500. atomic_inc(&lo->lo_pending);
  1501. - spin_unlock_irq(&lo->lo_lock);
  1502. + /*
  1503. + * file backed, queue for loop_thread to handle
  1504. + */
  1505. + if (lo->lo_flags & LO_FLAGS_DO_BMAP) {
  1506. + loop_add_queue_last(lo, old_bio, &lo->lo_bio_que0);
  1507. + return 0;
  1508. + }
  1509. +
  1510. + /*
  1511. + * device backed, just remap bdev & sector for NONE transfer
  1512. + */
  1513. + if (!lo->lo_encryption) {
  1514. + old_bio->bi_sector += lo->lo_offs_sec;
  1515. + old_bio->bi_bdev = lo->lo_device;
  1516. + generic_make_request(old_bio);
  1517. + if (atomic_dec_and_test(&lo->lo_pending))
  1518. + wake_up_interruptible(&lo->lo_bio_wait);
  1519. + return 0;
  1520. + }
  1521. +
  1522. + /*
  1523. + * device backed, start reads and writes now if buffer available
  1524. + */
  1525. + merge = NULL;
  1526. +#if LINUX_VERSION_CODE >= 0x20609
  1527. + old_bio->bi_hw_front_size = 0;
  1528. +#endif
  1529. + if(test_and_clear_bit(0, &lo->lo_bio_flag) || bio_barrier(old_bio)) {
  1530. + atomic_inc(&lo->lo_bio_barr);
  1531. +#if LINUX_VERSION_CODE >= 0x20609
  1532. + old_bio->bi_hw_front_size = 1536;
  1533. +#endif
  1534. + }
  1535. + try_next_old_bio_vec:
  1536. + /* Passing isBarrBioPtr�as NULL. All barriers are sent from helper thread */
  1537. + /* If loop_get_buffer() incorrectly attempts to return barrier bio here, */
  1538. + /* then that function fails with NULL pointer dereference */
  1539. + new_bio = loop_get_buffer(lo, old_bio, 0, &merge, NULL);
  1540. + if (!new_bio) {
  1541. + /* just queue request and let thread handle allocs later */
  1542. + if (merge)
  1543. + loop_add_queue_last(lo, merge, &lo->lo_bio_que1);
  1544. + else
  1545. + loop_add_queue_last(lo, old_bio, &lo->lo_bio_que2);
  1546. + return 0;
  1547. + }
  1548. if (rw == WRITE) {
  1549. - if (lo->lo_flags & LO_FLAGS_READ_ONLY)
  1550. - goto err;
  1551. - } else if (rw == READA) {
  1552. - rw = READ;
  1553. - } else if (rw != READ) {
  1554. - printk(KERN_ERR "loop: unknown command (%x)\n", rw);
  1555. - goto err;
  1556. + extension = new_bio->bi_private;
  1557. + y = extension->bioext_index;
  1558. + md = kmap(old_bio->bi_io_vec[y].bv_page) + old_bio->bi_io_vec[y].bv_offset;
  1559. + if (lo_do_transfer(lo, WRITE, page_address(new_bio->bi_io_vec[0].bv_page), md, extension->bioext_size, extension->bioext_iv)) {
  1560. + clear_bit(0, &merge->bi_flags);
  1561. + }
  1562. + kunmap(old_bio->bi_io_vec[y].bv_page);
  1563. }
  1564. - loop_add_bio(lo, old_bio);
  1565. +
  1566. + /* merge & old_bio may vanish during generic_make_request() */
  1567. + /* if last vec gets processed before function returns */
  1568. + y = (merge->bi_idx < old_bio->bi_vcnt) ? 1 : 0;
  1569. + generic_make_request(new_bio);
  1570. +
  1571. + /* other vecs may need processing too */
  1572. + if (y)
  1573. + goto try_next_old_bio_vec;
  1574. return 0;
  1575. -err:
  1576. - if (atomic_dec_and_test(&lo->lo_pending))
  1577. - up(&lo->lo_bh_mutex);
  1578. +
  1579. out:
  1580. + old_bio->bi_next = NULL;
  1581. bio_io_error(old_bio, old_bio->bi_size);
  1582. return 0;
  1583. -inactive:
  1584. - spin_unlock_irq(&lo->lo_lock);
  1585. - goto out;
  1586. }
  1587. -/*
  1588. - * kick off io on the underlying address space
  1589. - */
  1590. -static void loop_unplug(request_queue_t *q)
  1591. +static void loop_unplug_backingdev(request_queue_t *bq)
  1592. {
  1593. - struct loop_device *lo = q->queuedata;
  1594. -
  1595. - clear_bit(QUEUE_FLAG_PLUGGED, &q->queue_flags);
  1596. - blk_run_address_space(lo->lo_backing_file->f_mapping);
  1597. +#if defined(QUEUE_FLAG_PLUGGED)
  1598. + if(bq && bq->unplug_fn)
  1599. + bq->unplug_fn(bq);
  1600. +#else
  1601. + blk_run_queues();
  1602. +#endif
  1603. }
  1604. -struct switch_request {
  1605. - struct file *file;
  1606. - struct completion wait;
  1607. -};
  1608. -
  1609. -static void do_loop_switch(struct loop_device *, struct switch_request *);
  1610. -
  1611. -static inline void loop_handle_bio(struct loop_device *lo, struct bio *bio)
  1612. +#if defined(QUEUE_FLAG_PLUGGED)
  1613. +static void loop_unplug_loopdev(request_queue_t *mq)
  1614. {
  1615. - int ret;
  1616. + struct loop_device *lo;
  1617. + struct file *f;
  1618. - if (unlikely(!bio->bi_bdev)) {
  1619. - do_loop_switch(lo, bio->bi_private);
  1620. - bio_put(bio);
  1621. - } else {
  1622. - ret = do_bio_filebacked(lo, bio);
  1623. - bio_endio(bio, bio->bi_size, ret);
  1624. - }
  1625. + clear_bit(QUEUE_FLAG_PLUGGED, &mq->queue_flags);
  1626. + lo = mq->queuedata;
  1627. + if(!lo)
  1628. + return;
  1629. + f = lo->lo_backing_file;
  1630. + if(!f)
  1631. + return;
  1632. + blk_run_address_space(f->f_mapping);
  1633. }
  1634. +#endif
  1635. /*
  1636. * worker thread that handles reads/writes to file backed loop devices,
  1637. @@ -476,20 +1385,45 @@ static inline void loop_handle_bio(struc
  1638. static int loop_thread(void *data)
  1639. {
  1640. struct loop_device *lo = data;
  1641. - struct bio *bio;
  1642. -
  1643. + struct bio *bio, *xbio, *merge;
  1644. + struct loop_bio_extension *extension;
  1645. + int x, y, flushcnt = 0, isBarrBio;
  1646. + wait_queue_t waitq;
  1647. + char *md;
  1648. + request_queue_t *backingQueue;
  1649. + static const struct rlimit loop_rlim_defaults[RLIM_NLIMITS] = INIT_RLIMITS;
  1650. +
  1651. + init_waitqueue_entry(&waitq, current);
  1652. +#if !defined(OLD_PER_THREAD_RLIMITS)
  1653. + memcpy(&current->signal->rlim[0], &loop_rlim_defaults[0], sizeof(current->signal->rlim));
  1654. +#else
  1655. + memcpy(&current->rlim[0], &loop_rlim_defaults[0], sizeof(current->rlim));
  1656. +#endif
  1657. daemonize("loop%d", lo->lo_number);
  1658. + if(lo->lo_device)
  1659. + backingQueue = bdev_get_queue(lo->lo_device);
  1660. + else
  1661. + backingQueue = NULL;
  1662. +
  1663. /*
  1664. * loop can be used in an encrypted device,
  1665. * hence, it mustn't be stopped at all
  1666. * because it could be indirectly used during suspension
  1667. */
  1668. +#if defined(PF_NOFREEZE)
  1669. current->flags |= PF_NOFREEZE;
  1670. +#elif defined(PF_IOTHREAD)
  1671. + current->flags |= PF_IOTHREAD;
  1672. +#endif
  1673. + current->flags |= PF_LESS_THROTTLE;
  1674. - set_user_nice(current, -20);
  1675. + if (lo_nice > 0)
  1676. + lo_nice = 0;
  1677. + if (lo_nice < -20)
  1678. + lo_nice = -20;
  1679. + set_user_nice(current, lo_nice);
  1680. - lo->lo_state = Lo_bound;
  1681. atomic_inc(&lo->lo_pending);
  1682. /*
  1683. @@ -498,23 +1432,152 @@ static int loop_thread(void *data)
  1684. up(&lo->lo_sem);
  1685. for (;;) {
  1686. - down_interruptible(&lo->lo_bh_mutex);
  1687. + add_wait_queue(&lo->lo_bio_wait, &waitq);
  1688. + for (;;) {
  1689. + set_current_state(TASK_INTERRUPTIBLE);
  1690. + if (!atomic_read(&lo->lo_pending))
  1691. + break;
  1692. +
  1693. + x = 0;
  1694. + spin_lock_irq(&lo->lo_lock);
  1695. +#if CONFIG_BLK_DEV_LOOP_KEYSCRUB
  1696. + if(lo->lo_keyscrub_fn) x = 1;
  1697. +#endif
  1698. + if (lo->lo_bio_que0) {
  1699. + /* don't sleep if device backed READ needs processing */
  1700. + /* don't sleep if file backed READ/WRITE needs processing */
  1701. + x = 1;
  1702. + } else if (lo->lo_bio_que1) {
  1703. + /* don't sleep if a buffer-bio is available */
  1704. + /* don't sleep if need-buffer-bio request is not set */
  1705. + if (lo->lo_bio_free0 || !(lo->lo_bio_need & 1))
  1706. + x = 1;
  1707. + } else if (lo->lo_bio_que2) {
  1708. + /* don't sleep if a merge-bio is available */
  1709. + /* don't sleep if need-merge-bio request is not set */
  1710. + if (lo->lo_bio_free1 || !(lo->lo_bio_need & 2))
  1711. + x = 1;
  1712. + }
  1713. + spin_unlock_irq(&lo->lo_lock);
  1714. + if (x)
  1715. + break;
  1716. +
  1717. + schedule();
  1718. + }
  1719. + set_current_state(TASK_RUNNING);
  1720. + remove_wait_queue(&lo->lo_bio_wait, &waitq);
  1721. +
  1722. +#if CONFIG_BLK_DEV_LOOP_KEYSCRUB
  1723. + if(lo->lo_keyscrub_fn) {
  1724. + (*lo->lo_keyscrub_fn)(lo->lo_keyscrub_ptr);
  1725. + lo->lo_keyscrub_fn = 0;
  1726. + }
  1727. +#endif
  1728. /*
  1729. - * could be upped because of tear-down, not because of
  1730. + * could be woken because of tear-down, not because of
  1731. * pending work
  1732. */
  1733. if (!atomic_read(&lo->lo_pending))
  1734. break;
  1735. - bio = loop_get_bio(lo);
  1736. - if (!bio) {
  1737. - printk("loop: missing bio\n");
  1738. + bio = loop_get_bio(lo, &x);
  1739. + if (!bio)
  1740. continue;
  1741. +
  1742. + /*
  1743. + * x list tag usage(has-buffer,has-merge)
  1744. + * --- --------------- ---------------------------
  1745. + * 0 lo->lo_bio_que0 dev-r(y,y) / file-rw
  1746. + * 1 lo->lo_bio_que1 dev-rw(n,y)
  1747. + * 2 lo->lo_bio_que2 dev-rw(n,n)
  1748. + */
  1749. + if (x >= 1) {
  1750. + /* loop_make_request_real didn't allocate a buffer, do that now */
  1751. + if (x == 1) {
  1752. + merge = bio;
  1753. + bio = merge->bi_private;
  1754. + } else {
  1755. + merge = NULL;
  1756. + }
  1757. + try_next_bio_vec:
  1758. + isBarrBio = 0;
  1759. + xbio = loop_get_buffer(lo, bio, 1, &merge, &isBarrBio);
  1760. + if (!xbio) {
  1761. + loop_unplug_backingdev(backingQueue);
  1762. + flushcnt = 0;
  1763. + if (merge)
  1764. + loop_add_queue_first(lo, merge, &lo->lo_bio_que1);
  1765. + else
  1766. + loop_add_queue_first(lo, bio, &lo->lo_bio_que2);
  1767. + /* lo->lo_bio_need should be non-zero now, go back to sleep */
  1768. + continue;
  1769. + }
  1770. + if (bio_rw(bio) == WRITE) {
  1771. + extension = xbio->bi_private;
  1772. + y = extension->bioext_index;
  1773. + md = kmap(bio->bi_io_vec[y].bv_page) + bio->bi_io_vec[y].bv_offset;
  1774. + if (lo_do_transfer(lo, WRITE, page_address(xbio->bi_io_vec[0].bv_page), md, extension->bioext_size, extension->bioext_iv)) {
  1775. + clear_bit(0, &merge->bi_flags);
  1776. + }
  1777. + kunmap(bio->bi_io_vec[y].bv_page);
  1778. + }
  1779. +
  1780. + /* merge & bio may vanish during generic_make_request() */
  1781. + /* if last vec gets processed before function returns */
  1782. + y = (merge->bi_idx < bio->bi_vcnt) ? 1 : 0;
  1783. + generic_make_request(xbio);
  1784. +
  1785. + /* maybe just submitted bio was a barrier bio */
  1786. + if (isBarrBio) {
  1787. + atomic_dec(&lo->lo_bio_barr);
  1788. + }
  1789. +
  1790. + /* start I/O if there are no more requests lacking buffers */
  1791. + x = 0;
  1792. + spin_lock_irq(&lo->lo_lock);
  1793. + if (!y && !lo->lo_bio_que1 && !lo->lo_bio_que2)
  1794. + x = 1;
  1795. + spin_unlock_irq(&lo->lo_lock);
  1796. + if (x || (++flushcnt >= lo->lo_bio_flsh)) {
  1797. + loop_unplug_backingdev(backingQueue);
  1798. + flushcnt = 0;
  1799. + }
  1800. +
  1801. + /* other vecs may need processing too */
  1802. + if (y)
  1803. + goto try_next_bio_vec;
  1804. +
  1805. + /* request not completely processed yet */
  1806. + continue;
  1807. + }
  1808. +
  1809. + if (lo->lo_flags & LO_FLAGS_DO_BMAP) {
  1810. + /* request is for file backed device */
  1811. + y = do_bio_filebacked(lo, bio);
  1812. + bio->bi_next = NULL;
  1813. + bio_endio(bio, bio->bi_size, y);
  1814. + } else {
  1815. + /* device backed read has completed, do decrypt now */
  1816. + extension = bio->bi_private;
  1817. + merge = extension->bioext_merge;
  1818. + y = extension->bioext_index;
  1819. + xbio = merge->bi_private;
  1820. + md = kmap(xbio->bi_io_vec[y].bv_page) + xbio->bi_io_vec[y].bv_offset;
  1821. + if (lo_do_transfer(lo, READ, page_address(bio->bi_io_vec[0].bv_page), md, extension->bioext_size, extension->bioext_iv)) {
  1822. + clear_bit(0, &merge->bi_flags);
  1823. + }
  1824. + flush_dcache_page(xbio->bi_io_vec[y].bv_page);
  1825. + kunmap(xbio->bi_io_vec[y].bv_page);
  1826. + loop_put_buffer(lo, bio, 0);
  1827. + if (!atomic_dec_and_test(&merge->bi_cnt))
  1828. + continue;
  1829. + xbio->bi_next = NULL;
  1830. + bio_endio(xbio, xbio->bi_size, test_bit(0, &merge->bi_flags) ? (int)merge->bi_size : -EIO);
  1831. + loop_put_buffer(lo, merge, 1);
  1832. }
  1833. - loop_handle_bio(lo, bio);
  1834. /*
  1835. - * upped both for pending work and tear-down, lo_pending
  1836. + * woken both for pending work and tear-down, lo_pending
  1837. * will hit zero then
  1838. */
  1839. if (atomic_dec_and_test(&lo->lo_pending))
  1840. @@ -525,101 +1588,26 @@ static int loop_thread(void *data)
  1841. return 0;
  1842. }
  1843. -/*
  1844. - * loop_switch performs the hard work of switching a backing store.
  1845. - * First it needs to flush existing IO, it does this by sending a magic
  1846. - * BIO down the pipe. The completion of this BIO does the actual switch.
  1847. - */
  1848. -static int loop_switch(struct loop_device *lo, struct file *file)
  1849. -{
  1850. - struct switch_request w;
  1851. - struct bio *bio = bio_alloc(GFP_KERNEL, 1);
  1852. - if (!bio)
  1853. - return -ENOMEM;
  1854. - init_completion(&w.wait);
  1855. - w.file = file;
  1856. - bio->bi_private = &w;
  1857. - bio->bi_bdev = NULL;
  1858. - loop_make_request(lo->lo_queue, bio);
  1859. - wait_for_completion(&w.wait);
  1860. - return 0;
  1861. -}
  1862. -
  1863. -/*
  1864. - * Do the actual switch; called from the BIO completion routine
  1865. - */
  1866. -static void do_loop_switch(struct loop_device *lo, struct switch_request *p)
  1867. -{
  1868. - struct file *file = p->file;
  1869. - struct file *old_file = lo->lo_backing_file;
  1870. - struct address_space *mapping = file->f_mapping;
  1871. -
  1872. - mapping_set_gfp_mask(old_file->f_mapping, lo->old_gfp_mask);
  1873. - lo->lo_backing_file = file;
  1874. - lo->lo_blocksize = mapping->host->i_blksize;
  1875. - lo->old_gfp_mask = mapping_gfp_mask(mapping);
  1876. - mapping_set_gfp_mask(mapping, lo->old_gfp_mask & ~(__GFP_IO|__GFP_FS));
  1877. - complete(&p->wait);
  1878. -}
  1879. -
  1880. -
  1881. -/*
  1882. - * loop_change_fd switched the backing store of a loopback device to
  1883. - * a new file. This is useful for operating system installers to free up
  1884. - * the original file and in High Availability environments to switch to
  1885. - * an alternative location for the content in case of server meltdown.
  1886. - * This can only work if the loop device is used read-only, and if the
  1887. - * new backing store is the same size and type as the old backing store.
  1888. - */
  1889. -static int loop_change_fd(struct loop_device *lo, struct file *lo_file,
  1890. - struct block_device *bdev, unsigned int arg)
  1891. +static void loop_set_softblksz(struct loop_device *lo, struct block_device *bdev)
  1892. {
  1893. - struct file *file, *old_file;
  1894. - struct inode *inode;
  1895. - int error;
  1896. -
  1897. - error = -ENXIO;
  1898. - if (lo->lo_state != Lo_bound)
  1899. - goto out;
  1900. -
  1901. - /* the loop device has to be read-only */
  1902. - error = -EINVAL;
  1903. - if (lo->lo_flags != LO_FLAGS_READ_ONLY)
  1904. - goto out;
  1905. -
  1906. - error = -EBADF;
  1907. - file = fget(arg);
  1908. - if (!file)
  1909. - goto out;
  1910. -
  1911. - inode = file->f_mapping->host;
  1912. - old_file = lo->lo_backing_file;
  1913. -
  1914. - error = -EINVAL;
  1915. -
  1916. - if (!S_ISREG(inode->i_mode) && !S_ISBLK(inode->i_mode))
  1917. - goto out_putf;
  1918. -
  1919. - /* new backing store needs to support loop (eg sendfile) */
  1920. - if (!inode->i_fop->sendfile)
  1921. - goto out_putf;
  1922. -
  1923. - /* size of the new backing store needs to be the same */
  1924. - if (get_loop_size(lo, file) != get_loop_size(lo, old_file))
  1925. - goto out_putf;
  1926. -
  1927. - /* and ... switch */
  1928. - error = loop_switch(lo, file);
  1929. - if (error)
  1930. - goto out_putf;
  1931. -
  1932. - fput(old_file);
  1933. - return 0;
  1934. + int bs, x;
  1935. - out_putf:
  1936. - fput(file);
  1937. - out:
  1938. - return error;
  1939. + if (lo->lo_device)
  1940. + bs = block_size(lo->lo_device);
  1941. + else
  1942. + bs = PAGE_SIZE;
  1943. + if (lo->lo_flags & LO_FLAGS_DO_BMAP) {
  1944. + x = (int) bdev->bd_inode->i_size;
  1945. + if ((bs == 8192) && (x & 0x1E00))
  1946. + bs = 4096;
  1947. + if ((bs == 4096) && (x & 0x0E00))
  1948. + bs = 2048;
  1949. + if ((bs == 2048) && (x & 0x0600))
  1950. + bs = 1024;
  1951. + if ((bs == 1024) && (x & 0x0200))
  1952. + bs = 512;
  1953. + }
  1954. + set_blocksize(bdev, bs);
  1955. }
  1956. static int loop_set_fd(struct loop_device *lo, struct file *lo_file,
  1957. @@ -627,124 +1615,189 @@ static int loop_set_fd(struct loop_devic
  1958. {
  1959. struct file *file;
  1960. struct inode *inode;
  1961. - struct address_space *mapping;
  1962. - unsigned lo_blocksize;
  1963. + struct block_device *lo_device = NULL;
  1964. int lo_flags = 0;
  1965. int error;
  1966. - loff_t size;
  1967. -
  1968. - /* This is safe, since we have a reference from open(). */
  1969. - __module_get(THIS_MODULE);
  1970. -
  1971. - error = -EBUSY;
  1972. - if (lo->lo_state != Lo_unbound)
  1973. - goto out;
  1974. error = -EBADF;
  1975. file = fget(arg);
  1976. if (!file)
  1977. goto out;
  1978. - mapping = file->f_mapping;
  1979. - inode = mapping->host;
  1980. + error = -EINVAL;
  1981. + inode = file->f_dentry->d_inode;
  1982. if (!(file->f_mode & FMODE_WRITE))
  1983. lo_flags |= LO_FLAGS_READ_ONLY;
  1984. - error = -EINVAL;
  1985. - if (S_ISREG(inode->i_mode) || S_ISBLK(inode->i_mode)) {
  1986. - struct address_space_operations *aops = mapping->a_ops;
  1987. + init_MUTEX_LOCKED(&lo->lo_sem);
  1988. + spin_lock_init(&lo->lo_lock);
  1989. + init_waitqueue_head(&lo->lo_bio_wait);
  1990. + atomic_set(&lo->lo_pending, 0);
  1991. + atomic_set(&lo->lo_bio_barr, 0);
  1992. + clear_bit(0, &lo->lo_bio_flag);
  1993. +#if CONFIG_BLK_DEV_LOOP_KEYSCRUB
  1994. + lo->lo_keyscrub_fn = 0;
  1995. +#endif
  1996. + lo->lo_offset = lo->lo_sizelimit = 0;
  1997. + lo->lo_offs_sec = lo->lo_iv_remove = 0;
  1998. + lo->lo_encryption = NULL;
  1999. + lo->lo_encrypt_key_size = 0;
  2000. + lo->transfer = NULL;
  2001. + lo->lo_crypt_name[0] = 0;
  2002. + lo->lo_file_name[0] = 0;
  2003. + lo->lo_init[1] = lo->lo_init[0] = 0;
  2004. + lo->lo_key_owner = 0;
  2005. + lo->ioctl = NULL;
  2006. + lo->key_data = NULL;
  2007. + lo->lo_bio_que2 = lo->lo_bio_que1 = lo->lo_bio_que0 = NULL;
  2008. + lo->lo_bio_free1 = lo->lo_bio_free0 = NULL;
  2009. + lo->lo_bio_flsh = lo->lo_bio_need = 0;
  2010. +
  2011. + if (S_ISBLK(inode->i_mode)) {
  2012. + lo_device = inode->i_bdev;
  2013. + if (lo_device == bdev) {
  2014. + error = -EBUSY;
  2015. + goto out_putf;
  2016. + }
  2017. + if (loop_prealloc_init(lo, 0)) {
  2018. + error = -ENOMEM;
  2019. + goto out_putf;
  2020. + }
  2021. + if (bdev_read_only(lo_device))
  2022. + lo_flags |= LO_FLAGS_READ_ONLY;
  2023. + else
  2024. + filemap_fdatawrite(inode->i_mapping);
  2025. + } else if (S_ISREG(inode->i_mode)) {
  2026. /*
  2027. * If we can't read - sorry. If we only can't write - well,
  2028. * it's going to be read-only.
  2029. */
  2030. - if (!file->f_op->sendfile)
  2031. + if (!file->f_op || !file->f_op->read)
  2032. goto out_putf;
  2033. - if (!aops->prepare_write || !aops->commit_write)
  2034. + if (!file->f_op->write)
  2035. lo_flags |= LO_FLAGS_READ_ONLY;
  2036. - lo_blocksize = inode->i_blksize;
  2037. - error = 0;
  2038. - } else {
  2039. + lo_flags |= LO_FLAGS_DO_BMAP;
  2040. + if (loop_prealloc_init(lo, 1)) {
  2041. + error = -ENOMEM;
  2042. + goto out_putf;
  2043. + }
  2044. + } else
  2045. goto out_putf;
  2046. - }
  2047. -
  2048. - size = get_loop_size(lo, file);
  2049. - if ((loff_t)(sector_t)size != size) {
  2050. - error = -EFBIG;
  2051. - goto out_putf;
  2052. - }
  2053. + get_file(file);
  2054. if (!(lo_file->f_mode & FMODE_WRITE))
  2055. lo_flags |= LO_FLAGS_READ_ONLY;
  2056. set_device_ro(bdev, (lo_flags & LO_FLAGS_READ_ONLY) != 0);
  2057. - lo->lo_blocksize = lo_blocksize;
  2058. - lo->lo_device = bdev;
  2059. + lo->lo_device = lo_device;
  2060. lo->lo_flags = lo_flags;
  2061. + if(lo_flags & LO_FLAGS_READ_ONLY)
  2062. + lo->lo_flags |= 0x200000; /* export to user space */
  2063. lo->lo_backing_file = file;
  2064. - lo->transfer = NULL;
  2065. - lo->ioctl = NULL;
  2066. - lo->lo_sizelimit = 0;
  2067. - lo->old_gfp_mask = mapping_gfp_mask(mapping);
  2068. - mapping_set_gfp_mask(mapping, lo->old_gfp_mask & ~(__GFP_IO|__GFP_FS));
  2069. -
  2070. - lo->lo_bio = lo->lo_biotail = NULL;
  2071. + if (figure_loop_size(lo, bdev)) {
  2072. + error = -EFBIG;
  2073. + goto out_cleanup;
  2074. + }
  2075. /*
  2076. * set queue make_request_fn, and add limits based on lower level
  2077. * device
  2078. */
  2079. - blk_queue_make_request(lo->lo_queue, loop_make_request);
  2080. - lo->lo_queue->queuedata = lo;
  2081. - lo->lo_queue->unplug_fn = loop_unplug;
  2082. + blk_queue_make_request(lo->lo_queue, loop_make_request_err);
  2083. + blk_queue_bounce_limit(lo->lo_queue, BLK_BOUNCE_ANY);
  2084. + blk_queue_max_segment_size(lo->lo_queue, PAGE_CACHE_SIZE);
  2085. + blk_queue_segment_boundary(lo->lo_queue, PAGE_CACHE_SIZE - 1);
  2086. + blk_queue_max_phys_segments(lo->lo_queue, MAX_PHYS_SEGMENTS);
  2087. + blk_queue_max_hw_segments(lo->lo_queue, MAX_HW_SEGMENTS);
  2088. + blk_queue_max_sectors(lo->lo_queue, MAX_SECTORS);
  2089. + lo->lo_queue->queue_flags &= ~(1 << QUEUE_FLAG_CLUSTER);
  2090. +#if defined(QUEUE_FLAG_ORDERED)
  2091. + blk_queue_ordered(lo->lo_queue, 0);
  2092. +#endif
  2093. +#if LINUX_VERSION_CODE >= 0x20609
  2094. + blk_queue_issue_flush_fn(lo->lo_queue, NULL);
  2095. +#endif
  2096. +
  2097. + /*
  2098. + * we remap to a block device, make sure we correctly stack limits
  2099. + */
  2100. + if (S_ISBLK(inode->i_mode) && lo_device) {
  2101. + request_queue_t *q = bdev_get_queue(lo_device);
  2102. +
  2103. + blk_queue_hardsect_size(lo->lo_queue, q->hardsect_size);
  2104. +#if defined(QUEUE_FLAG_ORDERED)
  2105. + if(q->queue_flags & (1 << QUEUE_FLAG_ORDERED)) {
  2106. + blk_queue_ordered(lo->lo_queue, 1);
  2107. +#if LINUX_VERSION_CODE >= 0x20609
  2108. + if(q->issue_flush_fn) {
  2109. + blk_queue_issue_flush_fn(lo->lo_queue, loop_issue_flush);
  2110. + }
  2111. +#endif
  2112. + }
  2113. +#endif
  2114. + }
  2115. - set_capacity(disks[lo->lo_number], size);
  2116. - bd_set_size(bdev, size << 9);
  2117. + if (lo_flags & LO_FLAGS_DO_BMAP) {
  2118. + lo->old_gfp_mask = mapping_gfp_mask(inode->i_mapping);
  2119. + mapping_set_gfp_mask(inode->i_mapping, (lo->old_gfp_mask & ~(__GFP_IO|__GFP_FS)) | __GFP_HIGH);
  2120. + } else {
  2121. + lo->old_gfp_mask = -1;
  2122. + }
  2123. - set_blocksize(bdev, lo_blocksize);
  2124. + loop_set_softblksz(lo, bdev);
  2125. - kernel_thread(loop_thread, lo, CLONE_KERNEL);
  2126. + error = kernel_thread(loop_thread, lo, CLONE_KERNEL);
  2127. + if(error < 0)
  2128. + goto out_mapping;
  2129. down(&lo->lo_sem);
  2130. + fput(file);
  2131. +#if defined(QUEUE_FLAG_PLUGGED)
  2132. + lo->lo_queue->unplug_fn = loop_unplug_loopdev;
  2133. +#endif
  2134. + lo->lo_queue->queuedata = lo;
  2135. + __module_get(THIS_MODULE);
  2136. return 0;
  2137. + out_mapping:
  2138. + if(lo->old_gfp_mask != -1)
  2139. + mapping_set_gfp_mask(inode->i_mapping, lo->old_gfp_mask);
  2140. + out_cleanup:
  2141. + loop_prealloc_cleanup(lo);
  2142. + fput(file);
  2143. out_putf:
  2144. fput(file);
  2145. out:
  2146. - /* This is safe: open() is still holding a reference. */
  2147. - module_put(THIS_MODULE);
  2148. return error;
  2149. }
  2150. -static int
  2151. -loop_release_xfer(struct loop_device *lo)
  2152. +static int loop_release_xfer(struct loop_device *lo)
  2153. {
  2154. int err = 0;
  2155. struct loop_func_table *xfer = lo->lo_encryption;
  2156. if (xfer) {
  2157. + lo->transfer = NULL;
  2158. if (xfer->release)
  2159. err = xfer->release(lo);
  2160. - lo->transfer = NULL;
  2161. lo->lo_encryption = NULL;
  2162. module_put(xfer->owner);
  2163. }
  2164. return err;
  2165. }
  2166. -static int
  2167. -loop_init_xfer(struct loop_device *lo, struct loop_func_table *xfer,
  2168. - const struct loop_info64 *i)
  2169. +static int loop_init_xfer(struct loop_device *lo, struct loop_func_table *xfer, struct loop_info64 *i)
  2170. {
  2171. int err = 0;
  2172. if (xfer) {
  2173. struct module *owner = xfer->owner;
  2174. - if (!try_module_get(owner))
  2175. + if(!try_module_get(owner))
  2176. return -EINVAL;
  2177. if (xfer->init)
  2178. err = xfer->init(lo, i);
  2179. @@ -761,59 +1814,54 @@ static int loop_clr_fd(struct loop_devic
  2180. struct file *filp = lo->lo_backing_file;
  2181. int gfp = lo->old_gfp_mask;
  2182. - if (lo->lo_state != Lo_bound)
  2183. - return -ENXIO;
  2184. -
  2185. - if (lo->lo_refcnt > 1) /* we needed one fd for the ioctl */
  2186. + if (bdev->bd_openers != 1) /* one for this fd being open */
  2187. return -EBUSY;
  2188. -
  2189. - if (filp == NULL)
  2190. + if (filp==NULL)
  2191. return -EINVAL;
  2192. - spin_lock_irq(&lo->lo_lock);
  2193. - lo->lo_state = Lo_rundown;
  2194. + lo->lo_queue->queuedata = NULL;
  2195. + lo->lo_queue->make_request_fn = loop_make_request_err;
  2196. if (atomic_dec_and_test(&lo->lo_pending))
  2197. - up(&lo->lo_bh_mutex);
  2198. - spin_unlock_irq(&lo->lo_lock);
  2199. -
  2200. + wake_up_interruptible(&lo->lo_bio_wait);
  2201. down(&lo->lo_sem);
  2202. + loop_prealloc_cleanup(lo);
  2203. lo->lo_backing_file = NULL;
  2204. -
  2205. loop_release_xfer(lo);
  2206. lo->transfer = NULL;
  2207. lo->ioctl = NULL;
  2208. lo->lo_device = NULL;
  2209. lo->lo_encryption = NULL;
  2210. - lo->lo_offset = 0;
  2211. - lo->lo_sizelimit = 0;
  2212. +#if CONFIG_BLK_DEV_LOOP_KEYSCRUB
  2213. + lo->lo_keyscrub_fn = 0;
  2214. +#endif
  2215. + lo->lo_offset = lo->lo_sizelimit = 0;
  2216. + lo->lo_offs_sec = lo->lo_iv_remove = 0;
  2217. lo->lo_encrypt_key_size = 0;
  2218. lo->lo_flags = 0;
  2219. + lo->lo_init[1] = lo->lo_init[0] = 0;
  2220. + lo->lo_key_owner = 0;
  2221. + lo->key_data = NULL;
  2222. memset(lo->lo_encrypt_key, 0, LO_KEY_SIZE);
  2223. memset(lo->lo_crypt_name, 0, LO_NAME_SIZE);
  2224. memset(lo->lo_file_name, 0, LO_NAME_SIZE);
  2225. invalidate_bdev(bdev, 0);
  2226. set_capacity(disks[lo->lo_number], 0);
  2227. - bd_set_size(bdev, 0);
  2228. - mapping_set_gfp_mask(filp->f_mapping, gfp);
  2229. - lo->lo_state = Lo_unbound;
  2230. + if (gfp != -1)
  2231. + mapping_set_gfp_mask(filp->f_dentry->d_inode->i_mapping, gfp);
  2232. fput(filp);
  2233. - /* This is safe: open() is still holding a reference. */
  2234. module_put(THIS_MODULE);
  2235. return 0;
  2236. }
  2237. -static int
  2238. -loop_set_status(struct loop_device *lo, const struct loop_info64 *info)
  2239. +static int loop_set_status(struct loop_device *lo, struct block_device *bdev, struct loop_info64 *info)
  2240. {
  2241. int err;
  2242. - struct loop_func_table *xfer;
  2243. + struct loop_func_table *xfer = NULL;
  2244. if (lo->lo_encrypt_key_size && lo->lo_key_owner != current->uid &&
  2245. !capable(CAP_SYS_ADMIN))
  2246. return -EPERM;
  2247. - if (lo->lo_state != Lo_bound)
  2248. - return -ENXIO;
  2249. if ((unsigned int) info->lo_encrypt_key_size > LO_KEY_SIZE)
  2250. return -EINVAL;
  2251. @@ -821,6 +1869,22 @@ loop_set_status(struct loop_device *lo,
  2252. if (err)
  2253. return err;
  2254. + if ((loff_t)info->lo_offset < 0) {
  2255. + /* negative offset == remove offset from IV computations */
  2256. + lo->lo_offset = -(info->lo_offset);
  2257. + lo->lo_iv_remove = lo->lo_offset >> 9;
  2258. + } else {
  2259. + /* positive offset == include offset in IV computations */
  2260. + lo->lo_offset = info->lo_offset;
  2261. + lo->lo_iv_remove = 0;
  2262. + }
  2263. + lo->lo_offs_sec = lo->lo_offset >> 9;
  2264. + lo->lo_sizelimit = info->lo_sizelimit;
  2265. + err = figure_loop_size(lo, bdev);
  2266. + if (err)
  2267. + return err;
  2268. + loop_set_softblksz(lo, bdev);
  2269. +
  2270. if (info->lo_encrypt_type) {
  2271. unsigned int type = info->lo_encrypt_type;
  2272. @@ -829,31 +1893,20 @@ loop_set_status(struct loop_device *lo,
  2273. xfer = xfer_funcs[type];
  2274. if (xfer == NULL)
  2275. return -EINVAL;
  2276. - } else
  2277. - xfer = NULL;
  2278. -
  2279. + }
  2280. err = loop_init_xfer(lo, xfer, info);
  2281. if (err)
  2282. return err;
  2283. - if (lo->lo_offset != info->lo_offset ||
  2284. - lo->lo_sizelimit != info->lo_sizelimit) {
  2285. - lo->lo_offset = info->lo_offset;
  2286. - lo->lo_sizelimit = info->lo_sizelimit;
  2287. - if (figure_loop_size(lo))
  2288. - return -EFBIG;
  2289. - }
  2290. -
  2291. - memcpy(lo->lo_file_name, info->lo_file_name, LO_NAME_SIZE);
  2292. - memcpy(lo->lo_crypt_name, info->lo_crypt_name, LO_NAME_SIZE);
  2293. - lo->lo_file_name[LO_NAME_SIZE-1] = 0;
  2294. - lo->lo_crypt_name[LO_NAME_SIZE-1] = 0;
  2295. -
  2296. if (!xfer)
  2297. xfer = &none_funcs;
  2298. lo->transfer = xfer->transfer;
  2299. lo->ioctl = xfer->ioctl;
  2300. -
  2301. +
  2302. + memcpy(lo->lo_file_name, info->lo_file_name, LO_NAME_SIZE);
  2303. + memcpy(lo->lo_crypt_name, info->lo_crypt_name, LO_NAME_SIZE);
  2304. + lo->lo_file_name[LO_NAME_SIZE-1] = 0;
  2305. + lo->lo_crypt_name[LO_NAME_SIZE-1] = 0;
  2306. lo->lo_encrypt_key_size = info->lo_encrypt_key_size;
  2307. lo->lo_init[0] = info->lo_init[0];
  2308. lo->lo_init[1] = info->lo_init[1];
  2309. @@ -863,18 +1916,16 @@ loop_set_status(struct loop_device *lo,
  2310. lo->lo_key_owner = current->uid;
  2311. }
  2312. + lo->lo_queue->make_request_fn = loop_make_request_real;
  2313. return 0;
  2314. }
  2315. -static int
  2316. -loop_get_status(struct loop_device *lo, struct loop_info64 *info)
  2317. +static int loop_get_status(struct loop_device *lo, struct loop_info64 *info)
  2318. {
  2319. struct file *file = lo->lo_backing_file;
  2320. struct kstat stat;
  2321. int error;
  2322. - if (lo->lo_state != Lo_bound)
  2323. - return -ENXIO;
  2324. error = vfs_getattr(file->f_vfsmnt, file->f_dentry, &stat);
  2325. if (error)
  2326. return error;
  2327. @@ -883,17 +1934,18 @@ loop_get_status(struct loop_device *lo,
  2328. info->lo_device = huge_encode_dev(stat.dev);
  2329. info->lo_inode = stat.ino;
  2330. info->lo_rdevice = huge_encode_dev(lo->lo_device ? stat.rdev : stat.dev);
  2331. - info->lo_offset = lo->lo_offset;
  2332. + info->lo_offset = lo->lo_iv_remove ? -(lo->lo_offset) : lo->lo_offset;
  2333. info->lo_sizelimit = lo->lo_sizelimit;
  2334. info->lo_flags = lo->lo_flags;
  2335. memcpy(info->lo_file_name, lo->lo_file_name, LO_NAME_SIZE);
  2336. memcpy(info->lo_crypt_name, lo->lo_crypt_name, LO_NAME_SIZE);
  2337. - info->lo_encrypt_type =
  2338. - lo->lo_encryption ? lo->lo_encryption->number : 0;
  2339. + info->lo_encrypt_type = lo->lo_encryption ? lo->lo_encryption->number : 0;
  2340. if (lo->lo_encrypt_key_size && capable(CAP_SYS_ADMIN)) {
  2341. info->lo_encrypt_key_size = lo->lo_encrypt_key_size;
  2342. memcpy(info->lo_encrypt_key, lo->lo_encrypt_key,
  2343. lo->lo_encrypt_key_size);
  2344. + info->lo_init[0] = lo->lo_init[0];
  2345. + info->lo_init[1] = lo->lo_init[1];
  2346. }
  2347. return 0;
  2348. }
  2349. @@ -907,7 +1959,6 @@ loop_info64_from_old(const struct loop_i
  2350. info64->lo_inode = info->lo_inode;
  2351. info64->lo_rdevice = info->lo_rdevice;
  2352. info64->lo_offset = info->lo_offset;
  2353. - info64->lo_sizelimit = 0;
  2354. info64->lo_encrypt_type = info->lo_encrypt_type;
  2355. info64->lo_encrypt_key_size = info->lo_encrypt_key_size;
  2356. info64->lo_flags = info->lo_flags;
  2357. @@ -921,7 +1972,7 @@ loop_info64_from_old(const struct loop_i
  2358. }
  2359. static int
  2360. -loop_info64_to_old(const struct loop_info64 *info64, struct loop_info *info)
  2361. +loop_info64_to_old(struct loop_info64 *info64, struct loop_info *info)
  2362. {
  2363. memset(info, 0, sizeof(*info));
  2364. info->lo_number = info64->lo_number;
  2365. @@ -944,14 +1995,15 @@ loop_info64_to_old(const struct loop_inf
  2366. if (info->lo_device != info64->lo_device ||
  2367. info->lo_rdevice != info64->lo_rdevice ||
  2368. info->lo_inode != info64->lo_inode ||
  2369. - info->lo_offset != info64->lo_offset)
  2370. + info->lo_offset != info64->lo_offset ||
  2371. + info64->lo_sizelimit)
  2372. return -EOVERFLOW;
  2373. return 0;
  2374. }
  2375. static int
  2376. -loop_set_status_old(struct loop_device *lo, const struct loop_info __user *arg)
  2377. +loop_set_status_old(struct loop_device *lo, struct block_device *bdev, const struct loop_info *arg)
  2378. {
  2379. struct loop_info info;
  2380. struct loop_info64 info64;
  2381. @@ -959,21 +2011,22 @@ loop_set_status_old(struct loop_device *
  2382. if (copy_from_user(&info, arg, sizeof (struct loop_info)))
  2383. return -EFAULT;
  2384. loop_info64_from_old(&info, &info64);
  2385. - return loop_set_status(lo, &info64);
  2386. + memset(&info.lo_encrypt_key[0], 0, sizeof(info.lo_encrypt_key));
  2387. + return loop_set_status(lo, bdev, &info64);
  2388. }
  2389. static int
  2390. -loop_set_status64(struct loop_device *lo, const struct loop_info64 __user *arg)
  2391. +loop_set_status64(struct loop_device *lo, struct block_device *bdev, struct loop_info64 *arg)
  2392. {
  2393. struct loop_info64 info64;
  2394. if (copy_from_user(&info64, arg, sizeof (struct loop_info64)))
  2395. return -EFAULT;
  2396. - return loop_set_status(lo, &info64);
  2397. + return loop_set_status(lo, bdev, &info64);
  2398. }
  2399. static int
  2400. -loop_get_status_old(struct loop_device *lo, struct loop_info __user *arg) {
  2401. +loop_get_status_old(struct loop_device *lo, struct loop_info *arg) {
  2402. struct loop_info info;
  2403. struct loop_info64 info64;
  2404. int err = 0;
  2405. @@ -991,7 +2044,7 @@ loop_get_status_old(struct loop_device *
  2406. }
  2407. static int
  2408. -loop_get_status64(struct loop_device *lo, struct loop_info64 __user *arg) {
  2409. +loop_get_status64(struct loop_device *lo, struct loop_info64 *arg) {
  2410. struct loop_info64 info64;
  2411. int err = 0;
  2412. @@ -1005,61 +2058,65 @@ loop_get_status64(struct loop_device *lo
  2413. return err;
  2414. }
  2415. -static int lo_ioctl(struct inode * inode, struct file * file,
  2416. - unsigned int cmd, unsigned long arg)
  2417. +static int lo_ioctl(struct inode *inode, struct file * file, unsigned int cmd, unsigned long arg)
  2418. {
  2419. - struct loop_device *lo = inode->i_bdev->bd_disk->private_data;
  2420. + struct block_device *bdev = inode->i_bdev;
  2421. + struct loop_device *lo = bdev->bd_disk->private_data;
  2422. int err;
  2423. - down(&lo->lo_ctl_mutex);
  2424. + down(&bdev->bd_sem);
  2425. +
  2426. + /*
  2427. + * LOOP_SET_FD can only be called when no device is attached.
  2428. + * All other ioctls can only be called when a device is attached.
  2429. + */
  2430. + if (bdev->bd_disk->queue->queuedata != NULL) {
  2431. + if (cmd == LOOP_SET_FD) {
  2432. + err = -EBUSY;
  2433. + goto out_err;
  2434. + }
  2435. + } else {
  2436. + if (cmd != LOOP_SET_FD) {
  2437. + err = -ENXIO;
  2438. + goto out_err;
  2439. + }
  2440. + }
  2441. +
  2442. switch (cmd) {
  2443. case LOOP_SET_FD:
  2444. - err = loop_set_fd(lo, file, inode->i_bdev, arg);
  2445. - break;
  2446. - case LOOP_CHANGE_FD:
  2447. - err = loop_change_fd(lo, file, inode->i_bdev, arg);
  2448. + err = loop_set_fd(lo, file, bdev, arg);
  2449. break;
  2450. case LOOP_CLR_FD:
  2451. - err = loop_clr_fd(lo, inode->i_bdev);
  2452. + err = loop_clr_fd(lo, bdev);
  2453. break;
  2454. case LOOP_SET_STATUS:
  2455. - err = loop_set_status_old(lo, (struct loop_info __user *) arg);
  2456. + err = loop_set_status_old(lo, bdev, (struct loop_info *) arg);
  2457. break;
  2458. case LOOP_GET_STATUS:
  2459. - err = loop_get_status_old(lo, (struct loop_info __user *) arg);
  2460. + err = loop_get_status_old(lo, (struct loop_info *) arg);
  2461. break;
  2462. case LOOP_SET_STATUS64:
  2463. - err = loop_set_status64(lo, (struct loop_info64 __user *) arg);
  2464. + err = loop_set_status64(lo, bdev, (struct loop_info64 *) arg);
  2465. break;
  2466. case LOOP_GET_STATUS64:
  2467. - err = loop_get_status64(lo, (struct loop_info64 __user *) arg);
  2468. + err = loop_get_status64(lo, (struct loop_info64 *) arg);
  2469. break;
  2470. default:
  2471. err = lo->ioctl ? lo->ioctl(lo, cmd, arg) : -EINVAL;
  2472. }
  2473. - up(&lo->lo_ctl_mutex);
  2474. +out_err:
  2475. + up(&bdev->bd_sem);
  2476. return err;
  2477. }
  2478. static int lo_open(struct inode *inode, struct file *file)
  2479. {
  2480. - struct loop_device *lo = inode->i_bdev->bd_disk->private_data;
  2481. -
  2482. - down(&lo->lo_ctl_mutex);
  2483. - lo->lo_refcnt++;
  2484. - up(&lo->lo_ctl_mutex);
  2485. -
  2486. return 0;
  2487. }
  2488. static int lo_release(struct inode *inode, struct file *file)
  2489. {
  2490. - struct loop_device *lo = inode->i_bdev->bd_disk->private_data;
  2491. -
  2492. - down(&lo->lo_ctl_mutex);
  2493. - --lo->lo_refcnt;
  2494. - up(&lo->lo_ctl_mutex);
  2495. -
  2496. + sync_blockdev(inode->i_bdev);
  2497. return 0;
  2498. }
  2499. @@ -1073,8 +2130,6 @@ static struct block_device_operations lo
  2500. /*
  2501. * And now the modules code and kernel interface.
  2502. */
  2503. -MODULE_PARM(max_loop, "i");
  2504. -MODULE_PARM_DESC(max_loop, "Maximum number of loop devices (1-256)");
  2505. MODULE_LICENSE("GPL");
  2506. MODULE_ALIAS_BLOCKDEV_MAJOR(LOOP_MAJOR);
  2507. @@ -1093,21 +2148,18 @@ int loop_unregister_transfer(int number)
  2508. unsigned int n = number;
  2509. struct loop_device *lo;
  2510. struct loop_func_table *xfer;
  2511. + int x;
  2512. if (n == 0 || n >= MAX_LO_CRYPT || (xfer = xfer_funcs[n]) == NULL)
  2513. return -EINVAL;
  2514. -
  2515. xfer_funcs[n] = NULL;
  2516. -
  2517. - for (lo = &loop_dev[0]; lo < &loop_dev[max_loop]; lo++) {
  2518. - down(&lo->lo_ctl_mutex);
  2519. -
  2520. + for (x = 0; x < max_loop; x++) {
  2521. + lo = loop_dev_ptr_arr[x];
  2522. + if (!lo)
  2523. + continue;
  2524. if (lo->lo_encryption == xfer)
  2525. loop_release_xfer(lo);
  2526. -
  2527. - up(&lo->lo_ctl_mutex);
  2528. }
  2529. -
  2530. return 0;
  2531. }
  2532. @@ -1118,7 +2170,7 @@ int __init loop_init(void)
  2533. {
  2534. int i;
  2535. - if (max_loop < 1 || max_loop > 256) {
  2536. + if ((max_loop < 1) || (max_loop > 256)) {
  2537. printk(KERN_WARNING "loop: invalid max_loop (must be between"
  2538. " 1 and 256), using default (8)\n");
  2539. max_loop = 8;
  2540. @@ -1127,62 +2179,86 @@ int __init loop_init(void)
  2541. if (register_blkdev(LOOP_MAJOR, "loop"))
  2542. return -EIO;
  2543. - loop_dev = kmalloc(max_loop * sizeof(struct loop_device), GFP_KERNEL);
  2544. - if (!loop_dev)
  2545. + loop_dev_ptr_arr = kmalloc(max_loop * sizeof(struct loop_device *), GFP_KERNEL);
  2546. + if (!loop_dev_ptr_arr)
  2547. goto out_mem1;
  2548. - memset(loop_dev, 0, max_loop * sizeof(struct loop_device));
  2549. disks = kmalloc(max_loop * sizeof(struct gendisk *), GFP_KERNEL);
  2550. if (!disks)
  2551. goto out_mem2;
  2552. for (i = 0; i < max_loop; i++) {
  2553. + loop_dev_ptr_arr[i] = kmalloc(sizeof(struct loop_device), GFP_KERNEL);
  2554. + if (!loop_dev_ptr_arr[i])
  2555. + goto out_mem3;
  2556. + }
  2557. +
  2558. + for (i = 0; i < max_loop; i++) {
  2559. disks[i] = alloc_disk(1);
  2560. if (!disks[i])
  2561. - goto out_mem3;
  2562. + goto out_mem4;
  2563. + }
  2564. +
  2565. + for (i = 0; i < max_loop; i++) {
  2566. + disks[i]->queue = blk_alloc_queue(GFP_KERNEL);
  2567. + if (!disks[i]->queue)
  2568. + goto out_mem5;
  2569. + disks[i]->queue->queuedata = NULL;
  2570. + blk_queue_make_request(disks[i]->queue, loop_make_request_err);
  2571. + }
  2572. +
  2573. + for (i = 0; i < (sizeof(lo_prealloc) / sizeof(int)); i += 2) {
  2574. + if (!lo_prealloc[i])
  2575. + continue;
  2576. + if (lo_prealloc[i] < LO_PREALLOC_MIN)
  2577. + lo_prealloc[i] = LO_PREALLOC_MIN;
  2578. + if (lo_prealloc[i] > LO_PREALLOC_MAX)
  2579. + lo_prealloc[i] = LO_PREALLOC_MAX;
  2580. }
  2581. +#if defined(IOCTL32_COMPATIBLE_PTR)
  2582. + register_ioctl32_conversion(LOOP_MULTI_KEY_SETUP, IOCTL32_COMPATIBLE_PTR);
  2583. +#endif
  2584. +
  2585. devfs_mk_dir("loop");
  2586. for (i = 0; i < max_loop; i++) {
  2587. - struct loop_device *lo = &loop_dev[i];
  2588. + struct loop_device *lo = loop_dev_ptr_arr[i];
  2589. struct gendisk *disk = disks[i];
  2590. -
  2591. - memset(lo, 0, sizeof(*lo));
  2592. - lo->lo_queue = blk_alloc_queue(GFP_KERNEL);
  2593. - if (!lo->lo_queue)
  2594. - goto out_mem4;
  2595. - init_MUTEX(&lo->lo_ctl_mutex);
  2596. - init_MUTEX_LOCKED(&lo->lo_sem);
  2597. - init_MUTEX_LOCKED(&lo->lo_bh_mutex);
  2598. + memset(lo, 0, sizeof(struct loop_device));
  2599. lo->lo_number = i;
  2600. - spin_lock_init(&lo->lo_lock);
  2601. + lo->lo_queue = disk->queue;
  2602. disk->major = LOOP_MAJOR;
  2603. disk->first_minor = i;
  2604. disk->fops = &lo_fops;
  2605. sprintf(disk->disk_name, "loop%d", i);
  2606. sprintf(disk->devfs_name, "loop/%d", i);
  2607. disk->private_data = lo;
  2608. - disk->queue = lo->lo_queue;
  2609. + add_disk(disk);
  2610. }
  2611. - /* We cannot fail after we call this, so another loop!*/
  2612. - for (i = 0; i < max_loop; i++)
  2613. - add_disk(disks[i]);
  2614. +#if CONFIG_BLK_DEV_LOOP_AES
  2615. +#if CONFIG_BLK_DEV_LOOP_KEYSCRUB
  2616. + printk(KERN_INFO "loop: AES key scrubbing enabled\n");
  2617. +#endif
  2618. +#endif
  2619. printk(KERN_INFO "loop: loaded (max %d devices)\n", max_loop);
  2620. return 0;
  2621. +out_mem5:
  2622. + while (i--)
  2623. + blk_put_queue(disks[i]->queue);
  2624. + i = max_loop;
  2625. out_mem4:
  2626. while (i--)
  2627. - blk_put_queue(loop_dev[i].lo_queue);
  2628. - devfs_remove("loop");
  2629. + put_disk(disks[i]);
  2630. i = max_loop;
  2631. out_mem3:
  2632. while (i--)
  2633. - put_disk(disks[i]);
  2634. + kfree(loop_dev_ptr_arr[i]);
  2635. kfree(disks);
  2636. out_mem2:
  2637. - kfree(loop_dev);
  2638. + kfree(loop_dev_ptr_arr);
  2639. out_mem1:
  2640. unregister_blkdev(LOOP_MAJOR, "loop");
  2641. printk(KERN_ERR "loop: ran out of memory\n");
  2642. @@ -1195,26 +2271,30 @@ void loop_exit(void)
  2643. for (i = 0; i < max_loop; i++) {
  2644. del_gendisk(disks[i]);
  2645. - blk_put_queue(loop_dev[i].lo_queue);
  2646. put_disk(disks[i]);
  2647. + blk_put_queue(loop_dev_ptr_arr[i]->lo_queue);
  2648. + kfree(loop_dev_ptr_arr[i]);
  2649. }
  2650. devfs_remove("loop");
  2651. - if (unregister_blkdev(LOOP_MAJOR, "loop"))
  2652. - printk(KERN_WARNING "loop: cannot unregister blkdev\n");
  2653. -
  2654. + unregister_blkdev(LOOP_MAJOR, "loop");
  2655. kfree(disks);
  2656. - kfree(loop_dev);
  2657. + kfree(loop_dev_ptr_arr);
  2658. +
  2659. +#if defined(IOCTL32_COMPATIBLE_PTR)
  2660. + unregister_ioctl32_conversion(LOOP_MULTI_KEY_SETUP);
  2661. +#endif
  2662. }
  2663. module_init(loop_init);
  2664. module_exit(loop_exit);
  2665. -#ifndef MODULE
  2666. -static int __init max_loop_setup(char *str)
  2667. +#if CONFIG_BLK_DEV_LOOP_KEYSCRUB
  2668. +void loop_add_keyscrub_fn(struct loop_device *lo, void (*fn)(void *), void *ptr)
  2669. {
  2670. - max_loop = simple_strtol(str, NULL, 0);
  2671. - return 1;
  2672. + lo->lo_keyscrub_ptr = ptr;
  2673. + wmb();
  2674. + lo->lo_keyscrub_fn = fn;
  2675. + wake_up_interruptible(&lo->lo_bio_wait);
  2676. }
  2677. -
  2678. -__setup("max_loop=", max_loop_setup);
  2679. +EXPORT_SYMBOL(loop_add_keyscrub_fn);
  2680. #endif
  2681. diff -pruN linux-2.6.9_orig/drivers/misc/Makefile linux-2.6.9/drivers/misc/Makefile
  2682. --- linux-2.6.9_orig/drivers/misc/Makefile 2004-10-18 23:55:24.000000000 +0200
  2683. +++ linux-2.6.9/drivers/misc/Makefile 2004-10-25 14:26:31.845232960 +0200
  2684. @@ -4,3 +4,24 @@
  2685. obj- := misc.o # Dummy rule to force built-in.o to be made
  2686. obj-$(CONFIG_IBM_ASM) += ibmasm/
  2687. +
  2688. +ifeq ($(CONFIG_BLK_DEV_LOOP_AES),y)
  2689. +AES_X86_ASM=n
  2690. +ifeq ($(CONFIG_X86),y)
  2691. +ifneq ($(CONFIG_X86_64),y)
  2692. + AES_X86_ASM=y
  2693. +endif
  2694. +endif
  2695. +ifeq ($(AES_X86_ASM),y)
  2696. + obj-y += aes-x86.o md5-x86.o crypto-ksym.o
  2697. + AFLAGS_aes-x86.o := -DUSE_UNDERLINE=1
  2698. +else
  2699. +ifeq ($(CONFIG_X86_64),y)
  2700. + obj-y += aes-amd64.o md5-amd64.o crypto-ksym.o
  2701. + AFLAGS_aes-amd64.o := -DUSE_UNDERLINE=1
  2702. +else
  2703. + obj-y += aes.o md5.o crypto-ksym.o
  2704. + CFLAGS_aes.o := -DDATA_ALWAYS_ALIGNED=1
  2705. +endif
  2706. +endif
  2707. +endif
  2708. diff -pruN linux-2.6.9_orig/drivers/misc/aes-amd64.S linux-2.6.9/drivers/misc/aes-amd64.S
  2709. --- linux-2.6.9_orig/drivers/misc/aes-amd64.S 1970-01-01 01:00:00.000000000 +0100
  2710. +++ linux-2.6.9/drivers/misc/aes-amd64.S 2004-10-25 14:26:31.848232504 +0200
  2711. @@ -0,0 +1,893 @@
  2712. +//
  2713. +// Copyright (c) 2001, Dr Brian Gladman <brg@gladman.uk.net>, Worcester, UK.
  2714. +// All rights reserved.
  2715. +//
  2716. +// TERMS
  2717. +//
  2718. +// Redistribution and use in source and binary forms, with or without
  2719. +// modification, are permitted subject to the following conditions:
  2720. +//
  2721. +// 1. Redistributions of source code must retain the above copyright
  2722. +// notice, this list of conditions and the following disclaimer.
  2723. +//
  2724. +// 2. Redistributions in binary form must reproduce the above copyright
  2725. +// notice, this list of conditions and the following disclaimer in the
  2726. +// documentation and/or other materials provided with the distribution.
  2727. +//
  2728. +// 3. The copyright holder's name must not be used to endorse or promote
  2729. +// any products derived from this software without his specific prior
  2730. +// written permission.
  2731. +//
  2732. +// This software is provided 'as is' with no express or implied warranties
  2733. +// of correctness or fitness for purpose.
  2734. +
  2735. +// Modified by Jari Ruusu, December 24 2001
  2736. +// - Converted syntax to GNU CPP/assembler syntax
  2737. +// - C programming interface converted back to "old" API
  2738. +// - Minor portability cleanups and speed optimizations
  2739. +
  2740. +// Modified by Jari Ruusu, April 11 2002
  2741. +// - Added above copyright and terms to resulting object code so that
  2742. +// binary distributions can avoid legal trouble
  2743. +
  2744. +// Modified by Jari Ruusu, June 12 2004
  2745. +// - Converted 32 bit x86 code to 64 bit AMD64 code
  2746. +// - Re-wrote encrypt and decrypt code from scratch
  2747. +
  2748. +// An AES (Rijndael) implementation for the AMD64. This version only
  2749. +// implements the standard AES block length (128 bits, 16 bytes). This code
  2750. +// does not preserve the rax, rcx, rdx, rsi, rdi or r8-r11 registers or the
  2751. +// artihmetic status flags. However, the rbx, rbp and r12-r15 registers are
  2752. +// preserved across calls.
  2753. +
  2754. +// void aes_set_key(aes_context *cx, const unsigned char key[], const int key_len, const int f)
  2755. +// void aes_encrypt(const aes_context *cx, const unsigned char in_blk[], unsigned char out_blk[])
  2756. +// void aes_decrypt(const aes_context *cx, const unsigned char in_blk[], unsigned char out_blk[])
  2757. +
  2758. +#if defined(USE_UNDERLINE)
  2759. +# define aes_set_key _aes_set_key
  2760. +# define aes_encrypt _aes_encrypt
  2761. +# define aes_decrypt _aes_decrypt
  2762. +#endif
  2763. +#if !defined(ALIGN64BYTES)
  2764. +# define ALIGN64BYTES 64
  2765. +#endif
  2766. +
  2767. + .file "aes-amd64.S"
  2768. + .globl aes_set_key
  2769. + .globl aes_encrypt
  2770. + .globl aes_decrypt
  2771. +
  2772. + .section .rodata
  2773. +copyright:
  2774. + .ascii " \000"
  2775. + .ascii "Copyright (c) 2001, Dr Brian Gladman <brg@gladman.uk.net>, Worcester, UK.\000"
  2776. + .ascii "All rights reserved.\000"
  2777. + .ascii " \000"
  2778. + .ascii "TERMS\000"
  2779. + .ascii " \000"
  2780. + .ascii " Redistribution and use in source and binary forms, with or without\000"
  2781. + .ascii " modification, are permitted subject to the following conditions:\000"
  2782. + .ascii " \000"
  2783. + .ascii " 1. Redistributions of source code must retain the above copyright\000"
  2784. + .ascii " notice, this list of conditions and the following disclaimer.\000"
  2785. + .ascii " \000"
  2786. + .ascii " 2. Redistributions in binary form must reproduce the above copyright\000"
  2787. + .ascii " notice, this list of conditions and the following disclaimer in the\000"
  2788. + .ascii " documentation and/or other materials provided with the distribution.\000"
  2789. + .ascii " \000"
  2790. + .ascii " 3. The copyright holder's name must not be used to endorse or promote\000"
  2791. + .ascii " any products derived from this software without his specific prior\000"
  2792. + .ascii " written permission.\000"
  2793. + .ascii " \000"
  2794. + .ascii " This software is provided 'as is' with no express or implied warranties\000"
  2795. + .ascii " of correctness or fitness for purpose.\000"
  2796. + .ascii " \000"
  2797. +
  2798. +#define tlen 1024 // length of each of 4 'xor' arrays (256 32-bit words)
  2799. +
  2800. +// offsets in context structure
  2801. +
  2802. +#define nkey 0 // key length, size 4
  2803. +#define nrnd 4 // number of rounds, size 4
  2804. +#define ekey 8 // encryption key schedule base address, size 256
  2805. +#define dkey 264 // decryption key schedule base address, size 256
  2806. +
  2807. +// This macro performs a forward encryption cycle. It is entered with
  2808. +// the first previous round column values in I1E, I2E, I3E and I4E and
  2809. +// exits with the final values OU1, OU2, OU3 and OU4 registers.
  2810. +
  2811. +#define fwd_rnd(p1,p2,I1E,I1B,I1H,I2E,I2B,I2H,I3E,I3B,I3R,I4E,I4B,I4R,OU1,OU2,OU3,OU4) \
  2812. + movl p2(%rbp),OU1 ;\
  2813. + movl p2+4(%rbp),OU2 ;\
  2814. + movl p2+8(%rbp),OU3 ;\
  2815. + movl p2+12(%rbp),OU4 ;\
  2816. + movzbl I1B,%edi ;\
  2817. + movzbl I2B,%esi ;\
  2818. + movzbl I3B,%r8d ;\
  2819. + movzbl I4B,%r13d ;\
  2820. + shrl $8,I3E ;\
  2821. + shrl $8,I4E ;\
  2822. + xorl p1(,%rdi,4),OU1 ;\
  2823. + xorl p1(,%rsi,4),OU2 ;\
  2824. + xorl p1(,%r8,4),OU3 ;\
  2825. + xorl p1(,%r13,4),OU4 ;\
  2826. + movzbl I2H,%esi ;\
  2827. + movzbl I3B,%r8d ;\
  2828. + movzbl I4B,%r13d ;\
  2829. + movzbl I1H,%edi ;\
  2830. + shrl $8,I3E ;\
  2831. + shrl $8,I4E ;\
  2832. + xorl p1+tlen(,%rsi,4),OU1 ;\
  2833. + xorl p1+tlen(,%r8,4),OU2 ;\
  2834. + xorl p1+tlen(,%r13,4),OU3 ;\
  2835. + xorl p1+tlen(,%rdi,4),OU4 ;\
  2836. + shrl $16,I1E ;\
  2837. + shrl $16,I2E ;\
  2838. + movzbl I3B,%r8d ;\
  2839. + movzbl I4B,%r13d ;\
  2840. + movzbl I1B,%edi ;\
  2841. + movzbl I2B,%esi ;\
  2842. + xorl p1+2*tlen(,%r8,4),OU1 ;\
  2843. + xorl p1+2*tlen(,%r13,4),OU2 ;\
  2844. + xorl p1+2*tlen(,%rdi,4),OU3 ;\
  2845. + xorl p1+2*tlen(,%rsi,4),OU4 ;\
  2846. + shrl $8,I4E ;\
  2847. + movzbl I1H,%edi ;\
  2848. + movzbl I2H,%esi ;\
  2849. + shrl $8,I3E ;\
  2850. + xorl p1+3*tlen(,I4R,4),OU1 ;\
  2851. + xorl p1+3*tlen(,%rdi,4),OU2 ;\
  2852. + xorl p1+3*tlen(,%rsi,4),OU3 ;\
  2853. + xorl p1+3*tlen(,I3R,4),OU4
  2854. +
  2855. +// This macro performs an inverse encryption cycle. It is entered with
  2856. +// the first previous round column values in I1E, I2E, I3E and I4E and
  2857. +// exits with the final values OU1, OU2, OU3 and OU4 registers.
  2858. +
  2859. +#define inv_rnd(p1,p2,I1E,I1B,I1R,I2E,I2B,I2R,I3E,I3B,I3H,I4E,I4B,I4H,OU1,OU2,OU3,OU4) \
  2860. + movl p2+12(%rbp),OU4 ;\
  2861. + movl p2+8(%rbp),OU3 ;\
  2862. + movl p2+4(%rbp),OU2 ;\
  2863. + movl p2(%rbp),OU1 ;\
  2864. + movzbl I4B,%edi ;\
  2865. + movzbl I3B,%esi ;\
  2866. + movzbl I2B,%r8d ;\
  2867. + movzbl I1B,%r13d ;\
  2868. + shrl $8,I2E ;\
  2869. + shrl $8,I1E ;\
  2870. + xorl p1(,%rdi,4),OU4 ;\
  2871. + xorl p1(,%rsi,4),OU3 ;\
  2872. + xorl p1(,%r8,4),OU2 ;\
  2873. + xorl p1(,%r13,4),OU1 ;\
  2874. + movzbl I3H,%esi ;\
  2875. + movzbl I2B,%r8d ;\
  2876. + movzbl I1B,%r13d ;\
  2877. + movzbl I4H,%edi ;\
  2878. + shrl $8,I2E ;\
  2879. + shrl $8,I1E ;\
  2880. + xorl p1+tlen(,%rsi,4),OU4 ;\
  2881. + xorl p1+tlen(,%r8,4),OU3 ;\
  2882. + xorl p1+tlen(,%r13,4),OU2 ;\
  2883. + xorl p1+tlen(,%rdi,4),OU1 ;\
  2884. + shrl $16,I4E ;\
  2885. + shrl $16,I3E ;\
  2886. + movzbl I2B,%r8d ;\
  2887. + movzbl I1B,%r13d ;\
  2888. + movzbl I4B,%edi ;\
  2889. + movzbl I3B,%esi ;\
  2890. + xorl p1+2*tlen(,%r8,4),OU4 ;\
  2891. + xorl p1+2*tlen(,%r13,4),OU3 ;\
  2892. + xorl p1+2*tlen(,%rdi,4),OU2 ;\
  2893. + xorl p1+2*tlen(,%rsi,4),OU1 ;\
  2894. + shrl $8,I1E ;\
  2895. + movzbl I4H,%edi ;\
  2896. + movzbl I3H,%esi ;\
  2897. + shrl $8,I2E ;\
  2898. + xorl p1+3*tlen(,I1R,4),OU4 ;\
  2899. + xorl p1+3*tlen(,%rdi,4),OU3 ;\
  2900. + xorl p1+3*tlen(,%rsi,4),OU2 ;\
  2901. + xorl p1+3*tlen(,I2R,4),OU1
  2902. +
  2903. +// AES (Rijndael) Encryption Subroutine
  2904. +
  2905. +// rdi = pointer to AES context
  2906. +// rsi = pointer to input plaintext bytes
  2907. +// rdx = pointer to output ciphertext bytes
  2908. +
  2909. + .text
  2910. + .align ALIGN64BYTES
  2911. +aes_encrypt:
  2912. + movl (%rsi),%eax // read in plaintext
  2913. + movl 4(%rsi),%ecx
  2914. + movl 8(%rsi),%r10d
  2915. + movl 12(%rsi),%r11d
  2916. +
  2917. + pushq %rbp
  2918. + leaq ekey+16(%rdi),%rbp // encryption key pointer
  2919. + movq %rdx,%r9 // pointer to out block
  2920. + movl nrnd(%rdi),%edx // number of rounds
  2921. + pushq %rbx
  2922. + pushq %r13
  2923. + pushq %r14
  2924. + pushq %r15
  2925. +
  2926. + xorl -16(%rbp),%eax // xor in first round key
  2927. + xorl -12(%rbp),%ecx
  2928. + xorl -8(%rbp),%r10d
  2929. + xorl -4(%rbp),%r11d
  2930. +
  2931. + subl $10,%edx
  2932. + je aes_15
  2933. + addq $32,%rbp
  2934. + subl $2,%edx
  2935. + je aes_13
  2936. + addq $32,%rbp
  2937. +
  2938. + fwd_rnd(aes_ft_tab,-64,%eax,%al,%ah,%ecx,%cl,%ch,%r10d,%r10b,%r10,%r11d,%r11b,%r11,%ebx,%edx,%r14d,%r15d)
  2939. + fwd_rnd(aes_ft_tab,-48,%ebx,%bl,%bh,%edx,%dl,%dh,%r14d,%r14b,%r14,%r15d,%r15b,%r15,%eax,%ecx,%r10d,%r11d)
  2940. + jmp aes_13
  2941. + .align ALIGN64BYTES
  2942. +aes_13: fwd_rnd(aes_ft_tab,-32,%eax,%al,%ah,%ecx,%cl,%ch,%r10d,%r10b,%r10,%r11d,%r11b,%r11,%ebx,%edx,%r14d,%r15d)
  2943. + fwd_rnd(aes_ft_tab,-16,%ebx,%bl,%bh,%edx,%dl,%dh,%r14d,%r14b,%r14,%r15d,%r15b,%r15,%eax,%ecx,%r10d,%r11d)
  2944. + jmp aes_15
  2945. + .align ALIGN64BYTES
  2946. +aes_15: fwd_rnd(aes_ft_tab,0, %eax,%al,%ah,%ecx,%cl,%ch,%r10d,%r10b,%r10,%r11d,%r11b,%r11,%ebx,%edx,%r14d,%r15d)
  2947. + fwd_rnd(aes_ft_tab,16, %ebx,%bl,%bh,%edx,%dl,%dh,%r14d,%r14b,%r14,%r15d,%r15b,%r15,%eax,%ecx,%r10d,%r11d)
  2948. + fwd_rnd(aes_ft_tab,32, %eax,%al,%ah,%ecx,%cl,%ch,%r10d,%r10b,%r10,%r11d,%r11b,%r11,%ebx,%edx,%r14d,%r15d)
  2949. + fwd_rnd(aes_ft_tab,48, %ebx,%bl,%bh,%edx,%dl,%dh,%r14d,%r14b,%r14,%r15d,%r15b,%r15,%eax,%ecx,%r10d,%r11d)
  2950. + fwd_rnd(aes_ft_tab,64, %eax,%al,%ah,%ecx,%cl,%ch,%r10d,%r10b,%r10,%r11d,%r11b,%r11,%ebx,%edx,%r14d,%r15d)
  2951. + fwd_rnd(aes_ft_tab,80, %ebx,%bl,%bh,%edx,%dl,%dh,%r14d,%r14b,%r14,%r15d,%r15b,%r15,%eax,%ecx,%r10d,%r11d)
  2952. + fwd_rnd(aes_ft_tab,96, %eax,%al,%ah,%ecx,%cl,%ch,%r10d,%r10b,%r10,%r11d,%r11b,%r11,%ebx,%edx,%r14d,%r15d)
  2953. + fwd_rnd(aes_ft_tab,112,%ebx,%bl,%bh,%edx,%dl,%dh,%r14d,%r14b,%r14,%r15d,%r15b,%r15,%eax,%ecx,%r10d,%r11d)
  2954. + fwd_rnd(aes_ft_tab,128,%eax,%al,%ah,%ecx,%cl,%ch,%r10d,%r10b,%r10,%r11d,%r11b,%r11,%ebx,%edx,%r14d,%r15d)
  2955. + fwd_rnd(aes_fl_tab,144,%ebx,%bl,%bh,%edx,%dl,%dh,%r14d,%r14b,%r14,%r15d,%r15b,%r15,%eax,%ecx,%r10d,%r11d)
  2956. +
  2957. + popq %r15
  2958. + popq %r14
  2959. + popq %r13
  2960. + popq %rbx
  2961. + popq %rbp
  2962. +
  2963. + movl %eax,(%r9) // move final values to the output array.
  2964. + movl %ecx,4(%r9)
  2965. + movl %r10d,8(%r9)
  2966. + movl %r11d,12(%r9)
  2967. + ret
  2968. +
  2969. +// AES (Rijndael) Decryption Subroutine
  2970. +
  2971. +// rdi = pointer to AES context
  2972. +// rsi = pointer to input ciphertext bytes
  2973. +// rdx = pointer to output plaintext bytes
  2974. +
  2975. + .align ALIGN64BYTES
  2976. +aes_decrypt:
  2977. + movl 12(%rsi),%eax // read in ciphertext
  2978. + movl 8(%rsi),%ecx
  2979. + movl 4(%rsi),%r10d
  2980. + movl (%rsi),%r11d
  2981. +
  2982. + pushq %rbp
  2983. + leaq dkey+16(%rdi),%rbp // decryption key pointer
  2984. + movq %rdx,%r9 // pointer to out block
  2985. + movl nrnd(%rdi),%edx // number of rounds
  2986. + pushq %rbx
  2987. + pushq %r13
  2988. + pushq %r14
  2989. + pushq %r15
  2990. +
  2991. + xorl -4(%rbp),%eax // xor in first round key
  2992. + xorl -8(%rbp),%ecx
  2993. + xorl -12(%rbp),%r10d
  2994. + xorl -16(%rbp),%r11d
  2995. +
  2996. + subl $10,%edx
  2997. + je aes_25
  2998. + addq $32,%rbp
  2999. + subl $2,%edx
  3000. + je aes_23
  3001. + addq $32,%rbp
  3002. +
  3003. + inv_rnd(aes_it_tab,-64,%r11d,%r11b,%r11,%r10d,%r10b,%r10,%ecx,%cl,%ch,%eax,%al,%ah,%r15d,%r14d,%edx,%ebx)
  3004. + inv_rnd(aes_it_tab,-48,%r15d,%r15b,%r15,%r14d,%r14b,%r14,%edx,%dl,%dh,%ebx,%bl,%bh,%r11d,%r10d,%ecx,%eax)
  3005. + jmp aes_23
  3006. + .align ALIGN64BYTES
  3007. +aes_23: inv_rnd(aes_it_tab,-32,%r11d,%r11b,%r11,%r10d,%r10b,%r10,%ecx,%cl,%ch,%eax,%al,%ah,%r15d,%r14d,%edx,%ebx)
  3008. + inv_rnd(aes_it_tab,-16,%r15d,%r15b,%r15,%r14d,%r14b,%r14,%edx,%dl,%dh,%ebx,%bl,%bh,%r11d,%r10d,%ecx,%eax)
  3009. + jmp aes_25
  3010. + .align ALIGN64BYTES
  3011. +aes_25: inv_rnd(aes_it_tab,0, %r11d,%r11b,%r11,%r10d,%r10b,%r10,%ecx,%cl,%ch,%eax,%al,%ah,%r15d,%r14d,%edx,%ebx)
  3012. + inv_rnd(aes_it_tab,16, %r15d,%r15b,%r15,%r14d,%r14b,%r14,%edx,%dl,%dh,%ebx,%bl,%bh,%r11d,%r10d,%ecx,%eax)
  3013. + inv_rnd(aes_it_tab,32, %r11d,%r11b,%r11,%r10d,%r10b,%r10,%ecx,%cl,%ch,%eax,%al,%ah,%r15d,%r14d,%edx,%ebx)
  3014. + inv_rnd(aes_it_tab,48, %r15d,%r15b,%r15,%r14d,%r14b,%r14,%edx,%dl,%dh,%ebx,%bl,%bh,%r11d,%r10d,%ecx,%eax)
  3015. + inv_rnd(aes_it_tab,64, %r11d,%r11b,%r11,%r10d,%r10b,%r10,%ecx,%cl,%ch,%eax,%al,%ah,%r15d,%r14d,%edx,%ebx)
  3016. + inv_rnd(aes_it_tab,80, %r15d,%r15b,%r15,%r14d,%r14b,%r14,%edx,%dl,%dh,%ebx,%bl,%bh,%r11d,%r10d,%ecx,%eax)
  3017. + inv_rnd(aes_it_tab,96, %r11d,%r11b,%r11,%r10d,%r10b,%r10,%ecx,%cl,%ch,%eax,%al,%ah,%r15d,%r14d,%edx,%ebx)
  3018. + inv_rnd(aes_it_tab,112,%r15d,%r15b,%r15,%r14d,%r14b,%r14,%edx,%dl,%dh,%ebx,%bl,%bh,%r11d,%r10d,%ecx,%eax)
  3019. + inv_rnd(aes_it_tab,128,%r11d,%r11b,%r11,%r10d,%r10b,%r10,%ecx,%cl,%ch,%eax,%al,%ah,%r15d,%r14d,%edx,%ebx)
  3020. + inv_rnd(aes_il_tab,144,%r15d,%r15b,%r15,%r14d,%r14b,%r14,%edx,%dl,%dh,%ebx,%bl,%bh,%r11d,%r10d,%ecx,%eax)
  3021. +
  3022. + popq %r15
  3023. + popq %r14
  3024. + popq %r13
  3025. + popq %rbx
  3026. + popq %rbp
  3027. +
  3028. + movl %eax,12(%r9) // move final values to the output array.
  3029. + movl %ecx,8(%r9)
  3030. + movl %r10d,4(%r9)
  3031. + movl %r11d,(%r9)
  3032. + ret
  3033. +
  3034. +// AES (Rijndael) Key Schedule Subroutine
  3035. +
  3036. +// This macro performs a column mixing operation on an input 32-bit
  3037. +// word to give a 32-bit result. It uses each of the 4 bytes in the
  3038. +// the input column to index 4 different tables of 256 32-bit words
  3039. +// that are xored together to form the output value.
  3040. +
  3041. +#define mix_col(p1) \
  3042. + movzbl %bl,%ecx ;\
  3043. + movl p1(,%rcx,4),%eax ;\
  3044. + movzbl %bh,%ecx ;\
  3045. + ror $16,%ebx ;\
  3046. + xorl p1+tlen(,%rcx,4),%eax ;\
  3047. + movzbl %bl,%ecx ;\
  3048. + xorl p1+2*tlen(,%rcx,4),%eax ;\
  3049. + movzbl %bh,%ecx ;\
  3050. + xorl p1+3*tlen(,%rcx,4),%eax
  3051. +
  3052. +// Key Schedule Macros
  3053. +
  3054. +#define ksc4(p1) \
  3055. + rol $24,%ebx ;\
  3056. + mix_col(aes_fl_tab) ;\
  3057. + ror $8,%ebx ;\
  3058. + xorl 4*p1+aes_rcon_tab,%eax ;\
  3059. + xorl %eax,%esi ;\
  3060. + xorl %esi,%ebp ;\
  3061. + movl %esi,16*p1(%rdi) ;\
  3062. + movl %ebp,16*p1+4(%rdi) ;\
  3063. + xorl %ebp,%edx ;\
  3064. + xorl %edx,%ebx ;\
  3065. + movl %edx,16*p1+8(%rdi) ;\
  3066. + movl %ebx,16*p1+12(%rdi)
  3067. +
  3068. +#define ksc6(p1) \
  3069. + rol $24,%ebx ;\
  3070. + mix_col(aes_fl_tab) ;\
  3071. + ror $8,%ebx ;\
  3072. + xorl 4*p1+aes_rcon_tab,%eax ;\
  3073. + xorl 24*p1-24(%rdi),%eax ;\
  3074. + movl %eax,24*p1(%rdi) ;\
  3075. + xorl 24*p1-20(%rdi),%eax ;\
  3076. + movl %eax,24*p1+4(%rdi) ;\
  3077. + xorl %eax,%esi ;\
  3078. + xorl %esi,%ebp ;\
  3079. + movl %esi,24*p1+8(%rdi) ;\
  3080. + movl %ebp,24*p1+12(%rdi) ;\
  3081. + xorl %ebp,%edx ;\
  3082. + xorl %edx,%ebx ;\
  3083. + movl %edx,24*p1+16(%rdi) ;\
  3084. + movl %ebx,24*p1+20(%rdi)
  3085. +
  3086. +#define ksc8(p1) \
  3087. + rol $24,%ebx ;\
  3088. + mix_col(aes_fl_tab) ;\
  3089. + ror $8,%ebx ;\
  3090. + xorl 4*p1+aes_rcon_tab,%eax ;\
  3091. + xorl 32*p1-32(%rdi),%eax ;\
  3092. + movl %eax,32*p1(%rdi) ;\
  3093. + xorl 32*p1-28(%rdi),%eax ;\
  3094. + movl %eax,32*p1+4(%rdi) ;\
  3095. + xorl 32*p1-24(%rdi),%eax ;\
  3096. + movl %eax,32*p1+8(%rdi) ;\
  3097. + xorl 32*p1-20(%rdi),%eax ;\
  3098. + movl %eax,32*p1+12(%rdi) ;\
  3099. + pushq %rbx ;\
  3100. + movl %eax,%ebx ;\
  3101. + mix_col(aes_fl_tab) ;\
  3102. + popq %rbx ;\
  3103. + xorl %eax,%esi ;\
  3104. + xorl %esi,%ebp ;\
  3105. + movl %esi,32*p1+16(%rdi) ;\
  3106. + movl %ebp,32*p1+20(%rdi) ;\
  3107. + xorl %ebp,%edx ;\
  3108. + xorl %edx,%ebx ;\
  3109. + movl %edx,32*p1+24(%rdi) ;\
  3110. + movl %ebx,32*p1+28(%rdi)
  3111. +
  3112. +// rdi = pointer to AES context
  3113. +// rsi = pointer to key bytes
  3114. +// rdx = key length, bytes or bits
  3115. +// rcx = ed_flag, 1=encrypt only, 0=both encrypt and decrypt
  3116. +
  3117. + .align ALIGN64BYTES
  3118. +aes_set_key:
  3119. + pushfq
  3120. + pushq %rbp
  3121. + pushq %rbx
  3122. +
  3123. + movq %rcx,%r11 // ed_flg
  3124. + movq %rdx,%rcx // key length
  3125. + movq %rdi,%r10 // AES context
  3126. +
  3127. + cmpl $128,%ecx
  3128. + jb aes_30
  3129. + shrl $3,%ecx
  3130. +aes_30: cmpl $32,%ecx
  3131. + je aes_32
  3132. + cmpl $24,%ecx
  3133. + je aes_32
  3134. + movl $16,%ecx
  3135. +aes_32: shrl $2,%ecx
  3136. + movl %ecx,nkey(%r10)
  3137. + leaq 6(%rcx),%rax // 10/12/14 for 4/6/8 32-bit key length
  3138. + movl %eax,nrnd(%r10)
  3139. + leaq ekey(%r10),%rdi // key position in AES context
  3140. + cld
  3141. + movl %ecx,%eax // save key length in eax
  3142. + rep ; movsl // words in the key schedule
  3143. + movl -4(%rsi),%ebx // put some values in registers
  3144. + movl -8(%rsi),%edx // to allow faster code
  3145. + movl -12(%rsi),%ebp
  3146. + movl -16(%rsi),%esi
  3147. +
  3148. + cmpl $4,%eax // jump on key size
  3149. + je aes_36
  3150. + cmpl $6,%eax
  3151. + je aes_35
  3152. +
  3153. + ksc8(0)
  3154. + ksc8(1)
  3155. + ksc8(2)
  3156. + ksc8(3)
  3157. + ksc8(4)
  3158. + ksc8(5)
  3159. + ksc8(6)
  3160. + jmp aes_37
  3161. +aes_35: ksc6(0)
  3162. + ksc6(1)
  3163. + ksc6(2)
  3164. + ksc6(3)
  3165. + ksc6(4)
  3166. + ksc6(5)
  3167. + ksc6(6)
  3168. + ksc6(7)
  3169. + jmp aes_37
  3170. +aes_36: ksc4(0)
  3171. + ksc4(1)
  3172. + ksc4(2)
  3173. + ksc4(3)
  3174. + ksc4(4)
  3175. + ksc4(5)
  3176. + ksc4(6)
  3177. + ksc4(7)
  3178. + ksc4(8)
  3179. + ksc4(9)
  3180. +aes_37: cmpl $0,%r11d // ed_flg
  3181. + jne aes_39
  3182. +
  3183. +// compile decryption key schedule from encryption schedule - reverse
  3184. +// order and do mix_column operation on round keys except first and last
  3185. +
  3186. + movl nrnd(%r10),%eax // kt = cx->d_key + nc * cx->Nrnd
  3187. + shl $2,%rax
  3188. + leaq dkey(%r10,%rax,4),%rdi
  3189. + leaq ekey(%r10),%rsi // kf = cx->e_key
  3190. +
  3191. + movsq // copy first round key (unmodified)
  3192. + movsq
  3193. + subq $32,%rdi
  3194. + movl $1,%r9d
  3195. +aes_38: // do mix column on each column of
  3196. + lodsl // each round key
  3197. + movl %eax,%ebx
  3198. + mix_col(aes_im_tab)
  3199. + stosl
  3200. + lodsl
  3201. + movl %eax,%ebx
  3202. + mix_col(aes_im_tab)
  3203. + stosl
  3204. + lodsl
  3205. + movl %eax,%ebx
  3206. + mix_col(aes_im_tab)
  3207. + stosl
  3208. + lodsl
  3209. + movl %eax,%ebx
  3210. + mix_col(aes_im_tab)
  3211. + stosl
  3212. + subq $32,%rdi
  3213. +
  3214. + incl %r9d
  3215. + cmpl nrnd(%r10),%r9d
  3216. + jb aes_38
  3217. +
  3218. + movsq // copy last round key (unmodified)
  3219. + movsq
  3220. +aes_39: popq %rbx
  3221. + popq %rbp
  3222. + popfq
  3223. + ret
  3224. +
  3225. +
  3226. +// finite field multiplies by {02}, {04} and {08}
  3227. +
  3228. +#define f2(x) ((x<<1)^(((x>>7)&1)*0x11b))
  3229. +#define f4(x) ((x<<2)^(((x>>6)&1)*0x11b)^(((x>>6)&2)*0x11b))
  3230. +#define f8(x) ((x<<3)^(((x>>5)&1)*0x11b)^(((x>>5)&2)*0x11b)^(((x>>5)&4)*0x11b))
  3231. +
  3232. +// finite field multiplies required in table generation
  3233. +
  3234. +#define f3(x) (f2(x) ^ x)
  3235. +#define f9(x) (f8(x) ^ x)
  3236. +#define fb(x) (f8(x) ^ f2(x) ^ x)
  3237. +#define fd(x) (f8(x) ^ f4(x) ^ x)
  3238. +#define fe(x) (f8(x) ^ f4(x) ^ f2(x))
  3239. +
  3240. +// These defines generate the forward table entries
  3241. +
  3242. +#define u0(x) ((f3(x) << 24) | (x << 16) | (x << 8) | f2(x))
  3243. +#define u1(x) ((x << 24) | (x << 16) | (f2(x) << 8) | f3(x))
  3244. +#define u2(x) ((x << 24) | (f2(x) << 16) | (f3(x) << 8) | x)
  3245. +#define u3(x) ((f2(x) << 24) | (f3(x) << 16) | (x << 8) | x)
  3246. +
  3247. +// These defines generate the inverse table entries
  3248. +
  3249. +#define v0(x) ((fb(x) << 24) | (fd(x) << 16) | (f9(x) << 8) | fe(x))
  3250. +#define v1(x) ((fd(x) << 24) | (f9(x) << 16) | (fe(x) << 8) | fb(x))
  3251. +#define v2(x) ((f9(x) << 24) | (fe(x) << 16) | (fb(x) << 8) | fd(x))
  3252. +#define v3(x) ((fe(x) << 24) | (fb(x) << 16) | (fd(x) << 8) | f9(x))
  3253. +
  3254. +// These defines generate entries for the last round tables
  3255. +
  3256. +#define w0(x) (x)
  3257. +#define w1(x) (x << 8)
  3258. +#define w2(x) (x << 16)
  3259. +#define w3(x) (x << 24)
  3260. +
  3261. +// macro to generate inverse mix column tables (needed for the key schedule)
  3262. +
  3263. +#define im_data0(p1) \
  3264. + .long p1(0x00),p1(0x01),p1(0x02),p1(0x03),p1(0x04),p1(0x05),p1(0x06),p1(0x07) ;\
  3265. + .long p1(0x08),p1(0x09),p1(0x0a),p1(0x0b),p1(0x0c),p1(0x0d),p1(0x0e),p1(0x0f) ;\
  3266. + .long p1(0x10),p1(0x11),p1(0x12),p1(0x13),p1(0x14),p1(0x15),p1(0x16),p1(0x17) ;\
  3267. + .long p1(0x18),p1(0x19),p1(0x1a),p1(0x1b),p1(0x1c),p1(0x1d),p1(0x1e),p1(0x1f)
  3268. +#define im_data1(p1) \
  3269. + .long p1(0x20),p1(0x21),p1(0x22),p1(0x23),p1(0x24),p1(0x25),p1(0x26),p1(0x27) ;\
  3270. + .long p1(0x28),p1(0x29),p1(0x2a),p1(0x2b),p1(0x2c),p1(0x2d),p1(0x2e),p1(0x2f) ;\
  3271. + .long p1(0x30),p1(0x31),p1(0x32),p1(0x33),p1(0x34),p1(0x35),p1(0x36),p1(0x37) ;\
  3272. + .long p1(0x38),p1(0x39),p1(0x3a),p1(0x3b),p1(0x3c),p1(0x3d),p1(0x3e),p1(0x3f)
  3273. +#define im_data2(p1) \
  3274. + .long p1(0x40),p1(0x41),p1(0x42),p1(0x43),p1(0x44),p1(0x45),p1(0x46),p1(0x47) ;\
  3275. + .long p1(0x48),p1(0x49),p1(0x4a),p1(0x4b),p1(0x4c),p1(0x4d),p1(0x4e),p1(0x4f) ;\
  3276. + .long p1(0x50),p1(0x51),p1(0x52),p1(0x53),p1(0x54),p1(0x55),p1(0x56),p1(0x57) ;\
  3277. + .long p1(0x58),p1(0x59),p1(0x5a),p1(0x5b),p1(0x5c),p1(0x5d),p1(0x5e),p1(0x5f)
  3278. +#define im_data3(p1) \
  3279. + .long p1(0x60),p1(0x61),p1(0x62),p1(0x63),p1(0x64),p1(0x65),p1(0x66),p1(0x67) ;\
  3280. + .long p1(0x68),p1(0x69),p1(0x6a),p1(0x6b),p1(0x6c),p1(0x6d),p1(0x6e),p1(0x6f) ;\
  3281. + .long p1(0x70),p1(0x71),p1(0x72),p1(0x73),p1(0x74),p1(0x75),p1(0x76),p1(0x77) ;\
  3282. + .long p1(0x78),p1(0x79),p1(0x7a),p1(0x7b),p1(0x7c),p1(0x7d),p1(0x7e),p1(0x7f)
  3283. +#define im_data4(p1) \
  3284. + .long p1(0x80),p1(0x81),p1(0x82),p1(0x83),p1(0x84),p1(0x85),p1(0x86),p1(0x87) ;\
  3285. + .long p1(0x88),p1(0x89),p1(0x8a),p1(0x8b),p1(0x8c),p1(0x8d),p1(0x8e),p1(0x8f) ;\
  3286. + .long p1(0x90),p1(0x91),p1(0x92),p1(0x93),p1(0x94),p1(0x95),p1(0x96),p1(0x97) ;\
  3287. + .long p1(0x98),p1(0x99),p1(0x9a),p1(0x9b),p1(0x9c),p1(0x9d),p1(0x9e),p1(0x9f)
  3288. +#define im_data5(p1) \
  3289. + .long p1(0xa0),p1(0xa1),p1(0xa2),p1(0xa3),p1(0xa4),p1(0xa5),p1(0xa6),p1(0xa7) ;\
  3290. + .long p1(0xa8),p1(0xa9),p1(0xaa),p1(0xab),p1(0xac),p1(0xad),p1(0xae),p1(0xaf) ;\
  3291. + .long p1(0xb0),p1(0xb1),p1(0xb2),p1(0xb3),p1(0xb4),p1(0xb5),p1(0xb6),p1(0xb7) ;\
  3292. + .long p1(0xb8),p1(0xb9),p1(0xba),p1(0xbb),p1(0xbc),p1(0xbd),p1(0xbe),p1(0xbf)
  3293. +#define im_data6(p1) \
  3294. + .long p1(0xc0),p1(0xc1),p1(0xc2),p1(0xc3),p1(0xc4),p1(0xc5),p1(0xc6),p1(0xc7) ;\
  3295. + .long p1(0xc8),p1(0xc9),p1(0xca),p1(0xcb),p1(0xcc),p1(0xcd),p1(0xce),p1(0xcf) ;\
  3296. + .long p1(0xd0),p1(0xd1),p1(0xd2),p1(0xd3),p1(0xd4),p1(0xd5),p1(0xd6),p1(0xd7) ;\
  3297. + .long p1(0xd8),p1(0xd9),p1(0xda),p1(0xdb),p1(0xdc),p1(0xdd),p1(0xde),p1(0xdf)
  3298. +#define im_data7(p1) \
  3299. + .long p1(0xe0),p1(0xe1),p1(0xe2),p1(0xe3),p1(0xe4),p1(0xe5),p1(0xe6),p1(0xe7) ;\
  3300. + .long p1(0xe8),p1(0xe9),p1(0xea),p1(0xeb),p1(0xec),p1(0xed),p1(0xee),p1(0xef) ;\
  3301. + .long p1(0xf0),p1(0xf1),p1(0xf2),p1(0xf3),p1(0xf4),p1(0xf5),p1(0xf6),p1(0xf7) ;\
  3302. + .long p1(0xf8),p1(0xf9),p1(0xfa),p1(0xfb),p1(0xfc),p1(0xfd),p1(0xfe),p1(0xff)
  3303. +
  3304. +// S-box data - 256 entries
  3305. +
  3306. +#define sb_data0(p1) \
  3307. + .long p1(0x63),p1(0x7c),p1(0x77),p1(0x7b),p1(0xf2),p1(0x6b),p1(0x6f),p1(0xc5) ;\
  3308. + .long p1(0x30),p1(0x01),p1(0x67),p1(0x2b),p1(0xfe),p1(0xd7),p1(0xab),p1(0x76) ;\
  3309. + .long p1(0xca),p1(0x82),p1(0xc9),p1(0x7d),p1(0xfa),p1(0x59),p1(0x47),p1(0xf0) ;\
  3310. + .long p1(0xad),p1(0xd4),p1(0xa2),p1(0xaf),p1(0x9c),p1(0xa4),p1(0x72),p1(0xc0)
  3311. +#define sb_data1(p1) \
  3312. + .long p1(0xb7),p1(0xfd),p1(0x93),p1(0x26),p1(0x36),p1(0x3f),p1(0xf7),p1(0xcc) ;\
  3313. + .long p1(0x34),p1(0xa5),p1(0xe5),p1(0xf1),p1(0x71),p1(0xd8),p1(0x31),p1(0x15) ;\
  3314. + .long p1(0x04),p1(0xc7),p1(0x23),p1(0xc3),p1(0x18),p1(0x96),p1(0x05),p1(0x9a) ;\
  3315. + .long p1(0x07),p1(0x12),p1(0x80),p1(0xe2),p1(0xeb),p1(0x27),p1(0xb2),p1(0x75)
  3316. +#define sb_data2(p1) \
  3317. + .long p1(0x09),p1(0x83),p1(0x2c),p1(0x1a),p1(0x1b),p1(0x6e),p1(0x5a),p1(0xa0) ;\
  3318. + .long p1(0x52),p1(0x3b),p1(0xd6),p1(0xb3),p1(0x29),p1(0xe3),p1(0x2f),p1(0x84) ;\
  3319. + .long p1(0x53),p1(0xd1),p1(0x00),p1(0xed),p1(0x20),p1(0xfc),p1(0xb1),p1(0x5b) ;\
  3320. + .long p1(0x6a),p1(0xcb),p1(0xbe),p1(0x39),p1(0x4a),p1(0x4c),p1(0x58),p1(0xcf)
  3321. +#define sb_data3(p1) \
  3322. + .long p1(0xd0),p1(0xef),p1(0xaa),p1(0xfb),p1(0x43),p1(0x4d),p1(0x33),p1(0x85) ;\
  3323. + .long p1(0x45),p1(0xf9),p1(0x02),p1(0x7f),p1(0x50),p1(0x3c),p1(0x9f),p1(0xa8) ;\
  3324. + .long p1(0x51),p1(0xa3),p1(0x40),p1(0x8f),p1(0x92),p1(0x9d),p1(0x38),p1(0xf5) ;\
  3325. + .long p1(0xbc),p1(0xb6),p1(0xda),p1(0x21),p1(0x10),p1(0xff),p1(0xf3),p1(0xd2)
  3326. +#define sb_data4(p1) \
  3327. + .long p1(0xcd),p1(0x0c),p1(0x13),p1(0xec),p1(0x5f),p1(0x97),p1(0x44),p1(0x17) ;\
  3328. + .long p1(0xc4),p1(0xa7),p1(0x7e),p1(0x3d),p1(0x64),p1(0x5d),p1(0x19),p1(0x73) ;\
  3329. + .long p1(0x60),p1(0x81),p1(0x4f),p1(0xdc),p1(0x22),p1(0x2a),p1(0x90),p1(0x88) ;\
  3330. + .long p1(0x46),p1(0xee),p1(0xb8),p1(0x14),p1(0xde),p1(0x5e),p1(0x0b),p1(0xdb)
  3331. +#define sb_data5(p1) \
  3332. + .long p1(0xe0),p1(0x32),p1(0x3a),p1(0x0a),p1(0x49),p1(0x06),p1(0x24),p1(0x5c) ;\
  3333. + .long p1(0xc2),p1(0xd3),p1(0xac),p1(0x62),p1(0x91),p1(0x95),p1(0xe4),p1(0x79) ;\
  3334. + .long p1(0xe7),p1(0xc8),p1(0x37),p1(0x6d),p1(0x8d),p1(0xd5),p1(0x4e),p1(0xa9) ;\
  3335. + .long p1(0x6c),p1(0x56),p1(0xf4),p1(0xea),p1(0x65),p1(0x7a),p1(0xae),p1(0x08)
  3336. +#define sb_data6(p1) \
  3337. + .long p1(0xba),p1(0x78),p1(0x25),p1(0x2e),p1(0x1c),p1(0xa6),p1(0xb4),p1(0xc6) ;\
  3338. + .long p1(0xe8),p1(0xdd),p1(0x74),p1(0x1f),p1(0x4b),p1(0xbd),p1(0x8b),p1(0x8a) ;\
  3339. + .long p1(0x70),p1(0x3e),p1(0xb5),p1(0x66),p1(0x48),p1(0x03),p1(0xf6),p1(0x0e) ;\
  3340. + .long p1(0x61),p1(0x35),p1(0x57),p1(0xb9),p1(0x86),p1(0xc1),p1(0x1d),p1(0x9e)
  3341. +#define sb_data7(p1) \
  3342. + .long p1(0xe1),p1(0xf8),p1(0x98),p1(0x11),p1(0x69),p1(0xd9),p1(0x8e),p1(0x94) ;\
  3343. + .long p1(0x9b),p1(0x1e),p1(0x87),p1(0xe9),p1(0xce),p1(0x55),p1(0x28),p1(0xdf) ;\
  3344. + .long p1(0x8c),p1(0xa1),p1(0x89),p1(0x0d),p1(0xbf),p1(0xe6),p1(0x42),p1(0x68) ;\
  3345. + .long p1(0x41),p1(0x99),p1(0x2d),p1(0x0f),p1(0xb0),p1(0x54),p1(0xbb),p1(0x16)
  3346. +
  3347. +// Inverse S-box data - 256 entries
  3348. +
  3349. +#define ib_data0(p1) \
  3350. + .long p1(0x52),p1(0x09),p1(0x6a),p1(0xd5),p1(0x30),p1(0x36),p1(0xa5),p1(0x38) ;\
  3351. + .long p1(0xbf),p1(0x40),p1(0xa3),p1(0x9e),p1(0x81),p1(0xf3),p1(0xd7),p1(0xfb) ;\
  3352. + .long p1(0x7c),p1(0xe3),p1(0x39),p1(0x82),p1(0x9b),p1(0x2f),p1(0xff),p1(0x87) ;\
  3353. + .long p1(0x34),p1(0x8e),p1(0x43),p1(0x44),p1(0xc4),p1(0xde),p1(0xe9),p1(0xcb)
  3354. +#define ib_data1(p1) \
  3355. + .long p1(0x54),p1(0x7b),p1(0x94),p1(0x32),p1(0xa6),p1(0xc2),p1(0x23),p1(0x3d) ;\
  3356. + .long p1(0xee),p1(0x4c),p1(0x95),p1(0x0b),p1(0x42),p1(0xfa),p1(0xc3),p1(0x4e) ;\
  3357. + .long p1(0x08),p1(0x2e),p1(0xa1),p1(0x66),p1(0x28),p1(0xd9),p1(0x24),p1(0xb2) ;\
  3358. + .long p1(0x76),p1(0x5b),p1(0xa2),p1(0x49),p1(0x6d),p1(0x8b),p1(0xd1),p1(0x25)
  3359. +#define ib_data2(p1) \
  3360. + .long p1(0x72),p1(0xf8),p1(0xf6),p1(0x64),p1(0x86),p1(0x68),p1(0x98),p1(0x16) ;\
  3361. + .long p1(0xd4),p1(0xa4),p1(0x5c),p1(0xcc),p1(0x5d),p1(0x65),p1(0xb6),p1(0x92) ;\
  3362. + .long p1(0x6c),p1(0x70),p1(0x48),p1(0x50),p1(0xfd),p1(0xed),p1(0xb9),p1(0xda) ;\
  3363. + .long p1(0x5e),p1(0x15),p1(0x46),p1(0x57),p1(0xa7),p1(0x8d),p1(0x9d),p1(0x84)
  3364. +#define ib_data3(p1) \
  3365. + .long p1(0x90),p1(0xd8),p1(0xab),p1(0x00),p1(0x8c),p1(0xbc),p1(0xd3),p1(0x0a) ;\
  3366. + .long p1(0xf7),p1(0xe4),p1(0x58),p1(0x05),p1(0xb8),p1(0xb3),p1(0x45),p1(0x06) ;\
  3367. + .long p1(0xd0),p1(0x2c),p1(0x1e),p1(0x8f),p1(0xca),p1(0x3f),p1(0x0f),p1(0x02) ;\
  3368. + .long p1(0xc1),p1(0xaf),p1(0xbd),p1(0x03),p1(0x01),p1(0x13),p1(0x8a),p1(0x6b)
  3369. +#define ib_data4(p1) \
  3370. + .long p1(0x3a),p1(0x91),p1(0x11),p1(0x41),p1(0x4f),p1(0x67),p1(0xdc),p1(0xea) ;\
  3371. + .long p1(0x97),p1(0xf2),p1(0xcf),p1(0xce),p1(0xf0),p1(0xb4),p1(0xe6),p1(0x73) ;\
  3372. + .long p1(0x96),p1(0xac),p1(0x74),p1(0x22),p1(0xe7),p1(0xad),p1(0x35),p1(0x85) ;\
  3373. + .long p1(0xe2),p1(0xf9),p1(0x37),p1(0xe8),p1(0x1c),p1(0x75),p1(0xdf),p1(0x6e)
  3374. +#define ib_data5(p1) \
  3375. + .long p1(0x47),p1(0xf1),p1(0x1a),p1(0x71),p1(0x1d),p1(0x29),p1(0xc5),p1(0x89) ;\
  3376. + .long p1(0x6f),p1(0xb7),p1(0x62),p1(0x0e),p1(0xaa),p1(0x18),p1(0xbe),p1(0x1b) ;\
  3377. + .long p1(0xfc),p1(0x56),p1(0x3e),p1(0x4b),p1(0xc6),p1(0xd2),p1(0x79),p1(0x20) ;\
  3378. + .long p1(0x9a),p1(0xdb),p1(0xc0),p1(0xfe),p1(0x78),p1(0xcd),p1(0x5a),p1(0xf4)
  3379. +#define ib_data6(p1) \
  3380. + .long p1(0x1f),p1(0xdd),p1(0xa8),p1(0x33),p1(0x88),p1(0x07),p1(0xc7),p1(0x31) ;\
  3381. + .long p1(0xb1),p1(0x12),p1(0x10),p1(0x59),p1(0x27),p1(0x80),p1(0xec),p1(0x5f) ;\
  3382. + .long p1(0x60),p1(0x51),p1(0x7f),p1(0xa9),p1(0x19),p1(0xb5),p1(0x4a),p1(0x0d) ;\
  3383. + .long p1(0x2d),p1(0xe5),p1(0x7a),p1(0x9f),p1(0x93),p1(0xc9),p1(0x9c),p1(0xef)
  3384. +#define ib_data7(p1) \
  3385. + .long p1(0xa0),p1(0xe0),p1(0x3b),p1(0x4d),p1(0xae),p1(0x2a),p1(0xf5),p1(0xb0) ;\
  3386. + .long p1(0xc8),p1(0xeb),p1(0xbb),p1(0x3c),p1(0x83),p1(0x53),p1(0x99),p1(0x61) ;\
  3387. + .long p1(0x17),p1(0x2b),p1(0x04),p1(0x7e),p1(0xba),p1(0x77),p1(0xd6),p1(0x26) ;\
  3388. + .long p1(0xe1),p1(0x69),p1(0x14),p1(0x63),p1(0x55),p1(0x21),p1(0x0c),p1(0x7d)
  3389. +
  3390. +// The rcon_table (needed for the key schedule)
  3391. +//
  3392. +// Here is original Dr Brian Gladman's source code:
  3393. +// _rcon_tab:
  3394. +// %assign x 1
  3395. +// %rep 29
  3396. +// dd x
  3397. +// %assign x f2(x)
  3398. +// %endrep
  3399. +//
  3400. +// Here is precomputed output (it's more portable this way):
  3401. +
  3402. + .section .rodata
  3403. + .align ALIGN64BYTES
  3404. +aes_rcon_tab:
  3405. + .long 0x01,0x02,0x04,0x08,0x10,0x20,0x40,0x80
  3406. + .long 0x1b,0x36,0x6c,0xd8,0xab,0x4d,0x9a,0x2f
  3407. + .long 0x5e,0xbc,0x63,0xc6,0x97,0x35,0x6a,0xd4
  3408. + .long 0xb3,0x7d,0xfa,0xef,0xc5
  3409. +
  3410. +// The forward xor tables
  3411. +
  3412. + .align ALIGN64BYTES
  3413. +aes_ft_tab:
  3414. + sb_data0(u0)
  3415. + sb_data1(u0)
  3416. + sb_data2(u0)
  3417. + sb_data3(u0)
  3418. + sb_data4(u0)
  3419. + sb_data5(u0)
  3420. + sb_data6(u0)
  3421. + sb_data7(u0)
  3422. +
  3423. + sb_data0(u1)
  3424. + sb_data1(u1)
  3425. + sb_data2(u1)
  3426. + sb_data3(u1)
  3427. + sb_data4(u1)
  3428. + sb_data5(u1)
  3429. + sb_data6(u1)
  3430. + sb_data7(u1)
  3431. +
  3432. + sb_data0(u2)
  3433. + sb_data1(u2)
  3434. + sb_data2(u2)
  3435. + sb_data3(u2)
  3436. + sb_data4(u2)
  3437. + sb_data5(u2)
  3438. + sb_data6(u2)
  3439. + sb_data7(u2)
  3440. +
  3441. + sb_data0(u3)
  3442. + sb_data1(u3)
  3443. + sb_data2(u3)
  3444. + sb_data3(u3)
  3445. + sb_data4(u3)
  3446. + sb_data5(u3)
  3447. + sb_data6(u3)
  3448. + sb_data7(u3)
  3449. +
  3450. + .align ALIGN64BYTES
  3451. +aes_fl_tab:
  3452. + sb_data0(w0)
  3453. + sb_data1(w0)
  3454. + sb_data2(w0)
  3455. + sb_data3(w0)
  3456. + sb_data4(w0)
  3457. + sb_data5(w0)
  3458. + sb_data6(w0)
  3459. + sb_data7(w0)
  3460. +
  3461. + sb_data0(w1)
  3462. + sb_data1(w1)
  3463. + sb_data2(w1)
  3464. + sb_data3(w1)
  3465. + sb_data4(w1)
  3466. + sb_data5(w1)
  3467. + sb_data6(w1)
  3468. + sb_data7(w1)
  3469. +
  3470. + sb_data0(w2)
  3471. + sb_data1(w2)
  3472. + sb_data2(w2)
  3473. + sb_data3(w2)
  3474. + sb_data4(w2)
  3475. + sb_data5(w2)
  3476. + sb_data6(w2)
  3477. + sb_data7(w2)
  3478. +
  3479. + sb_data0(w3)
  3480. + sb_data1(w3)
  3481. + sb_data2(w3)
  3482. + sb_data3(w3)
  3483. + sb_data4(w3)
  3484. + sb_data5(w3)
  3485. + sb_data6(w3)
  3486. + sb_data7(w3)
  3487. +
  3488. +// The inverse xor tables
  3489. +
  3490. + .align ALIGN64BYTES
  3491. +aes_it_tab:
  3492. + ib_data0(v0)
  3493. + ib_data1(v0)
  3494. + ib_data2(v0)
  3495. + ib_data3(v0)
  3496. + ib_data4(v0)
  3497. + ib_data5(v0)
  3498. + ib_data6(v0)
  3499. + ib_data7(v0)
  3500. +
  3501. + ib_data0(v1)
  3502. + ib_data1(v1)
  3503. + ib_data2(v1)
  3504. + ib_data3(v1)
  3505. + ib_data4(v1)
  3506. + ib_data5(v1)
  3507. + ib_data6(v1)
  3508. + ib_data7(v1)
  3509. +
  3510. + ib_data0(v2)
  3511. + ib_data1(v2)
  3512. + ib_data2(v2)
  3513. + ib_data3(v2)
  3514. + ib_data4(v2)
  3515. + ib_data5(v2)
  3516. + ib_data6(v2)
  3517. + ib_data7(v2)
  3518. +
  3519. + ib_data0(v3)
  3520. + ib_data1(v3)
  3521. + ib_data2(v3)
  3522. + ib_data3(v3)
  3523. + ib_data4(v3)
  3524. + ib_data5(v3)
  3525. + ib_data6(v3)
  3526. + ib_data7(v3)
  3527. +
  3528. + .align ALIGN64BYTES
  3529. +aes_il_tab:
  3530. + ib_data0(w0)
  3531. + ib_data1(w0)
  3532. + ib_data2(w0)
  3533. + ib_data3(w0)
  3534. + ib_data4(w0)
  3535. + ib_data5(w0)
  3536. + ib_data6(w0)
  3537. + ib_data7(w0)
  3538. +
  3539. + ib_data0(w1)
  3540. + ib_data1(w1)
  3541. + ib_data2(w1)
  3542. + ib_data3(w1)
  3543. + ib_data4(w1)
  3544. + ib_data5(w1)
  3545. + ib_data6(w1)
  3546. + ib_data7(w1)
  3547. +
  3548. + ib_data0(w2)
  3549. + ib_data1(w2)
  3550. + ib_data2(w2)
  3551. + ib_data3(w2)
  3552. + ib_data4(w2)
  3553. + ib_data5(w2)
  3554. + ib_data6(w2)
  3555. + ib_data7(w2)
  3556. +
  3557. + ib_data0(w3)
  3558. + ib_data1(w3)
  3559. + ib_data2(w3)
  3560. + ib_data3(w3)
  3561. + ib_data4(w3)
  3562. + ib_data5(w3)
  3563. + ib_data6(w3)
  3564. + ib_data7(w3)
  3565. +
  3566. +// The inverse mix column tables
  3567. +
  3568. + .align ALIGN64BYTES
  3569. +aes_im_tab:
  3570. + im_data0(v0)
  3571. + im_data1(v0)
  3572. + im_data2(v0)
  3573. + im_data3(v0)
  3574. + im_data4(v0)
  3575. + im_data5(v0)
  3576. + im_data6(v0)
  3577. + im_data7(v0)
  3578. +
  3579. + im_data0(v1)
  3580. + im_data1(v1)
  3581. + im_data2(v1)
  3582. + im_data3(v1)
  3583. + im_data4(v1)
  3584. + im_data5(v1)
  3585. + im_data6(v1)
  3586. + im_data7(v1)
  3587. +
  3588. + im_data0(v2)
  3589. + im_data1(v2)
  3590. + im_data2(v2)
  3591. + im_data3(v2)
  3592. + im_data4(v2)
  3593. + im_data5(v2)
  3594. + im_data6(v2)
  3595. + im_data7(v2)
  3596. +
  3597. + im_data0(v3)
  3598. + im_data1(v3)
  3599. + im_data2(v3)
  3600. + im_data3(v3)
  3601. + im_data4(v3)
  3602. + im_data5(v3)
  3603. + im_data6(v3)
  3604. + im_data7(v3)
  3605. diff -pruN linux-2.6.9_orig/drivers/misc/aes-x86.S linux-2.6.9/drivers/misc/aes-x86.S
  3606. --- linux-2.6.9_orig/drivers/misc/aes-x86.S 1970-01-01 01:00:00.000000000 +0100
  3607. +++ linux-2.6.9/drivers/misc/aes-x86.S 2004-10-25 14:26:31.850232200 +0200
  3608. @@ -0,0 +1,922 @@
  3609. +//
  3610. +// Copyright (c) 2001, Dr Brian Gladman <brg@gladman.uk.net>, Worcester, UK.
  3611. +// All rights reserved.
  3612. +//
  3613. +// TERMS
  3614. +//
  3615. +// Redistribution and use in source and binary forms, with or without
  3616. +// modification, are permitted subject to the following conditions:
  3617. +//
  3618. +// 1. Redistributions of source code must retain the above copyright
  3619. +// notice, this list of conditions and the following disclaimer.
  3620. +//
  3621. +// 2. Redistributions in binary form must reproduce the above copyright
  3622. +// notice, this list of conditions and the following disclaimer in the
  3623. +// documentation and/or other materials provided with the distribution.
  3624. +//
  3625. +// 3. The copyright holder's name must not be used to endorse or promote
  3626. +// any products derived from this software without his specific prior
  3627. +// written permission.
  3628. +//
  3629. +// This software is provided 'as is' with no express or implied warranties
  3630. +// of correctness or fitness for purpose.
  3631. +
  3632. +// Modified by Jari Ruusu, December 24 2001
  3633. +// - Converted syntax to GNU CPP/assembler syntax
  3634. +// - C programming interface converted back to "old" API
  3635. +// - Minor portability cleanups and speed optimizations
  3636. +
  3637. +// Modified by Jari Ruusu, April 11 2002
  3638. +// - Added above copyright and terms to resulting object code so that
  3639. +// binary distributions can avoid legal trouble
  3640. +
  3641. +// An AES (Rijndael) implementation for x86 compatible processors. This
  3642. +// version uses i386 instruction set but instruction scheduling is optimized
  3643. +// for Pentium-2. This version only implements the standard AES block length
  3644. +// (128 bits, 16 bytes). This code does not preserve the eax, ecx or edx
  3645. +// registers or the artihmetic status flags. However, the ebx, esi, edi, and
  3646. +// ebp registers are preserved across calls.
  3647. +
  3648. +// void aes_set_key(aes_context *cx, const unsigned char key[], const int key_len, const int f)
  3649. +// void aes_encrypt(const aes_context *cx, const unsigned char in_blk[], unsigned char out_blk[])
  3650. +// void aes_decrypt(const aes_context *cx, const unsigned char in_blk[], unsigned char out_blk[])
  3651. +
  3652. +#if defined(USE_UNDERLINE)
  3653. +# define aes_set_key _aes_set_key
  3654. +# define aes_encrypt _aes_encrypt
  3655. +# define aes_decrypt _aes_decrypt
  3656. +#endif
  3657. +#if !defined(ALIGN32BYTES)
  3658. +# define ALIGN32BYTES 32
  3659. +#endif
  3660. +
  3661. + .file "aes-x86.S"
  3662. + .globl aes_set_key
  3663. + .globl aes_encrypt
  3664. + .globl aes_decrypt
  3665. +
  3666. + .text
  3667. +copyright:
  3668. + .ascii " \000"
  3669. + .ascii "Copyright (c) 2001, Dr Brian Gladman <brg@gladman.uk.net>, Worcester, UK.\000"
  3670. + .ascii "All rights reserved.\000"
  3671. + .ascii " \000"
  3672. + .ascii "TERMS\000"
  3673. + .ascii " \000"
  3674. + .ascii " Redistribution and use in source and binary forms, with or without\000"
  3675. + .ascii " modification, are permitted subject to the following conditions:\000"
  3676. + .ascii " \000"
  3677. + .ascii " 1. Redistributions of source code must retain the above copyright\000"
  3678. + .ascii " notice, this list of conditions and the following disclaimer.\000"
  3679. + .ascii " \000"
  3680. + .ascii " 2. Redistributions in binary form must reproduce the above copyright\000"
  3681. + .ascii " notice, this list of conditions and the following disclaimer in the\000"
  3682. + .ascii " documentation and/or other materials provided with the distribution.\000"
  3683. + .ascii " \000"
  3684. + .ascii " 3. The copyright holder's name must not be used to endorse or promote\000"
  3685. + .ascii " any products derived from this software without his specific prior\000"
  3686. + .ascii " written permission.\000"
  3687. + .ascii " \000"
  3688. + .ascii " This software is provided 'as is' with no express or implied warranties\000"
  3689. + .ascii " of correctness or fitness for purpose.\000"
  3690. + .ascii " \000"
  3691. +
  3692. +#define tlen 1024 // length of each of 4 'xor' arrays (256 32-bit words)
  3693. +
  3694. +// offsets to parameters with one register pushed onto stack
  3695. +
  3696. +#define ctx 8 // AES context structure
  3697. +#define in_blk 12 // input byte array address parameter
  3698. +#define out_blk 16 // output byte array address parameter
  3699. +
  3700. +// offsets in context structure
  3701. +
  3702. +#define nkey 0 // key length, size 4
  3703. +#define nrnd 4 // number of rounds, size 4
  3704. +#define ekey 8 // encryption key schedule base address, size 256
  3705. +#define dkey 264 // decryption key schedule base address, size 256
  3706. +
  3707. +// This macro performs a forward encryption cycle. It is entered with
  3708. +// the first previous round column values in %eax, %ebx, %esi and %edi and
  3709. +// exits with the final values in the same registers.
  3710. +
  3711. +#define fwd_rnd(p1,p2) \
  3712. + mov %ebx,(%esp) ;\
  3713. + movzbl %al,%edx ;\
  3714. + mov %eax,%ecx ;\
  3715. + mov p2(%ebp),%eax ;\
  3716. + mov %edi,4(%esp) ;\
  3717. + mov p2+12(%ebp),%edi ;\
  3718. + xor p1(,%edx,4),%eax ;\
  3719. + movzbl %ch,%edx ;\
  3720. + shr $16,%ecx ;\
  3721. + mov p2+4(%ebp),%ebx ;\
  3722. + xor p1+tlen(,%edx,4),%edi ;\
  3723. + movzbl %cl,%edx ;\
  3724. + movzbl %ch,%ecx ;\
  3725. + xor p1+3*tlen(,%ecx,4),%ebx ;\
  3726. + mov %esi,%ecx ;\
  3727. + mov p1+2*tlen(,%edx,4),%esi ;\
  3728. + movzbl %cl,%edx ;\
  3729. + xor p1(,%edx,4),%esi ;\
  3730. + movzbl %ch,%edx ;\
  3731. + shr $16,%ecx ;\
  3732. + xor p1+tlen(,%edx,4),%ebx ;\
  3733. + movzbl %cl,%edx ;\
  3734. + movzbl %ch,%ecx ;\
  3735. + xor p1+2*tlen(,%edx,4),%eax ;\
  3736. + mov (%esp),%edx ;\
  3737. + xor p1+3*tlen(,%ecx,4),%edi ;\
  3738. + movzbl %dl,%ecx ;\
  3739. + xor p2+8(%ebp),%esi ;\
  3740. + xor p1(,%ecx,4),%ebx ;\
  3741. + movzbl %dh,%ecx ;\
  3742. + shr $16,%edx ;\
  3743. + xor p1+tlen(,%ecx,4),%eax ;\
  3744. + movzbl %dl,%ecx ;\
  3745. + movzbl %dh,%edx ;\
  3746. + xor p1+2*tlen(,%ecx,4),%edi ;\
  3747. + mov 4(%esp),%ecx ;\
  3748. + xor p1+3*tlen(,%edx,4),%esi ;\
  3749. + movzbl %cl,%edx ;\
  3750. + xor p1(,%edx,4),%edi ;\
  3751. + movzbl %ch,%edx ;\
  3752. + shr $16,%ecx ;\
  3753. + xor p1+tlen(,%edx,4),%esi ;\
  3754. + movzbl %cl,%edx ;\
  3755. + movzbl %ch,%ecx ;\
  3756. + xor p1+2*tlen(,%edx,4),%ebx ;\
  3757. + xor p1+3*tlen(,%ecx,4),%eax
  3758. +
  3759. +// This macro performs an inverse encryption cycle. It is entered with
  3760. +// the first previous round column values in %eax, %ebx, %esi and %edi and
  3761. +// exits with the final values in the same registers.
  3762. +
  3763. +#define inv_rnd(p1,p2) \
  3764. + movzbl %al,%edx ;\
  3765. + mov %ebx,(%esp) ;\
  3766. + mov %eax,%ecx ;\
  3767. + mov p2(%ebp),%eax ;\
  3768. + mov %edi,4(%esp) ;\
  3769. + mov p2+4(%ebp),%ebx ;\
  3770. + xor p1(,%edx,4),%eax ;\
  3771. + movzbl %ch,%edx ;\
  3772. + shr $16,%ecx ;\
  3773. + mov p2+12(%ebp),%edi ;\
  3774. + xor p1+tlen(,%edx,4),%ebx ;\
  3775. + movzbl %cl,%edx ;\
  3776. + movzbl %ch,%ecx ;\
  3777. + xor p1+3*tlen(,%ecx,4),%edi ;\
  3778. + mov %esi,%ecx ;\
  3779. + mov p1+2*tlen(,%edx,4),%esi ;\
  3780. + movzbl %cl,%edx ;\
  3781. + xor p1(,%edx,4),%esi ;\
  3782. + movzbl %ch,%edx ;\
  3783. + shr $16,%ecx ;\
  3784. + xor p1+tlen(,%edx,4),%edi ;\
  3785. + movzbl %cl,%edx ;\
  3786. + movzbl %ch,%ecx ;\
  3787. + xor p1+2*tlen(,%edx,4),%eax ;\
  3788. + mov (%esp),%edx ;\
  3789. + xor p1+3*tlen(,%ecx,4),%ebx ;\
  3790. + movzbl %dl,%ecx ;\
  3791. + xor p2+8(%ebp),%esi ;\
  3792. + xor p1(,%ecx,4),%ebx ;\
  3793. + movzbl %dh,%ecx ;\
  3794. + shr $16,%edx ;\
  3795. + xor p1+tlen(,%ecx,4),%esi ;\
  3796. + movzbl %dl,%ecx ;\
  3797. + movzbl %dh,%edx ;\
  3798. + xor p1+2*tlen(,%ecx,4),%edi ;\
  3799. + mov 4(%esp),%ecx ;\
  3800. + xor p1+3*tlen(,%edx,4),%eax ;\
  3801. + movzbl %cl,%edx ;\
  3802. + xor p1(,%edx,4),%edi ;\
  3803. + movzbl %ch,%edx ;\
  3804. + shr $16,%ecx ;\
  3805. + xor p1+tlen(,%edx,4),%eax ;\
  3806. + movzbl %cl,%edx ;\
  3807. + movzbl %ch,%ecx ;\
  3808. + xor p1+2*tlen(,%edx,4),%ebx ;\
  3809. + xor p1+3*tlen(,%ecx,4),%esi
  3810. +
  3811. +// AES (Rijndael) Encryption Subroutine
  3812. +
  3813. + .text
  3814. + .align ALIGN32BYTES
  3815. +aes_encrypt:
  3816. + push %ebp
  3817. + mov ctx(%esp),%ebp // pointer to context
  3818. + mov in_blk(%esp),%ecx
  3819. + push %ebx
  3820. + push %esi
  3821. + push %edi
  3822. + mov nrnd(%ebp),%edx // number of rounds
  3823. + lea ekey+16(%ebp),%ebp // key pointer
  3824. +
  3825. +// input four columns and xor in first round key
  3826. +
  3827. + mov (%ecx),%eax
  3828. + mov 4(%ecx),%ebx
  3829. + mov 8(%ecx),%esi
  3830. + mov 12(%ecx),%edi
  3831. + xor -16(%ebp),%eax
  3832. + xor -12(%ebp),%ebx
  3833. + xor -8(%ebp),%esi
  3834. + xor -4(%ebp),%edi
  3835. +
  3836. + sub $8,%esp // space for register saves on stack
  3837. +
  3838. + sub $10,%edx
  3839. + je aes_15
  3840. + add $32,%ebp
  3841. + sub $2,%edx
  3842. + je aes_13
  3843. + add $32,%ebp
  3844. +
  3845. + fwd_rnd(aes_ft_tab,-64) // 14 rounds for 256-bit key
  3846. + fwd_rnd(aes_ft_tab,-48)
  3847. +aes_13: fwd_rnd(aes_ft_tab,-32) // 12 rounds for 192-bit key
  3848. + fwd_rnd(aes_ft_tab,-16)
  3849. +aes_15: fwd_rnd(aes_ft_tab,0) // 10 rounds for 128-bit key
  3850. + fwd_rnd(aes_ft_tab,16)
  3851. + fwd_rnd(aes_ft_tab,32)
  3852. + fwd_rnd(aes_ft_tab,48)
  3853. + fwd_rnd(aes_ft_tab,64)
  3854. + fwd_rnd(aes_ft_tab,80)
  3855. + fwd_rnd(aes_ft_tab,96)
  3856. + fwd_rnd(aes_ft_tab,112)
  3857. + fwd_rnd(aes_ft_tab,128)
  3858. + fwd_rnd(aes_fl_tab,144) // last round uses a different table
  3859. +
  3860. +// move final values to the output array.
  3861. +
  3862. + mov out_blk+20(%esp),%ebp
  3863. + add $8,%esp
  3864. + mov %eax,(%ebp)
  3865. + mov %ebx,4(%ebp)
  3866. + mov %esi,8(%ebp)
  3867. + mov %edi,12(%ebp)
  3868. + pop %edi
  3869. + pop %esi
  3870. + pop %ebx
  3871. + pop %ebp
  3872. + ret
  3873. +
  3874. +
  3875. +// AES (Rijndael) Decryption Subroutine
  3876. +
  3877. + .align ALIGN32BYTES
  3878. +aes_decrypt:
  3879. + push %ebp
  3880. + mov ctx(%esp),%ebp // pointer to context
  3881. + mov in_blk(%esp),%ecx
  3882. + push %ebx
  3883. + push %esi
  3884. + push %edi
  3885. + mov nrnd(%ebp),%edx // number of rounds
  3886. + lea dkey+16(%ebp),%ebp // key pointer
  3887. +
  3888. +// input four columns and xor in first round key
  3889. +
  3890. + mov (%ecx),%eax
  3891. + mov 4(%ecx),%ebx
  3892. + mov 8(%ecx),%esi
  3893. + mov 12(%ecx),%edi
  3894. + xor -16(%ebp),%eax
  3895. + xor -12(%ebp),%ebx
  3896. + xor -8(%ebp),%esi
  3897. + xor -4(%ebp),%edi
  3898. +
  3899. + sub $8,%esp // space for register saves on stack
  3900. +
  3901. + sub $10,%edx
  3902. + je aes_25
  3903. + add $32,%ebp
  3904. + sub $2,%edx
  3905. + je aes_23
  3906. + add $32,%ebp
  3907. +
  3908. + inv_rnd(aes_it_tab,-64) // 14 rounds for 256-bit key
  3909. + inv_rnd(aes_it_tab,-48)
  3910. +aes_23: inv_rnd(aes_it_tab,-32) // 12 rounds for 192-bit key
  3911. + inv_rnd(aes_it_tab,-16)
  3912. +aes_25: inv_rnd(aes_it_tab,0) // 10 rounds for 128-bit key
  3913. + inv_rnd(aes_it_tab,16)
  3914. + inv_rnd(aes_it_tab,32)
  3915. + inv_rnd(aes_it_tab,48)
  3916. + inv_rnd(aes_it_tab,64)
  3917. + inv_rnd(aes_it_tab,80)
  3918. + inv_rnd(aes_it_tab,96)
  3919. + inv_rnd(aes_it_tab,112)
  3920. + inv_rnd(aes_it_tab,128)
  3921. + inv_rnd(aes_il_tab,144) // last round uses a different table
  3922. +
  3923. +// move final values to the output array.
  3924. +
  3925. + mov out_blk+20(%esp),%ebp
  3926. + add $8,%esp
  3927. + mov %eax,(%ebp)
  3928. + mov %ebx,4(%ebp)
  3929. + mov %esi,8(%ebp)
  3930. + mov %edi,12(%ebp)
  3931. + pop %edi
  3932. + pop %esi
  3933. + pop %ebx
  3934. + pop %ebp
  3935. + ret
  3936. +
  3937. +// AES (Rijndael) Key Schedule Subroutine
  3938. +
  3939. +// input/output parameters
  3940. +
  3941. +#define aes_cx 12 // AES context
  3942. +#define in_key 16 // key input array address
  3943. +#define key_ln 20 // key length, bytes (16,24,32) or bits (128,192,256)
  3944. +#define ed_flg 24 // 0=create both encr/decr keys, 1=create encr key only
  3945. +
  3946. +// offsets for locals
  3947. +
  3948. +#define cnt -4
  3949. +#define slen 8
  3950. +
  3951. +// This macro performs a column mixing operation on an input 32-bit
  3952. +// word to give a 32-bit result. It uses each of the 4 bytes in the
  3953. +// the input column to index 4 different tables of 256 32-bit words
  3954. +// that are xored together to form the output value.
  3955. +
  3956. +#define mix_col(p1) \
  3957. + movzbl %bl,%ecx ;\
  3958. + mov p1(,%ecx,4),%eax ;\
  3959. + movzbl %bh,%ecx ;\
  3960. + ror $16,%ebx ;\
  3961. + xor p1+tlen(,%ecx,4),%eax ;\
  3962. + movzbl %bl,%ecx ;\
  3963. + xor p1+2*tlen(,%ecx,4),%eax ;\
  3964. + movzbl %bh,%ecx ;\
  3965. + xor p1+3*tlen(,%ecx,4),%eax
  3966. +
  3967. +// Key Schedule Macros
  3968. +
  3969. +#define ksc4(p1) \
  3970. + rol $24,%ebx ;\
  3971. + mix_col(aes_fl_tab) ;\
  3972. + ror $8,%ebx ;\
  3973. + xor 4*p1+aes_rcon_tab,%eax ;\
  3974. + xor %eax,%esi ;\
  3975. + xor %esi,%ebp ;\
  3976. + mov %esi,16*p1(%edi) ;\
  3977. + mov %ebp,16*p1+4(%edi) ;\
  3978. + xor %ebp,%edx ;\
  3979. + xor %edx,%ebx ;\
  3980. + mov %edx,16*p1+8(%edi) ;\
  3981. + mov %ebx,16*p1+12(%edi)
  3982. +
  3983. +#define ksc6(p1) \
  3984. + rol $24,%ebx ;\
  3985. + mix_col(aes_fl_tab) ;\
  3986. + ror $8,%ebx ;\
  3987. + xor 4*p1+aes_rcon_tab,%eax ;\
  3988. + xor 24*p1-24(%edi),%eax ;\
  3989. + mov %eax,24*p1(%edi) ;\
  3990. + xor 24*p1-20(%edi),%eax ;\
  3991. + mov %eax,24*p1+4(%edi) ;\
  3992. + xor %eax,%esi ;\
  3993. + xor %esi,%ebp ;\
  3994. + mov %esi,24*p1+8(%edi) ;\
  3995. + mov %ebp,24*p1+12(%edi) ;\
  3996. + xor %ebp,%edx ;\
  3997. + xor %edx,%ebx ;\
  3998. + mov %edx,24*p1+16(%edi) ;\
  3999. + mov %ebx,24*p1+20(%edi)
  4000. +
  4001. +#define ksc8(p1) \
  4002. + rol $24,%ebx ;\
  4003. + mix_col(aes_fl_tab) ;\
  4004. + ror $8,%ebx ;\
  4005. + xor 4*p1+aes_rcon_tab,%eax ;\
  4006. + xor 32*p1-32(%edi),%eax ;\
  4007. + mov %eax,32*p1(%edi) ;\
  4008. + xor 32*p1-28(%edi),%eax ;\
  4009. + mov %eax,32*p1+4(%edi) ;\
  4010. + xor 32*p1-24(%edi),%eax ;\
  4011. + mov %eax,32*p1+8(%edi) ;\
  4012. + xor 32*p1-20(%edi),%eax ;\
  4013. + mov %eax,32*p1+12(%edi) ;\
  4014. + push %ebx ;\
  4015. + mov %eax,%ebx ;\
  4016. + mix_col(aes_fl_tab) ;\
  4017. + pop %ebx ;\
  4018. + xor %eax,%esi ;\
  4019. + xor %esi,%ebp ;\
  4020. + mov %esi,32*p1+16(%edi) ;\
  4021. + mov %ebp,32*p1+20(%edi) ;\
  4022. + xor %ebp,%edx ;\
  4023. + xor %edx,%ebx ;\
  4024. + mov %edx,32*p1+24(%edi) ;\
  4025. + mov %ebx,32*p1+28(%edi)
  4026. +
  4027. + .align ALIGN32BYTES
  4028. +aes_set_key:
  4029. + pushfl
  4030. + push %ebp
  4031. + mov %esp,%ebp
  4032. + sub $slen,%esp
  4033. + push %ebx
  4034. + push %esi
  4035. + push %edi
  4036. +
  4037. + mov aes_cx(%ebp),%edx // edx -> AES context
  4038. +
  4039. + mov key_ln(%ebp),%ecx // key length
  4040. + cmpl $128,%ecx
  4041. + jb aes_30
  4042. + shr $3,%ecx
  4043. +aes_30: cmpl $32,%ecx
  4044. + je aes_32
  4045. + cmpl $24,%ecx
  4046. + je aes_32
  4047. + mov $16,%ecx
  4048. +aes_32: shr $2,%ecx
  4049. + mov %ecx,nkey(%edx)
  4050. +
  4051. + lea 6(%ecx),%eax // 10/12/14 for 4/6/8 32-bit key length
  4052. + mov %eax,nrnd(%edx)
  4053. +
  4054. + mov in_key(%ebp),%esi // key input array
  4055. + lea ekey(%edx),%edi // key position in AES context
  4056. + cld
  4057. + push %ebp
  4058. + mov %ecx,%eax // save key length in eax
  4059. + rep ; movsl // words in the key schedule
  4060. + mov -4(%esi),%ebx // put some values in registers
  4061. + mov -8(%esi),%edx // to allow faster code
  4062. + mov -12(%esi),%ebp
  4063. + mov -16(%esi),%esi
  4064. +
  4065. + cmpl $4,%eax // jump on key size
  4066. + je aes_36
  4067. + cmpl $6,%eax
  4068. + je aes_35
  4069. +
  4070. + ksc8(0)
  4071. + ksc8(1)
  4072. + ksc8(2)
  4073. + ksc8(3)
  4074. + ksc8(4)
  4075. + ksc8(5)
  4076. + ksc8(6)
  4077. + jmp aes_37
  4078. +aes_35: ksc6(0)
  4079. + ksc6(1)
  4080. + ksc6(2)
  4081. + ksc6(3)
  4082. + ksc6(4)
  4083. + ksc6(5)
  4084. + ksc6(6)
  4085. + ksc6(7)
  4086. + jmp aes_37
  4087. +aes_36: ksc4(0)
  4088. + ksc4(1)
  4089. + ksc4(2)
  4090. + ksc4(3)
  4091. + ksc4(4)
  4092. + ksc4(5)
  4093. + ksc4(6)
  4094. + ksc4(7)
  4095. + ksc4(8)
  4096. + ksc4(9)
  4097. +aes_37: pop %ebp
  4098. + mov aes_cx(%ebp),%edx // edx -> AES context
  4099. + cmpl $0,ed_flg(%ebp)
  4100. + jne aes_39
  4101. +
  4102. +// compile decryption key schedule from encryption schedule - reverse
  4103. +// order and do mix_column operation on round keys except first and last
  4104. +
  4105. + mov nrnd(%edx),%eax // kt = cx->d_key + nc * cx->Nrnd
  4106. + shl $2,%eax
  4107. + lea dkey(%edx,%eax,4),%edi
  4108. + lea ekey(%edx),%esi // kf = cx->e_key
  4109. +
  4110. + movsl // copy first round key (unmodified)
  4111. + movsl
  4112. + movsl
  4113. + movsl
  4114. + sub $32,%edi
  4115. + movl $1,cnt(%ebp)
  4116. +aes_38: // do mix column on each column of
  4117. + lodsl // each round key
  4118. + mov %eax,%ebx
  4119. + mix_col(aes_im_tab)
  4120. + stosl
  4121. + lodsl
  4122. + mov %eax,%ebx
  4123. + mix_col(aes_im_tab)
  4124. + stosl
  4125. + lodsl
  4126. + mov %eax,%ebx
  4127. + mix_col(aes_im_tab)
  4128. + stosl
  4129. + lodsl
  4130. + mov %eax,%ebx
  4131. + mix_col(aes_im_tab)
  4132. + stosl
  4133. + sub $32,%edi
  4134. +
  4135. + incl cnt(%ebp)
  4136. + mov cnt(%ebp),%eax
  4137. + cmp nrnd(%edx),%eax
  4138. + jb aes_38
  4139. +
  4140. + movsl // copy last round key (unmodified)
  4141. + movsl
  4142. + movsl
  4143. + movsl
  4144. +aes_39: pop %edi
  4145. + pop %esi
  4146. + pop %ebx
  4147. + mov %ebp,%esp
  4148. + pop %ebp
  4149. + popfl
  4150. + ret
  4151. +
  4152. +
  4153. +// finite field multiplies by {02}, {04} and {08}
  4154. +
  4155. +#define f2(x) ((x<<1)^(((x>>7)&1)*0x11b))
  4156. +#define f4(x) ((x<<2)^(((x>>6)&1)*0x11b)^(((x>>6)&2)*0x11b))
  4157. +#define f8(x) ((x<<3)^(((x>>5)&1)*0x11b)^(((x>>5)&2)*0x11b)^(((x>>5)&4)*0x11b))
  4158. +
  4159. +// finite field multiplies required in table generation
  4160. +
  4161. +#define f3(x) (f2(x) ^ x)
  4162. +#define f9(x) (f8(x) ^ x)
  4163. +#define fb(x) (f8(x) ^ f2(x) ^ x)
  4164. +#define fd(x) (f8(x) ^ f4(x) ^ x)
  4165. +#define fe(x) (f8(x) ^ f4(x) ^ f2(x))
  4166. +
  4167. +// These defines generate the forward table entries
  4168. +
  4169. +#define u0(x) ((f3(x) << 24) | (x << 16) | (x << 8) | f2(x))
  4170. +#define u1(x) ((x << 24) | (x << 16) | (f2(x) << 8) | f3(x))
  4171. +#define u2(x) ((x << 24) | (f2(x) << 16) | (f3(x) << 8) | x)
  4172. +#define u3(x) ((f2(x) << 24) | (f3(x) << 16) | (x << 8) | x)
  4173. +
  4174. +// These defines generate the inverse table entries
  4175. +
  4176. +#define v0(x) ((fb(x) << 24) | (fd(x) << 16) | (f9(x) << 8) | fe(x))
  4177. +#define v1(x) ((fd(x) << 24) | (f9(x) << 16) | (fe(x) << 8) | fb(x))
  4178. +#define v2(x) ((f9(x) << 24) | (fe(x) << 16) | (fb(x) << 8) | fd(x))
  4179. +#define v3(x) ((fe(x) << 24) | (fb(x) << 16) | (fd(x) << 8) | f9(x))
  4180. +
  4181. +// These defines generate entries for the last round tables
  4182. +
  4183. +#define w0(x) (x)
  4184. +#define w1(x) (x << 8)
  4185. +#define w2(x) (x << 16)
  4186. +#define w3(x) (x << 24)
  4187. +
  4188. +// macro to generate inverse mix column tables (needed for the key schedule)
  4189. +
  4190. +#define im_data0(p1) \
  4191. + .long p1(0x00),p1(0x01),p1(0x02),p1(0x03),p1(0x04),p1(0x05),p1(0x06),p1(0x07) ;\
  4192. + .long p1(0x08),p1(0x09),p1(0x0a),p1(0x0b),p1(0x0c),p1(0x0d),p1(0x0e),p1(0x0f) ;\
  4193. + .long p1(0x10),p1(0x11),p1(0x12),p1(0x13),p1(0x14),p1(0x15),p1(0x16),p1(0x17) ;\
  4194. + .long p1(0x18),p1(0x19),p1(0x1a),p1(0x1b),p1(0x1c),p1(0x1d),p1(0x1e),p1(0x1f)
  4195. +#define im_data1(p1) \
  4196. + .long p1(0x20),p1(0x21),p1(0x22),p1(0x23),p1(0x24),p1(0x25),p1(0x26),p1(0x27) ;\
  4197. + .long p1(0x28),p1(0x29),p1(0x2a),p1(0x2b),p1(0x2c),p1(0x2d),p1(0x2e),p1(0x2f) ;\
  4198. + .long p1(0x30),p1(0x31),p1(0x32),p1(0x33),p1(0x34),p1(0x35),p1(0x36),p1(0x37) ;\
  4199. + .long p1(0x38),p1(0x39),p1(0x3a),p1(0x3b),p1(0x3c),p1(0x3d),p1(0x3e),p1(0x3f)
  4200. +#define im_data2(p1) \
  4201. + .long p1(0x40),p1(0x41),p1(0x42),p1(0x43),p1(0x44),p1(0x45),p1(0x46),p1(0x47) ;\
  4202. + .long p1(0x48),p1(0x49),p1(0x4a),p1(0x4b),p1(0x4c),p1(0x4d),p1(0x4e),p1(0x4f) ;\
  4203. + .long p1(0x50),p1(0x51),p1(0x52),p1(0x53),p1(0x54),p1(0x55),p1(0x56),p1(0x57) ;\
  4204. + .long p1(0x58),p1(0x59),p1(0x5a),p1(0x5b),p1(0x5c),p1(0x5d),p1(0x5e),p1(0x5f)
  4205. +#define im_data3(p1) \
  4206. + .long p1(0x60),p1(0x61),p1(0x62),p1(0x63),p1(0x64),p1(0x65),p1(0x66),p1(0x67) ;\
  4207. + .long p1(0x68),p1(0x69),p1(0x6a),p1(0x6b),p1(0x6c),p1(0x6d),p1(0x6e),p1(0x6f) ;\
  4208. + .long p1(0x70),p1(0x71),p1(0x72),p1(0x73),p1(0x74),p1(0x75),p1(0x76),p1(0x77) ;\
  4209. + .long p1(0x78),p1(0x79),p1(0x7a),p1(0x7b),p1(0x7c),p1(0x7d),p1(0x7e),p1(0x7f)
  4210. +#define im_data4(p1) \
  4211. + .long p1(0x80),p1(0x81),p1(0x82),p1(0x83),p1(0x84),p1(0x85),p1(0x86),p1(0x87) ;\
  4212. + .long p1(0x88),p1(0x89),p1(0x8a),p1(0x8b),p1(0x8c),p1(0x8d),p1(0x8e),p1(0x8f) ;\
  4213. + .long p1(0x90),p1(0x91),p1(0x92),p1(0x93),p1(0x94),p1(0x95),p1(0x96),p1(0x97) ;\
  4214. + .long p1(0x98),p1(0x99),p1(0x9a),p1(0x9b),p1(0x9c),p1(0x9d),p1(0x9e),p1(0x9f)
  4215. +#define im_data5(p1) \
  4216. + .long p1(0xa0),p1(0xa1),p1(0xa2),p1(0xa3),p1(0xa4),p1(0xa5),p1(0xa6),p1(0xa7) ;\
  4217. + .long p1(0xa8),p1(0xa9),p1(0xaa),p1(0xab),p1(0xac),p1(0xad),p1(0xae),p1(0xaf) ;\
  4218. + .long p1(0xb0),p1(0xb1),p1(0xb2),p1(0xb3),p1(0xb4),p1(0xb5),p1(0xb6),p1(0xb7) ;\
  4219. + .long p1(0xb8),p1(0xb9),p1(0xba),p1(0xbb),p1(0xbc),p1(0xbd),p1(0xbe),p1(0xbf)
  4220. +#define im_data6(p1) \
  4221. + .long p1(0xc0),p1(0xc1),p1(0xc2),p1(0xc3),p1(0xc4),p1(0xc5),p1(0xc6),p1(0xc7) ;\
  4222. + .long p1(0xc8),p1(0xc9),p1(0xca),p1(0xcb),p1(0xcc),p1(0xcd),p1(0xce),p1(0xcf) ;\
  4223. + .long p1(0xd0),p1(0xd1),p1(0xd2),p1(0xd3),p1(0xd4),p1(0xd5),p1(0xd6),p1(0xd7) ;\
  4224. + .long p1(0xd8),p1(0xd9),p1(0xda),p1(0xdb),p1(0xdc),p1(0xdd),p1(0xde),p1(0xdf)
  4225. +#define im_data7(p1) \
  4226. + .long p1(0xe0),p1(0xe1),p1(0xe2),p1(0xe3),p1(0xe4),p1(0xe5),p1(0xe6),p1(0xe7) ;\
  4227. + .long p1(0xe8),p1(0xe9),p1(0xea),p1(0xeb),p1(0xec),p1(0xed),p1(0xee),p1(0xef) ;\
  4228. + .long p1(0xf0),p1(0xf1),p1(0xf2),p1(0xf3),p1(0xf4),p1(0xf5),p1(0xf6),p1(0xf7) ;\
  4229. + .long p1(0xf8),p1(0xf9),p1(0xfa),p1(0xfb),p1(0xfc),p1(0xfd),p1(0xfe),p1(0xff)
  4230. +
  4231. +// S-box data - 256 entries
  4232. +
  4233. +#define sb_data0(p1) \
  4234. + .long p1(0x63),p1(0x7c),p1(0x77),p1(0x7b),p1(0xf2),p1(0x6b),p1(0x6f),p1(0xc5) ;\
  4235. + .long p1(0x30),p1(0x01),p1(0x67),p1(0x2b),p1(0xfe),p1(0xd7),p1(0xab),p1(0x76) ;\
  4236. + .long p1(0xca),p1(0x82),p1(0xc9),p1(0x7d),p1(0xfa),p1(0x59),p1(0x47),p1(0xf0) ;\
  4237. + .long p1(0xad),p1(0xd4),p1(0xa2),p1(0xaf),p1(0x9c),p1(0xa4),p1(0x72),p1(0xc0)
  4238. +#define sb_data1(p1) \
  4239. + .long p1(0xb7),p1(0xfd),p1(0x93),p1(0x26),p1(0x36),p1(0x3f),p1(0xf7),p1(0xcc) ;\
  4240. + .long p1(0x34),p1(0xa5),p1(0xe5),p1(0xf1),p1(0x71),p1(0xd8),p1(0x31),p1(0x15) ;\
  4241. + .long p1(0x04),p1(0xc7),p1(0x23),p1(0xc3),p1(0x18),p1(0x96),p1(0x05),p1(0x9a) ;\
  4242. + .long p1(0x07),p1(0x12),p1(0x80),p1(0xe2),p1(0xeb),p1(0x27),p1(0xb2),p1(0x75)
  4243. +#define sb_data2(p1) \
  4244. + .long p1(0x09),p1(0x83),p1(0x2c),p1(0x1a),p1(0x1b),p1(0x6e),p1(0x5a),p1(0xa0) ;\
  4245. + .long p1(0x52),p1(0x3b),p1(0xd6),p1(0xb3),p1(0x29),p1(0xe3),p1(0x2f),p1(0x84) ;\
  4246. + .long p1(0x53),p1(0xd1),p1(0x00),p1(0xed),p1(0x20),p1(0xfc),p1(0xb1),p1(0x5b) ;\
  4247. + .long p1(0x6a),p1(0xcb),p1(0xbe),p1(0x39),p1(0x4a),p1(0x4c),p1(0x58),p1(0xcf)
  4248. +#define sb_data3(p1) \
  4249. + .long p1(0xd0),p1(0xef),p1(0xaa),p1(0xfb),p1(0x43),p1(0x4d),p1(0x33),p1(0x85) ;\
  4250. + .long p1(0x45),p1(0xf9),p1(0x02),p1(0x7f),p1(0x50),p1(0x3c),p1(0x9f),p1(0xa8) ;\
  4251. + .long p1(0x51),p1(0xa3),p1(0x40),p1(0x8f),p1(0x92),p1(0x9d),p1(0x38),p1(0xf5) ;\
  4252. + .long p1(0xbc),p1(0xb6),p1(0xda),p1(0x21),p1(0x10),p1(0xff),p1(0xf3),p1(0xd2)
  4253. +#define sb_data4(p1) \
  4254. + .long p1(0xcd),p1(0x0c),p1(0x13),p1(0xec),p1(0x5f),p1(0x97),p1(0x44),p1(0x17) ;\
  4255. + .long p1(0xc4),p1(0xa7),p1(0x7e),p1(0x3d),p1(0x64),p1(0x5d),p1(0x19),p1(0x73) ;\
  4256. + .long p1(0x60),p1(0x81),p1(0x4f),p1(0xdc),p1(0x22),p1(0x2a),p1(0x90),p1(0x88) ;\
  4257. + .long p1(0x46),p1(0xee),p1(0xb8),p1(0x14),p1(0xde),p1(0x5e),p1(0x0b),p1(0xdb)
  4258. +#define sb_data5(p1) \
  4259. + .long p1(0xe0),p1(0x32),p1(0x3a),p1(0x0a),p1(0x49),p1(0x06),p1(0x24),p1(0x5c) ;\
  4260. + .long p1(0xc2),p1(0xd3),p1(0xac),p1(0x62),p1(0x91),p1(0x95),p1(0xe4),p1(0x79) ;\
  4261. + .long p1(0xe7),p1(0xc8),p1(0x37),p1(0x6d),p1(0x8d),p1(0xd5),p1(0x4e),p1(0xa9) ;\
  4262. + .long p1(0x6c),p1(0x56),p1(0xf4),p1(0xea),p1(0x65),p1(0x7a),p1(0xae),p1(0x08)
  4263. +#define sb_data6(p1) \
  4264. + .long p1(0xba),p1(0x78),p1(0x25),p1(0x2e),p1(0x1c),p1(0xa6),p1(0xb4),p1(0xc6) ;\
  4265. + .long p1(0xe8),p1(0xdd),p1(0x74),p1(0x1f),p1(0x4b),p1(0xbd),p1(0x8b),p1(0x8a) ;\
  4266. + .long p1(0x70),p1(0x3e),p1(0xb5),p1(0x66),p1(0x48),p1(0x03),p1(0xf6),p1(0x0e) ;\
  4267. + .long p1(0x61),p1(0x35),p1(0x57),p1(0xb9),p1(0x86),p1(0xc1),p1(0x1d),p1(0x9e)
  4268. +#define sb_data7(p1) \
  4269. + .long p1(0xe1),p1(0xf8),p1(0x98),p1(0x11),p1(0x69),p1(0xd9),p1(0x8e),p1(0x94) ;\
  4270. + .long p1(0x9b),p1(0x1e),p1(0x87),p1(0xe9),p1(0xce),p1(0x55),p1(0x28),p1(0xdf) ;\
  4271. + .long p1(0x8c),p1(0xa1),p1(0x89),p1(0x0d),p1(0xbf),p1(0xe6),p1(0x42),p1(0x68) ;\
  4272. + .long p1(0x41),p1(0x99),p1(0x2d),p1(0x0f),p1(0xb0),p1(0x54),p1(0xbb),p1(0x16)
  4273. +
  4274. +// Inverse S-box data - 256 entries
  4275. +
  4276. +#define ib_data0(p1) \
  4277. + .long p1(0x52),p1(0x09),p1(0x6a),p1(0xd5),p1(0x30),p1(0x36),p1(0xa5),p1(0x38) ;\
  4278. + .long p1(0xbf),p1(0x40),p1(0xa3),p1(0x9e),p1(0x81),p1(0xf3),p1(0xd7),p1(0xfb) ;\
  4279. + .long p1(0x7c),p1(0xe3),p1(0x39),p1(0x82),p1(0x9b),p1(0x2f),p1(0xff),p1(0x87) ;\
  4280. + .long p1(0x34),p1(0x8e),p1(0x43),p1(0x44),p1(0xc4),p1(0xde),p1(0xe9),p1(0xcb)
  4281. +#define ib_data1(p1) \
  4282. + .long p1(0x54),p1(0x7b),p1(0x94),p1(0x32),p1(0xa6),p1(0xc2),p1(0x23),p1(0x3d) ;\
  4283. + .long p1(0xee),p1(0x4c),p1(0x95),p1(0x0b),p1(0x42),p1(0xfa),p1(0xc3),p1(0x4e) ;\
  4284. + .long p1(0x08),p1(0x2e),p1(0xa1),p1(0x66),p1(0x28),p1(0xd9),p1(0x24),p1(0xb2) ;\
  4285. + .long p1(0x76),p1(0x5b),p1(0xa2),p1(0x49),p1(0x6d),p1(0x8b),p1(0xd1),p1(0x25)
  4286. +#define ib_data2(p1) \
  4287. + .long p1(0x72),p1(0xf8),p1(0xf6),p1(0x64),p1(0x86),p1(0x68),p1(0x98),p1(0x16) ;\
  4288. + .long p1(0xd4),p1(0xa4),p1(0x5c),p1(0xcc),p1(0x5d),p1(0x65),p1(0xb6),p1(0x92) ;\
  4289. + .long p1(0x6c),p1(0x70),p1(0x48),p1(0x50),p1(0xfd),p1(0xed),p1(0xb9),p1(0xda) ;\
  4290. + .long p1(0x5e),p1(0x15),p1(0x46),p1(0x57),p1(0xa7),p1(0x8d),p1(0x9d),p1(0x84)
  4291. +#define ib_data3(p1) \
  4292. + .long p1(0x90),p1(0xd8),p1(0xab),p1(0x00),p1(0x8c),p1(0xbc),p1(0xd3),p1(0x0a) ;\
  4293. + .long p1(0xf7),p1(0xe4),p1(0x58),p1(0x05),p1(0xb8),p1(0xb3),p1(0x45),p1(0x06) ;\
  4294. + .long p1(0xd0),p1(0x2c),p1(0x1e),p1(0x8f),p1(0xca),p1(0x3f),p1(0x0f),p1(0x02) ;\
  4295. + .long p1(0xc1),p1(0xaf),p1(0xbd),p1(0x03),p1(0x01),p1(0x13),p1(0x8a),p1(0x6b)
  4296. +#define ib_data4(p1) \
  4297. + .long p1(0x3a),p1(0x91),p1(0x11),p1(0x41),p1(0x4f),p1(0x67),p1(0xdc),p1(0xea) ;\
  4298. + .long p1(0x97),p1(0xf2),p1(0xcf),p1(0xce),p1(0xf0),p1(0xb4),p1(0xe6),p1(0x73) ;\
  4299. + .long p1(0x96),p1(0xac),p1(0x74),p1(0x22),p1(0xe7),p1(0xad),p1(0x35),p1(0x85) ;\
  4300. + .long p1(0xe2),p1(0xf9),p1(0x37),p1(0xe8),p1(0x1c),p1(0x75),p1(0xdf),p1(0x6e)
  4301. +#define ib_data5(p1) \
  4302. + .long p1(0x47),p1(0xf1),p1(0x1a),p1(0x71),p1(0x1d),p1(0x29),p1(0xc5),p1(0x89) ;\
  4303. + .long p1(0x6f),p1(0xb7),p1(0x62),p1(0x0e),p1(0xaa),p1(0x18),p1(0xbe),p1(0x1b) ;\
  4304. + .long p1(0xfc),p1(0x56),p1(0x3e),p1(0x4b),p1(0xc6),p1(0xd2),p1(0x79),p1(0x20) ;\
  4305. + .long p1(0x9a),p1(0xdb),p1(0xc0),p1(0xfe),p1(0x78),p1(0xcd),p1(0x5a),p1(0xf4)
  4306. +#define ib_data6(p1) \
  4307. + .long p1(0x1f),p1(0xdd),p1(0xa8),p1(0x33),p1(0x88),p1(0x07),p1(0xc7),p1(0x31) ;\
  4308. + .long p1(0xb1),p1(0x12),p1(0x10),p1(0x59),p1(0x27),p1(0x80),p1(0xec),p1(0x5f) ;\
  4309. + .long p1(0x60),p1(0x51),p1(0x7f),p1(0xa9),p1(0x19),p1(0xb5),p1(0x4a),p1(0x0d) ;\
  4310. + .long p1(0x2d),p1(0xe5),p1(0x7a),p1(0x9f),p1(0x93),p1(0xc9),p1(0x9c),p1(0xef)
  4311. +#define ib_data7(p1) \
  4312. + .long p1(0xa0),p1(0xe0),p1(0x3b),p1(0x4d),p1(0xae),p1(0x2a),p1(0xf5),p1(0xb0) ;\
  4313. + .long p1(0xc8),p1(0xeb),p1(0xbb),p1(0x3c),p1(0x83),p1(0x53),p1(0x99),p1(0x61) ;\
  4314. + .long p1(0x17),p1(0x2b),p1(0x04),p1(0x7e),p1(0xba),p1(0x77),p1(0xd6),p1(0x26) ;\
  4315. + .long p1(0xe1),p1(0x69),p1(0x14),p1(0x63),p1(0x55),p1(0x21),p1(0x0c),p1(0x7d)
  4316. +
  4317. +// The rcon_table (needed for the key schedule)
  4318. +//
  4319. +// Here is original Dr Brian Gladman's source code:
  4320. +// _rcon_tab:
  4321. +// %assign x 1
  4322. +// %rep 29
  4323. +// dd x
  4324. +// %assign x f2(x)
  4325. +// %endrep
  4326. +//
  4327. +// Here is precomputed output (it's more portable this way):
  4328. +
  4329. + .align ALIGN32BYTES
  4330. +aes_rcon_tab:
  4331. + .long 0x01,0x02,0x04,0x08,0x10,0x20,0x40,0x80
  4332. + .long 0x1b,0x36,0x6c,0xd8,0xab,0x4d,0x9a,0x2f
  4333. + .long 0x5e,0xbc,0x63,0xc6,0x97,0x35,0x6a,0xd4
  4334. + .long 0xb3,0x7d,0xfa,0xef,0xc5
  4335. +
  4336. +// The forward xor tables
  4337. +
  4338. + .align ALIGN32BYTES
  4339. +aes_ft_tab:
  4340. + sb_data0(u0)
  4341. + sb_data1(u0)
  4342. + sb_data2(u0)
  4343. + sb_data3(u0)
  4344. + sb_data4(u0)
  4345. + sb_data5(u0)
  4346. + sb_data6(u0)
  4347. + sb_data7(u0)
  4348. +
  4349. + sb_data0(u1)
  4350. + sb_data1(u1)
  4351. + sb_data2(u1)
  4352. + sb_data3(u1)
  4353. + sb_data4(u1)
  4354. + sb_data5(u1)
  4355. + sb_data6(u1)
  4356. + sb_data7(u1)
  4357. +
  4358. + sb_data0(u2)
  4359. + sb_data1(u2)
  4360. + sb_data2(u2)
  4361. + sb_data3(u2)
  4362. + sb_data4(u2)
  4363. + sb_data5(u2)
  4364. + sb_data6(u2)
  4365. + sb_data7(u2)
  4366. +
  4367. + sb_data0(u3)
  4368. + sb_data1(u3)
  4369. + sb_data2(u3)
  4370. + sb_data3(u3)
  4371. + sb_data4(u3)
  4372. + sb_data5(u3)
  4373. + sb_data6(u3)
  4374. + sb_data7(u3)
  4375. +
  4376. + .align ALIGN32BYTES
  4377. +aes_fl_tab:
  4378. + sb_data0(w0)
  4379. + sb_data1(w0)
  4380. + sb_data2(w0)
  4381. + sb_data3(w0)
  4382. + sb_data4(w0)
  4383. + sb_data5(w0)
  4384. + sb_data6(w0)
  4385. + sb_data7(w0)
  4386. +
  4387. + sb_data0(w1)
  4388. + sb_data1(w1)
  4389. + sb_data2(w1)
  4390. + sb_data3(w1)
  4391. + sb_data4(w1)
  4392. + sb_data5(w1)
  4393. + sb_data6(w1)
  4394. + sb_data7(w1)
  4395. +
  4396. + sb_data0(w2)
  4397. + sb_data1(w2)
  4398. + sb_data2(w2)
  4399. + sb_data3(w2)
  4400. + sb_data4(w2)
  4401. + sb_data5(w2)
  4402. + sb_data6(w2)
  4403. + sb_data7(w2)
  4404. +
  4405. + sb_data0(w3)
  4406. + sb_data1(w3)
  4407. + sb_data2(w3)
  4408. + sb_data3(w3)
  4409. + sb_data4(w3)
  4410. + sb_data5(w3)
  4411. + sb_data6(w3)
  4412. + sb_data7(w3)
  4413. +
  4414. +// The inverse xor tables
  4415. +
  4416. + .align ALIGN32BYTES
  4417. +aes_it_tab:
  4418. + ib_data0(v0)
  4419. + ib_data1(v0)
  4420. + ib_data2(v0)
  4421. + ib_data3(v0)
  4422. + ib_data4(v0)
  4423. + ib_data5(v0)
  4424. + ib_data6(v0)
  4425. + ib_data7(v0)
  4426. +
  4427. + ib_data0(v1)
  4428. + ib_data1(v1)
  4429. + ib_data2(v1)
  4430. + ib_data3(v1)
  4431. + ib_data4(v1)
  4432. + ib_data5(v1)
  4433. + ib_data6(v1)
  4434. + ib_data7(v1)
  4435. +
  4436. + ib_data0(v2)
  4437. + ib_data1(v2)
  4438. + ib_data2(v2)
  4439. + ib_data3(v2)
  4440. + ib_data4(v2)
  4441. + ib_data5(v2)
  4442. + ib_data6(v2)
  4443. + ib_data7(v2)
  4444. +
  4445. + ib_data0(v3)
  4446. + ib_data1(v3)
  4447. + ib_data2(v3)
  4448. + ib_data3(v3)
  4449. + ib_data4(v3)
  4450. + ib_data5(v3)
  4451. + ib_data6(v3)
  4452. + ib_data7(v3)
  4453. +
  4454. + .align ALIGN32BYTES
  4455. +aes_il_tab:
  4456. + ib_data0(w0)
  4457. + ib_data1(w0)
  4458. + ib_data2(w0)
  4459. + ib_data3(w0)
  4460. + ib_data4(w0)
  4461. + ib_data5(w0)
  4462. + ib_data6(w0)
  4463. + ib_data7(w0)
  4464. +
  4465. + ib_data0(w1)
  4466. + ib_data1(w1)
  4467. + ib_data2(w1)
  4468. + ib_data3(w1)
  4469. + ib_data4(w1)
  4470. + ib_data5(w1)
  4471. + ib_data6(w1)
  4472. + ib_data7(w1)
  4473. +
  4474. + ib_data0(w2)
  4475. + ib_data1(w2)
  4476. + ib_data2(w2)
  4477. + ib_data3(w2)
  4478. + ib_data4(w2)
  4479. + ib_data5(w2)
  4480. + ib_data6(w2)
  4481. + ib_data7(w2)
  4482. +
  4483. + ib_data0(w3)
  4484. + ib_data1(w3)
  4485. + ib_data2(w3)
  4486. + ib_data3(w3)
  4487. + ib_data4(w3)
  4488. + ib_data5(w3)
  4489. + ib_data6(w3)
  4490. + ib_data7(w3)
  4491. +
  4492. +// The inverse mix column tables
  4493. +
  4494. + .align ALIGN32BYTES
  4495. +aes_im_tab:
  4496. + im_data0(v0)
  4497. + im_data1(v0)
  4498. + im_data2(v0)
  4499. + im_data3(v0)
  4500. + im_data4(v0)
  4501. + im_data5(v0)
  4502. + im_data6(v0)
  4503. + im_data7(v0)
  4504. +
  4505. + im_data0(v1)
  4506. + im_data1(v1)
  4507. + im_data2(v1)
  4508. + im_data3(v1)
  4509. + im_data4(v1)
  4510. + im_data5(v1)
  4511. + im_data6(v1)
  4512. + im_data7(v1)
  4513. +
  4514. + im_data0(v2)
  4515. + im_data1(v2)
  4516. + im_data2(v2)
  4517. + im_data3(v2)
  4518. + im_data4(v2)
  4519. + im_data5(v2)
  4520. + im_data6(v2)
  4521. + im_data7(v2)
  4522. +
  4523. + im_data0(v3)
  4524. + im_data1(v3)
  4525. + im_data2(v3)
  4526. + im_data3(v3)
  4527. + im_data4(v3)
  4528. + im_data5(v3)
  4529. + im_data6(v3)
  4530. + im_data7(v3)
  4531. diff -pruN linux-2.6.9_orig/drivers/misc/aes.c linux-2.6.9/drivers/misc/aes.c
  4532. --- linux-2.6.9_orig/drivers/misc/aes.c 1970-01-01 01:00:00.000000000 +0100
  4533. +++ linux-2.6.9/drivers/misc/aes.c 2004-10-25 14:26:31.882227336 +0200
  4534. @@ -0,0 +1,1479 @@
  4535. +// I retain copyright in this code but I encourage its free use provided
  4536. +// that I don't carry any responsibility for the results. I am especially
  4537. +// happy to see it used in free and open source software. If you do use
  4538. +// it I would appreciate an acknowledgement of its origin in the code or
  4539. +// the product that results and I would also appreciate knowing a little
  4540. +// about the use to which it is being put. I am grateful to Frank Yellin
  4541. +// for some ideas that are used in this implementation.
  4542. +//
  4543. +// Dr B. R. Gladman <brg@gladman.uk.net> 6th April 2001.
  4544. +//
  4545. +// This is an implementation of the AES encryption algorithm (Rijndael)
  4546. +// designed by Joan Daemen and Vincent Rijmen. This version is designed
  4547. +// to provide both fixed and dynamic block and key lengths and can also
  4548. +// run with either big or little endian internal byte order (see aes.h).
  4549. +// It inputs block and key lengths in bytes with the legal values being
  4550. +// 16, 24 and 32.
  4551. +
  4552. +/*
  4553. + * Modified by Jari Ruusu, May 1 2001
  4554. + * - Fixed some compile warnings, code was ok but gcc warned anyway.
  4555. + * - Changed basic types: byte -> unsigned char, word -> u_int32_t
  4556. + * - Major name space cleanup: Names visible to outside now begin
  4557. + * with "aes_" or "AES_". A lot of stuff moved from aes.h to aes.c
  4558. + * - Removed C++ and DLL support as part of name space cleanup.
  4559. + * - Eliminated unnecessary recomputation of tables. (actual bug fix)
  4560. + * - Merged precomputed constant tables to aes.c file.
  4561. + * - Removed data alignment restrictions for portability reasons.
  4562. + * - Made block and key lengths accept bit count (128/192/256)
  4563. + * as well byte count (16/24/32).
  4564. + * - Removed all error checks. This change also eliminated the need
  4565. + * to preinitialize the context struct to zero.
  4566. + * - Removed some totally unused constants.
  4567. + */
  4568. +/*
  4569. + * Modified by Jari Ruusu, April 21 2004
  4570. + * - Added back code that avoids byte swaps on big endian boxes.
  4571. + */
  4572. +
  4573. +#include "aes.h"
  4574. +
  4575. +// CONFIGURATION OPTIONS (see also aes.h)
  4576. +//
  4577. +// 1. Define UNROLL for full loop unrolling in encryption and decryption.
  4578. +// 2. Define PARTIAL_UNROLL to unroll two loops in encryption and decryption.
  4579. +// 3. Define FIXED_TABLES for compiled rather than dynamic tables.
  4580. +// 4. Define FF_TABLES to use tables for field multiplies and inverses.
  4581. +// Do not enable this without understanding stack space requirements.
  4582. +// 5. Define ARRAYS to use arrays to hold the local state block. If this
  4583. +// is not defined, individually declared 32-bit words are used.
  4584. +// 6. Define FAST_VARIABLE if a high speed variable block implementation
  4585. +// is needed (essentially three separate fixed block size code sequences)
  4586. +// 7. Define either ONE_TABLE or FOUR_TABLES for a fast table driven
  4587. +// version using 1 table (2 kbytes of table space) or 4 tables (8
  4588. +// kbytes of table space) for higher speed.
  4589. +// 8. Define either ONE_LR_TABLE or FOUR_LR_TABLES for a further speed
  4590. +// increase by using tables for the last rounds but with more table
  4591. +// space (2 or 8 kbytes extra).
  4592. +// 9. If neither ONE_TABLE nor FOUR_TABLES is defined, a compact but
  4593. +// slower version is provided.
  4594. +// 10. If fast decryption key scheduling is needed define ONE_IM_TABLE
  4595. +// or FOUR_IM_TABLES for higher speed (2 or 8 kbytes extra).
  4596. +
  4597. +#define UNROLL
  4598. +//#define PARTIAL_UNROLL
  4599. +
  4600. +#define FIXED_TABLES
  4601. +//#define FF_TABLES
  4602. +//#define ARRAYS
  4603. +#define FAST_VARIABLE
  4604. +
  4605. +//#define ONE_TABLE
  4606. +#define FOUR_TABLES
  4607. +
  4608. +//#define ONE_LR_TABLE
  4609. +#define FOUR_LR_TABLES
  4610. +
  4611. +//#define ONE_IM_TABLE
  4612. +#define FOUR_IM_TABLES
  4613. +
  4614. +#if defined(UNROLL) && defined (PARTIAL_UNROLL)
  4615. +#error both UNROLL and PARTIAL_UNROLL are defined
  4616. +#endif
  4617. +
  4618. +#if defined(ONE_TABLE) && defined (FOUR_TABLES)
  4619. +#error both ONE_TABLE and FOUR_TABLES are defined
  4620. +#endif
  4621. +
  4622. +#if defined(ONE_LR_TABLE) && defined (FOUR_LR_TABLES)
  4623. +#error both ONE_LR_TABLE and FOUR_LR_TABLES are defined
  4624. +#endif
  4625. +
  4626. +#if defined(ONE_IM_TABLE) && defined (FOUR_IM_TABLES)
  4627. +#error both ONE_IM_TABLE and FOUR_IM_TABLES are defined
  4628. +#endif
  4629. +
  4630. +#if defined(AES_BLOCK_SIZE) && AES_BLOCK_SIZE != 16 && AES_BLOCK_SIZE != 24 && AES_BLOCK_SIZE != 32
  4631. +#error an illegal block size has been specified
  4632. +#endif
  4633. +
  4634. +/* INTERNAL_BYTE_ORDER: 0=unknown, 1=little endian, 2=big endian */
  4635. +#if defined(INTERNAL_BYTE_ORDER)
  4636. +#elif defined(__i386__)||defined(__i386)||defined(__x86_64__)||defined(__x86_64)||defined(__amd64__)||defined(__amd64)||defined(__AMD64__)||defined(__AMD64)
  4637. +# define INTERNAL_BYTE_ORDER 1
  4638. +# undef DATA_ALWAYS_ALIGNED
  4639. +# define DATA_ALWAYS_ALIGNED 1 /* unaligned access is always ok */
  4640. +#elif defined(__ppc__)||defined(__ppc)||defined(__PPC__)||defined(__PPC)||defined(__powerpc__)||defined(__powerpc)||defined(__POWERPC__)||defined(__POWERPC)||defined(__PowerPC__)||defined(__PowerPC)||defined(__ppc64__)||defined(__ppc64)||defined(__PPC64__)||defined(__PPC64)||defined(__powerpc64__)||defined(__powerpc64)||defined(__s390__)||defined(__s390)
  4641. +# define INTERNAL_BYTE_ORDER 2
  4642. +# undef DATA_ALWAYS_ALIGNED
  4643. +# define DATA_ALWAYS_ALIGNED 1 /* unaligned access is always ok */
  4644. +#elif defined(__alpha__)||defined(__alpha)||defined(__ia64__)||defined(__ia64)
  4645. +# define INTERNAL_BYTE_ORDER 1
  4646. +#elif defined(__hppa__)||defined(__hppa)||defined(__HPPA__)||defined(__HPPA)||defined(__parisc__)||defined(__parisc)||defined(__sparc__)||defined(__sparc)||defined(__sparc_v9__)||defined(__sparc_v9)||defined(__sparc64__)||defined(__sparc64)||defined(__mc68000__)||defined(__mc68000)
  4647. +# define INTERNAL_BYTE_ORDER 2
  4648. +#elif defined(CONFIGURE_DETECTS_BYTE_ORDER)
  4649. +# if WORDS_BIGENDIAN
  4650. +# define INTERNAL_BYTE_ORDER 2
  4651. +# else
  4652. +# define INTERNAL_BYTE_ORDER 1
  4653. +# endif
  4654. +#elif defined(__linux__) && defined(__KERNEL__)
  4655. +# include <asm/byteorder.h>
  4656. +# if defined(__BIG_ENDIAN)
  4657. +# define INTERNAL_BYTE_ORDER 2
  4658. +# else
  4659. +# define INTERNAL_BYTE_ORDER 1
  4660. +# endif
  4661. +#else
  4662. +# include <sys/param.h>
  4663. +# if (defined(BYTE_ORDER) && defined(LITTLE_ENDIAN) && (BYTE_ORDER == LITTLE_ENDIAN)) || (defined(__BYTE_ORDER) && defined(__LITTLE_ENDIAN) && (__BYTE_ORDER == __LITTLE_ENDIAN))
  4664. +# define INTERNAL_BYTE_ORDER 1
  4665. +# elif WORDS_BIGENDIAN || defined(__BIG_ENDIAN__) || (defined(BYTE_ORDER) && defined(BIG_ENDIAN) && (BYTE_ORDER == BIG_ENDIAN)) || (defined(__BYTE_ORDER) && defined(__BIG_ENDIAN) && (__BYTE_ORDER == __BIG_ENDIAN))
  4666. +# define INTERNAL_BYTE_ORDER 2
  4667. +# else
  4668. +# define INTERNAL_BYTE_ORDER 0
  4669. +# endif
  4670. +#endif
  4671. +
  4672. +#if defined(DATA_ALWAYS_ALIGNED) && (INTERNAL_BYTE_ORDER > 0)
  4673. +# define word_in(x) *(u_int32_t*)(x)
  4674. +# define word_out(x,v) *(u_int32_t*)(x) = (v)
  4675. +#elif defined(__linux__) && defined(__KERNEL__)
  4676. +# include <asm/unaligned.h>
  4677. +# define word_in(x) get_unaligned((u_int32_t*)(x))
  4678. +# define word_out(x,v) put_unaligned((v),(u_int32_t*)(x))
  4679. +#else
  4680. +/* unknown endianness and/or unable to handle unaligned data */
  4681. +# undef INTERNAL_BYTE_ORDER
  4682. +# define INTERNAL_BYTE_ORDER 1
  4683. +# define word_in(x) ((u_int32_t)(((unsigned char *)(x))[0])|((u_int32_t)(((unsigned char *)(x))[1])<<8)|((u_int32_t)(((unsigned char *)(x))[2])<<16)|((u_int32_t)(((unsigned char *)(x))[3])<<24))
  4684. +# define word_out(x,v) ((unsigned char *)(x))[0]=(v),((unsigned char *)(x))[1]=((v)>>8),((unsigned char *)(x))[2]=((v)>>16),((unsigned char *)(x))[3]=((v)>>24)
  4685. +#endif
  4686. +
  4687. +// upr(x,n): rotates bytes within words by n positions, moving bytes
  4688. +// to higher index positions with wrap around into low positions
  4689. +// ups(x,n): moves bytes by n positions to higher index positions in
  4690. +// words but without wrap around
  4691. +// bval(x,n): extracts a byte from a word
  4692. +
  4693. +#if (INTERNAL_BYTE_ORDER < 2)
  4694. +/* little endian */
  4695. +#define upr(x,n) (((x) << 8 * (n)) | ((x) >> (32 - 8 * (n))))
  4696. +#define ups(x,n) ((x) << 8 * (n))
  4697. +#define bval(x,n) ((unsigned char)((x) >> 8 * (n)))
  4698. +#define bytes2word(b0, b1, b2, b3) \
  4699. + ((u_int32_t)(b3) << 24 | (u_int32_t)(b2) << 16 | (u_int32_t)(b1) << 8 | (b0))
  4700. +#else
  4701. +/* big endian */
  4702. +#define upr(x,n) (((x) >> 8 * (n)) | ((x) << (32 - 8 * (n))))
  4703. +#define ups(x,n) ((x) >> 8 * (n)))
  4704. +#define bval(x,n) ((unsigned char)((x) >> (24 - 8 * (n))))
  4705. +#define bytes2word(b0, b1, b2, b3) \
  4706. + ((u_int32_t)(b0) << 24 | (u_int32_t)(b1) << 16 | (u_int32_t)(b2) << 8 | (b3))
  4707. +#endif
  4708. +
  4709. +// Disable at least some poor combinations of options
  4710. +
  4711. +#if !defined(ONE_TABLE) && !defined(FOUR_TABLES)
  4712. +#define FIXED_TABLES
  4713. +#undef UNROLL
  4714. +#undef ONE_LR_TABLE
  4715. +#undef FOUR_LR_TABLES
  4716. +#undef ONE_IM_TABLE
  4717. +#undef FOUR_IM_TABLES
  4718. +#elif !defined(FOUR_TABLES)
  4719. +#ifdef FOUR_LR_TABLES
  4720. +#undef FOUR_LR_TABLES
  4721. +#define ONE_LR_TABLE
  4722. +#endif
  4723. +#ifdef FOUR_IM_TABLES
  4724. +#undef FOUR_IM_TABLES
  4725. +#define ONE_IM_TABLE
  4726. +#endif
  4727. +#elif !defined(AES_BLOCK_SIZE)
  4728. +#if defined(UNROLL)
  4729. +#define PARTIAL_UNROLL
  4730. +#undef UNROLL
  4731. +#endif
  4732. +#endif
  4733. +
  4734. +// the finite field modular polynomial and elements
  4735. +
  4736. +#define ff_poly 0x011b
  4737. +#define ff_hi 0x80
  4738. +
  4739. +// multiply four bytes in GF(2^8) by 'x' {02} in parallel
  4740. +
  4741. +#define m1 0x80808080
  4742. +#define m2 0x7f7f7f7f
  4743. +#define m3 0x0000001b
  4744. +#define FFmulX(x) ((((x) & m2) << 1) ^ ((((x) & m1) >> 7) * m3))
  4745. +
  4746. +// The following defines provide alternative definitions of FFmulX that might
  4747. +// give improved performance if a fast 32-bit multiply is not available. Note
  4748. +// that a temporary variable u needs to be defined where FFmulX is used.
  4749. +
  4750. +// #define FFmulX(x) (u = (x) & m1, u |= (u >> 1), ((x) & m2) << 1) ^ ((u >> 3) | (u >> 6))
  4751. +// #define m4 0x1b1b1b1b
  4752. +// #define FFmulX(x) (u = (x) & m1, ((x) & m2) << 1) ^ ((u - (u >> 7)) & m4)
  4753. +
  4754. +// perform column mix operation on four bytes in parallel
  4755. +
  4756. +#define fwd_mcol(x) (f2 = FFmulX(x), f2 ^ upr(x ^ f2,3) ^ upr(x,2) ^ upr(x,1))
  4757. +
  4758. +#if defined(FIXED_TABLES)
  4759. +
  4760. +// the S-Box table
  4761. +
  4762. +static const unsigned char s_box[256] =
  4763. +{
  4764. + 0x63, 0x7c, 0x77, 0x7b, 0xf2, 0x6b, 0x6f, 0xc5,
  4765. + 0x30, 0x01, 0x67, 0x2b, 0xfe, 0xd7, 0xab, 0x76,
  4766. + 0xca, 0x82, 0xc9, 0x7d, 0xfa, 0x59, 0x47, 0xf0,
  4767. + 0xad, 0xd4, 0xa2, 0xaf, 0x9c, 0xa4, 0x72, 0xc0,
  4768. + 0xb7, 0xfd, 0x93, 0x26, 0x36, 0x3f, 0xf7, 0xcc,
  4769. + 0x34, 0xa5, 0xe5, 0xf1, 0x71, 0xd8, 0x31, 0x15,
  4770. + 0x04, 0xc7, 0x23, 0xc3, 0x18, 0x96, 0x05, 0x9a,
  4771. + 0x07, 0x12, 0x80, 0xe2, 0xeb, 0x27, 0xb2, 0x75,
  4772. + 0x09, 0x83, 0x2c, 0x1a, 0x1b, 0x6e, 0x5a, 0xa0,
  4773. + 0x52, 0x3b, 0xd6, 0xb3, 0x29, 0xe3, 0x2f, 0x84,
  4774. + 0x53, 0xd1, 0x00, 0xed, 0x20, 0xfc, 0xb1, 0x5b,
  4775. + 0x6a, 0xcb, 0xbe, 0x39, 0x4a, 0x4c, 0x58, 0xcf,
  4776. + 0xd0, 0xef, 0xaa, 0xfb, 0x43, 0x4d, 0x33, 0x85,
  4777. + 0x45, 0xf9, 0x02, 0x7f, 0x50, 0x3c, 0x9f, 0xa8,
  4778. + 0x51, 0xa3, 0x40, 0x8f, 0x92, 0x9d, 0x38, 0xf5,
  4779. + 0xbc, 0xb6, 0xda, 0x21, 0x10, 0xff, 0xf3, 0xd2,
  4780. + 0xcd, 0x0c, 0x13, 0xec, 0x5f, 0x97, 0x44, 0x17,
  4781. + 0xc4, 0xa7, 0x7e, 0x3d, 0x64, 0x5d, 0x19, 0x73,
  4782. + 0x60, 0x81, 0x4f, 0xdc, 0x22, 0x2a, 0x90, 0x88,
  4783. + 0x46, 0xee, 0xb8, 0x14, 0xde, 0x5e, 0x0b, 0xdb,
  4784. + 0xe0, 0x32, 0x3a, 0x0a, 0x49, 0x06, 0x24, 0x5c,
  4785. + 0xc2, 0xd3, 0xac, 0x62, 0x91, 0x95, 0xe4, 0x79,
  4786. + 0xe7, 0xc8, 0x37, 0x6d, 0x8d, 0xd5, 0x4e, 0xa9,
  4787. + 0x6c, 0x56, 0xf4, 0xea, 0x65, 0x7a, 0xae, 0x08,
  4788. + 0xba, 0x78, 0x25, 0x2e, 0x1c, 0xa6, 0xb4, 0xc6,
  4789. + 0xe8, 0xdd, 0x74, 0x1f, 0x4b, 0xbd, 0x8b, 0x8a,
  4790. + 0x70, 0x3e, 0xb5, 0x66, 0x48, 0x03, 0xf6, 0x0e,
  4791. + 0x61, 0x35, 0x57, 0xb9, 0x86, 0xc1, 0x1d, 0x9e,
  4792. + 0xe1, 0xf8, 0x98, 0x11, 0x69, 0xd9, 0x8e, 0x94,
  4793. + 0x9b, 0x1e, 0x87, 0xe9, 0xce, 0x55, 0x28, 0xdf,
  4794. + 0x8c, 0xa1, 0x89, 0x0d, 0xbf, 0xe6, 0x42, 0x68,
  4795. + 0x41, 0x99, 0x2d, 0x0f, 0xb0, 0x54, 0xbb, 0x16
  4796. +};
  4797. +
  4798. +// the inverse S-Box table
  4799. +
  4800. +static const unsigned char inv_s_box[256] =
  4801. +{
  4802. + 0x52, 0x09, 0x6a, 0xd5, 0x30, 0x36, 0xa5, 0x38,
  4803. + 0xbf, 0x40, 0xa3, 0x9e, 0x81, 0xf3, 0xd7, 0xfb,
  4804. + 0x7c, 0xe3, 0x39, 0x82, 0x9b, 0x2f, 0xff, 0x87,
  4805. + 0x34, 0x8e, 0x43, 0x44, 0xc4, 0xde, 0xe9, 0xcb,
  4806. + 0x54, 0x7b, 0x94, 0x32, 0xa6, 0xc2, 0x23, 0x3d,
  4807. + 0xee, 0x4c, 0x95, 0x0b, 0x42, 0xfa, 0xc3, 0x4e,
  4808. + 0x08, 0x2e, 0xa1, 0x66, 0x28, 0xd9, 0x24, 0xb2,
  4809. + 0x76, 0x5b, 0xa2, 0x49, 0x6d, 0x8b, 0xd1, 0x25,
  4810. + 0x72, 0xf8, 0xf6, 0x64, 0x86, 0x68, 0x98, 0x16,
  4811. + 0xd4, 0xa4, 0x5c, 0xcc, 0x5d, 0x65, 0xb6, 0x92,
  4812. + 0x6c, 0x70, 0x48, 0x50, 0xfd, 0xed, 0xb9, 0xda,
  4813. + 0x5e, 0x15, 0x46, 0x57, 0xa7, 0x8d, 0x9d, 0x84,
  4814. + 0x90, 0xd8, 0xab, 0x00, 0x8c, 0xbc, 0xd3, 0x0a,
  4815. + 0xf7, 0xe4, 0x58, 0x05, 0xb8, 0xb3, 0x45, 0x06,
  4816. + 0xd0, 0x2c, 0x1e, 0x8f, 0xca, 0x3f, 0x0f, 0x02,
  4817. + 0xc1, 0xaf, 0xbd, 0x03, 0x01, 0x13, 0x8a, 0x6b,
  4818. + 0x3a, 0x91, 0x11, 0x41, 0x4f, 0x67, 0xdc, 0xea,
  4819. + 0x97, 0xf2, 0xcf, 0xce, 0xf0, 0xb4, 0xe6, 0x73,
  4820. + 0x96, 0xac, 0x74, 0x22, 0xe7, 0xad, 0x35, 0x85,
  4821. + 0xe2, 0xf9, 0x37, 0xe8, 0x1c, 0x75, 0xdf, 0x6e,
  4822. + 0x47, 0xf1, 0x1a, 0x71, 0x1d, 0x29, 0xc5, 0x89,
  4823. + 0x6f, 0xb7, 0x62, 0x0e, 0xaa, 0x18, 0xbe, 0x1b,
  4824. + 0xfc, 0x56, 0x3e, 0x4b, 0xc6, 0xd2, 0x79, 0x20,
  4825. + 0x9a, 0xdb, 0xc0, 0xfe, 0x78, 0xcd, 0x5a, 0xf4,
  4826. + 0x1f, 0xdd, 0xa8, 0x33, 0x88, 0x07, 0xc7, 0x31,
  4827. + 0xb1, 0x12, 0x10, 0x59, 0x27, 0x80, 0xec, 0x5f,
  4828. + 0x60, 0x51, 0x7f, 0xa9, 0x19, 0xb5, 0x4a, 0x0d,
  4829. + 0x2d, 0xe5, 0x7a, 0x9f, 0x93, 0xc9, 0x9c, 0xef,
  4830. + 0xa0, 0xe0, 0x3b, 0x4d, 0xae, 0x2a, 0xf5, 0xb0,
  4831. + 0xc8, 0xeb, 0xbb, 0x3c, 0x83, 0x53, 0x99, 0x61,
  4832. + 0x17, 0x2b, 0x04, 0x7e, 0xba, 0x77, 0xd6, 0x26,
  4833. + 0xe1, 0x69, 0x14, 0x63, 0x55, 0x21, 0x0c, 0x7d
  4834. +};
  4835. +
  4836. +// used to ensure table is generated in the right format
  4837. +// depending on the internal byte order required
  4838. +
  4839. +#if (INTERNAL_BYTE_ORDER < 2)
  4840. +/* little endian */
  4841. +#define w0(p) 0x000000##p
  4842. +#else
  4843. +/* big endian */
  4844. +#define w0(p) 0x##p##000000
  4845. +#endif
  4846. +
  4847. +// Number of elements required in this table for different
  4848. +// block and key lengths is:
  4849. +//
  4850. +// Nk = 4 6 8
  4851. +// ----------
  4852. +// Nb = 4 | 10 8 7
  4853. +// 6 | 19 12 11
  4854. +// 8 | 29 19 14
  4855. +//
  4856. +// this table can be a table of bytes if the key schedule
  4857. +// code is adjusted accordingly
  4858. +
  4859. +static const u_int32_t rcon_tab[29] =
  4860. +{
  4861. + w0(01), w0(02), w0(04), w0(08),
  4862. + w0(10), w0(20), w0(40), w0(80),
  4863. + w0(1b), w0(36), w0(6c), w0(d8),
  4864. + w0(ab), w0(4d), w0(9a), w0(2f),
  4865. + w0(5e), w0(bc), w0(63), w0(c6),
  4866. + w0(97), w0(35), w0(6a), w0(d4),
  4867. + w0(b3), w0(7d), w0(fa), w0(ef),
  4868. + w0(c5)
  4869. +};
  4870. +
  4871. +#undef w0
  4872. +
  4873. +// used to ensure table is generated in the right format
  4874. +// depending on the internal byte order required
  4875. +
  4876. +#if (INTERNAL_BYTE_ORDER < 2)
  4877. +/* little endian */
  4878. +#define r0(p,q,r,s) 0x##p##q##r##s
  4879. +#define r1(p,q,r,s) 0x##q##r##s##p
  4880. +#define r2(p,q,r,s) 0x##r##s##p##q
  4881. +#define r3(p,q,r,s) 0x##s##p##q##r
  4882. +#define w0(p) 0x000000##p
  4883. +#define w1(p) 0x0000##p##00
  4884. +#define w2(p) 0x00##p##0000
  4885. +#define w3(p) 0x##p##000000
  4886. +#else
  4887. +/* big endian */
  4888. +#define r0(p,q,r,s) 0x##s##r##q##p
  4889. +#define r1(p,q,r,s) 0x##p##s##r##q
  4890. +#define r2(p,q,r,s) 0x##q##p##s##r
  4891. +#define r3(p,q,r,s) 0x##r##q##p##s
  4892. +#define w0(p) 0x##p##000000
  4893. +#define w1(p) 0x00##p##0000
  4894. +#define w2(p) 0x0000##p##00
  4895. +#define w3(p) 0x000000##p
  4896. +#endif
  4897. +
  4898. +#if defined(FIXED_TABLES) && (defined(ONE_TABLE) || defined(FOUR_TABLES))
  4899. +
  4900. +// data for forward tables (other than last round)
  4901. +
  4902. +#define f_table \
  4903. + r(a5,63,63,c6), r(84,7c,7c,f8), r(99,77,77,ee), r(8d,7b,7b,f6),\
  4904. + r(0d,f2,f2,ff), r(bd,6b,6b,d6), r(b1,6f,6f,de), r(54,c5,c5,91),\
  4905. + r(50,30,30,60), r(03,01,01,02), r(a9,67,67,ce), r(7d,2b,2b,56),\
  4906. + r(19,fe,fe,e7), r(62,d7,d7,b5), r(e6,ab,ab,4d), r(9a,76,76,ec),\
  4907. + r(45,ca,ca,8f), r(9d,82,82,1f), r(40,c9,c9,89), r(87,7d,7d,fa),\
  4908. + r(15,fa,fa,ef), r(eb,59,59,b2), r(c9,47,47,8e), r(0b,f0,f0,fb),\
  4909. + r(ec,ad,ad,41), r(67,d4,d4,b3), r(fd,a2,a2,5f), r(ea,af,af,45),\
  4910. + r(bf,9c,9c,23), r(f7,a4,a4,53), r(96,72,72,e4), r(5b,c0,c0,9b),\
  4911. + r(c2,b7,b7,75), r(1c,fd,fd,e1), r(ae,93,93,3d), r(6a,26,26,4c),\
  4912. + r(5a,36,36,6c), r(41,3f,3f,7e), r(02,f7,f7,f5), r(4f,cc,cc,83),\
  4913. + r(5c,34,34,68), r(f4,a5,a5,51), r(34,e5,e5,d1), r(08,f1,f1,f9),\
  4914. + r(93,71,71,e2), r(73,d8,d8,ab), r(53,31,31,62), r(3f,15,15,2a),\
  4915. + r(0c,04,04,08), r(52,c7,c7,95), r(65,23,23,46), r(5e,c3,c3,9d),\
  4916. + r(28,18,18,30), r(a1,96,96,37), r(0f,05,05,0a), r(b5,9a,9a,2f),\
  4917. + r(09,07,07,0e), r(36,12,12,24), r(9b,80,80,1b), r(3d,e2,e2,df),\
  4918. + r(26,eb,eb,cd), r(69,27,27,4e), r(cd,b2,b2,7f), r(9f,75,75,ea),\
  4919. + r(1b,09,09,12), r(9e,83,83,1d), r(74,2c,2c,58), r(2e,1a,1a,34),\
  4920. + r(2d,1b,1b,36), r(b2,6e,6e,dc), r(ee,5a,5a,b4), r(fb,a0,a0,5b),\
  4921. + r(f6,52,52,a4), r(4d,3b,3b,76), r(61,d6,d6,b7), r(ce,b3,b3,7d),\
  4922. + r(7b,29,29,52), r(3e,e3,e3,dd), r(71,2f,2f,5e), r(97,84,84,13),\
  4923. + r(f5,53,53,a6), r(68,d1,d1,b9), r(00,00,00,00), r(2c,ed,ed,c1),\
  4924. + r(60,20,20,40), r(1f,fc,fc,e3), r(c8,b1,b1,79), r(ed,5b,5b,b6),\
  4925. + r(be,6a,6a,d4), r(46,cb,cb,8d), r(d9,be,be,67), r(4b,39,39,72),\
  4926. + r(de,4a,4a,94), r(d4,4c,4c,98), r(e8,58,58,b0), r(4a,cf,cf,85),\
  4927. + r(6b,d0,d0,bb), r(2a,ef,ef,c5), r(e5,aa,aa,4f), r(16,fb,fb,ed),\
  4928. + r(c5,43,43,86), r(d7,4d,4d,9a), r(55,33,33,66), r(94,85,85,11),\
  4929. + r(cf,45,45,8a), r(10,f9,f9,e9), r(06,02,02,04), r(81,7f,7f,fe),\
  4930. + r(f0,50,50,a0), r(44,3c,3c,78), r(ba,9f,9f,25), r(e3,a8,a8,4b),\
  4931. + r(f3,51,51,a2), r(fe,a3,a3,5d), r(c0,40,40,80), r(8a,8f,8f,05),\
  4932. + r(ad,92,92,3f), r(bc,9d,9d,21), r(48,38,38,70), r(04,f5,f5,f1),\
  4933. + r(df,bc,bc,63), r(c1,b6,b6,77), r(75,da,da,af), r(63,21,21,42),\
  4934. + r(30,10,10,20), r(1a,ff,ff,e5), r(0e,f3,f3,fd), r(6d,d2,d2,bf),\
  4935. + r(4c,cd,cd,81), r(14,0c,0c,18), r(35,13,13,26), r(2f,ec,ec,c3),\
  4936. + r(e1,5f,5f,be), r(a2,97,97,35), r(cc,44,44,88), r(39,17,17,2e),\
  4937. + r(57,c4,c4,93), r(f2,a7,a7,55), r(82,7e,7e,fc), r(47,3d,3d,7a),\
  4938. + r(ac,64,64,c8), r(e7,5d,5d,ba), r(2b,19,19,32), r(95,73,73,e6),\
  4939. + r(a0,60,60,c0), r(98,81,81,19), r(d1,4f,4f,9e), r(7f,dc,dc,a3),\
  4940. + r(66,22,22,44), r(7e,2a,2a,54), r(ab,90,90,3b), r(83,88,88,0b),\
  4941. + r(ca,46,46,8c), r(29,ee,ee,c7), r(d3,b8,b8,6b), r(3c,14,14,28),\
  4942. + r(79,de,de,a7), r(e2,5e,5e,bc), r(1d,0b,0b,16), r(76,db,db,ad),\
  4943. + r(3b,e0,e0,db), r(56,32,32,64), r(4e,3a,3a,74), r(1e,0a,0a,14),\
  4944. + r(db,49,49,92), r(0a,06,06,0c), r(6c,24,24,48), r(e4,5c,5c,b8),\
  4945. + r(5d,c2,c2,9f), r(6e,d3,d3,bd), r(ef,ac,ac,43), r(a6,62,62,c4),\
  4946. + r(a8,91,91,39), r(a4,95,95,31), r(37,e4,e4,d3), r(8b,79,79,f2),\
  4947. + r(32,e7,e7,d5), r(43,c8,c8,8b), r(59,37,37,6e), r(b7,6d,6d,da),\
  4948. + r(8c,8d,8d,01), r(64,d5,d5,b1), r(d2,4e,4e,9c), r(e0,a9,a9,49),\
  4949. + r(b4,6c,6c,d8), r(fa,56,56,ac), r(07,f4,f4,f3), r(25,ea,ea,cf),\
  4950. + r(af,65,65,ca), r(8e,7a,7a,f4), r(e9,ae,ae,47), r(18,08,08,10),\
  4951. + r(d5,ba,ba,6f), r(88,78,78,f0), r(6f,25,25,4a), r(72,2e,2e,5c),\
  4952. + r(24,1c,1c,38), r(f1,a6,a6,57), r(c7,b4,b4,73), r(51,c6,c6,97),\
  4953. + r(23,e8,e8,cb), r(7c,dd,dd,a1), r(9c,74,74,e8), r(21,1f,1f,3e),\
  4954. + r(dd,4b,4b,96), r(dc,bd,bd,61), r(86,8b,8b,0d), r(85,8a,8a,0f),\
  4955. + r(90,70,70,e0), r(42,3e,3e,7c), r(c4,b5,b5,71), r(aa,66,66,cc),\
  4956. + r(d8,48,48,90), r(05,03,03,06), r(01,f6,f6,f7), r(12,0e,0e,1c),\
  4957. + r(a3,61,61,c2), r(5f,35,35,6a), r(f9,57,57,ae), r(d0,b9,b9,69),\
  4958. + r(91,86,86,17), r(58,c1,c1,99), r(27,1d,1d,3a), r(b9,9e,9e,27),\
  4959. + r(38,e1,e1,d9), r(13,f8,f8,eb), r(b3,98,98,2b), r(33,11,11,22),\
  4960. + r(bb,69,69,d2), r(70,d9,d9,a9), r(89,8e,8e,07), r(a7,94,94,33),\
  4961. + r(b6,9b,9b,2d), r(22,1e,1e,3c), r(92,87,87,15), r(20,e9,e9,c9),\
  4962. + r(49,ce,ce,87), r(ff,55,55,aa), r(78,28,28,50), r(7a,df,df,a5),\
  4963. + r(8f,8c,8c,03), r(f8,a1,a1,59), r(80,89,89,09), r(17,0d,0d,1a),\
  4964. + r(da,bf,bf,65), r(31,e6,e6,d7), r(c6,42,42,84), r(b8,68,68,d0),\
  4965. + r(c3,41,41,82), r(b0,99,99,29), r(77,2d,2d,5a), r(11,0f,0f,1e),\
  4966. + r(cb,b0,b0,7b), r(fc,54,54,a8), r(d6,bb,bb,6d), r(3a,16,16,2c)
  4967. +
  4968. +// data for inverse tables (other than last round)
  4969. +
  4970. +#define i_table \
  4971. + r(50,a7,f4,51), r(53,65,41,7e), r(c3,a4,17,1a), r(96,5e,27,3a),\
  4972. + r(cb,6b,ab,3b), r(f1,45,9d,1f), r(ab,58,fa,ac), r(93,03,e3,4b),\
  4973. + r(55,fa,30,20), r(f6,6d,76,ad), r(91,76,cc,88), r(25,4c,02,f5),\
  4974. + r(fc,d7,e5,4f), r(d7,cb,2a,c5), r(80,44,35,26), r(8f,a3,62,b5),\
  4975. + r(49,5a,b1,de), r(67,1b,ba,25), r(98,0e,ea,45), r(e1,c0,fe,5d),\
  4976. + r(02,75,2f,c3), r(12,f0,4c,81), r(a3,97,46,8d), r(c6,f9,d3,6b),\
  4977. + r(e7,5f,8f,03), r(95,9c,92,15), r(eb,7a,6d,bf), r(da,59,52,95),\
  4978. + r(2d,83,be,d4), r(d3,21,74,58), r(29,69,e0,49), r(44,c8,c9,8e),\
  4979. + r(6a,89,c2,75), r(78,79,8e,f4), r(6b,3e,58,99), r(dd,71,b9,27),\
  4980. + r(b6,4f,e1,be), r(17,ad,88,f0), r(66,ac,20,c9), r(b4,3a,ce,7d),\
  4981. + r(18,4a,df,63), r(82,31,1a,e5), r(60,33,51,97), r(45,7f,53,62),\
  4982. + r(e0,77,64,b1), r(84,ae,6b,bb), r(1c,a0,81,fe), r(94,2b,08,f9),\
  4983. + r(58,68,48,70), r(19,fd,45,8f), r(87,6c,de,94), r(b7,f8,7b,52),\
  4984. + r(23,d3,73,ab), r(e2,02,4b,72), r(57,8f,1f,e3), r(2a,ab,55,66),\
  4985. + r(07,28,eb,b2), r(03,c2,b5,2f), r(9a,7b,c5,86), r(a5,08,37,d3),\
  4986. + r(f2,87,28,30), r(b2,a5,bf,23), r(ba,6a,03,02), r(5c,82,16,ed),\
  4987. + r(2b,1c,cf,8a), r(92,b4,79,a7), r(f0,f2,07,f3), r(a1,e2,69,4e),\
  4988. + r(cd,f4,da,65), r(d5,be,05,06), r(1f,62,34,d1), r(8a,fe,a6,c4),\
  4989. + r(9d,53,2e,34), r(a0,55,f3,a2), r(32,e1,8a,05), r(75,eb,f6,a4),\
  4990. + r(39,ec,83,0b), r(aa,ef,60,40), r(06,9f,71,5e), r(51,10,6e,bd),\
  4991. + r(f9,8a,21,3e), r(3d,06,dd,96), r(ae,05,3e,dd), r(46,bd,e6,4d),\
  4992. + r(b5,8d,54,91), r(05,5d,c4,71), r(6f,d4,06,04), r(ff,15,50,60),\
  4993. + r(24,fb,98,19), r(97,e9,bd,d6), r(cc,43,40,89), r(77,9e,d9,67),\
  4994. + r(bd,42,e8,b0), r(88,8b,89,07), r(38,5b,19,e7), r(db,ee,c8,79),\
  4995. + r(47,0a,7c,a1), r(e9,0f,42,7c), r(c9,1e,84,f8), r(00,00,00,00),\
  4996. + r(83,86,80,09), r(48,ed,2b,32), r(ac,70,11,1e), r(4e,72,5a,6c),\
  4997. + r(fb,ff,0e,fd), r(56,38,85,0f), r(1e,d5,ae,3d), r(27,39,2d,36),\
  4998. + r(64,d9,0f,0a), r(21,a6,5c,68), r(d1,54,5b,9b), r(3a,2e,36,24),\
  4999. + r(b1,67,0a,0c), r(0f,e7,57,93), r(d2,96,ee,b4), r(9e,91,9b,1b),\
  5000. + r(4f,c5,c0,80), r(a2,20,dc,61), r(69,4b,77,5a), r(16,1a,12,1c),\
  5001. + r(0a,ba,93,e2), r(e5,2a,a0,c0), r(43,e0,22,3c), r(1d,17,1b,12),\
  5002. + r(0b,0d,09,0e), r(ad,c7,8b,f2), r(b9,a8,b6,2d), r(c8,a9,1e,14),\
  5003. + r(85,19,f1,57), r(4c,07,75,af), r(bb,dd,99,ee), r(fd,60,7f,a3),\
  5004. + r(9f,26,01,f7), r(bc,f5,72,5c), r(c5,3b,66,44), r(34,7e,fb,5b),\
  5005. + r(76,29,43,8b), r(dc,c6,23,cb), r(68,fc,ed,b6), r(63,f1,e4,b8),\
  5006. + r(ca,dc,31,d7), r(10,85,63,42), r(40,22,97,13), r(20,11,c6,84),\
  5007. + r(7d,24,4a,85), r(f8,3d,bb,d2), r(11,32,f9,ae), r(6d,a1,29,c7),\
  5008. + r(4b,2f,9e,1d), r(f3,30,b2,dc), r(ec,52,86,0d), r(d0,e3,c1,77),\
  5009. + r(6c,16,b3,2b), r(99,b9,70,a9), r(fa,48,94,11), r(22,64,e9,47),\
  5010. + r(c4,8c,fc,a8), r(1a,3f,f0,a0), r(d8,2c,7d,56), r(ef,90,33,22),\
  5011. + r(c7,4e,49,87), r(c1,d1,38,d9), r(fe,a2,ca,8c), r(36,0b,d4,98),\
  5012. + r(cf,81,f5,a6), r(28,de,7a,a5), r(26,8e,b7,da), r(a4,bf,ad,3f),\
  5013. + r(e4,9d,3a,2c), r(0d,92,78,50), r(9b,cc,5f,6a), r(62,46,7e,54),\
  5014. + r(c2,13,8d,f6), r(e8,b8,d8,90), r(5e,f7,39,2e), r(f5,af,c3,82),\
  5015. + r(be,80,5d,9f), r(7c,93,d0,69), r(a9,2d,d5,6f), r(b3,12,25,cf),\
  5016. + r(3b,99,ac,c8), r(a7,7d,18,10), r(6e,63,9c,e8), r(7b,bb,3b,db),\
  5017. + r(09,78,26,cd), r(f4,18,59,6e), r(01,b7,9a,ec), r(a8,9a,4f,83),\
  5018. + r(65,6e,95,e6), r(7e,e6,ff,aa), r(08,cf,bc,21), r(e6,e8,15,ef),\
  5019. + r(d9,9b,e7,ba), r(ce,36,6f,4a), r(d4,09,9f,ea), r(d6,7c,b0,29),\
  5020. + r(af,b2,a4,31), r(31,23,3f,2a), r(30,94,a5,c6), r(c0,66,a2,35),\
  5021. + r(37,bc,4e,74), r(a6,ca,82,fc), r(b0,d0,90,e0), r(15,d8,a7,33),\
  5022. + r(4a,98,04,f1), r(f7,da,ec,41), r(0e,50,cd,7f), r(2f,f6,91,17),\
  5023. + r(8d,d6,4d,76), r(4d,b0,ef,43), r(54,4d,aa,cc), r(df,04,96,e4),\
  5024. + r(e3,b5,d1,9e), r(1b,88,6a,4c), r(b8,1f,2c,c1), r(7f,51,65,46),\
  5025. + r(04,ea,5e,9d), r(5d,35,8c,01), r(73,74,87,fa), r(2e,41,0b,fb),\
  5026. + r(5a,1d,67,b3), r(52,d2,db,92), r(33,56,10,e9), r(13,47,d6,6d),\
  5027. + r(8c,61,d7,9a), r(7a,0c,a1,37), r(8e,14,f8,59), r(89,3c,13,eb),\
  5028. + r(ee,27,a9,ce), r(35,c9,61,b7), r(ed,e5,1c,e1), r(3c,b1,47,7a),\
  5029. + r(59,df,d2,9c), r(3f,73,f2,55), r(79,ce,14,18), r(bf,37,c7,73),\
  5030. + r(ea,cd,f7,53), r(5b,aa,fd,5f), r(14,6f,3d,df), r(86,db,44,78),\
  5031. + r(81,f3,af,ca), r(3e,c4,68,b9), r(2c,34,24,38), r(5f,40,a3,c2),\
  5032. + r(72,c3,1d,16), r(0c,25,e2,bc), r(8b,49,3c,28), r(41,95,0d,ff),\
  5033. + r(71,01,a8,39), r(de,b3,0c,08), r(9c,e4,b4,d8), r(90,c1,56,64),\
  5034. + r(61,84,cb,7b), r(70,b6,32,d5), r(74,5c,6c,48), r(42,57,b8,d0)
  5035. +
  5036. +// generate the required tables in the desired endian format
  5037. +
  5038. +#undef r
  5039. +#define r r0
  5040. +
  5041. +#if defined(ONE_TABLE)
  5042. +static const u_int32_t ft_tab[256] =
  5043. + { f_table };
  5044. +#elif defined(FOUR_TABLES)
  5045. +static const u_int32_t ft_tab[4][256] =
  5046. +{ { f_table },
  5047. +#undef r
  5048. +#define r r1
  5049. + { f_table },
  5050. +#undef r
  5051. +#define r r2
  5052. + { f_table },
  5053. +#undef r
  5054. +#define r r3
  5055. + { f_table }
  5056. +};
  5057. +#endif
  5058. +
  5059. +#undef r
  5060. +#define r r0
  5061. +#if defined(ONE_TABLE)
  5062. +static const u_int32_t it_tab[256] =
  5063. + { i_table };
  5064. +#elif defined(FOUR_TABLES)
  5065. +static const u_int32_t it_tab[4][256] =
  5066. +{ { i_table },
  5067. +#undef r
  5068. +#define r r1
  5069. + { i_table },
  5070. +#undef r
  5071. +#define r r2
  5072. + { i_table },
  5073. +#undef r
  5074. +#define r r3
  5075. + { i_table }
  5076. +};
  5077. +#endif
  5078. +
  5079. +#endif
  5080. +
  5081. +#if defined(FIXED_TABLES) && (defined(ONE_LR_TABLE) || defined(FOUR_LR_TABLES))
  5082. +
  5083. +// data for inverse tables (last round)
  5084. +
  5085. +#define li_table \
  5086. + w(52), w(09), w(6a), w(d5), w(30), w(36), w(a5), w(38),\
  5087. + w(bf), w(40), w(a3), w(9e), w(81), w(f3), w(d7), w(fb),\
  5088. + w(7c), w(e3), w(39), w(82), w(9b), w(2f), w(ff), w(87),\
  5089. + w(34), w(8e), w(43), w(44), w(c4), w(de), w(e9), w(cb),\
  5090. + w(54), w(7b), w(94), w(32), w(a6), w(c2), w(23), w(3d),\
  5091. + w(ee), w(4c), w(95), w(0b), w(42), w(fa), w(c3), w(4e),\
  5092. + w(08), w(2e), w(a1), w(66), w(28), w(d9), w(24), w(b2),\
  5093. + w(76), w(5b), w(a2), w(49), w(6d), w(8b), w(d1), w(25),\
  5094. + w(72), w(f8), w(f6), w(64), w(86), w(68), w(98), w(16),\
  5095. + w(d4), w(a4), w(5c), w(cc), w(5d), w(65), w(b6), w(92),\
  5096. + w(6c), w(70), w(48), w(50), w(fd), w(ed), w(b9), w(da),\
  5097. + w(5e), w(15), w(46), w(57), w(a7), w(8d), w(9d), w(84),\
  5098. + w(90), w(d8), w(ab), w(00), w(8c), w(bc), w(d3), w(0a),\
  5099. + w(f7), w(e4), w(58), w(05), w(b8), w(b3), w(45), w(06),\
  5100. + w(d0), w(2c), w(1e), w(8f), w(ca), w(3f), w(0f), w(02),\
  5101. + w(c1), w(af), w(bd), w(03), w(01), w(13), w(8a), w(6b),\
  5102. + w(3a), w(91), w(11), w(41), w(4f), w(67), w(dc), w(ea),\
  5103. + w(97), w(f2), w(cf), w(ce), w(f0), w(b4), w(e6), w(73),\
  5104. + w(96), w(ac), w(74), w(22), w(e7), w(ad), w(35), w(85),\
  5105. + w(e2), w(f9), w(37), w(e8), w(1c), w(75), w(df), w(6e),\
  5106. + w(47), w(f1), w(1a), w(71), w(1d), w(29), w(c5), w(89),\
  5107. + w(6f), w(b7), w(62), w(0e), w(aa), w(18), w(be), w(1b),\
  5108. + w(fc), w(56), w(3e), w(4b), w(c6), w(d2), w(79), w(20),\
  5109. + w(9a), w(db), w(c0), w(fe), w(78), w(cd), w(5a), w(f4),\
  5110. + w(1f), w(dd), w(a8), w(33), w(88), w(07), w(c7), w(31),\
  5111. + w(b1), w(12), w(10), w(59), w(27), w(80), w(ec), w(5f),\
  5112. + w(60), w(51), w(7f), w(a9), w(19), w(b5), w(4a), w(0d),\
  5113. + w(2d), w(e5), w(7a), w(9f), w(93), w(c9), w(9c), w(ef),\
  5114. + w(a0), w(e0), w(3b), w(4d), w(ae), w(2a), w(f5), w(b0),\
  5115. + w(c8), w(eb), w(bb), w(3c), w(83), w(53), w(99), w(61),\
  5116. + w(17), w(2b), w(04), w(7e), w(ba), w(77), w(d6), w(26),\
  5117. + w(e1), w(69), w(14), w(63), w(55), w(21), w(0c), w(7d),
  5118. +
  5119. +// generate the required tables in the desired endian format
  5120. +
  5121. +#undef r
  5122. +#define r(p,q,r,s) w0(q)
  5123. +#if defined(ONE_LR_TABLE)
  5124. +static const u_int32_t fl_tab[256] =
  5125. + { f_table };
  5126. +#elif defined(FOUR_LR_TABLES)
  5127. +static const u_int32_t fl_tab[4][256] =
  5128. +{ { f_table },
  5129. +#undef r
  5130. +#define r(p,q,r,s) w1(q)
  5131. + { f_table },
  5132. +#undef r
  5133. +#define r(p,q,r,s) w2(q)
  5134. + { f_table },
  5135. +#undef r
  5136. +#define r(p,q,r,s) w3(q)
  5137. + { f_table }
  5138. +};
  5139. +#endif
  5140. +
  5141. +#undef w
  5142. +#define w w0
  5143. +#if defined(ONE_LR_TABLE)
  5144. +static const u_int32_t il_tab[256] =
  5145. + { li_table };
  5146. +#elif defined(FOUR_LR_TABLES)
  5147. +static const u_int32_t il_tab[4][256] =
  5148. +{ { li_table },
  5149. +#undef w
  5150. +#define w w1
  5151. + { li_table },
  5152. +#undef w
  5153. +#define w w2
  5154. + { li_table },
  5155. +#undef w
  5156. +#define w w3
  5157. + { li_table }
  5158. +};
  5159. +#endif
  5160. +
  5161. +#endif
  5162. +
  5163. +#if defined(FIXED_TABLES) && (defined(ONE_IM_TABLE) || defined(FOUR_IM_TABLES))
  5164. +
  5165. +#define m_table \
  5166. + r(00,00,00,00), r(0b,0d,09,0e), r(16,1a,12,1c), r(1d,17,1b,12),\
  5167. + r(2c,34,24,38), r(27,39,2d,36), r(3a,2e,36,24), r(31,23,3f,2a),\
  5168. + r(58,68,48,70), r(53,65,41,7e), r(4e,72,5a,6c), r(45,7f,53,62),\
  5169. + r(74,5c,6c,48), r(7f,51,65,46), r(62,46,7e,54), r(69,4b,77,5a),\
  5170. + r(b0,d0,90,e0), r(bb,dd,99,ee), r(a6,ca,82,fc), r(ad,c7,8b,f2),\
  5171. + r(9c,e4,b4,d8), r(97,e9,bd,d6), r(8a,fe,a6,c4), r(81,f3,af,ca),\
  5172. + r(e8,b8,d8,90), r(e3,b5,d1,9e), r(fe,a2,ca,8c), r(f5,af,c3,82),\
  5173. + r(c4,8c,fc,a8), r(cf,81,f5,a6), r(d2,96,ee,b4), r(d9,9b,e7,ba),\
  5174. + r(7b,bb,3b,db), r(70,b6,32,d5), r(6d,a1,29,c7), r(66,ac,20,c9),\
  5175. + r(57,8f,1f,e3), r(5c,82,16,ed), r(41,95,0d,ff), r(4a,98,04,f1),\
  5176. + r(23,d3,73,ab), r(28,de,7a,a5), r(35,c9,61,b7), r(3e,c4,68,b9),\
  5177. + r(0f,e7,57,93), r(04,ea,5e,9d), r(19,fd,45,8f), r(12,f0,4c,81),\
  5178. + r(cb,6b,ab,3b), r(c0,66,a2,35), r(dd,71,b9,27), r(d6,7c,b0,29),\
  5179. + r(e7,5f,8f,03), r(ec,52,86,0d), r(f1,45,9d,1f), r(fa,48,94,11),\
  5180. + r(93,03,e3,4b), r(98,0e,ea,45), r(85,19,f1,57), r(8e,14,f8,59),\
  5181. + r(bf,37,c7,73), r(b4,3a,ce,7d), r(a9,2d,d5,6f), r(a2,20,dc,61),\
  5182. + r(f6,6d,76,ad), r(fd,60,7f,a3), r(e0,77,64,b1), r(eb,7a,6d,bf),\
  5183. + r(da,59,52,95), r(d1,54,5b,9b), r(cc,43,40,89), r(c7,4e,49,87),\
  5184. + r(ae,05,3e,dd), r(a5,08,37,d3), r(b8,1f,2c,c1), r(b3,12,25,cf),\
  5185. + r(82,31,1a,e5), r(89,3c,13,eb), r(94,2b,08,f9), r(9f,26,01,f7),\
  5186. + r(46,bd,e6,4d), r(4d,b0,ef,43), r(50,a7,f4,51), r(5b,aa,fd,5f),\
  5187. + r(6a,89,c2,75), r(61,84,cb,7b), r(7c,93,d0,69), r(77,9e,d9,67),\
  5188. + r(1e,d5,ae,3d), r(15,d8,a7,33), r(08,cf,bc,21), r(03,c2,b5,2f),\
  5189. + r(32,e1,8a,05), r(39,ec,83,0b), r(24,fb,98,19), r(2f,f6,91,17),\
  5190. + r(8d,d6,4d,76), r(86,db,44,78), r(9b,cc,5f,6a), r(90,c1,56,64),\
  5191. + r(a1,e2,69,4e), r(aa,ef,60,40), r(b7,f8,7b,52), r(bc,f5,72,5c),\
  5192. + r(d5,be,05,06), r(de,b3,0c,08), r(c3,a4,17,1a), r(c8,a9,1e,14),\
  5193. + r(f9,8a,21,3e), r(f2,87,28,30), r(ef,90,33,22), r(e4,9d,3a,2c),\
  5194. + r(3d,06,dd,96), r(36,0b,d4,98), r(2b,1c,cf,8a), r(20,11,c6,84),\
  5195. + r(11,32,f9,ae), r(1a,3f,f0,a0), r(07,28,eb,b2), r(0c,25,e2,bc),\
  5196. + r(65,6e,95,e6), r(6e,63,9c,e8), r(73,74,87,fa), r(78,79,8e,f4),\
  5197. + r(49,5a,b1,de), r(42,57,b8,d0), r(5f,40,a3,c2), r(54,4d,aa,cc),\
  5198. + r(f7,da,ec,41), r(fc,d7,e5,4f), r(e1,c0,fe,5d), r(ea,cd,f7,53),\
  5199. + r(db,ee,c8,79), r(d0,e3,c1,77), r(cd,f4,da,65), r(c6,f9,d3,6b),\
  5200. + r(af,b2,a4,31), r(a4,bf,ad,3f), r(b9,a8,b6,2d), r(b2,a5,bf,23),\
  5201. + r(83,86,80,09), r(88,8b,89,07), r(95,9c,92,15), r(9e,91,9b,1b),\
  5202. + r(47,0a,7c,a1), r(4c,07,75,af), r(51,10,6e,bd), r(5a,1d,67,b3),\
  5203. + r(6b,3e,58,99), r(60,33,51,97), r(7d,24,4a,85), r(76,29,43,8b),\
  5204. + r(1f,62,34,d1), r(14,6f,3d,df), r(09,78,26,cd), r(02,75,2f,c3),\
  5205. + r(33,56,10,e9), r(38,5b,19,e7), r(25,4c,02,f5), r(2e,41,0b,fb),\
  5206. + r(8c,61,d7,9a), r(87,6c,de,94), r(9a,7b,c5,86), r(91,76,cc,88),\
  5207. + r(a0,55,f3,a2), r(ab,58,fa,ac), r(b6,4f,e1,be), r(bd,42,e8,b0),\
  5208. + r(d4,09,9f,ea), r(df,04,96,e4), r(c2,13,8d,f6), r(c9,1e,84,f8),\
  5209. + r(f8,3d,bb,d2), r(f3,30,b2,dc), r(ee,27,a9,ce), r(e5,2a,a0,c0),\
  5210. + r(3c,b1,47,7a), r(37,bc,4e,74), r(2a,ab,55,66), r(21,a6,5c,68),\
  5211. + r(10,85,63,42), r(1b,88,6a,4c), r(06,9f,71,5e), r(0d,92,78,50),\
  5212. + r(64,d9,0f,0a), r(6f,d4,06,04), r(72,c3,1d,16), r(79,ce,14,18),\
  5213. + r(48,ed,2b,32), r(43,e0,22,3c), r(5e,f7,39,2e), r(55,fa,30,20),\
  5214. + r(01,b7,9a,ec), r(0a,ba,93,e2), r(17,ad,88,f0), r(1c,a0,81,fe),\
  5215. + r(2d,83,be,d4), r(26,8e,b7,da), r(3b,99,ac,c8), r(30,94,a5,c6),\
  5216. + r(59,df,d2,9c), r(52,d2,db,92), r(4f,c5,c0,80), r(44,c8,c9,8e),\
  5217. + r(75,eb,f6,a4), r(7e,e6,ff,aa), r(63,f1,e4,b8), r(68,fc,ed,b6),\
  5218. + r(b1,67,0a,0c), r(ba,6a,03,02), r(a7,7d,18,10), r(ac,70,11,1e),\
  5219. + r(9d,53,2e,34), r(96,5e,27,3a), r(8b,49,3c,28), r(80,44,35,26),\
  5220. + r(e9,0f,42,7c), r(e2,02,4b,72), r(ff,15,50,60), r(f4,18,59,6e),\
  5221. + r(c5,3b,66,44), r(ce,36,6f,4a), r(d3,21,74,58), r(d8,2c,7d,56),\
  5222. + r(7a,0c,a1,37), r(71,01,a8,39), r(6c,16,b3,2b), r(67,1b,ba,25),\
  5223. + r(56,38,85,0f), r(5d,35,8c,01), r(40,22,97,13), r(4b,2f,9e,1d),\
  5224. + r(22,64,e9,47), r(29,69,e0,49), r(34,7e,fb,5b), r(3f,73,f2,55),\
  5225. + r(0e,50,cd,7f), r(05,5d,c4,71), r(18,4a,df,63), r(13,47,d6,6d),\
  5226. + r(ca,dc,31,d7), r(c1,d1,38,d9), r(dc,c6,23,cb), r(d7,cb,2a,c5),\
  5227. + r(e6,e8,15,ef), r(ed,e5,1c,e1), r(f0,f2,07,f3), r(fb,ff,0e,fd),\
  5228. + r(92,b4,79,a7), r(99,b9,70,a9), r(84,ae,6b,bb), r(8f,a3,62,b5),\
  5229. + r(be,80,5d,9f), r(b5,8d,54,91), r(a8,9a,4f,83), r(a3,97,46,8d)
  5230. +
  5231. +#undef r
  5232. +#define r r0
  5233. +
  5234. +#if defined(ONE_IM_TABLE)
  5235. +static const u_int32_t im_tab[256] =
  5236. + { m_table };
  5237. +#elif defined(FOUR_IM_TABLES)
  5238. +static const u_int32_t im_tab[4][256] =
  5239. +{ { m_table },
  5240. +#undef r
  5241. +#define r r1
  5242. + { m_table },
  5243. +#undef r
  5244. +#define r r2
  5245. + { m_table },
  5246. +#undef r
  5247. +#define r r3
  5248. + { m_table }
  5249. +};
  5250. +#endif
  5251. +
  5252. +#endif
  5253. +
  5254. +#else
  5255. +
  5256. +static int tab_gen = 0;
  5257. +
  5258. +static unsigned char s_box[256]; // the S box
  5259. +static unsigned char inv_s_box[256]; // the inverse S box
  5260. +static u_int32_t rcon_tab[AES_RC_LENGTH]; // table of round constants
  5261. +
  5262. +#if defined(ONE_TABLE)
  5263. +static u_int32_t ft_tab[256];
  5264. +static u_int32_t it_tab[256];
  5265. +#elif defined(FOUR_TABLES)
  5266. +static u_int32_t ft_tab[4][256];
  5267. +static u_int32_t it_tab[4][256];
  5268. +#endif
  5269. +
  5270. +#if defined(ONE_LR_TABLE)
  5271. +static u_int32_t fl_tab[256];
  5272. +static u_int32_t il_tab[256];
  5273. +#elif defined(FOUR_LR_TABLES)
  5274. +static u_int32_t fl_tab[4][256];
  5275. +static u_int32_t il_tab[4][256];
  5276. +#endif
  5277. +
  5278. +#if defined(ONE_IM_TABLE)
  5279. +static u_int32_t im_tab[256];
  5280. +#elif defined(FOUR_IM_TABLES)
  5281. +static u_int32_t im_tab[4][256];
  5282. +#endif
  5283. +
  5284. +// Generate the tables for the dynamic table option
  5285. +
  5286. +#if !defined(FF_TABLES)
  5287. +
  5288. +// It will generally be sensible to use tables to compute finite
  5289. +// field multiplies and inverses but where memory is scarse this
  5290. +// code might sometimes be better.
  5291. +
  5292. +// return 2 ^ (n - 1) where n is the bit number of the highest bit
  5293. +// set in x with x in the range 1 < x < 0x00000200. This form is
  5294. +// used so that locals within FFinv can be bytes rather than words
  5295. +
  5296. +static unsigned char hibit(const u_int32_t x)
  5297. +{ unsigned char r = (unsigned char)((x >> 1) | (x >> 2));
  5298. +
  5299. + r |= (r >> 2);
  5300. + r |= (r >> 4);
  5301. + return (r + 1) >> 1;
  5302. +}
  5303. +
  5304. +// return the inverse of the finite field element x
  5305. +
  5306. +static unsigned char FFinv(const unsigned char x)
  5307. +{ unsigned char p1 = x, p2 = 0x1b, n1 = hibit(x), n2 = 0x80, v1 = 1, v2 = 0;
  5308. +
  5309. + if(x < 2) return x;
  5310. +
  5311. + for(;;)
  5312. + {
  5313. + if(!n1) return v1;
  5314. +
  5315. + while(n2 >= n1)
  5316. + {
  5317. + n2 /= n1; p2 ^= p1 * n2; v2 ^= v1 * n2; n2 = hibit(p2);
  5318. + }
  5319. +
  5320. + if(!n2) return v2;
  5321. +
  5322. + while(n1 >= n2)
  5323. + {
  5324. + n1 /= n2; p1 ^= p2 * n1; v1 ^= v2 * n1; n1 = hibit(p1);
  5325. + }
  5326. + }
  5327. +}
  5328. +
  5329. +// define the finite field multiplies required for Rijndael
  5330. +
  5331. +#define FFmul02(x) ((((x) & 0x7f) << 1) ^ ((x) & 0x80 ? 0x1b : 0))
  5332. +#define FFmul03(x) ((x) ^ FFmul02(x))
  5333. +#define FFmul09(x) ((x) ^ FFmul02(FFmul02(FFmul02(x))))
  5334. +#define FFmul0b(x) ((x) ^ FFmul02((x) ^ FFmul02(FFmul02(x))))
  5335. +#define FFmul0d(x) ((x) ^ FFmul02(FFmul02((x) ^ FFmul02(x))))
  5336. +#define FFmul0e(x) FFmul02((x) ^ FFmul02((x) ^ FFmul02(x)))
  5337. +
  5338. +#else
  5339. +
  5340. +#define FFinv(x) ((x) ? pow[255 - log[x]]: 0)
  5341. +
  5342. +#define FFmul02(x) (x ? pow[log[x] + 0x19] : 0)
  5343. +#define FFmul03(x) (x ? pow[log[x] + 0x01] : 0)
  5344. +#define FFmul09(x) (x ? pow[log[x] + 0xc7] : 0)
  5345. +#define FFmul0b(x) (x ? pow[log[x] + 0x68] : 0)
  5346. +#define FFmul0d(x) (x ? pow[log[x] + 0xee] : 0)
  5347. +#define FFmul0e(x) (x ? pow[log[x] + 0xdf] : 0)
  5348. +
  5349. +#endif
  5350. +
  5351. +// The forward and inverse affine transformations used in the S-box
  5352. +
  5353. +#define fwd_affine(x) \
  5354. + (w = (u_int32_t)x, w ^= (w<<1)^(w<<2)^(w<<3)^(w<<4), 0x63^(unsigned char)(w^(w>>8)))
  5355. +
  5356. +#define inv_affine(x) \
  5357. + (w = (u_int32_t)x, w = (w<<1)^(w<<3)^(w<<6), 0x05^(unsigned char)(w^(w>>8)))
  5358. +
  5359. +static void gen_tabs(void)
  5360. +{ u_int32_t i, w;
  5361. +
  5362. +#if defined(FF_TABLES)
  5363. +
  5364. + unsigned char pow[512], log[256];
  5365. +
  5366. + // log and power tables for GF(2^8) finite field with
  5367. + // 0x011b as modular polynomial - the simplest primitive
  5368. + // root is 0x03, used here to generate the tables
  5369. +
  5370. + i = 0; w = 1;
  5371. + do
  5372. + {
  5373. + pow[i] = (unsigned char)w;
  5374. + pow[i + 255] = (unsigned char)w;
  5375. + log[w] = (unsigned char)i++;
  5376. + w ^= (w << 1) ^ (w & ff_hi ? ff_poly : 0);
  5377. + }
  5378. + while (w != 1);
  5379. +
  5380. +#endif
  5381. +
  5382. + for(i = 0, w = 1; i < AES_RC_LENGTH; ++i)
  5383. + {
  5384. + rcon_tab[i] = bytes2word(w, 0, 0, 0);
  5385. + w = (w << 1) ^ (w & ff_hi ? ff_poly : 0);
  5386. + }
  5387. +
  5388. + for(i = 0; i < 256; ++i)
  5389. + { unsigned char b;
  5390. +
  5391. + s_box[i] = b = fwd_affine(FFinv((unsigned char)i));
  5392. +
  5393. + w = bytes2word(b, 0, 0, 0);
  5394. +#if defined(ONE_LR_TABLE)
  5395. + fl_tab[i] = w;
  5396. +#elif defined(FOUR_LR_TABLES)
  5397. + fl_tab[0][i] = w;
  5398. + fl_tab[1][i] = upr(w,1);
  5399. + fl_tab[2][i] = upr(w,2);
  5400. + fl_tab[3][i] = upr(w,3);
  5401. +#endif
  5402. + w = bytes2word(FFmul02(b), b, b, FFmul03(b));
  5403. +#if defined(ONE_TABLE)
  5404. + ft_tab[i] = w;
  5405. +#elif defined(FOUR_TABLES)
  5406. + ft_tab[0][i] = w;
  5407. + ft_tab[1][i] = upr(w,1);
  5408. + ft_tab[2][i] = upr(w,2);
  5409. + ft_tab[3][i] = upr(w,3);
  5410. +#endif
  5411. + inv_s_box[i] = b = FFinv(inv_affine((unsigned char)i));
  5412. +
  5413. + w = bytes2word(b, 0, 0, 0);
  5414. +#if defined(ONE_LR_TABLE)
  5415. + il_tab[i] = w;
  5416. +#elif defined(FOUR_LR_TABLES)
  5417. + il_tab[0][i] = w;
  5418. + il_tab[1][i] = upr(w,1);
  5419. + il_tab[2][i] = upr(w,2);
  5420. + il_tab[3][i] = upr(w,3);
  5421. +#endif
  5422. + w = bytes2word(FFmul0e(b), FFmul09(b), FFmul0d(b), FFmul0b(b));
  5423. +#if defined(ONE_TABLE)
  5424. + it_tab[i] = w;
  5425. +#elif defined(FOUR_TABLES)
  5426. + it_tab[0][i] = w;
  5427. + it_tab[1][i] = upr(w,1);
  5428. + it_tab[2][i] = upr(w,2);
  5429. + it_tab[3][i] = upr(w,3);
  5430. +#endif
  5431. +#if defined(ONE_IM_TABLE)
  5432. + im_tab[b] = w;
  5433. +#elif defined(FOUR_IM_TABLES)
  5434. + im_tab[0][b] = w;
  5435. + im_tab[1][b] = upr(w,1);
  5436. + im_tab[2][b] = upr(w,2);
  5437. + im_tab[3][b] = upr(w,3);
  5438. +#endif
  5439. +
  5440. + }
  5441. +}
  5442. +
  5443. +#endif
  5444. +
  5445. +#define no_table(x,box,vf,rf,c) bytes2word( \
  5446. + box[bval(vf(x,0,c),rf(0,c))], \
  5447. + box[bval(vf(x,1,c),rf(1,c))], \
  5448. + box[bval(vf(x,2,c),rf(2,c))], \
  5449. + box[bval(vf(x,3,c),rf(3,c))])
  5450. +
  5451. +#define one_table(x,op,tab,vf,rf,c) \
  5452. + ( tab[bval(vf(x,0,c),rf(0,c))] \
  5453. + ^ op(tab[bval(vf(x,1,c),rf(1,c))],1) \
  5454. + ^ op(tab[bval(vf(x,2,c),rf(2,c))],2) \
  5455. + ^ op(tab[bval(vf(x,3,c),rf(3,c))],3))
  5456. +
  5457. +#define four_tables(x,tab,vf,rf,c) \
  5458. + ( tab[0][bval(vf(x,0,c),rf(0,c))] \
  5459. + ^ tab[1][bval(vf(x,1,c),rf(1,c))] \
  5460. + ^ tab[2][bval(vf(x,2,c),rf(2,c))] \
  5461. + ^ tab[3][bval(vf(x,3,c),rf(3,c))])
  5462. +
  5463. +#define vf1(x,r,c) (x)
  5464. +#define rf1(r,c) (r)
  5465. +#define rf2(r,c) ((r-c)&3)
  5466. +
  5467. +#if defined(FOUR_LR_TABLES)
  5468. +#define ls_box(x,c) four_tables(x,fl_tab,vf1,rf2,c)
  5469. +#elif defined(ONE_LR_TABLE)
  5470. +#define ls_box(x,c) one_table(x,upr,fl_tab,vf1,rf2,c)
  5471. +#else
  5472. +#define ls_box(x,c) no_table(x,s_box,vf1,rf2,c)
  5473. +#endif
  5474. +
  5475. +#if defined(FOUR_IM_TABLES)
  5476. +#define inv_mcol(x) four_tables(x,im_tab,vf1,rf1,0)
  5477. +#elif defined(ONE_IM_TABLE)
  5478. +#define inv_mcol(x) one_table(x,upr,im_tab,vf1,rf1,0)
  5479. +#else
  5480. +#define inv_mcol(x) \
  5481. + (f9 = (x),f2 = FFmulX(f9), f4 = FFmulX(f2), f8 = FFmulX(f4), f9 ^= f8, \
  5482. + f2 ^= f4 ^ f8 ^ upr(f2 ^ f9,3) ^ upr(f4 ^ f9,2) ^ upr(f9,1))
  5483. +#endif
  5484. +
  5485. +// Subroutine to set the block size (if variable) in bytes, legal
  5486. +// values being 16, 24 and 32.
  5487. +
  5488. +#if defined(AES_BLOCK_SIZE)
  5489. +#define nc (AES_BLOCK_SIZE / 4)
  5490. +#else
  5491. +#define nc (cx->aes_Ncol)
  5492. +
  5493. +void aes_set_blk(aes_context *cx, int n_bytes)
  5494. +{
  5495. +#if !defined(FIXED_TABLES)
  5496. + if(!tab_gen) { gen_tabs(); tab_gen = 1; }
  5497. +#endif
  5498. +
  5499. + switch(n_bytes) {
  5500. + case 32: /* bytes */
  5501. + case 256: /* bits */
  5502. + nc = 8;
  5503. + break;
  5504. + case 24: /* bytes */
  5505. + case 192: /* bits */
  5506. + nc = 6;
  5507. + break;
  5508. + case 16: /* bytes */
  5509. + case 128: /* bits */
  5510. + default:
  5511. + nc = 4;
  5512. + break;
  5513. + }
  5514. +}
  5515. +
  5516. +#endif
  5517. +
  5518. +// Initialise the key schedule from the user supplied key. The key
  5519. +// length is now specified in bytes - 16, 24 or 32 as appropriate.
  5520. +// This corresponds to bit lengths of 128, 192 and 256 bits, and
  5521. +// to Nk values of 4, 6 and 8 respectively.
  5522. +
  5523. +#define mx(t,f) (*t++ = inv_mcol(*f),f++)
  5524. +#define cp(t,f) *t++ = *f++
  5525. +
  5526. +#if AES_BLOCK_SIZE == 16
  5527. +#define cpy(d,s) cp(d,s); cp(d,s); cp(d,s); cp(d,s)
  5528. +#define mix(d,s) mx(d,s); mx(d,s); mx(d,s); mx(d,s)
  5529. +#elif AES_BLOCK_SIZE == 24
  5530. +#define cpy(d,s) cp(d,s); cp(d,s); cp(d,s); cp(d,s); \
  5531. + cp(d,s); cp(d,s)
  5532. +#define mix(d,s) mx(d,s); mx(d,s); mx(d,s); mx(d,s); \
  5533. + mx(d,s); mx(d,s)
  5534. +#elif AES_BLOCK_SIZE == 32
  5535. +#define cpy(d,s) cp(d,s); cp(d,s); cp(d,s); cp(d,s); \
  5536. + cp(d,s); cp(d,s); cp(d,s); cp(d,s)
  5537. +#define mix(d,s) mx(d,s); mx(d,s); mx(d,s); mx(d,s); \
  5538. + mx(d,s); mx(d,s); mx(d,s); mx(d,s)
  5539. +#else
  5540. +
  5541. +#define cpy(d,s) \
  5542. +switch(nc) \
  5543. +{ case 8: cp(d,s); cp(d,s); \
  5544. + case 6: cp(d,s); cp(d,s); \
  5545. + case 4: cp(d,s); cp(d,s); \
  5546. + cp(d,s); cp(d,s); \
  5547. +}
  5548. +
  5549. +#define mix(d,s) \
  5550. +switch(nc) \
  5551. +{ case 8: mx(d,s); mx(d,s); \
  5552. + case 6: mx(d,s); mx(d,s); \
  5553. + case 4: mx(d,s); mx(d,s); \
  5554. + mx(d,s); mx(d,s); \
  5555. +}
  5556. +
  5557. +#endif
  5558. +
  5559. +void aes_set_key(aes_context *cx, const unsigned char in_key[], int n_bytes, const int f)
  5560. +{ u_int32_t *kf, *kt, rci;
  5561. +
  5562. +#if !defined(FIXED_TABLES)
  5563. + if(!tab_gen) { gen_tabs(); tab_gen = 1; }
  5564. +#endif
  5565. +
  5566. + switch(n_bytes) {
  5567. + case 32: /* bytes */
  5568. + case 256: /* bits */
  5569. + cx->aes_Nkey = 8;
  5570. + break;
  5571. + case 24: /* bytes */
  5572. + case 192: /* bits */
  5573. + cx->aes_Nkey = 6;
  5574. + break;
  5575. + case 16: /* bytes */
  5576. + case 128: /* bits */
  5577. + default:
  5578. + cx->aes_Nkey = 4;
  5579. + break;
  5580. + }
  5581. +
  5582. + cx->aes_Nrnd = (cx->aes_Nkey > nc ? cx->aes_Nkey : nc) + 6;
  5583. +
  5584. + cx->aes_e_key[0] = word_in(in_key );
  5585. + cx->aes_e_key[1] = word_in(in_key + 4);
  5586. + cx->aes_e_key[2] = word_in(in_key + 8);
  5587. + cx->aes_e_key[3] = word_in(in_key + 12);
  5588. +
  5589. + kf = cx->aes_e_key;
  5590. + kt = kf + nc * (cx->aes_Nrnd + 1) - cx->aes_Nkey;
  5591. + rci = 0;
  5592. +
  5593. + switch(cx->aes_Nkey)
  5594. + {
  5595. + case 4: do
  5596. + { kf[4] = kf[0] ^ ls_box(kf[3],3) ^ rcon_tab[rci++];
  5597. + kf[5] = kf[1] ^ kf[4];
  5598. + kf[6] = kf[2] ^ kf[5];
  5599. + kf[7] = kf[3] ^ kf[6];
  5600. + kf += 4;
  5601. + }
  5602. + while(kf < kt);
  5603. + break;
  5604. +
  5605. + case 6: cx->aes_e_key[4] = word_in(in_key + 16);
  5606. + cx->aes_e_key[5] = word_in(in_key + 20);
  5607. + do
  5608. + { kf[ 6] = kf[0] ^ ls_box(kf[5],3) ^ rcon_tab[rci++];
  5609. + kf[ 7] = kf[1] ^ kf[ 6];
  5610. + kf[ 8] = kf[2] ^ kf[ 7];
  5611. + kf[ 9] = kf[3] ^ kf[ 8];
  5612. + kf[10] = kf[4] ^ kf[ 9];
  5613. + kf[11] = kf[5] ^ kf[10];
  5614. + kf += 6;
  5615. + }
  5616. + while(kf < kt);
  5617. + break;
  5618. +
  5619. + case 8: cx->aes_e_key[4] = word_in(in_key + 16);
  5620. + cx->aes_e_key[5] = word_in(in_key + 20);
  5621. + cx->aes_e_key[6] = word_in(in_key + 24);
  5622. + cx->aes_e_key[7] = word_in(in_key + 28);
  5623. + do
  5624. + { kf[ 8] = kf[0] ^ ls_box(kf[7],3) ^ rcon_tab[rci++];
  5625. + kf[ 9] = kf[1] ^ kf[ 8];
  5626. + kf[10] = kf[2] ^ kf[ 9];
  5627. + kf[11] = kf[3] ^ kf[10];
  5628. + kf[12] = kf[4] ^ ls_box(kf[11],0);
  5629. + kf[13] = kf[5] ^ kf[12];
  5630. + kf[14] = kf[6] ^ kf[13];
  5631. + kf[15] = kf[7] ^ kf[14];
  5632. + kf += 8;
  5633. + }
  5634. + while (kf < kt);
  5635. + break;
  5636. + }
  5637. +
  5638. + if(!f)
  5639. + { u_int32_t i;
  5640. +
  5641. + kt = cx->aes_d_key + nc * cx->aes_Nrnd;
  5642. + kf = cx->aes_e_key;
  5643. +
  5644. + cpy(kt, kf); kt -= 2 * nc;
  5645. +
  5646. + for(i = 1; i < cx->aes_Nrnd; ++i)
  5647. + {
  5648. +#if defined(ONE_TABLE) || defined(FOUR_TABLES)
  5649. +#if !defined(ONE_IM_TABLE) && !defined(FOUR_IM_TABLES)
  5650. + u_int32_t f2, f4, f8, f9;
  5651. +#endif
  5652. + mix(kt, kf);
  5653. +#else
  5654. + cpy(kt, kf);
  5655. +#endif
  5656. + kt -= 2 * nc;
  5657. + }
  5658. +
  5659. + cpy(kt, kf);
  5660. + }
  5661. +}
  5662. +
  5663. +// y = output word, x = input word, r = row, c = column
  5664. +// for r = 0, 1, 2 and 3 = column accessed for row r
  5665. +
  5666. +#if defined(ARRAYS)
  5667. +#define s(x,c) x[c]
  5668. +#else
  5669. +#define s(x,c) x##c
  5670. +#endif
  5671. +
  5672. +// I am grateful to Frank Yellin for the following constructions
  5673. +// which, given the column (c) of the output state variable that
  5674. +// is being computed, return the input state variables which are
  5675. +// needed for each row (r) of the state
  5676. +
  5677. +// For the fixed block size options, compilers reduce these two
  5678. +// expressions to fixed variable references. For variable block
  5679. +// size code conditional clauses will sometimes be returned
  5680. +
  5681. +#define unused 77 // Sunset Strip
  5682. +
  5683. +#define fwd_var(x,r,c) \
  5684. + ( r==0 ? \
  5685. + ( c==0 ? s(x,0) \
  5686. + : c==1 ? s(x,1) \
  5687. + : c==2 ? s(x,2) \
  5688. + : c==3 ? s(x,3) \
  5689. + : c==4 ? s(x,4) \
  5690. + : c==5 ? s(x,5) \
  5691. + : c==6 ? s(x,6) \
  5692. + : s(x,7)) \
  5693. + : r==1 ? \
  5694. + ( c==0 ? s(x,1) \
  5695. + : c==1 ? s(x,2) \
  5696. + : c==2 ? s(x,3) \
  5697. + : c==3 ? nc==4 ? s(x,0) : s(x,4) \
  5698. + : c==4 ? s(x,5) \
  5699. + : c==5 ? nc==8 ? s(x,6) : s(x,0) \
  5700. + : c==6 ? s(x,7) \
  5701. + : s(x,0)) \
  5702. + : r==2 ? \
  5703. + ( c==0 ? nc==8 ? s(x,3) : s(x,2) \
  5704. + : c==1 ? nc==8 ? s(x,4) : s(x,3) \
  5705. + : c==2 ? nc==4 ? s(x,0) : nc==8 ? s(x,5) : s(x,4) \
  5706. + : c==3 ? nc==4 ? s(x,1) : nc==8 ? s(x,6) : s(x,5) \
  5707. + : c==4 ? nc==8 ? s(x,7) : s(x,0) \
  5708. + : c==5 ? nc==8 ? s(x,0) : s(x,1) \
  5709. + : c==6 ? s(x,1) \
  5710. + : s(x,2)) \
  5711. + : \
  5712. + ( c==0 ? nc==8 ? s(x,4) : s(x,3) \
  5713. + : c==1 ? nc==4 ? s(x,0) : nc==8 ? s(x,5) : s(x,4) \
  5714. + : c==2 ? nc==4 ? s(x,1) : nc==8 ? s(x,6) : s(x,5) \
  5715. + : c==3 ? nc==4 ? s(x,2) : nc==8 ? s(x,7) : s(x,0) \
  5716. + : c==4 ? nc==8 ? s(x,0) : s(x,1) \
  5717. + : c==5 ? nc==8 ? s(x,1) : s(x,2) \
  5718. + : c==6 ? s(x,2) \
  5719. + : s(x,3)))
  5720. +
  5721. +#define inv_var(x,r,c) \
  5722. + ( r==0 ? \
  5723. + ( c==0 ? s(x,0) \
  5724. + : c==1 ? s(x,1) \
  5725. + : c==2 ? s(x,2) \
  5726. + : c==3 ? s(x,3) \
  5727. + : c==4 ? s(x,4) \
  5728. + : c==5 ? s(x,5) \
  5729. + : c==6 ? s(x,6) \
  5730. + : s(x,7)) \
  5731. + : r==1 ? \
  5732. + ( c==0 ? nc==4 ? s(x,3) : nc==8 ? s(x,7) : s(x,5) \
  5733. + : c==1 ? s(x,0) \
  5734. + : c==2 ? s(x,1) \
  5735. + : c==3 ? s(x,2) \
  5736. + : c==4 ? s(x,3) \
  5737. + : c==5 ? s(x,4) \
  5738. + : c==6 ? s(x,5) \
  5739. + : s(x,6)) \
  5740. + : r==2 ? \
  5741. + ( c==0 ? nc==4 ? s(x,2) : nc==8 ? s(x,5) : s(x,4) \
  5742. + : c==1 ? nc==4 ? s(x,3) : nc==8 ? s(x,6) : s(x,5) \
  5743. + : c==2 ? nc==8 ? s(x,7) : s(x,0) \
  5744. + : c==3 ? nc==8 ? s(x,0) : s(x,1) \
  5745. + : c==4 ? nc==8 ? s(x,1) : s(x,2) \
  5746. + : c==5 ? nc==8 ? s(x,2) : s(x,3) \
  5747. + : c==6 ? s(x,3) \
  5748. + : s(x,4)) \
  5749. + : \
  5750. + ( c==0 ? nc==4 ? s(x,1) : nc==8 ? s(x,4) : s(x,3) \
  5751. + : c==1 ? nc==4 ? s(x,2) : nc==8 ? s(x,5) : s(x,4) \
  5752. + : c==2 ? nc==4 ? s(x,3) : nc==8 ? s(x,6) : s(x,5) \
  5753. + : c==3 ? nc==8 ? s(x,7) : s(x,0) \
  5754. + : c==4 ? nc==8 ? s(x,0) : s(x,1) \
  5755. + : c==5 ? nc==8 ? s(x,1) : s(x,2) \
  5756. + : c==6 ? s(x,2) \
  5757. + : s(x,3)))
  5758. +
  5759. +#define si(y,x,k,c) s(y,c) = word_in(x + 4 * c) ^ k[c]
  5760. +#define so(y,x,c) word_out(y + 4 * c, s(x,c))
  5761. +
  5762. +#if defined(FOUR_TABLES)
  5763. +#define fwd_rnd(y,x,k,c) s(y,c)= (k)[c] ^ four_tables(x,ft_tab,fwd_var,rf1,c)
  5764. +#define inv_rnd(y,x,k,c) s(y,c)= (k)[c] ^ four_tables(x,it_tab,inv_var,rf1,c)
  5765. +#elif defined(ONE_TABLE)
  5766. +#define fwd_rnd(y,x,k,c) s(y,c)= (k)[c] ^ one_table(x,upr,ft_tab,fwd_var,rf1,c)
  5767. +#define inv_rnd(y,x,k,c) s(y,c)= (k)[c] ^ one_table(x,upr,it_tab,inv_var,rf1,c)
  5768. +#else
  5769. +#define fwd_rnd(y,x,k,c) s(y,c) = fwd_mcol(no_table(x,s_box,fwd_var,rf1,c)) ^ (k)[c]
  5770. +#define inv_rnd(y,x,k,c) s(y,c) = inv_mcol(no_table(x,inv_s_box,inv_var,rf1,c) ^ (k)[c])
  5771. +#endif
  5772. +
  5773. +#if defined(FOUR_LR_TABLES)
  5774. +#define fwd_lrnd(y,x,k,c) s(y,c)= (k)[c] ^ four_tables(x,fl_tab,fwd_var,rf1,c)
  5775. +#define inv_lrnd(y,x,k,c) s(y,c)= (k)[c] ^ four_tables(x,il_tab,inv_var,rf1,c)
  5776. +#elif defined(ONE_LR_TABLE)
  5777. +#define fwd_lrnd(y,x,k,c) s(y,c)= (k)[c] ^ one_table(x,ups,fl_tab,fwd_var,rf1,c)
  5778. +#define inv_lrnd(y,x,k,c) s(y,c)= (k)[c] ^ one_table(x,ups,il_tab,inv_var,rf1,c)
  5779. +#else
  5780. +#define fwd_lrnd(y,x,k,c) s(y,c) = no_table(x,s_box,fwd_var,rf1,c) ^ (k)[c]
  5781. +#define inv_lrnd(y,x,k,c) s(y,c) = no_table(x,inv_s_box,inv_var,rf1,c) ^ (k)[c]
  5782. +#endif
  5783. +
  5784. +#if AES_BLOCK_SIZE == 16
  5785. +
  5786. +#if defined(ARRAYS)
  5787. +#define locals(y,x) x[4],y[4]
  5788. +#else
  5789. +#define locals(y,x) x##0,x##1,x##2,x##3,y##0,y##1,y##2,y##3
  5790. +// the following defines prevent the compiler requiring the declaration
  5791. +// of generated but unused variables in the fwd_var and inv_var macros
  5792. +#define b04 unused
  5793. +#define b05 unused
  5794. +#define b06 unused
  5795. +#define b07 unused
  5796. +#define b14 unused
  5797. +#define b15 unused
  5798. +#define b16 unused
  5799. +#define b17 unused
  5800. +#endif
  5801. +#define l_copy(y, x) s(y,0) = s(x,0); s(y,1) = s(x,1); \
  5802. + s(y,2) = s(x,2); s(y,3) = s(x,3);
  5803. +#define state_in(y,x,k) si(y,x,k,0); si(y,x,k,1); si(y,x,k,2); si(y,x,k,3)
  5804. +#define state_out(y,x) so(y,x,0); so(y,x,1); so(y,x,2); so(y,x,3)
  5805. +#define round(rm,y,x,k) rm(y,x,k,0); rm(y,x,k,1); rm(y,x,k,2); rm(y,x,k,3)
  5806. +
  5807. +#elif AES_BLOCK_SIZE == 24
  5808. +
  5809. +#if defined(ARRAYS)
  5810. +#define locals(y,x) x[6],y[6]
  5811. +#else
  5812. +#define locals(y,x) x##0,x##1,x##2,x##3,x##4,x##5, \
  5813. + y##0,y##1,y##2,y##3,y##4,y##5
  5814. +#define b06 unused
  5815. +#define b07 unused
  5816. +#define b16 unused
  5817. +#define b17 unused
  5818. +#endif
  5819. +#define l_copy(y, x) s(y,0) = s(x,0); s(y,1) = s(x,1); \
  5820. + s(y,2) = s(x,2); s(y,3) = s(x,3); \
  5821. + s(y,4) = s(x,4); s(y,5) = s(x,5);
  5822. +#define state_in(y,x,k) si(y,x,k,0); si(y,x,k,1); si(y,x,k,2); \
  5823. + si(y,x,k,3); si(y,x,k,4); si(y,x,k,5)
  5824. +#define state_out(y,x) so(y,x,0); so(y,x,1); so(y,x,2); \
  5825. + so(y,x,3); so(y,x,4); so(y,x,5)
  5826. +#define round(rm,y,x,k) rm(y,x,k,0); rm(y,x,k,1); rm(y,x,k,2); \
  5827. + rm(y,x,k,3); rm(y,x,k,4); rm(y,x,k,5)
  5828. +#else
  5829. +
  5830. +#if defined(ARRAYS)
  5831. +#define locals(y,x) x[8],y[8]
  5832. +#else
  5833. +#define locals(y,x) x##0,x##1,x##2,x##3,x##4,x##5,x##6,x##7, \
  5834. + y##0,y##1,y##2,y##3,y##4,y##5,y##6,y##7
  5835. +#endif
  5836. +#define l_copy(y, x) s(y,0) = s(x,0); s(y,1) = s(x,1); \
  5837. + s(y,2) = s(x,2); s(y,3) = s(x,3); \
  5838. + s(y,4) = s(x,4); s(y,5) = s(x,5); \
  5839. + s(y,6) = s(x,6); s(y,7) = s(x,7);
  5840. +
  5841. +#if AES_BLOCK_SIZE == 32
  5842. +
  5843. +#define state_in(y,x,k) si(y,x,k,0); si(y,x,k,1); si(y,x,k,2); si(y,x,k,3); \
  5844. + si(y,x,k,4); si(y,x,k,5); si(y,x,k,6); si(y,x,k,7)
  5845. +#define state_out(y,x) so(y,x,0); so(y,x,1); so(y,x,2); so(y,x,3); \
  5846. + so(y,x,4); so(y,x,5); so(y,x,6); so(y,x,7)
  5847. +#define round(rm,y,x,k) rm(y,x,k,0); rm(y,x,k,1); rm(y,x,k,2); rm(y,x,k,3); \
  5848. + rm(y,x,k,4); rm(y,x,k,5); rm(y,x,k,6); rm(y,x,k,7)
  5849. +#else
  5850. +
  5851. +#define state_in(y,x,k) \
  5852. +switch(nc) \
  5853. +{ case 8: si(y,x,k,7); si(y,x,k,6); \
  5854. + case 6: si(y,x,k,5); si(y,x,k,4); \
  5855. + case 4: si(y,x,k,3); si(y,x,k,2); \
  5856. + si(y,x,k,1); si(y,x,k,0); \
  5857. +}
  5858. +
  5859. +#define state_out(y,x) \
  5860. +switch(nc) \
  5861. +{ case 8: so(y,x,7); so(y,x,6); \
  5862. + case 6: so(y,x,5); so(y,x,4); \
  5863. + case 4: so(y,x,3); so(y,x,2); \
  5864. + so(y,x,1); so(y,x,0); \
  5865. +}
  5866. +
  5867. +#if defined(FAST_VARIABLE)
  5868. +
  5869. +#define round(rm,y,x,k) \
  5870. +switch(nc) \
  5871. +{ case 8: rm(y,x,k,7); rm(y,x,k,6); \
  5872. + rm(y,x,k,5); rm(y,x,k,4); \
  5873. + rm(y,x,k,3); rm(y,x,k,2); \
  5874. + rm(y,x,k,1); rm(y,x,k,0); \
  5875. + break; \
  5876. + case 6: rm(y,x,k,5); rm(y,x,k,4); \
  5877. + rm(y,x,k,3); rm(y,x,k,2); \
  5878. + rm(y,x,k,1); rm(y,x,k,0); \
  5879. + break; \
  5880. + case 4: rm(y,x,k,3); rm(y,x,k,2); \
  5881. + rm(y,x,k,1); rm(y,x,k,0); \
  5882. + break; \
  5883. +}
  5884. +#else
  5885. +
  5886. +#define round(rm,y,x,k) \
  5887. +switch(nc) \
  5888. +{ case 8: rm(y,x,k,7); rm(y,x,k,6); \
  5889. + case 6: rm(y,x,k,5); rm(y,x,k,4); \
  5890. + case 4: rm(y,x,k,3); rm(y,x,k,2); \
  5891. + rm(y,x,k,1); rm(y,x,k,0); \
  5892. +}
  5893. +
  5894. +#endif
  5895. +
  5896. +#endif
  5897. +#endif
  5898. +
  5899. +void aes_encrypt(const aes_context *cx, const unsigned char in_blk[], unsigned char out_blk[])
  5900. +{ u_int32_t locals(b0, b1);
  5901. + const u_int32_t *kp = cx->aes_e_key;
  5902. +
  5903. +#if !defined(ONE_TABLE) && !defined(FOUR_TABLES)
  5904. + u_int32_t f2;
  5905. +#endif
  5906. +
  5907. + state_in(b0, in_blk, kp); kp += nc;
  5908. +
  5909. +#if defined(UNROLL)
  5910. +
  5911. + switch(cx->aes_Nrnd)
  5912. + {
  5913. + case 14: round(fwd_rnd, b1, b0, kp );
  5914. + round(fwd_rnd, b0, b1, kp + nc ); kp += 2 * nc;
  5915. + case 12: round(fwd_rnd, b1, b0, kp );
  5916. + round(fwd_rnd, b0, b1, kp + nc ); kp += 2 * nc;
  5917. + case 10: round(fwd_rnd, b1, b0, kp );
  5918. + round(fwd_rnd, b0, b1, kp + nc);
  5919. + round(fwd_rnd, b1, b0, kp + 2 * nc);
  5920. + round(fwd_rnd, b0, b1, kp + 3 * nc);
  5921. + round(fwd_rnd, b1, b0, kp + 4 * nc);
  5922. + round(fwd_rnd, b0, b1, kp + 5 * nc);
  5923. + round(fwd_rnd, b1, b0, kp + 6 * nc);
  5924. + round(fwd_rnd, b0, b1, kp + 7 * nc);
  5925. + round(fwd_rnd, b1, b0, kp + 8 * nc);
  5926. + round(fwd_lrnd, b0, b1, kp + 9 * nc);
  5927. + }
  5928. +
  5929. +#elif defined(PARTIAL_UNROLL)
  5930. + { u_int32_t rnd;
  5931. +
  5932. + for(rnd = 0; rnd < (cx->aes_Nrnd >> 1) - 1; ++rnd)
  5933. + {
  5934. + round(fwd_rnd, b1, b0, kp);
  5935. + round(fwd_rnd, b0, b1, kp + nc); kp += 2 * nc;
  5936. + }
  5937. +
  5938. + round(fwd_rnd, b1, b0, kp);
  5939. + round(fwd_lrnd, b0, b1, kp + nc);
  5940. + }
  5941. +#else
  5942. + { u_int32_t rnd;
  5943. +
  5944. + for(rnd = 0; rnd < cx->aes_Nrnd - 1; ++rnd)
  5945. + {
  5946. + round(fwd_rnd, b1, b0, kp);
  5947. + l_copy(b0, b1); kp += nc;
  5948. + }
  5949. +
  5950. + round(fwd_lrnd, b0, b1, kp);
  5951. + }
  5952. +#endif
  5953. +
  5954. + state_out(out_blk, b0);
  5955. +}
  5956. +
  5957. +void aes_decrypt(const aes_context *cx, const unsigned char in_blk[], unsigned char out_blk[])
  5958. +{ u_int32_t locals(b0, b1);
  5959. + const u_int32_t *kp = cx->aes_d_key;
  5960. +
  5961. +#if !defined(ONE_TABLE) && !defined(FOUR_TABLES)
  5962. + u_int32_t f2, f4, f8, f9;
  5963. +#endif
  5964. +
  5965. + state_in(b0, in_blk, kp); kp += nc;
  5966. +
  5967. +#if defined(UNROLL)
  5968. +
  5969. + switch(cx->aes_Nrnd)
  5970. + {
  5971. + case 14: round(inv_rnd, b1, b0, kp );
  5972. + round(inv_rnd, b0, b1, kp + nc ); kp += 2 * nc;
  5973. + case 12: round(inv_rnd, b1, b0, kp );
  5974. + round(inv_rnd, b0, b1, kp + nc ); kp += 2 * nc;
  5975. + case 10: round(inv_rnd, b1, b0, kp );
  5976. + round(inv_rnd, b0, b1, kp + nc);
  5977. + round(inv_rnd, b1, b0, kp + 2 * nc);
  5978. + round(inv_rnd, b0, b1, kp + 3 * nc);
  5979. + round(inv_rnd, b1, b0, kp + 4 * nc);
  5980. + round(inv_rnd, b0, b1, kp + 5 * nc);
  5981. + round(inv_rnd, b1, b0, kp + 6 * nc);
  5982. + round(inv_rnd, b0, b1, kp + 7 * nc);
  5983. + round(inv_rnd, b1, b0, kp + 8 * nc);
  5984. + round(inv_lrnd, b0, b1, kp + 9 * nc);
  5985. + }
  5986. +
  5987. +#elif defined(PARTIAL_UNROLL)
  5988. + { u_int32_t rnd;
  5989. +
  5990. + for(rnd = 0; rnd < (cx->aes_Nrnd >> 1) - 1; ++rnd)
  5991. + {
  5992. + round(inv_rnd, b1, b0, kp);
  5993. + round(inv_rnd, b0, b1, kp + nc); kp += 2 * nc;
  5994. + }
  5995. +
  5996. + round(inv_rnd, b1, b0, kp);
  5997. + round(inv_lrnd, b0, b1, kp + nc);
  5998. + }
  5999. +#else
  6000. + { u_int32_t rnd;
  6001. +
  6002. + for(rnd = 0; rnd < cx->aes_Nrnd - 1; ++rnd)
  6003. + {
  6004. + round(inv_rnd, b1, b0, kp);
  6005. + l_copy(b0, b1); kp += nc;
  6006. + }
  6007. +
  6008. + round(inv_lrnd, b0, b1, kp);
  6009. + }
  6010. +#endif
  6011. +
  6012. + state_out(out_blk, b0);
  6013. +}
  6014. diff -pruN linux-2.6.9_orig/drivers/misc/aes.h linux-2.6.9/drivers/misc/aes.h
  6015. --- linux-2.6.9_orig/drivers/misc/aes.h 1970-01-01 01:00:00.000000000 +0100
  6016. +++ linux-2.6.9/drivers/misc/aes.h 2004-10-25 14:26:31.883227184 +0200
  6017. @@ -0,0 +1,113 @@
  6018. +// I retain copyright in this code but I encourage its free use provided
  6019. +// that I don't carry any responsibility for the results. I am especially
  6020. +// happy to see it used in free and open source software. If you do use
  6021. +// it I would appreciate an acknowledgement of its origin in the code or
  6022. +// the product that results and I would also appreciate knowing a little
  6023. +// about the use to which it is being put. I am grateful to Frank Yellin
  6024. +// for some ideas that are used in this implementation.
  6025. +//
  6026. +// Dr B. R. Gladman <brg@gladman.uk.net> 6th April 2001.
  6027. +//
  6028. +// This is an implementation of the AES encryption algorithm (Rijndael)
  6029. +// designed by Joan Daemen and Vincent Rijmen. This version is designed
  6030. +// to provide both fixed and dynamic block and key lengths and can also
  6031. +// run with either big or little endian internal byte order (see aes.h).
  6032. +// It inputs block and key lengths in bytes with the legal values being
  6033. +// 16, 24 and 32.
  6034. +
  6035. +/*
  6036. + * Modified by Jari Ruusu, May 1 2001
  6037. + * - Fixed some compile warnings, code was ok but gcc warned anyway.
  6038. + * - Changed basic types: byte -> unsigned char, word -> u_int32_t
  6039. + * - Major name space cleanup: Names visible to outside now begin
  6040. + * with "aes_" or "AES_". A lot of stuff moved from aes.h to aes.c
  6041. + * - Removed C++ and DLL support as part of name space cleanup.
  6042. + * - Eliminated unnecessary recomputation of tables. (actual bug fix)
  6043. + * - Merged precomputed constant tables to aes.c file.
  6044. + * - Removed data alignment restrictions for portability reasons.
  6045. + * - Made block and key lengths accept bit count (128/192/256)
  6046. + * as well byte count (16/24/32).
  6047. + * - Removed all error checks. This change also eliminated the need
  6048. + * to preinitialize the context struct to zero.
  6049. + * - Removed some totally unused constants.
  6050. + */
  6051. +
  6052. +#ifndef _AES_H
  6053. +#define _AES_H
  6054. +
  6055. +#include <linux/types.h>
  6056. +#include <linux/linkage.h>
  6057. +#include <linux/config.h>
  6058. +#include <linux/module.h>
  6059. +
  6060. +// CONFIGURATION OPTIONS (see also aes.c)
  6061. +//
  6062. +// Define AES_BLOCK_SIZE to set the cipher block size (16, 24 or 32) or
  6063. +// leave this undefined for dynamically variable block size (this will
  6064. +// result in much slower code).
  6065. +// IMPORTANT NOTE: AES_BLOCK_SIZE is in BYTES (16, 24, 32 or undefined). If
  6066. +// left undefined a slower version providing variable block length is compiled
  6067. +
  6068. +#define AES_BLOCK_SIZE 16
  6069. +
  6070. +// The number of key schedule words for different block and key lengths
  6071. +// allowing for method of computation which requires the length to be a
  6072. +// multiple of the key length
  6073. +//
  6074. +// Nk = 4 6 8
  6075. +// -------------
  6076. +// Nb = 4 | 60 60 64
  6077. +// 6 | 96 90 96
  6078. +// 8 | 120 120 120
  6079. +
  6080. +#if !defined(AES_BLOCK_SIZE) || (AES_BLOCK_SIZE == 32)
  6081. +#define AES_KS_LENGTH 120
  6082. +#define AES_RC_LENGTH 29
  6083. +#else
  6084. +#define AES_KS_LENGTH 4 * AES_BLOCK_SIZE
  6085. +#define AES_RC_LENGTH (9 * AES_BLOCK_SIZE) / 8 - 8
  6086. +#endif
  6087. +
  6088. +typedef struct
  6089. +{
  6090. + u_int32_t aes_Nkey; // the number of words in the key input block
  6091. + u_int32_t aes_Nrnd; // the number of cipher rounds
  6092. + u_int32_t aes_e_key[AES_KS_LENGTH]; // the encryption key schedule
  6093. + u_int32_t aes_d_key[AES_KS_LENGTH]; // the decryption key schedule
  6094. +#if !defined(AES_BLOCK_SIZE)
  6095. + u_int32_t aes_Ncol; // the number of columns in the cipher state
  6096. +#endif
  6097. +} aes_context;
  6098. +
  6099. +// avoid global name conflict with mainline kernel
  6100. +#define aes_set_key _aes_set_key
  6101. +#define aes_encrypt _aes_encrypt
  6102. +#define aes_decrypt _aes_decrypt
  6103. +
  6104. +// THE CIPHER INTERFACE
  6105. +
  6106. +#if !defined(AES_BLOCK_SIZE)
  6107. +extern void aes_set_blk(aes_context *, const int);
  6108. +#endif
  6109. +
  6110. +#if defined(CONFIG_X86) || defined(CONFIG_X86_64)
  6111. + asmlinkage
  6112. +#endif
  6113. +extern void aes_set_key(aes_context *, const unsigned char [], const int, const int);
  6114. +
  6115. +#if defined(CONFIG_X86) || defined(CONFIG_X86_64)
  6116. + asmlinkage
  6117. +#endif
  6118. +extern void aes_encrypt(const aes_context *, const unsigned char [], unsigned char []);
  6119. +
  6120. +#if defined(CONFIG_X86) || defined(CONFIG_X86_64)
  6121. + asmlinkage
  6122. +#endif
  6123. +extern void aes_decrypt(const aes_context *, const unsigned char [], unsigned char []);
  6124. +
  6125. +// The block length inputs to aes_set_block and aes_set_key are in numbers
  6126. +// of bytes or bits. The calls to subroutines must be made in the above
  6127. +// order but multiple calls can be made without repeating earlier calls
  6128. +// if their parameters have not changed.
  6129. +
  6130. +#endif // _AES_H
  6131. diff -pruN linux-2.6.9_orig/drivers/misc/crypto-ksym.c linux-2.6.9/drivers/misc/crypto-ksym.c
  6132. --- linux-2.6.9_orig/drivers/misc/crypto-ksym.c 1970-01-01 01:00:00.000000000 +0100
  6133. +++ linux-2.6.9/drivers/misc/crypto-ksym.c 2004-10-25 14:26:31.883227184 +0200
  6134. @@ -0,0 +1,7 @@
  6135. +#include <linux/module.h>
  6136. +#include "aes.h"
  6137. +#include "md5.h"
  6138. +EXPORT_SYMBOL(aes_set_key);
  6139. +EXPORT_SYMBOL(aes_encrypt);
  6140. +EXPORT_SYMBOL(aes_decrypt);
  6141. +EXPORT_SYMBOL(md5_transform_CPUbyteorder);
  6142. diff -pruN linux-2.6.9_orig/drivers/misc/md5-amd64.S linux-2.6.9/drivers/misc/md5-amd64.S
  6143. --- linux-2.6.9_orig/drivers/misc/md5-amd64.S 1970-01-01 01:00:00.000000000 +0100
  6144. +++ linux-2.6.9/drivers/misc/md5-amd64.S 2004-10-25 14:26:31.884227032 +0200
  6145. @@ -0,0 +1,200 @@
  6146. +//
  6147. +// md5-amd64.S
  6148. +//
  6149. +// Written by Jari Ruusu, October 1 2003
  6150. +//
  6151. +// Copyright 2003 by Jari Ruusu.
  6152. +// Redistribution of this file is permitted under the GNU Public License.
  6153. +//
  6154. +
  6155. +// Modified by Jari Ruusu, June 12 2004
  6156. +// - Converted 32 bit x86 code to 64 bit AMD64 code
  6157. +
  6158. +// A MD5 transform implementation for AMD64 compatible processors.
  6159. +// This code does not preserve the rax, rcx, rdx, rsi, rdi or r8-r11
  6160. +// registers or the artihmetic status flags. However, the rbx, rbp and
  6161. +// r12-r15 registers are preserved across calls.
  6162. +
  6163. +// void md5_transform_CPUbyteorder(u_int32_t *hash, u_int32_t *in)
  6164. +
  6165. +#if defined(USE_UNDERLINE)
  6166. +# define md5_transform_CPUbyteorder _md5_transform_CPUbyteorder
  6167. +#endif
  6168. +#if !defined(ALIGN64BYTES)
  6169. +# define ALIGN64BYTES 64
  6170. +#endif
  6171. +
  6172. + .file "md5-amd64.S"
  6173. + .globl md5_transform_CPUbyteorder
  6174. +
  6175. +// rdi = pointer to hash[4] array which is read and written
  6176. +// rsi = pointer to in[16] array which is read only
  6177. +
  6178. + .text
  6179. + .align ALIGN64BYTES
  6180. +md5_transform_CPUbyteorder:
  6181. + movl 12(%rdi),%eax
  6182. + movl 8(%rdi),%ecx
  6183. + movl (%rdi),%r8d
  6184. + movl 4(%rdi),%r9d
  6185. + movl (%rsi),%r10d
  6186. + prefetcht0 60(%rsi)
  6187. + movl %eax,%edx
  6188. + xorl %ecx,%eax
  6189. +
  6190. +#define REPEAT1(p1w,p2x,p3z,p4c,p5s,p6Nin,p7Nz,p8Ny) \
  6191. + addl $p4c,p1w ;\
  6192. + andl p2x,%eax ;\
  6193. + addl %r10d,p1w ;\
  6194. + xorl p3z,%eax ;\
  6195. + movl p6Nin*4(%rsi),%r10d ;\
  6196. + addl %eax,p1w ;\
  6197. + movl p7Nz,%eax ;\
  6198. + roll $p5s,p1w ;\
  6199. + xorl p8Ny,%eax ;\
  6200. + addl p2x,p1w
  6201. +
  6202. + REPEAT1(%r8d,%r9d,%edx,0xd76aa478, 7, 1,%ecx,%r9d)
  6203. + REPEAT1(%edx,%r8d,%ecx,0xe8c7b756,12, 2,%r9d,%r8d)
  6204. + REPEAT1(%ecx,%edx,%r9d,0x242070db,17, 3,%r8d,%edx)
  6205. + REPEAT1(%r9d,%ecx,%r8d,0xc1bdceee,22, 4,%edx,%ecx)
  6206. + REPEAT1(%r8d,%r9d,%edx,0xf57c0faf, 7, 5,%ecx,%r9d)
  6207. + REPEAT1(%edx,%r8d,%ecx,0x4787c62a,12, 6,%r9d,%r8d)
  6208. + REPEAT1(%ecx,%edx,%r9d,0xa8304613,17, 7,%r8d,%edx)
  6209. + REPEAT1(%r9d,%ecx,%r8d,0xfd469501,22, 8,%edx,%ecx)
  6210. + REPEAT1(%r8d,%r9d,%edx,0x698098d8, 7, 9,%ecx,%r9d)
  6211. + REPEAT1(%edx,%r8d,%ecx,0x8b44f7af,12,10,%r9d,%r8d)
  6212. + REPEAT1(%ecx,%edx,%r9d,0xffff5bb1,17,11,%r8d,%edx)
  6213. + REPEAT1(%r9d,%ecx,%r8d,0x895cd7be,22,12,%edx,%ecx)
  6214. + REPEAT1(%r8d,%r9d,%edx,0x6b901122, 7,13,%ecx,%r9d)
  6215. + REPEAT1(%edx,%r8d,%ecx,0xfd987193,12,14,%r9d,%r8d)
  6216. + REPEAT1(%ecx,%edx,%r9d,0xa679438e,17,15,%r8d,%edx)
  6217. +
  6218. + addl $0x49b40821,%r9d
  6219. + andl %ecx,%eax
  6220. + addl %r10d,%r9d
  6221. + xorl %r8d,%eax
  6222. + movl 1*4(%rsi),%r10d
  6223. + addl %eax,%r9d
  6224. + movl %ecx,%eax
  6225. + roll $22,%r9d
  6226. + addl %ecx,%r9d
  6227. +
  6228. +#define REPEAT2(p1w,p2x,p3y,p4z,p5c,p6s,p7Nin,p8Ny) \
  6229. + xorl p2x,%eax ;\
  6230. + addl $p5c,p1w ;\
  6231. + andl p4z,%eax ;\
  6232. + addl %r10d,p1w ;\
  6233. + xorl p3y,%eax ;\
  6234. + movl p7Nin*4(%rsi),%r10d ;\
  6235. + addl %eax,p1w ;\
  6236. + movl p8Ny,%eax ;\
  6237. + roll $p6s,p1w ;\
  6238. + addl p2x,p1w
  6239. +
  6240. + REPEAT2(%r8d,%r9d,%ecx,%edx,0xf61e2562, 5, 6,%r9d)
  6241. + REPEAT2(%edx,%r8d,%r9d,%ecx,0xc040b340, 9,11,%r8d)
  6242. + REPEAT2(%ecx,%edx,%r8d,%r9d,0x265e5a51,14, 0,%edx)
  6243. + REPEAT2(%r9d,%ecx,%edx,%r8d,0xe9b6c7aa,20, 5,%ecx)
  6244. + REPEAT2(%r8d,%r9d,%ecx,%edx,0xd62f105d, 5,10,%r9d)
  6245. + REPEAT2(%edx,%r8d,%r9d,%ecx,0x02441453, 9,15,%r8d)
  6246. + REPEAT2(%ecx,%edx,%r8d,%r9d,0xd8a1e681,14, 4,%edx)
  6247. + REPEAT2(%r9d,%ecx,%edx,%r8d,0xe7d3fbc8,20, 9,%ecx)
  6248. + REPEAT2(%r8d,%r9d,%ecx,%edx,0x21e1cde6, 5,14,%r9d)
  6249. + REPEAT2(%edx,%r8d,%r9d,%ecx,0xc33707d6, 9, 3,%r8d)
  6250. + REPEAT2(%ecx,%edx,%r8d,%r9d,0xf4d50d87,14, 8,%edx)
  6251. + REPEAT2(%r9d,%ecx,%edx,%r8d,0x455a14ed,20,13,%ecx)
  6252. + REPEAT2(%r8d,%r9d,%ecx,%edx,0xa9e3e905, 5, 2,%r9d)
  6253. + REPEAT2(%edx,%r8d,%r9d,%ecx,0xfcefa3f8, 9, 7,%r8d)
  6254. + REPEAT2(%ecx,%edx,%r8d,%r9d,0x676f02d9,14,12,%edx)
  6255. +
  6256. + xorl %ecx,%eax
  6257. + addl $0x8d2a4c8a,%r9d
  6258. + andl %r8d,%eax
  6259. + addl %r10d,%r9d
  6260. + xorl %edx,%eax
  6261. + movl 5*4(%rsi),%r10d
  6262. + addl %eax,%r9d
  6263. + movl %ecx,%eax
  6264. + roll $20,%r9d
  6265. + xorl %edx,%eax
  6266. + addl %ecx,%r9d
  6267. +
  6268. +#define REPEAT3(p1w,p2x,p3c,p4s,p5Nin,p6Ny,p7Nz) \
  6269. + addl $p3c,p1w ;\
  6270. + xorl p2x,%eax ;\
  6271. + addl %r10d,p1w ;\
  6272. + movl p5Nin*4(%rsi),%r10d ;\
  6273. + addl %eax,p1w ;\
  6274. + movl p6Ny,%eax ;\
  6275. + roll $p4s,p1w ;\
  6276. + xorl p7Nz,%eax ;\
  6277. + addl p2x,p1w
  6278. +
  6279. + REPEAT3(%r8d,%r9d,0xfffa3942, 4, 8,%r9d,%ecx)
  6280. + REPEAT3(%edx,%r8d,0x8771f681,11,11,%r8d,%r9d)
  6281. + REPEAT3(%ecx,%edx,0x6d9d6122,16,14,%edx,%r8d)
  6282. + REPEAT3(%r9d,%ecx,0xfde5380c,23, 1,%ecx,%edx)
  6283. + REPEAT3(%r8d,%r9d,0xa4beea44, 4, 4,%r9d,%ecx)
  6284. + REPEAT3(%edx,%r8d,0x4bdecfa9,11, 7,%r8d,%r9d)
  6285. + REPEAT3(%ecx,%edx,0xf6bb4b60,16,10,%edx,%r8d)
  6286. + REPEAT3(%r9d,%ecx,0xbebfbc70,23,13,%ecx,%edx)
  6287. + REPEAT3(%r8d,%r9d,0x289b7ec6, 4, 0,%r9d,%ecx)
  6288. + REPEAT3(%edx,%r8d,0xeaa127fa,11, 3,%r8d,%r9d)
  6289. + REPEAT3(%ecx,%edx,0xd4ef3085,16, 6,%edx,%r8d)
  6290. + REPEAT3(%r9d,%ecx,0x04881d05,23, 9,%ecx,%edx)
  6291. + REPEAT3(%r8d,%r9d,0xd9d4d039, 4,12,%r9d,%ecx)
  6292. + REPEAT3(%edx,%r8d,0xe6db99e5,11,15,%r8d,%r9d)
  6293. + REPEAT3(%ecx,%edx,0x1fa27cf8,16, 2,%edx,%r8d)
  6294. +
  6295. + addl $0xc4ac5665,%r9d
  6296. + xorl %ecx,%eax
  6297. + addl %r10d,%r9d
  6298. + movl (%rsi),%r10d
  6299. + addl %eax,%r9d
  6300. + movl %edx,%eax
  6301. + roll $23,%r9d
  6302. + notl %eax
  6303. + addl %ecx,%r9d
  6304. +
  6305. +#define REPEAT4(p1w,p2x,p3y,p4c,p5s,p6Nin,p7Nz) \
  6306. + addl $p4c,p1w ;\
  6307. + orl p2x,%eax ;\
  6308. + addl %r10d,p1w ;\
  6309. + xorl p3y,%eax ;\
  6310. + movl p6Nin*4(%rsi),%r10d ;\
  6311. + addl %eax,p1w ;\
  6312. + movl p7Nz,%eax ;\
  6313. + roll $p5s,p1w ;\
  6314. + notl %eax ;\
  6315. + addl p2x,p1w
  6316. +
  6317. + REPEAT4(%r8d,%r9d,%ecx,0xf4292244, 6, 7,%ecx)
  6318. + REPEAT4(%edx,%r8d,%r9d,0x432aff97,10,14,%r9d)
  6319. + REPEAT4(%ecx,%edx,%r8d,0xab9423a7,15, 5,%r8d)
  6320. + REPEAT4(%r9d,%ecx,%edx,0xfc93a039,21,12,%edx)
  6321. + REPEAT4(%r8d,%r9d,%ecx,0x655b59c3, 6, 3,%ecx)
  6322. + REPEAT4(%edx,%r8d,%r9d,0x8f0ccc92,10,10,%r9d)
  6323. + REPEAT4(%ecx,%edx,%r8d,0xffeff47d,15, 1,%r8d)
  6324. + REPEAT4(%r9d,%ecx,%edx,0x85845dd1,21, 8,%edx)
  6325. + REPEAT4(%r8d,%r9d,%ecx,0x6fa87e4f, 6,15,%ecx)
  6326. + REPEAT4(%edx,%r8d,%r9d,0xfe2ce6e0,10, 6,%r9d)
  6327. + REPEAT4(%ecx,%edx,%r8d,0xa3014314,15,13,%r8d)
  6328. + REPEAT4(%r9d,%ecx,%edx,0x4e0811a1,21, 4,%edx)
  6329. + REPEAT4(%r8d,%r9d,%ecx,0xf7537e82, 6,11,%ecx)
  6330. + REPEAT4(%edx,%r8d,%r9d,0xbd3af235,10, 2,%r9d)
  6331. + REPEAT4(%ecx,%edx,%r8d,0x2ad7d2bb,15, 9,%r8d)
  6332. +
  6333. + addl $0xeb86d391,%r9d
  6334. + orl %ecx,%eax
  6335. + addl %r10d,%r9d
  6336. + xorl %edx,%eax
  6337. + addl %eax,%r9d
  6338. + roll $21,%r9d
  6339. + addl %ecx,%r9d
  6340. +
  6341. + addl %r8d,(%rdi)
  6342. + addl %r9d,4(%rdi)
  6343. + addl %ecx,8(%rdi)
  6344. + addl %edx,12(%rdi)
  6345. + ret
  6346. diff -pruN linux-2.6.9_orig/drivers/misc/md5-x86.S linux-2.6.9/drivers/misc/md5-x86.S
  6347. --- linux-2.6.9_orig/drivers/misc/md5-x86.S 1970-01-01 01:00:00.000000000 +0100
  6348. +++ linux-2.6.9/drivers/misc/md5-x86.S 2004-10-25 14:26:31.885226880 +0200
  6349. @@ -0,0 +1,207 @@
  6350. +//
  6351. +// md5-x86.S
  6352. +//
  6353. +// Written by Jari Ruusu, October 1 2003
  6354. +//
  6355. +// Copyright 2003 by Jari Ruusu.
  6356. +// Redistribution of this file is permitted under the GNU Public License.
  6357. +//
  6358. +
  6359. +// A MD5 transform implementation for x86 compatible processors. This
  6360. +// version uses i386 instruction set but instruction scheduling is optimized
  6361. +// for Pentium-2. This code does not preserve the eax, ecx or edx registers
  6362. +// or the artihmetic status flags. However, the ebx, esi, edi, and ebp
  6363. +// registers are preserved across calls.
  6364. +
  6365. +// void md5_transform_CPUbyteorder(u_int32_t *hash, u_int32_t *in)
  6366. +
  6367. +#if defined(USE_UNDERLINE)
  6368. +# define md5_transform_CPUbyteorder _md5_transform_CPUbyteorder
  6369. +#endif
  6370. +#if !defined(ALIGN32BYTES)
  6371. +# define ALIGN32BYTES 32
  6372. +#endif
  6373. +
  6374. + .file "md5-x86.S"
  6375. + .globl md5_transform_CPUbyteorder
  6376. + .text
  6377. + .align ALIGN32BYTES
  6378. +
  6379. +md5_transform_CPUbyteorder:
  6380. + push %ebp
  6381. + mov 4+4(%esp),%eax // pointer to 'hash' input
  6382. + mov 8+4(%esp),%ebp // pointer to 'in' array
  6383. + push %ebx
  6384. + push %esi
  6385. + push %edi
  6386. +
  6387. + mov (%eax),%esi
  6388. + mov 4(%eax),%edi
  6389. + mov 8(%eax),%ecx
  6390. + mov 12(%eax),%eax
  6391. + mov (%ebp),%ebx
  6392. + mov %eax,%edx
  6393. + xor %ecx,%eax
  6394. +
  6395. +#define REPEAT1(p1w,p2x,p3z,p4c,p5s,p6Nin,p7Nz,p8Ny) \
  6396. + add $p4c,p1w ;\
  6397. + and p2x,%eax ;\
  6398. + add %ebx,p1w ;\
  6399. + xor p3z,%eax ;\
  6400. + mov p6Nin*4(%ebp),%ebx ;\
  6401. + add %eax,p1w ;\
  6402. + mov p7Nz,%eax ;\
  6403. + rol $p5s,p1w ;\
  6404. + xor p8Ny,%eax ;\
  6405. + add p2x,p1w
  6406. +
  6407. + REPEAT1(%esi,%edi,%edx,0xd76aa478, 7, 1,%ecx,%edi)
  6408. + REPEAT1(%edx,%esi,%ecx,0xe8c7b756,12, 2,%edi,%esi)
  6409. + REPEAT1(%ecx,%edx,%edi,0x242070db,17, 3,%esi,%edx)
  6410. + REPEAT1(%edi,%ecx,%esi,0xc1bdceee,22, 4,%edx,%ecx)
  6411. + REPEAT1(%esi,%edi,%edx,0xf57c0faf, 7, 5,%ecx,%edi)
  6412. + REPEAT1(%edx,%esi,%ecx,0x4787c62a,12, 6,%edi,%esi)
  6413. + REPEAT1(%ecx,%edx,%edi,0xa8304613,17, 7,%esi,%edx)
  6414. + REPEAT1(%edi,%ecx,%esi,0xfd469501,22, 8,%edx,%ecx)
  6415. + REPEAT1(%esi,%edi,%edx,0x698098d8, 7, 9,%ecx,%edi)
  6416. + REPEAT1(%edx,%esi,%ecx,0x8b44f7af,12,10,%edi,%esi)
  6417. + REPEAT1(%ecx,%edx,%edi,0xffff5bb1,17,11,%esi,%edx)
  6418. + REPEAT1(%edi,%ecx,%esi,0x895cd7be,22,12,%edx,%ecx)
  6419. + REPEAT1(%esi,%edi,%edx,0x6b901122, 7,13,%ecx,%edi)
  6420. + REPEAT1(%edx,%esi,%ecx,0xfd987193,12,14,%edi,%esi)
  6421. + REPEAT1(%ecx,%edx,%edi,0xa679438e,17,15,%esi,%edx)
  6422. +
  6423. + add $0x49b40821,%edi
  6424. + and %ecx,%eax
  6425. + add %ebx,%edi
  6426. + xor %esi,%eax
  6427. + mov 1*4(%ebp),%ebx
  6428. + add %eax,%edi
  6429. + mov %ecx,%eax
  6430. + rol $22,%edi
  6431. + add %ecx,%edi
  6432. +
  6433. +#define REPEAT2(p1w,p2x,p3y,p4z,p5c,p6s,p7Nin,p8Ny) \
  6434. + xor p2x,%eax ;\
  6435. + add $p5c,p1w ;\
  6436. + and p4z,%eax ;\
  6437. + add %ebx,p1w ;\
  6438. + xor p3y,%eax ;\
  6439. + mov p7Nin*4(%ebp),%ebx ;\
  6440. + add %eax,p1w ;\
  6441. + mov p8Ny,%eax ;\
  6442. + rol $p6s,p1w ;\
  6443. + add p2x,p1w
  6444. +
  6445. + REPEAT2(%esi,%edi,%ecx,%edx,0xf61e2562, 5, 6,%edi)
  6446. + REPEAT2(%edx,%esi,%edi,%ecx,0xc040b340, 9,11,%esi)
  6447. + REPEAT2(%ecx,%edx,%esi,%edi,0x265e5a51,14, 0,%edx)
  6448. + REPEAT2(%edi,%ecx,%edx,%esi,0xe9b6c7aa,20, 5,%ecx)
  6449. + REPEAT2(%esi,%edi,%ecx,%edx,0xd62f105d, 5,10,%edi)
  6450. + REPEAT2(%edx,%esi,%edi,%ecx,0x02441453, 9,15,%esi)
  6451. + REPEAT2(%ecx,%edx,%esi,%edi,0xd8a1e681,14, 4,%edx)
  6452. + REPEAT2(%edi,%ecx,%edx,%esi,0xe7d3fbc8,20, 9,%ecx)
  6453. + REPEAT2(%esi,%edi,%ecx,%edx,0x21e1cde6, 5,14,%edi)
  6454. + REPEAT2(%edx,%esi,%edi,%ecx,0xc33707d6, 9, 3,%esi)
  6455. + REPEAT2(%ecx,%edx,%esi,%edi,0xf4d50d87,14, 8,%edx)
  6456. + REPEAT2(%edi,%ecx,%edx,%esi,0x455a14ed,20,13,%ecx)
  6457. + REPEAT2(%esi,%edi,%ecx,%edx,0xa9e3e905, 5, 2,%edi)
  6458. + REPEAT2(%edx,%esi,%edi,%ecx,0xfcefa3f8, 9, 7,%esi)
  6459. + REPEAT2(%ecx,%edx,%esi,%edi,0x676f02d9,14,12,%edx)
  6460. +
  6461. + xor %ecx,%eax
  6462. + add $0x8d2a4c8a,%edi
  6463. + and %esi,%eax
  6464. + add %ebx,%edi
  6465. + xor %edx,%eax
  6466. + mov 5*4(%ebp),%ebx
  6467. + add %eax,%edi
  6468. + mov %ecx,%eax
  6469. + rol $20,%edi
  6470. + xor %edx,%eax
  6471. + add %ecx,%edi
  6472. +
  6473. +#define REPEAT3(p1w,p2x,p3c,p4s,p5Nin,p6Ny,p7Nz) \
  6474. + add $p3c,p1w ;\
  6475. + xor p2x,%eax ;\
  6476. + add %ebx,p1w ;\
  6477. + mov p5Nin*4(%ebp),%ebx ;\
  6478. + add %eax,p1w ;\
  6479. + mov p6Ny,%eax ;\
  6480. + rol $p4s,p1w ;\
  6481. + xor p7Nz,%eax ;\
  6482. + add p2x,p1w
  6483. +
  6484. + REPEAT3(%esi,%edi,0xfffa3942, 4, 8,%edi,%ecx)
  6485. + REPEAT3(%edx,%esi,0x8771f681,11,11,%esi,%edi)
  6486. + REPEAT3(%ecx,%edx,0x6d9d6122,16,14,%edx,%esi)
  6487. + REPEAT3(%edi,%ecx,0xfde5380c,23, 1,%ecx,%edx)
  6488. + REPEAT3(%esi,%edi,0xa4beea44, 4, 4,%edi,%ecx)
  6489. + REPEAT3(%edx,%esi,0x4bdecfa9,11, 7,%esi,%edi)
  6490. + REPEAT3(%ecx,%edx,0xf6bb4b60,16,10,%edx,%esi)
  6491. + REPEAT3(%edi,%ecx,0xbebfbc70,23,13,%ecx,%edx)
  6492. + REPEAT3(%esi,%edi,0x289b7ec6, 4, 0,%edi,%ecx)
  6493. + REPEAT3(%edx,%esi,0xeaa127fa,11, 3,%esi,%edi)
  6494. + REPEAT3(%ecx,%edx,0xd4ef3085,16, 6,%edx,%esi)
  6495. + REPEAT3(%edi,%ecx,0x04881d05,23, 9,%ecx,%edx)
  6496. + REPEAT3(%esi,%edi,0xd9d4d039, 4,12,%edi,%ecx)
  6497. + REPEAT3(%edx,%esi,0xe6db99e5,11,15,%esi,%edi)
  6498. + REPEAT3(%ecx,%edx,0x1fa27cf8,16, 2,%edx,%esi)
  6499. +
  6500. + add $0xc4ac5665,%edi
  6501. + xor %ecx,%eax
  6502. + add %ebx,%edi
  6503. + mov (%ebp),%ebx
  6504. + add %eax,%edi
  6505. + mov %edx,%eax
  6506. + rol $23,%edi
  6507. + not %eax
  6508. + add %ecx,%edi
  6509. +
  6510. +#define REPEAT4(p1w,p2x,p3y,p4c,p5s,p6Nin,p7Nz) \
  6511. + add $p4c,p1w ;\
  6512. + or p2x,%eax ;\
  6513. + add %ebx,p1w ;\
  6514. + xor p3y,%eax ;\
  6515. + mov p6Nin*4(%ebp),%ebx ;\
  6516. + add %eax,p1w ;\
  6517. + mov p7Nz,%eax ;\
  6518. + rol $p5s,p1w ;\
  6519. + not %eax ;\
  6520. + add p2x,p1w
  6521. +
  6522. + REPEAT4(%esi,%edi,%ecx,0xf4292244, 6, 7,%ecx)
  6523. + REPEAT4(%edx,%esi,%edi,0x432aff97,10,14,%edi)
  6524. + REPEAT4(%ecx,%edx,%esi,0xab9423a7,15, 5,%esi)
  6525. + REPEAT4(%edi,%ecx,%edx,0xfc93a039,21,12,%edx)
  6526. + REPEAT4(%esi,%edi,%ecx,0x655b59c3, 6, 3,%ecx)
  6527. + REPEAT4(%edx,%esi,%edi,0x8f0ccc92,10,10,%edi)
  6528. + REPEAT4(%ecx,%edx,%esi,0xffeff47d,15, 1,%esi)
  6529. + REPEAT4(%edi,%ecx,%edx,0x85845dd1,21, 8,%edx)
  6530. + REPEAT4(%esi,%edi,%ecx,0x6fa87e4f, 6,15,%ecx)
  6531. + REPEAT4(%edx,%esi,%edi,0xfe2ce6e0,10, 6,%edi)
  6532. + REPEAT4(%ecx,%edx,%esi,0xa3014314,15,13,%esi)
  6533. + REPEAT4(%edi,%ecx,%edx,0x4e0811a1,21, 4,%edx)
  6534. + REPEAT4(%esi,%edi,%ecx,0xf7537e82, 6,11,%ecx)
  6535. + REPEAT4(%edx,%esi,%edi,0xbd3af235,10, 2,%edi)
  6536. + REPEAT4(%ecx,%edx,%esi,0x2ad7d2bb,15, 9,%esi)
  6537. +
  6538. + add $0xeb86d391,%edi
  6539. + or %ecx,%eax
  6540. + add %ebx,%edi
  6541. + xor %edx,%eax
  6542. + mov 4+16(%esp),%ebp // pointer to 'hash' output
  6543. + add %eax,%edi
  6544. + rol $21,%edi
  6545. + add %ecx,%edi
  6546. +
  6547. + add %esi,(%ebp)
  6548. + add %edi,4(%ebp)
  6549. + add %ecx,8(%ebp)
  6550. + add %edx,12(%ebp)
  6551. +
  6552. + pop %edi
  6553. + pop %esi
  6554. + pop %ebx
  6555. + pop %ebp
  6556. + ret
  6557. diff -pruN linux-2.6.9_orig/drivers/misc/md5.c linux-2.6.9/drivers/misc/md5.c
  6558. --- linux-2.6.9_orig/drivers/misc/md5.c 1970-01-01 01:00:00.000000000 +0100
  6559. +++ linux-2.6.9/drivers/misc/md5.c 2004-10-25 14:26:31.885226880 +0200
  6560. @@ -0,0 +1,106 @@
  6561. +/*
  6562. + * MD5 Message Digest Algorithm (RFC1321).
  6563. + *
  6564. + * Derived from cryptoapi implementation, originally based on the
  6565. + * public domain implementation written by Colin Plumb in 1993.
  6566. + *
  6567. + * Copyright (c) Cryptoapi developers.
  6568. + * Copyright (c) 2002 James Morris <jmorris@intercode.com.au>
  6569. + *
  6570. + * This program is free software; you can redistribute it and/or modify it
  6571. + * under the terms of the GNU General Public License as published by the Free
  6572. + * Software Foundation; either version 2 of the License, or (at your option)
  6573. + * any later version.
  6574. + */
  6575. +
  6576. +#include "md5.h"
  6577. +
  6578. +#define MD5_F1(x, y, z) (z ^ (x & (y ^ z)))
  6579. +#define MD5_F2(x, y, z) MD5_F1(z, x, y)
  6580. +#define MD5_F3(x, y, z) (x ^ y ^ z)
  6581. +#define MD5_F4(x, y, z) (y ^ (x | ~z))
  6582. +#define MD5_STEP(f, w, x, y, z, in, s) \
  6583. + (w += f(x, y, z) + in, w = (w<<s | w>>(32-s)) + x)
  6584. +
  6585. +void md5_transform_CPUbyteorder(u_int32_t *hash, u_int32_t const *in)
  6586. +{
  6587. + u_int32_t a, b, c, d;
  6588. +
  6589. + a = hash[0];
  6590. + b = hash[1];
  6591. + c = hash[2];
  6592. + d = hash[3];
  6593. +
  6594. + MD5_STEP(MD5_F1, a, b, c, d, in[0] + 0xd76aa478, 7);
  6595. + MD5_STEP(MD5_F1, d, a, b, c, in[1] + 0xe8c7b756, 12);
  6596. + MD5_STEP(MD5_F1, c, d, a, b, in[2] + 0x242070db, 17);
  6597. + MD5_STEP(MD5_F1, b, c, d, a, in[3] + 0xc1bdceee, 22);
  6598. + MD5_STEP(MD5_F1, a, b, c, d, in[4] + 0xf57c0faf, 7);
  6599. + MD5_STEP(MD5_F1, d, a, b, c, in[5] + 0x4787c62a, 12);
  6600. + MD5_STEP(MD5_F1, c, d, a, b, in[6] + 0xa8304613, 17);
  6601. + MD5_STEP(MD5_F1, b, c, d, a, in[7] + 0xfd469501, 22);
  6602. + MD5_STEP(MD5_F1, a, b, c, d, in[8] + 0x698098d8, 7);
  6603. + MD5_STEP(MD5_F1, d, a, b, c, in[9] + 0x8b44f7af, 12);
  6604. + MD5_STEP(MD5_F1, c, d, a, b, in[10] + 0xffff5bb1, 17);
  6605. + MD5_STEP(MD5_F1, b, c, d, a, in[11] + 0x895cd7be, 22);
  6606. + MD5_STEP(MD5_F1, a, b, c, d, in[12] + 0x6b901122, 7);
  6607. + MD5_STEP(MD5_F1, d, a, b, c, in[13] + 0xfd987193, 12);
  6608. + MD5_STEP(MD5_F1, c, d, a, b, in[14] + 0xa679438e, 17);
  6609. + MD5_STEP(MD5_F1, b, c, d, a, in[15] + 0x49b40821, 22);
  6610. +
  6611. + MD5_STEP(MD5_F2, a, b, c, d, in[1] + 0xf61e2562, 5);
  6612. + MD5_STEP(MD5_F2, d, a, b, c, in[6] + 0xc040b340, 9);
  6613. + MD5_STEP(MD5_F2, c, d, a, b, in[11] + 0x265e5a51, 14);
  6614. + MD5_STEP(MD5_F2, b, c, d, a, in[0] + 0xe9b6c7aa, 20);
  6615. + MD5_STEP(MD5_F2, a, b, c, d, in[5] + 0xd62f105d, 5);
  6616. + MD5_STEP(MD5_F2, d, a, b, c, in[10] + 0x02441453, 9);
  6617. + MD5_STEP(MD5_F2, c, d, a, b, in[15] + 0xd8a1e681, 14);
  6618. + MD5_STEP(MD5_F2, b, c, d, a, in[4] + 0xe7d3fbc8, 20);
  6619. + MD5_STEP(MD5_F2, a, b, c, d, in[9] + 0x21e1cde6, 5);
  6620. + MD5_STEP(MD5_F2, d, a, b, c, in[14] + 0xc33707d6, 9);
  6621. + MD5_STEP(MD5_F2, c, d, a, b, in[3] + 0xf4d50d87, 14);
  6622. + MD5_STEP(MD5_F2, b, c, d, a, in[8] + 0x455a14ed, 20);
  6623. + MD5_STEP(MD5_F2, a, b, c, d, in[13] + 0xa9e3e905, 5);
  6624. + MD5_STEP(MD5_F2, d, a, b, c, in[2] + 0xfcefa3f8, 9);
  6625. + MD5_STEP(MD5_F2, c, d, a, b, in[7] + 0x676f02d9, 14);
  6626. + MD5_STEP(MD5_F2, b, c, d, a, in[12] + 0x8d2a4c8a, 20);
  6627. +
  6628. + MD5_STEP(MD5_F3, a, b, c, d, in[5] + 0xfffa3942, 4);
  6629. + MD5_STEP(MD5_F3, d, a, b, c, in[8] + 0x8771f681, 11);
  6630. + MD5_STEP(MD5_F3, c, d, a, b, in[11] + 0x6d9d6122, 16);
  6631. + MD5_STEP(MD5_F3, b, c, d, a, in[14] + 0xfde5380c, 23);
  6632. + MD5_STEP(MD5_F3, a, b, c, d, in[1] + 0xa4beea44, 4);
  6633. + MD5_STEP(MD5_F3, d, a, b, c, in[4] + 0x4bdecfa9, 11);
  6634. + MD5_STEP(MD5_F3, c, d, a, b, in[7] + 0xf6bb4b60, 16);
  6635. + MD5_STEP(MD5_F3, b, c, d, a, in[10] + 0xbebfbc70, 23);
  6636. + MD5_STEP(MD5_F3, a, b, c, d, in[13] + 0x289b7ec6, 4);
  6637. + MD5_STEP(MD5_F3, d, a, b, c, in[0] + 0xeaa127fa, 11);
  6638. + MD5_STEP(MD5_F3, c, d, a, b, in[3] + 0xd4ef3085, 16);
  6639. + MD5_STEP(MD5_F3, b, c, d, a, in[6] + 0x04881d05, 23);
  6640. + MD5_STEP(MD5_F3, a, b, c, d, in[9] + 0xd9d4d039, 4);
  6641. + MD5_STEP(MD5_F3, d, a, b, c, in[12] + 0xe6db99e5, 11);
  6642. + MD5_STEP(MD5_F3, c, d, a, b, in[15] + 0x1fa27cf8, 16);
  6643. + MD5_STEP(MD5_F3, b, c, d, a, in[2] + 0xc4ac5665, 23);
  6644. +
  6645. + MD5_STEP(MD5_F4, a, b, c, d, in[0] + 0xf4292244, 6);
  6646. + MD5_STEP(MD5_F4, d, a, b, c, in[7] + 0x432aff97, 10);
  6647. + MD5_STEP(MD5_F4, c, d, a, b, in[14] + 0xab9423a7, 15);
  6648. + MD5_STEP(MD5_F4, b, c, d, a, in[5] + 0xfc93a039, 21);
  6649. + MD5_STEP(MD5_F4, a, b, c, d, in[12] + 0x655b59c3, 6);
  6650. + MD5_STEP(MD5_F4, d, a, b, c, in[3] + 0x8f0ccc92, 10);
  6651. + MD5_STEP(MD5_F4, c, d, a, b, in[10] + 0xffeff47d, 15);
  6652. + MD5_STEP(MD5_F4, b, c, d, a, in[1] + 0x85845dd1, 21);
  6653. + MD5_STEP(MD5_F4, a, b, c, d, in[8] + 0x6fa87e4f, 6);
  6654. + MD5_STEP(MD5_F4, d, a, b, c, in[15] + 0xfe2ce6e0, 10);
  6655. + MD5_STEP(MD5_F4, c, d, a, b, in[6] + 0xa3014314, 15);
  6656. + MD5_STEP(MD5_F4, b, c, d, a, in[13] + 0x4e0811a1, 21);
  6657. + MD5_STEP(MD5_F4, a, b, c, d, in[4] + 0xf7537e82, 6);
  6658. + MD5_STEP(MD5_F4, d, a, b, c, in[11] + 0xbd3af235, 10);
  6659. + MD5_STEP(MD5_F4, c, d, a, b, in[2] + 0x2ad7d2bb, 15);
  6660. + MD5_STEP(MD5_F4, b, c, d, a, in[9] + 0xeb86d391, 21);
  6661. +
  6662. + hash[0] += a;
  6663. + hash[1] += b;
  6664. + hash[2] += c;
  6665. + hash[3] += d;
  6666. +}
  6667. diff -pruN linux-2.6.9_orig/drivers/misc/md5.h linux-2.6.9/drivers/misc/md5.h
  6668. --- linux-2.6.9_orig/drivers/misc/md5.h 1970-01-01 01:00:00.000000000 +0100
  6669. +++ linux-2.6.9/drivers/misc/md5.h 2004-10-25 14:26:31.898224904 +0200
  6670. @@ -0,0 +1,11 @@
  6671. +/* md5.h */
  6672. +
  6673. +#include <linux/types.h>
  6674. +#include <linux/linkage.h>
  6675. +#include <linux/config.h>
  6676. +#include <linux/module.h>
  6677. +
  6678. +#if defined(CONFIG_X86) || defined(CONFIG_X86_64)
  6679. + asmlinkage
  6680. +#endif
  6681. +extern void md5_transform_CPUbyteorder(u_int32_t *, u_int32_t const *);
  6682. diff -pruN linux-2.6.9_orig/include/linux/loop.h linux-2.6.9/include/linux/loop.h
  6683. --- linux-2.6.9_orig/include/linux/loop.h 2004-10-18 23:55:36.000000000 +0200
  6684. +++ linux-2.6.9/include/linux/loop.h 2004-10-25 14:26:31.898224904 +0200
  6685. @@ -16,54 +16,57 @@
  6686. #ifdef __KERNEL__
  6687. #include <linux/bio.h>
  6688. #include <linux/blkdev.h>
  6689. +#include <linux/module.h>
  6690. #include <linux/spinlock.h>
  6691. -/* Possible states of device */
  6692. -enum {
  6693. - Lo_unbound,
  6694. - Lo_bound,
  6695. - Lo_rundown,
  6696. -};
  6697. -
  6698. struct loop_func_table;
  6699. struct loop_device {
  6700. int lo_number;
  6701. - int lo_refcnt;
  6702. + int lo_flags;
  6703. loff_t lo_offset;
  6704. loff_t lo_sizelimit;
  6705. - int lo_flags;
  6706. int (*transfer)(struct loop_device *, int cmd,
  6707. - struct page *raw_page, unsigned raw_off,
  6708. - struct page *loop_page, unsigned loop_off,
  6709. - int size, sector_t real_block);
  6710. + char *raw_buf, char *loop_buf, int size,
  6711. + sector_t real_block);
  6712. + struct loop_func_table *lo_encryption;
  6713. char lo_file_name[LO_NAME_SIZE];
  6714. char lo_crypt_name[LO_NAME_SIZE];
  6715. char lo_encrypt_key[LO_KEY_SIZE];
  6716. int lo_encrypt_key_size;
  6717. - struct loop_func_table *lo_encryption;
  6718. - __u32 lo_init[2];
  6719. uid_t lo_key_owner; /* Who set the key */
  6720. - int (*ioctl)(struct loop_device *, int cmd,
  6721. - unsigned long arg);
  6722. + __u32 lo_init[2];
  6723. + int (*ioctl)(struct loop_device *, int cmd,
  6724. + unsigned long arg);
  6725. struct file * lo_backing_file;
  6726. struct block_device *lo_device;
  6727. - unsigned lo_blocksize;
  6728. - void *key_data;
  6729. + void *key_data;
  6730. int old_gfp_mask;
  6731. spinlock_t lo_lock;
  6732. - struct bio *lo_bio;
  6733. - struct bio *lo_biotail;
  6734. - int lo_state;
  6735. struct semaphore lo_sem;
  6736. - struct semaphore lo_ctl_mutex;
  6737. - struct semaphore lo_bh_mutex;
  6738. atomic_t lo_pending;
  6739. request_queue_t *lo_queue;
  6740. +
  6741. + struct bio *lo_bio_que0;
  6742. + struct bio *lo_bio_que1;
  6743. + struct bio *lo_bio_que2;
  6744. + struct bio *lo_bio_free0;
  6745. + struct bio *lo_bio_free1;
  6746. + atomic_t lo_bio_barr;
  6747. + int lo_bio_flsh;
  6748. + int lo_bio_need;
  6749. + wait_queue_head_t lo_bio_wait;
  6750. + sector_t lo_offs_sec;
  6751. + sector_t lo_iv_remove;
  6752. + unsigned long lo_bio_flag;
  6753. +#if CONFIG_BLK_DEV_LOOP_KEYSCRUB
  6754. + void (*lo_keyscrub_fn)(void *);
  6755. + void *lo_keyscrub_ptr;
  6756. +#endif
  6757. };
  6758. #endif /* __KERNEL__ */
  6759. @@ -71,7 +74,8 @@ struct loop_device {
  6760. /*
  6761. * Loop flags
  6762. */
  6763. -#define LO_FLAGS_READ_ONLY 1
  6764. +#define LO_FLAGS_DO_BMAP 1
  6765. +#define LO_FLAGS_READ_ONLY 2
  6766. #include <asm/posix_types.h> /* for __kernel_old_dev_t */
  6767. #include <asm/types.h> /* for __u64 */
  6768. @@ -121,26 +125,25 @@ struct loop_info64 {
  6769. #define LO_CRYPT_IDEA 6
  6770. #define LO_CRYPT_DUMMY 9
  6771. #define LO_CRYPT_SKIPJACK 10
  6772. +#define LO_CRYPT_AES 16
  6773. #define LO_CRYPT_CRYPTOAPI 18
  6774. #define MAX_LO_CRYPT 20
  6775. #ifdef __KERNEL__
  6776. /* Support for loadable transfer modules */
  6777. struct loop_func_table {
  6778. - int number; /* filter type */
  6779. - int (*transfer)(struct loop_device *lo, int cmd,
  6780. - struct page *raw_page, unsigned raw_off,
  6781. - struct page *loop_page, unsigned loop_off,
  6782. - int size, sector_t real_block);
  6783. - int (*init)(struct loop_device *, const struct loop_info64 *);
  6784. + int number; /* filter type */
  6785. + int (*transfer)(struct loop_device *lo, int cmd, char *raw_buf,
  6786. + char *loop_buf, int size, sector_t real_block);
  6787. + int (*init)(struct loop_device *, struct loop_info64 *);
  6788. /* release is called from loop_unregister_transfer or clr_fd */
  6789. - int (*release)(struct loop_device *);
  6790. + int (*release)(struct loop_device *);
  6791. int (*ioctl)(struct loop_device *, int cmd, unsigned long arg);
  6792. struct module *owner;
  6793. -};
  6794. +};
  6795. int loop_register_transfer(struct loop_func_table *funcs);
  6796. -int loop_unregister_transfer(int number);
  6797. +int loop_unregister_transfer(int number);
  6798. #endif
  6799. /*
  6800. @@ -155,4 +158,5 @@ int loop_unregister_transfer(int number)
  6801. #define LOOP_GET_STATUS64 0x4C05
  6802. #define LOOP_CHANGE_FD 0x4C06
  6803. +#define LOOP_MULTI_KEY_SETUP 0x4C4D
  6804. #endif