mirror of the now-defunct rocklinux.org
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

6550 lines
207 KiB

  1. # --- ROCK-COPYRIGHT-NOTE-BEGIN ---
  2. #
  3. # This copyright note is auto-generated by ./scripts/Create-CopyPatch.
  4. # Please add additional copyright information _after_ the line containing
  5. # the ROCK-COPYRIGHT-NOTE-END tag. Otherwise it might get removed by
  6. # the ./scripts/Create-CopyPatch script. Do not edit this copyright text!
  7. #
  8. # ROCK Linux: rock-src/package/blindcoder/loop-aes/linux24_cryptoloop.diff
  9. # ROCK Linux is Copyright (C) 1998 - 2005 Clifford Wolf
  10. #
  11. # This patch file is dual-licensed. It is available under the license the
  12. # patched project is licensed under, as long as it is an OpenSource license
  13. # as defined at http://www.opensource.org/ (e.g. BSD, X11) or under the terms
  14. # of the GNU General Public License as published by the Free Software
  15. # Foundation; either version 2 of the License, or (at your option) any later
  16. # version.
  17. #
  18. # --- ROCK-COPYRIGHT-NOTE-END ---
  19. diff -pruN linux-2.4.28_orig/Documentation/Configure.help linux-2.4.28/Documentation/Configure.help
  20. --- linux-2.4.28_orig/Documentation/Configure.help 2004-11-17 12:54:20.000000000 +0100
  21. +++ linux-2.4.28/Documentation/Configure.help 2005-01-11 09:33:37.470527296 +0100
  22. @@ -620,6 +620,21 @@ CONFIG_BLK_STATS
  23. If unsure, say N.
  24. +AES encrypted loop device support
  25. +CONFIG_BLK_DEV_LOOP_AES
  26. + If you want to use AES encryption algorithm to encrypt loop devices,
  27. + say Y here. If you don't know what to do here, say N.
  28. +
  29. +loop encryption key scrubbing support
  30. +CONFIG_BLK_DEV_LOOP_KEYSCRUB
  31. + Loop encryption key scrubbing moves and inverts key bits in
  32. + kernel RAM so that the thin oxide which forms the storage
  33. + capacitor dielectric of DRAM cells is not permitted to develop
  34. + detectable property. For more info, see Peter Gutmann's paper:
  35. + http://www.cs.auckland.ac.nz/~pgut001/pubs/secure_del.html
  36. +
  37. + Paranoid tinfoil hat crowd say Y here, everyone else say N.
  38. +
  39. ATA/IDE/MFM/RLL support
  40. CONFIG_IDE
  41. If you say Y here, your kernel will be able to manage low cost mass
  42. diff -pruN linux-2.4.28_orig/drivers/block/Config.in linux-2.4.28/drivers/block/Config.in
  43. --- linux-2.4.28_orig/drivers/block/Config.in 2004-08-08 01:26:04.000000000 +0200
  44. +++ linux-2.4.28/drivers/block/Config.in 2005-01-11 09:33:37.471527144 +0100
  45. @@ -42,6 +42,10 @@ dep_tristate 'Micro Memory MM5415 Batter
  46. dep_tristate 'Promise SATA SX8 support' CONFIG_BLK_DEV_SX8 $CONFIG_PCI
  47. tristate 'Loopback device support' CONFIG_BLK_DEV_LOOP
  48. +if [ "$CONFIG_BLK_DEV_LOOP" != "n" ]; then
  49. + bool ' AES encrypted loop device support' CONFIG_BLK_DEV_LOOP_AES
  50. + bool ' loop encryption key scrubbing support' CONFIG_BLK_DEV_LOOP_KEYSCRUB
  51. +fi
  52. dep_tristate 'Network block device support' CONFIG_BLK_DEV_NBD $CONFIG_NET
  53. tristate 'RAM disk support' CONFIG_BLK_DEV_RAM
  54. diff -pruN linux-2.4.28_orig/drivers/block/loop.c linux-2.4.28/drivers/block/loop.c
  55. --- linux-2.4.28_orig/drivers/block/loop.c 2003-08-25 13:44:41.000000000 +0200
  56. +++ linux-2.4.28/drivers/block/loop.c 2005-01-11 09:33:37.475526536 +0100
  57. @@ -2,7 +2,7 @@
  58. * linux/drivers/block/loop.c
  59. *
  60. * Written by Theodore Ts'o, 3/29/93
  61. - *
  62. + *
  63. * Copyright 1993 by Theodore Ts'o. Redistribution of this file is
  64. * permitted under the GNU General Public License.
  65. *
  66. @@ -21,12 +21,12 @@
  67. * Loadable modules and other fixes by AK, 1998
  68. *
  69. * Make real block number available to downstream transfer functions, enables
  70. - * CBC (and relatives) mode encryption requiring unique IVs per data block.
  71. + * CBC (and relatives) mode encryption requiring unique IVs per data block.
  72. * Reed H. Petty, rhp@draper.net
  73. *
  74. * Maximum number of loop devices now dynamic via max_loop module parameter.
  75. * Russell Kroll <rkroll@exploits.org> 19990701
  76. - *
  77. + *
  78. * Maximum number of loop devices when compiled-in now selectable by passing
  79. * max_loop=<1-255> to the kernel on boot.
  80. * Erik I. Bols�, <eriki@himolde.no>, Oct 31, 1999
  81. @@ -39,20 +39,30 @@
  82. * Support up to 256 loop devices
  83. * Heinz Mauelshagen <mge@sistina.com>, Feb 2002
  84. *
  85. - * Still To Fix:
  86. - * - Advisory locking is ignored here.
  87. - * - Should use an own CAP_* category instead of CAP_SYS_ADMIN
  88. + * AES transfer added. IV is now passed as (512 byte) sector number.
  89. + * Jari Ruusu, May 18 2001
  90. + *
  91. + * External encryption module locking bug fixed.
  92. + * Ingo Rohloff <rohloff@in.tum.de>, June 21 2001
  93. + *
  94. + * Make device backed loop work with swap (pre-allocated buffers + queue rewrite).
  95. + * Jari Ruusu, September 2 2001
  96. + *
  97. + * File backed code now uses file->f_op->read/write. Based on Andrew Morton's idea.
  98. + * Jari Ruusu, May 23 2002
  99. *
  100. - * WARNING/FIXME:
  101. - * - The block number as IV passing to low level transfer functions is broken:
  102. - * it passes the underlying device's block number instead of the
  103. - * offset. This makes it change for a given block when the file is
  104. - * moved/restored/copied and also doesn't work over NFS.
  105. - * AV, Feb 12, 2000: we pass the logical block number now. It fixes the
  106. - * problem above. Encryption modules that used to rely on the old scheme
  107. - * should just call ->i_mapping->bmap() to calculate the physical block
  108. - * number.
  109. - */
  110. + * Backported struct loop_info64 ioctls from 2.6 kernels (64 bit offsets and
  111. + * 64 bit sizelimits). Added support for removing offset from IV computations.
  112. + * Jari Ruusu, September 21 2003
  113. + *
  114. + * Added support for MD5 IV computation and multi-key operation.
  115. + * Jari Ruusu, October 8 2003
  116. + *
  117. + *
  118. + * Still To Fix:
  119. + * - Advisory locking is ignored here.
  120. + * - Should use an own CAP_* category instead of CAP_SYS_ADMIN
  121. + */
  122. #include <linux/config.h>
  123. #include <linux/module.h>
  124. @@ -71,10 +81,14 @@
  125. #include <linux/smp_lock.h>
  126. #include <linux/swap.h>
  127. #include <linux/slab.h>
  128. +#include <linux/spinlock.h>
  129. #include <asm/uaccess.h>
  130. +#include <asm/byteorder.h>
  131. #include <linux/loop.h>
  132. +#include "../misc/aes.h"
  133. +#include "../misc/md5.h"
  134. #define MAJOR_NR LOOP_MAJOR
  135. @@ -82,21 +96,31 @@ static int max_loop = 8;
  136. static struct loop_device *loop_dev;
  137. static int *loop_sizes;
  138. static int *loop_blksizes;
  139. +static int *loop_hardsizes;
  140. static devfs_handle_t devfs_handle; /* For the directory */
  141. +#if defined(__x86_64__) && defined(CONFIG_IA32_EMULATION)
  142. +# include <asm/ioctl32.h>
  143. +# define IOCTL32_COMPATIBLE_PTR ((void*)sys_ioctl)
  144. +#endif
  145. +#if (defined(__sparc__) || defined(__sparc64__)) && defined(CONFIG_SPARC32_COMPAT)
  146. + extern int register_ioctl32_conversion(unsigned int cmd, int (*handler)(unsigned int, unsigned int, unsigned long, struct file *));
  147. + extern int unregister_ioctl32_conversion(unsigned int cmd);
  148. + extern int sys_ioctl(unsigned int fd, unsigned int cmd, unsigned long arg);
  149. +# define IOCTL32_COMPATIBLE_PTR ((void*)sys_ioctl)
  150. +#endif
  151. +
  152. /*
  153. * Transfer functions
  154. */
  155. static int transfer_none(struct loop_device *lo, int cmd, char *raw_buf,
  156. char *loop_buf, int size, int real_block)
  157. {
  158. - if (raw_buf != loop_buf) {
  159. - if (cmd == READ)
  160. - memcpy(loop_buf, raw_buf, size);
  161. - else
  162. - memcpy(raw_buf, loop_buf, size);
  163. - }
  164. + /* this code is only called from file backed loop */
  165. + /* and that code expects this function to be no-op */
  166. + if (current->need_resched)
  167. + {set_current_state(TASK_RUNNING);schedule();}
  168. return 0;
  169. }
  170. @@ -118,12 +142,13 @@ static int transfer_xor(struct loop_devi
  171. keysize = lo->lo_encrypt_key_size;
  172. for (i = 0; i < size; i++)
  173. *out++ = *in++ ^ key[(i & 511) % keysize];
  174. + if (current->need_resched)
  175. + {set_current_state(TASK_RUNNING);schedule();}
  176. return 0;
  177. }
  178. static int none_status(struct loop_device *lo, struct loop_info *info)
  179. {
  180. - lo->lo_flags |= LO_FLAGS_BH_REMAP;
  181. return 0;
  182. }
  183. @@ -134,336 +159,949 @@ static int xor_status(struct loop_device
  184. return 0;
  185. }
  186. -struct loop_func_table none_funcs = {
  187. +struct loop_func_table none_funcs = {
  188. number: LO_CRYPT_NONE,
  189. transfer: transfer_none,
  190. init: none_status,
  191. };
  192. -struct loop_func_table xor_funcs = {
  193. +struct loop_func_table xor_funcs = {
  194. number: LO_CRYPT_XOR,
  195. transfer: transfer_xor,
  196. - init: xor_status
  197. + init: xor_status,
  198. };
  199. -/* xfer_funcs[0] is special - its release function is never called */
  200. -struct loop_func_table *xfer_funcs[MAX_LO_CRYPT] = {
  201. - &none_funcs,
  202. - &xor_funcs
  203. -};
  204. +#if CONFIG_BLK_DEV_LOOP_AES
  205. +#if CONFIG_BLK_DEV_LOOP_KEYSCRUB
  206. +# define KEY_ALLOC_COUNT 128
  207. +#else
  208. +# define KEY_ALLOC_COUNT 64
  209. +#endif
  210. -#define MAX_DISK_SIZE 1024*1024*1024
  211. +typedef struct {
  212. + aes_context *keyPtr[KEY_ALLOC_COUNT];
  213. + unsigned keyMask;
  214. +#if CONFIG_BLK_DEV_LOOP_KEYSCRUB
  215. + u_int32_t *partialMD5;
  216. + u_int32_t partialMD5buf[8];
  217. + rwlock_t rwlock;
  218. + unsigned reversed;
  219. + unsigned blocked;
  220. + struct timer_list timer;
  221. +#else
  222. + u_int32_t partialMD5[4];
  223. +#endif
  224. +} AESmultiKey;
  225. -static int compute_loop_size(struct loop_device *lo, struct dentry * lo_dentry, kdev_t lodev)
  226. +#if CONFIG_BLK_DEV_LOOP_KEYSCRUB
  227. +static void keyScrubWork(AESmultiKey *m)
  228. {
  229. - if (S_ISREG(lo_dentry->d_inode->i_mode))
  230. - return (lo_dentry->d_inode->i_size - lo->lo_offset) >> BLOCK_SIZE_BITS;
  231. - if (blk_size[MAJOR(lodev)])
  232. - return blk_size[MAJOR(lodev)][MINOR(lodev)] -
  233. - (lo->lo_offset >> BLOCK_SIZE_BITS);
  234. - return MAX_DISK_SIZE;
  235. + aes_context *a0, *a1;
  236. + u_int32_t *p;
  237. + int x, y, z;
  238. +
  239. + z = m->keyMask + 1;
  240. + for(x = 0; x < z; x++) {
  241. + a0 = m->keyPtr[x];
  242. + a1 = m->keyPtr[x + z];
  243. + memcpy(a1, a0, sizeof(aes_context));
  244. + m->keyPtr[x] = a1;
  245. + m->keyPtr[x + z] = a0;
  246. + p = (u_int32_t *) a0;
  247. + y = sizeof(aes_context) / sizeof(u_int32_t);
  248. + while(y > 0) {
  249. + *p ^= 0xFFFFFFFF;
  250. + p++;
  251. + y--;
  252. + }
  253. + }
  254. +
  255. + x = m->reversed; /* x is 0 or 4 */
  256. + m->reversed ^= 4;
  257. + y = m->reversed; /* y is 4 or 0 */
  258. + p = &m->partialMD5buf[x];
  259. + memcpy(&m->partialMD5buf[y], p, 16);
  260. + m->partialMD5 = &m->partialMD5buf[y];
  261. + p[0] ^= 0xFFFFFFFF;
  262. + p[1] ^= 0xFFFFFFFF;
  263. + p[2] ^= 0xFFFFFFFF;
  264. + p[3] ^= 0xFFFFFFFF;
  265. +
  266. + /* try to flush dirty cache data to RAM */
  267. +#if defined(CONFIG_X86_64) || (defined(CONFIG_X86) && !defined(CONFIG_M386) && !defined(CONFIG_CPU_386))
  268. + __asm__ __volatile__ ("wbinvd": : :"memory");
  269. +#else
  270. + mb();
  271. +#endif
  272. }
  273. -static void figure_loop_size(struct loop_device *lo)
  274. +/* called only from loop thread process context */
  275. +static void keyScrubThreadFn(AESmultiKey *m)
  276. {
  277. - loop_sizes[lo->lo_number] = compute_loop_size(lo,
  278. - lo->lo_backing_file->f_dentry,
  279. - lo->lo_device);
  280. + write_lock(&m->rwlock);
  281. + if(!m->blocked) keyScrubWork(m);
  282. + write_unlock(&m->rwlock);
  283. }
  284. -static int lo_send(struct loop_device *lo, struct buffer_head *bh, int bsize,
  285. - loff_t pos)
  286. +static void keyScrubTimerInit(struct loop_device *lo)
  287. {
  288. - struct file *file = lo->lo_backing_file; /* kudos to NFsckingS */
  289. - struct address_space *mapping = file->f_dentry->d_inode->i_mapping;
  290. - struct address_space_operations *aops = mapping->a_ops;
  291. - struct page *page;
  292. - char *kaddr, *data;
  293. - unsigned long index;
  294. - unsigned size, offset;
  295. - int len;
  296. + AESmultiKey *m;
  297. + unsigned long expire;
  298. + static void keyScrubTimerFn(unsigned long);
  299. - down(&mapping->host->i_sem);
  300. - index = pos >> PAGE_CACHE_SHIFT;
  301. - offset = pos & (PAGE_CACHE_SIZE - 1);
  302. - len = bh->b_size;
  303. - data = bh->b_data;
  304. - while (len > 0) {
  305. - int IV = index * (PAGE_CACHE_SIZE/bsize) + offset/bsize;
  306. - int transfer_result;
  307. -
  308. - size = PAGE_CACHE_SIZE - offset;
  309. - if (size > len)
  310. - size = len;
  311. + m = (AESmultiKey *)lo->key_data;
  312. + expire = jiffies + HZ;
  313. + init_timer(&m->timer);
  314. + m->timer.expires = expire;
  315. + m->timer.data = (unsigned long)lo;
  316. + m->timer.function = keyScrubTimerFn;
  317. + add_timer(&m->timer);
  318. +}
  319. - page = grab_cache_page(mapping, index);
  320. - if (!page)
  321. - goto fail;
  322. - kaddr = kmap(page);
  323. - if (aops->prepare_write(file, page, offset, offset+size))
  324. - goto unlock;
  325. - flush_dcache_page(page);
  326. - transfer_result = lo_do_transfer(lo, WRITE, kaddr + offset, data, size, IV);
  327. - if (transfer_result) {
  328. - /*
  329. - * The transfer failed, but we still write the data to
  330. - * keep prepare/commit calls balanced.
  331. - */
  332. - printk(KERN_ERR "loop: transfer error block %ld\n", index);
  333. - memset(kaddr + offset, 0, size);
  334. - }
  335. - if (aops->commit_write(file, page, offset, offset+size))
  336. - goto unlock;
  337. - if (transfer_result)
  338. - goto unlock;
  339. - kunmap(page);
  340. - data += size;
  341. - len -= size;
  342. - offset = 0;
  343. - index++;
  344. - pos += size;
  345. - UnlockPage(page);
  346. - page_cache_release(page);
  347. - }
  348. - up(&mapping->host->i_sem);
  349. - return 0;
  350. +/* called only from timer handler context */
  351. +static void keyScrubTimerFn(unsigned long d)
  352. +{
  353. + struct loop_device *lo = (struct loop_device *)d;
  354. + extern void loop_add_keyscrub_fn(struct loop_device *, void (*)(void *), void *);
  355. -unlock:
  356. - kunmap(page);
  357. - UnlockPage(page);
  358. - page_cache_release(page);
  359. -fail:
  360. - up(&mapping->host->i_sem);
  361. - return -1;
  362. + /* rw lock needs process context, so make loop thread do scrubbing */
  363. + loop_add_keyscrub_fn(lo, (void (*)(void*))keyScrubThreadFn, lo->key_data);
  364. + /* start timer again */
  365. + keyScrubTimerInit(lo);
  366. }
  367. +#endif
  368. -struct lo_read_data {
  369. - struct loop_device *lo;
  370. - char *data;
  371. - int bsize;
  372. -};
  373. -
  374. -static int lo_read_actor(read_descriptor_t * desc, struct page *page, unsigned long offset, unsigned long size)
  375. +static AESmultiKey *allocMultiKey(void)
  376. {
  377. - char *kaddr;
  378. - unsigned long count = desc->count;
  379. - struct lo_read_data *p = (struct lo_read_data*)desc->buf;
  380. - struct loop_device *lo = p->lo;
  381. - int IV = page->index * (PAGE_CACHE_SIZE/p->bsize) + offset/p->bsize;
  382. + AESmultiKey *m;
  383. + aes_context *a;
  384. + int x = 0, n;
  385. +
  386. + m = (AESmultiKey *) kmalloc(sizeof(AESmultiKey), GFP_KERNEL);
  387. + if(!m) return 0;
  388. + memset(m, 0, sizeof(AESmultiKey));
  389. +#if CONFIG_BLK_DEV_LOOP_KEYSCRUB
  390. + m->partialMD5 = &m->partialMD5buf[0];
  391. + rwlock_init(&m->rwlock);
  392. + init_timer(&m->timer);
  393. + again:
  394. +#endif
  395. - if (size > count)
  396. - size = count;
  397. + n = PAGE_SIZE / sizeof(aes_context);
  398. + if(!n) n = 1;
  399. - kaddr = kmap(page);
  400. - if (lo_do_transfer(lo, READ, kaddr + offset, p->data, size, IV)) {
  401. - size = 0;
  402. - printk(KERN_ERR "loop: transfer error block %ld\n",page->index);
  403. - desc->error = -EINVAL;
  404. - }
  405. - kunmap(page);
  406. -
  407. - desc->count = count - size;
  408. - desc->written += size;
  409. - p->data += size;
  410. - return size;
  411. -}
  412. -
  413. -static int lo_receive(struct loop_device *lo, struct buffer_head *bh, int bsize,
  414. - loff_t pos)
  415. -{
  416. - struct lo_read_data cookie;
  417. - read_descriptor_t desc;
  418. - struct file *file;
  419. -
  420. - cookie.lo = lo;
  421. - cookie.data = bh->b_data;
  422. - cookie.bsize = bsize;
  423. - desc.written = 0;
  424. - desc.count = bh->b_size;
  425. - desc.buf = (char*)&cookie;
  426. - desc.error = 0;
  427. - spin_lock_irq(&lo->lo_lock);
  428. - file = lo->lo_backing_file;
  429. - spin_unlock_irq(&lo->lo_lock);
  430. - do_generic_file_read(file, &pos, &desc, lo_read_actor);
  431. - return desc.error;
  432. + a = (aes_context *) kmalloc(sizeof(aes_context) * n, GFP_KERNEL);
  433. + if(!a) {
  434. +#if CONFIG_BLK_DEV_LOOP_KEYSCRUB
  435. + if(x) kfree(m->keyPtr[0]);
  436. +#endif
  437. + kfree(m);
  438. + return 0;
  439. + }
  440. +
  441. + while((x < KEY_ALLOC_COUNT) && n) {
  442. + m->keyPtr[x] = a;
  443. + a++;
  444. + x++;
  445. + n--;
  446. + }
  447. +#if CONFIG_BLK_DEV_LOOP_KEYSCRUB
  448. + if(x < 2) goto again;
  449. +#endif
  450. + return m;
  451. }
  452. -static inline int loop_get_bs(struct loop_device *lo)
  453. +static void clearAndFreeMultiKey(AESmultiKey *m)
  454. {
  455. - int bs = 0;
  456. + aes_context *a;
  457. + int x, n;
  458. - if (blksize_size[MAJOR(lo->lo_device)])
  459. - bs = blksize_size[MAJOR(lo->lo_device)][MINOR(lo->lo_device)];
  460. - if (!bs)
  461. - bs = BLOCK_SIZE;
  462. +#if CONFIG_BLK_DEV_LOOP_KEYSCRUB
  463. + /* stop scrub timer. loop thread was killed earlier */
  464. + del_timer_sync(&m->timer);
  465. + /* make sure allocated keys are in original order */
  466. + if(m->reversed) keyScrubWork(m);
  467. +#endif
  468. + n = PAGE_SIZE / sizeof(aes_context);
  469. + if(!n) n = 1;
  470. +
  471. + x = 0;
  472. + while(x < KEY_ALLOC_COUNT) {
  473. + a = m->keyPtr[x];
  474. + if(!a) break;
  475. + memset(a, 0, sizeof(aes_context) * n);
  476. + kfree(a);
  477. + x += n;
  478. + }
  479. +
  480. + memset(m, 0, sizeof(AESmultiKey));
  481. + kfree(m);
  482. +}
  483. +
  484. +static int multiKeySetup(struct loop_device *lo, unsigned char *k, int version3)
  485. +{
  486. + AESmultiKey *m;
  487. + aes_context *a;
  488. + int x, y, n, err = 0;
  489. + union {
  490. + u_int32_t w[16];
  491. + unsigned char b[64];
  492. + } un;
  493. +
  494. + if(lo->lo_key_owner != current->uid && !capable(CAP_SYS_ADMIN))
  495. + return -EPERM;
  496. +
  497. + m = (AESmultiKey *)lo->key_data;
  498. + if(!m) return -ENXIO;
  499. +
  500. +#if CONFIG_BLK_DEV_LOOP_KEYSCRUB
  501. + /* temporarily prevent loop thread from messing with keys */
  502. + write_lock(&m->rwlock);
  503. + m->blocked = 1;
  504. + /* make sure allocated keys are in original order */
  505. + if(m->reversed) keyScrubWork(m);
  506. + write_unlock(&m->rwlock);
  507. +#endif
  508. + n = PAGE_SIZE / sizeof(aes_context);
  509. + if(!n) n = 1;
  510. - return bs;
  511. + x = 0;
  512. + while(x < KEY_ALLOC_COUNT) {
  513. + if(!m->keyPtr[x]) {
  514. + a = (aes_context *) kmalloc(sizeof(aes_context) * n, GFP_KERNEL);
  515. + if(!a) {
  516. + err = -ENOMEM;
  517. + goto error_out;
  518. + }
  519. + y = x;
  520. + while((y < (x + n)) && (y < KEY_ALLOC_COUNT)) {
  521. + m->keyPtr[y] = a;
  522. + a++;
  523. + y++;
  524. + }
  525. + }
  526. +#if CONFIG_BLK_DEV_LOOP_KEYSCRUB
  527. + if(x >= 64) {
  528. + x++;
  529. + continue;
  530. + }
  531. +#endif
  532. + if(copy_from_user(&un.b[0], k, 32)) {
  533. + err = -EFAULT;
  534. + goto error_out;
  535. + }
  536. + aes_set_key(m->keyPtr[x], &un.b[0], lo->lo_encrypt_key_size, 0);
  537. + k += 32;
  538. + x++;
  539. + }
  540. +
  541. + m->partialMD5[0] = 0x67452301;
  542. + m->partialMD5[1] = 0xefcdab89;
  543. + m->partialMD5[2] = 0x98badcfe;
  544. + m->partialMD5[3] = 0x10325476;
  545. + if(version3) {
  546. + /* only first 128 bits of iv-key is used */
  547. + if(copy_from_user(&un.b[0], k, 16)) {
  548. + err = -EFAULT;
  549. + goto error_out;
  550. + }
  551. +#if defined(__BIG_ENDIAN)
  552. + un.w[0] = cpu_to_le32(un.w[0]);
  553. + un.w[1] = cpu_to_le32(un.w[1]);
  554. + un.w[2] = cpu_to_le32(un.w[2]);
  555. + un.w[3] = cpu_to_le32(un.w[3]);
  556. +#endif
  557. + memset(&un.b[16], 0, 48);
  558. + md5_transform_CPUbyteorder(&m->partialMD5[0], &un.w[0]);
  559. + lo->lo_flags |= 0x080000; /* multi-key-v3 (info exported to user space) */
  560. + }
  561. +
  562. + m->keyMask = 0x3F; /* range 0...63 */
  563. + lo->lo_flags |= 0x100000; /* multi-key (info exported to user space) */
  564. + memset(&un.b[0], 0, 32);
  565. +error_out:
  566. +#if CONFIG_BLK_DEV_LOOP_KEYSCRUB
  567. + /* re-enable loop thread key scrubbing */
  568. + write_lock(&m->rwlock);
  569. + m->blocked = 0;
  570. + write_unlock(&m->rwlock);
  571. +#endif
  572. + return err;
  573. }
  574. -static inline unsigned long loop_get_iv(struct loop_device *lo,
  575. - unsigned long sector)
  576. +void loop_compute_sector_iv(int devSect, u_int32_t *ivout)
  577. {
  578. - int bs = loop_get_bs(lo);
  579. - unsigned long offset, IV;
  580. + ivout[0] = cpu_to_le32(devSect);
  581. + ivout[3] = ivout[2] = ivout[1] = 0;
  582. +}
  583. - IV = sector / (bs >> 9) + lo->lo_offset / bs;
  584. - offset = ((sector % (bs >> 9)) << 9) + lo->lo_offset % bs;
  585. - if (offset >= bs)
  586. - IV++;
  587. +void loop_compute_md5_iv_v3(int devSect, u_int32_t *ivout, u_int32_t *data)
  588. +{
  589. + int x;
  590. +#if defined(__BIG_ENDIAN)
  591. + int y, e;
  592. +#endif
  593. + u_int32_t buf[16];
  594. - return IV;
  595. +#if defined(__BIG_ENDIAN)
  596. + y = 7;
  597. + e = 16;
  598. + do {
  599. + if (!y) {
  600. + e = 12;
  601. + /* md5_transform_CPUbyteorder wants data in CPU byte order */
  602. + /* devSect is already in CPU byte order -- no need to convert */
  603. + /* 32 bits of sector number + 24 zero bits */
  604. + buf[12] = devSect;
  605. + buf[13] = 0x80000000;
  606. + /* 4024 bits == 31 * 128 bit plaintext blocks + 56 bits of sector number */
  607. + buf[14] = 4024;
  608. + buf[15] = 0;
  609. + }
  610. + x = 0;
  611. + do {
  612. + buf[x ] = cpu_to_le32(data[0]);
  613. + buf[x + 1] = cpu_to_le32(data[1]);
  614. + buf[x + 2] = cpu_to_le32(data[2]);
  615. + buf[x + 3] = cpu_to_le32(data[3]);
  616. + x += 4;
  617. + data += 4;
  618. + } while (x < e);
  619. + md5_transform_CPUbyteorder(&ivout[0], &buf[0]);
  620. + } while (--y >= 0);
  621. + ivout[0] = cpu_to_le32(ivout[0]);
  622. + ivout[1] = cpu_to_le32(ivout[1]);
  623. + ivout[2] = cpu_to_le32(ivout[2]);
  624. + ivout[3] = cpu_to_le32(ivout[3]);
  625. +#else
  626. + x = 6;
  627. + do {
  628. + md5_transform_CPUbyteorder(&ivout[0], data);
  629. + data += 16;
  630. + } while (--x >= 0);
  631. + memcpy(buf, data, 48);
  632. + /* md5_transform_CPUbyteorder wants data in CPU byte order */
  633. + /* devSect is already in CPU byte order -- no need to convert */
  634. + /* 32 bits of sector number + 24 zero bits */
  635. + buf[12] = devSect;
  636. + buf[13] = 0x80000000;
  637. + /* 4024 bits == 31 * 128 bit plaintext blocks + 56 bits of sector number */
  638. + buf[14] = 4024;
  639. + buf[15] = 0;
  640. + md5_transform_CPUbyteorder(&ivout[0], &buf[0]);
  641. +#endif
  642. }
  643. -static int do_bh_filebacked(struct loop_device *lo, struct buffer_head *bh, int rw)
  644. +/* this function exists for compatibility with old external cipher modules */
  645. +void loop_compute_md5_iv(int devSect, u_int32_t *ivout, u_int32_t *data)
  646. {
  647. - loff_t pos;
  648. - int ret;
  649. + ivout[0] = 0x67452301;
  650. + ivout[1] = 0xefcdab89;
  651. + ivout[2] = 0x98badcfe;
  652. + ivout[3] = 0x10325476;
  653. + loop_compute_md5_iv_v3(devSect, ivout, data);
  654. +}
  655. +
  656. +/* Some external modules do not know if md5_transform_CPUbyteorder() */
  657. +/* is asmlinkage or not, so here is C language wrapper for them. */
  658. +void md5_transform_CPUbyteorder_C(u_int32_t *hash, u_int32_t const *in)
  659. +{
  660. + md5_transform_CPUbyteorder(hash, in);
  661. +}
  662. +
  663. +static int transfer_aes(struct loop_device *lo, int cmd, char *raw_buf,
  664. + char *loop_buf, int size, int devSect)
  665. +{
  666. + aes_context *a;
  667. + AESmultiKey *m;
  668. + int x;
  669. + unsigned y;
  670. + u_int32_t iv[8];
  671. +
  672. + if(!size || (size & 511)) {
  673. + return -EINVAL;
  674. + }
  675. + m = (AESmultiKey *)lo->key_data;
  676. + y = m->keyMask;
  677. + if(cmd == READ) {
  678. + while(size) {
  679. +#if CONFIG_BLK_DEV_LOOP_KEYSCRUB
  680. + read_lock(&m->rwlock);
  681. +#endif
  682. + a = m->keyPtr[((unsigned)devSect) & y];
  683. + if(y) {
  684. + memcpy(&iv[0], raw_buf, 16);
  685. + raw_buf += 16;
  686. + loop_buf += 16;
  687. + } else {
  688. + loop_compute_sector_iv(devSect, &iv[0]);
  689. + }
  690. + x = 15;
  691. + do {
  692. + memcpy(&iv[4], raw_buf, 16);
  693. + aes_decrypt(a, raw_buf, loop_buf);
  694. + *((u_int32_t *)(&loop_buf[ 0])) ^= iv[0];
  695. + *((u_int32_t *)(&loop_buf[ 4])) ^= iv[1];
  696. + *((u_int32_t *)(&loop_buf[ 8])) ^= iv[2];
  697. + *((u_int32_t *)(&loop_buf[12])) ^= iv[3];
  698. + if(y && !x) {
  699. + raw_buf -= 496;
  700. + loop_buf -= 496;
  701. + memcpy(&iv[4], &m->partialMD5[0], 16);
  702. + loop_compute_md5_iv_v3(devSect, &iv[4], (u_int32_t *)(&loop_buf[16]));
  703. + } else {
  704. + raw_buf += 16;
  705. + loop_buf += 16;
  706. + memcpy(&iv[0], raw_buf, 16);
  707. + }
  708. + aes_decrypt(a, raw_buf, loop_buf);
  709. + *((u_int32_t *)(&loop_buf[ 0])) ^= iv[4];
  710. + *((u_int32_t *)(&loop_buf[ 4])) ^= iv[5];
  711. + *((u_int32_t *)(&loop_buf[ 8])) ^= iv[6];
  712. + *((u_int32_t *)(&loop_buf[12])) ^= iv[7];
  713. + if(y && !x) {
  714. + raw_buf += 512;
  715. + loop_buf += 512;
  716. + } else {
  717. + raw_buf += 16;
  718. + loop_buf += 16;
  719. + }
  720. + } while(--x >= 0);
  721. +#if CONFIG_BLK_DEV_LOOP_KEYSCRUB
  722. + read_unlock(&m->rwlock);
  723. +#endif
  724. + if(current->need_resched) {set_current_state(TASK_RUNNING);schedule();}
  725. + size -= 512;
  726. + devSect++;
  727. + }
  728. + } else {
  729. + while(size) {
  730. +#if CONFIG_BLK_DEV_LOOP_KEYSCRUB
  731. + read_lock(&m->rwlock);
  732. +#endif
  733. + a = m->keyPtr[((unsigned)devSect) & y];
  734. + if(y) {
  735. + /* on 2.4 and later kernels, real raw_buf is not doing */
  736. + /* any writes now so it can be used as temp buffer */
  737. + memcpy(raw_buf, loop_buf, 512);
  738. + memcpy(&iv[0], &m->partialMD5[0], 16);
  739. + loop_compute_md5_iv_v3(devSect, &iv[0], (u_int32_t *)(&raw_buf[16]));
  740. + x = 15;
  741. + do {
  742. + iv[0] ^= *((u_int32_t *)(&raw_buf[ 0]));
  743. + iv[1] ^= *((u_int32_t *)(&raw_buf[ 4]));
  744. + iv[2] ^= *((u_int32_t *)(&raw_buf[ 8]));
  745. + iv[3] ^= *((u_int32_t *)(&raw_buf[12]));
  746. + aes_encrypt(a, (unsigned char *)(&iv[0]), raw_buf);
  747. + memcpy(&iv[0], raw_buf, 16);
  748. + raw_buf += 16;
  749. + iv[0] ^= *((u_int32_t *)(&raw_buf[ 0]));
  750. + iv[1] ^= *((u_int32_t *)(&raw_buf[ 4]));
  751. + iv[2] ^= *((u_int32_t *)(&raw_buf[ 8]));
  752. + iv[3] ^= *((u_int32_t *)(&raw_buf[12]));
  753. + aes_encrypt(a, (unsigned char *)(&iv[0]), raw_buf);
  754. + memcpy(&iv[0], raw_buf, 16);
  755. + raw_buf += 16;
  756. + } while(--x >= 0);
  757. + loop_buf += 512;
  758. + } else {
  759. + loop_compute_sector_iv(devSect, &iv[0]);
  760. + x = 15;
  761. + do {
  762. + iv[0] ^= *((u_int32_t *)(&loop_buf[ 0]));
  763. + iv[1] ^= *((u_int32_t *)(&loop_buf[ 4]));
  764. + iv[2] ^= *((u_int32_t *)(&loop_buf[ 8]));
  765. + iv[3] ^= *((u_int32_t *)(&loop_buf[12]));
  766. + aes_encrypt(a, (unsigned char *)(&iv[0]), raw_buf);
  767. + memcpy(&iv[0], raw_buf, 16);
  768. + loop_buf += 16;
  769. + raw_buf += 16;
  770. + iv[0] ^= *((u_int32_t *)(&loop_buf[ 0]));
  771. + iv[1] ^= *((u_int32_t *)(&loop_buf[ 4]));
  772. + iv[2] ^= *((u_int32_t *)(&loop_buf[ 8]));
  773. + iv[3] ^= *((u_int32_t *)(&loop_buf[12]));
  774. + aes_encrypt(a, (unsigned char *)(&iv[0]), raw_buf);
  775. + memcpy(&iv[0], raw_buf, 16);
  776. + loop_buf += 16;
  777. + raw_buf += 16;
  778. + } while(--x >= 0);
  779. + }
  780. +#if CONFIG_BLK_DEV_LOOP_KEYSCRUB
  781. + read_unlock(&m->rwlock);
  782. +#endif
  783. + if(current->need_resched) {set_current_state(TASK_RUNNING);schedule();}
  784. + size -= 512;
  785. + devSect++;
  786. + }
  787. + }
  788. + return(0);
  789. +}
  790. +
  791. +static int keySetup_aes(struct loop_device *lo, struct loop_info *info)
  792. +{
  793. + AESmultiKey *m;
  794. + union {
  795. + u_int32_t w[8]; /* needed for 4 byte alignment for b[] */
  796. + unsigned char b[32];
  797. + } un;
  798. +
  799. + lo->key_data = m = allocMultiKey();
  800. + if(!m) return(-ENOMEM);
  801. + memcpy(&un.b[0], &info->lo_encrypt_key[0], 32);
  802. + aes_set_key(m->keyPtr[0], &un.b[0], info->lo_encrypt_key_size, 0);
  803. + memset(&info->lo_encrypt_key[0], 0, sizeof(info->lo_encrypt_key));
  804. + memset(&un.b[0], 0, 32);
  805. +#if CONFIG_BLK_DEV_LOOP_KEYSCRUB
  806. + keyScrubTimerInit(lo);
  807. +#endif
  808. + return(0);
  809. +}
  810. - pos = ((loff_t) bh->b_rsector << 9) + lo->lo_offset;
  811. +static int keyClean_aes(struct loop_device *lo)
  812. +{
  813. + if(lo->key_data) {
  814. + clearAndFreeMultiKey((AESmultiKey *)lo->key_data);
  815. + lo->key_data = 0;
  816. + }
  817. + return(0);
  818. +}
  819. - if (rw == WRITE)
  820. - ret = lo_send(lo, bh, loop_get_bs(lo), pos);
  821. - else
  822. - ret = lo_receive(lo, bh, loop_get_bs(lo), pos);
  823. +static int handleIoctl_aes(struct loop_device *lo, int cmd, unsigned long arg)
  824. +{
  825. + int err;
  826. +
  827. + switch (cmd) {
  828. + case LOOP_MULTI_KEY_SETUP:
  829. + err = multiKeySetup(lo, (unsigned char *)arg, 0);
  830. + break;
  831. + case LOOP_MULTI_KEY_SETUP_V3:
  832. + err = multiKeySetup(lo, (unsigned char *)arg, 1);
  833. + break;
  834. + default:
  835. + err = -EINVAL;
  836. + }
  837. + return err;
  838. +}
  839. +
  840. +static struct loop_func_table funcs_aes = {
  841. + number: 16, /* 16 == AES */
  842. + transfer: transfer_aes,
  843. + init: keySetup_aes,
  844. + release: keyClean_aes,
  845. + ioctl: handleIoctl_aes
  846. +};
  847. +
  848. +EXPORT_SYMBOL(loop_compute_sector_iv);
  849. +EXPORT_SYMBOL(loop_compute_md5_iv_v3);
  850. +EXPORT_SYMBOL(loop_compute_md5_iv);
  851. +EXPORT_SYMBOL(md5_transform_CPUbyteorder_C);
  852. +#endif /* CONFIG_BLK_DEV_LOOP_AES */
  853. +
  854. +/* xfer_funcs[0] is special - its release function is never called */
  855. +struct loop_func_table *xfer_funcs[MAX_LO_CRYPT] = {
  856. + &none_funcs,
  857. + &xor_funcs,
  858. +#if CONFIG_BLK_DEV_LOOP_AES
  859. + [LO_CRYPT_AES] = &funcs_aes,
  860. +#endif
  861. +};
  862. +
  863. +/*
  864. + * First number of 'lo_prealloc' is the default number of RAM pages
  865. + * to pre-allocate for each device backed loop. Every (configured)
  866. + * device backed loop pre-allocates this amount of RAM pages unless
  867. + * later 'lo_prealloc' numbers provide an override. 'lo_prealloc'
  868. + * overrides are defined in pairs: loop_index,number_of_pages
  869. + */
  870. +static int lo_prealloc[9] = { 125, 999, 0, 999, 0, 999, 0, 999, 0 };
  871. +#define LO_PREALLOC_MIN 4 /* minimum user defined pre-allocated RAM pages */
  872. +#define LO_PREALLOC_MAX 512 /* maximum user defined pre-allocated RAM pages */
  873. +
  874. +#ifdef MODULE
  875. +MODULE_PARM(lo_prealloc, "1-9i");
  876. +MODULE_PARM_DESC(lo_prealloc, "Number of pre-allocated pages [,index,pages]...");
  877. +#else
  878. +static int __init lo_prealloc_setup(char *str)
  879. +{
  880. + int x, y, z;
  881. +
  882. + for (x = 0; x < (sizeof(lo_prealloc) / sizeof(int)); x++) {
  883. + z = get_option(&str, &y);
  884. + if (z > 0)
  885. + lo_prealloc[x] = y;
  886. + if (z < 2)
  887. + break;
  888. + }
  889. + return 1;
  890. +}
  891. +__setup("lo_prealloc=", lo_prealloc_setup);
  892. +#endif
  893. - return ret;
  894. +/*
  895. + * This is loop helper thread nice value in range
  896. + * from 0 (low priority) to -20 (high priority).
  897. + */
  898. +#if defined(DEF_NICE) && defined(DEF_COUNTER)
  899. +static int lo_nice = -20; /* old scheduler default */
  900. +#else
  901. +static int lo_nice = -1; /* O(1) scheduler default */
  902. +#endif
  903. +
  904. +#ifdef MODULE
  905. +MODULE_PARM(lo_nice, "1i");
  906. +MODULE_PARM_DESC(lo_nice, "Loop thread scheduler nice (0 ... -20)");
  907. +#else
  908. +static int __init lo_nice_setup(char *str)
  909. +{
  910. + int y;
  911. +
  912. + if (get_option(&str, &y) == 1)
  913. + lo_nice = y;
  914. + return 1;
  915. }
  916. +__setup("lo_nice=", lo_nice_setup);
  917. +#endif
  918. +
  919. +typedef struct {
  920. + struct buffer_head **q0;
  921. + struct buffer_head **q1;
  922. + struct buffer_head **q2;
  923. + int x0;
  924. + int x1;
  925. + int x2;
  926. +} que_look_up_table;
  927. -static void loop_end_io_transfer(struct buffer_head *bh, int uptodate);
  928. -static void loop_put_buffer(struct buffer_head *bh)
  929. +static void loop_prealloc_cleanup(struct loop_device *lo)
  930. {
  931. - /*
  932. - * check b_end_io, may just be a remapped bh and not an allocated one
  933. - */
  934. - if (bh && bh->b_end_io == loop_end_io_transfer) {
  935. + struct buffer_head *bh;
  936. +
  937. + while ((bh = lo->lo_bh_free)) {
  938. __free_page(bh->b_page);
  939. + lo->lo_bh_free = bh->b_reqnext;
  940. + bh->b_reqnext = NULL;
  941. kmem_cache_free(bh_cachep, bh);
  942. }
  943. }
  944. -/*
  945. - * Add buffer_head to back of pending list
  946. - */
  947. -static void loop_add_bh(struct loop_device *lo, struct buffer_head *bh)
  948. +static int loop_prealloc_init(struct loop_device *lo, int y)
  949. +{
  950. + struct buffer_head *bh;
  951. + int x;
  952. +
  953. + if(!y) {
  954. + y = lo_prealloc[0];
  955. + for (x = 1; x < (sizeof(lo_prealloc) / sizeof(int)); x += 2) {
  956. + if (lo_prealloc[x + 1] && (lo->lo_number == lo_prealloc[x])) {
  957. + y = lo_prealloc[x + 1];
  958. + break;
  959. + }
  960. + }
  961. + }
  962. + lo->lo_bh_flsh = (y * 3) / 4;
  963. +
  964. + for (x = 0; x < y; x++) {
  965. + bh = kmem_cache_alloc(bh_cachep, SLAB_KERNEL);
  966. + if (!bh) {
  967. + loop_prealloc_cleanup(lo);
  968. + return 1;
  969. + }
  970. + bh->b_page = alloc_page(GFP_KERNEL);
  971. + if (!bh->b_page) {
  972. + bh->b_reqnext = NULL;
  973. + kmem_cache_free(bh_cachep, bh);
  974. + loop_prealloc_cleanup(lo);
  975. + return 1;
  976. + }
  977. + bh->b_reqnext = lo->lo_bh_free;
  978. + lo->lo_bh_free = bh;
  979. + }
  980. + return 0;
  981. +}
  982. +
  983. +static void loop_add_queue_last(struct loop_device *lo, struct buffer_head *bh, struct buffer_head **q)
  984. {
  985. unsigned long flags;
  986. spin_lock_irqsave(&lo->lo_lock, flags);
  987. - if (lo->lo_bhtail) {
  988. - lo->lo_bhtail->b_reqnext = bh;
  989. - lo->lo_bhtail = bh;
  990. - } else
  991. - lo->lo_bh = lo->lo_bhtail = bh;
  992. + if (*q) {
  993. + bh->b_reqnext = (*q)->b_reqnext;
  994. + (*q)->b_reqnext = bh;
  995. + } else {
  996. + bh->b_reqnext = bh;
  997. + }
  998. + *q = bh;
  999. spin_unlock_irqrestore(&lo->lo_lock, flags);
  1000. - up(&lo->lo_bh_mutex);
  1001. + if (waitqueue_active(&lo->lo_bh_wait))
  1002. + wake_up_interruptible(&lo->lo_bh_wait);
  1003. }
  1004. -/*
  1005. - * Grab first pending buffer
  1006. - */
  1007. -static struct buffer_head *loop_get_bh(struct loop_device *lo)
  1008. +static void loop_add_queue_first(struct loop_device *lo, struct buffer_head *bh, struct buffer_head **q)
  1009. {
  1010. - struct buffer_head *bh;
  1011. + spin_lock_irq(&lo->lo_lock);
  1012. + if (*q) {
  1013. + bh->b_reqnext = (*q)->b_reqnext;
  1014. + (*q)->b_reqnext = bh;
  1015. + } else {
  1016. + bh->b_reqnext = bh;
  1017. + *q = bh;
  1018. + }
  1019. + spin_unlock_irq(&lo->lo_lock);
  1020. +}
  1021. +
  1022. +static struct buffer_head *loop_get_bh(struct loop_device *lo, int *list_nr,
  1023. + que_look_up_table *qt)
  1024. +{
  1025. + struct buffer_head *bh = NULL, *last;
  1026. spin_lock_irq(&lo->lo_lock);
  1027. - if ((bh = lo->lo_bh)) {
  1028. - if (bh == lo->lo_bhtail)
  1029. - lo->lo_bhtail = NULL;
  1030. - lo->lo_bh = bh->b_reqnext;
  1031. + if ((last = *qt->q0)) {
  1032. + bh = last->b_reqnext;
  1033. + if (bh == last)
  1034. + *qt->q0 = NULL;
  1035. + else
  1036. + last->b_reqnext = bh->b_reqnext;
  1037. + bh->b_reqnext = NULL;
  1038. + *list_nr = qt->x0;
  1039. + } else if ((last = *qt->q1)) {
  1040. + bh = last->b_reqnext;
  1041. + if (bh == last)
  1042. + *qt->q1 = NULL;
  1043. + else
  1044. + last->b_reqnext = bh->b_reqnext;
  1045. bh->b_reqnext = NULL;
  1046. + *list_nr = qt->x1;
  1047. + } else if ((last = *qt->q2)) {
  1048. + bh = last->b_reqnext;
  1049. + if (bh == last)
  1050. + *qt->q2 = NULL;
  1051. + else
  1052. + last->b_reqnext = bh->b_reqnext;
  1053. + bh->b_reqnext = NULL;
  1054. + *list_nr = qt->x2;
  1055. }
  1056. spin_unlock_irq(&lo->lo_lock);
  1057. -
  1058. return bh;
  1059. }
  1060. -/*
  1061. - * when buffer i/o has completed. if BH_Dirty is set, this was a WRITE
  1062. - * and lo->transfer stuff has already been done. if not, it was a READ
  1063. - * so queue it for the loop thread and let it do the transfer out of
  1064. - * b_end_io context (we don't want to do decrypt of a page with irqs
  1065. - * disabled)
  1066. - */
  1067. -static void loop_end_io_transfer(struct buffer_head *bh, int uptodate)
  1068. +static void loop_put_buffer(struct loop_device *lo, struct buffer_head *b)
  1069. +{
  1070. + unsigned long flags;
  1071. + int wk;
  1072. +
  1073. + spin_lock_irqsave(&lo->lo_lock, flags);
  1074. + b->b_reqnext = lo->lo_bh_free;
  1075. + lo->lo_bh_free = b;
  1076. + wk = lo->lo_bh_need;
  1077. + spin_unlock_irqrestore(&lo->lo_lock, flags);
  1078. +
  1079. + if (wk && waitqueue_active(&lo->lo_bh_wait))
  1080. + wake_up_interruptible(&lo->lo_bh_wait);
  1081. +}
  1082. +
  1083. +static void loop_end_io_transfer_wr(struct buffer_head *bh, int uptodate)
  1084. {
  1085. struct loop_device *lo = &loop_dev[MINOR(bh->b_dev)];
  1086. + struct buffer_head *rbh = bh->b_private;
  1087. - if (!uptodate || test_bit(BH_Dirty, &bh->b_state)) {
  1088. - struct buffer_head *rbh = bh->b_private;
  1089. + rbh->b_reqnext = NULL;
  1090. + rbh->b_end_io(rbh, uptodate);
  1091. + loop_put_buffer(lo, bh);
  1092. + if (atomic_dec_and_test(&lo->lo_pending))
  1093. + wake_up_interruptible(&lo->lo_bh_wait);
  1094. +}
  1095. - rbh->b_end_io(rbh, uptodate);
  1096. - if (atomic_dec_and_test(&lo->lo_pending))
  1097. - up(&lo->lo_bh_mutex);
  1098. - loop_put_buffer(bh);
  1099. - } else
  1100. - loop_add_bh(lo, bh);
  1101. +static void loop_end_io_transfer_rd(struct buffer_head *bh, int uptodate)
  1102. +{
  1103. + struct loop_device *lo = &loop_dev[MINOR(bh->b_dev)];
  1104. +
  1105. + if (!uptodate)
  1106. + loop_end_io_transfer_wr(bh, uptodate);
  1107. + else
  1108. + loop_add_queue_last(lo, bh, &lo->lo_bh_que0);
  1109. }
  1110. static struct buffer_head *loop_get_buffer(struct loop_device *lo,
  1111. - struct buffer_head *rbh)
  1112. + struct buffer_head *rbh, int from_thread, int rw)
  1113. {
  1114. struct buffer_head *bh;
  1115. + struct page *p;
  1116. + unsigned long flags;
  1117. - /*
  1118. - * for xfer_funcs that can operate on the same bh, do that
  1119. - */
  1120. - if (lo->lo_flags & LO_FLAGS_BH_REMAP) {
  1121. - bh = rbh;
  1122. - goto out_bh;
  1123. + spin_lock_irqsave(&lo->lo_lock, flags);
  1124. + bh = lo->lo_bh_free;
  1125. + if (bh) {
  1126. + lo->lo_bh_free = bh->b_reqnext;
  1127. + if (from_thread)
  1128. + lo->lo_bh_need = 0;
  1129. + } else {
  1130. + if (from_thread)
  1131. + lo->lo_bh_need = 1;
  1132. }
  1133. + spin_unlock_irqrestore(&lo->lo_lock, flags);
  1134. + if (!bh)
  1135. + return (struct buffer_head *)0;
  1136. - do {
  1137. - bh = kmem_cache_alloc(bh_cachep, SLAB_NOIO);
  1138. - if (bh)
  1139. - break;
  1140. -
  1141. - run_task_queue(&tq_disk);
  1142. - set_current_state(TASK_INTERRUPTIBLE);
  1143. - schedule_timeout(HZ);
  1144. - } while (1);
  1145. - memset(bh, 0, sizeof(*bh));
  1146. + p = bh->b_page;
  1147. + memset(bh, 0, sizeof(struct buffer_head));
  1148. + bh->b_page = p;
  1149. + bh->b_private = rbh;
  1150. bh->b_size = rbh->b_size;
  1151. bh->b_dev = rbh->b_rdev;
  1152. + bh->b_rdev = lo->lo_device;
  1153. bh->b_state = (1 << BH_Req) | (1 << BH_Mapped) | (1 << BH_Lock);
  1154. + bh->b_data = page_address(bh->b_page);
  1155. + bh->b_end_io = (rw == WRITE) ? loop_end_io_transfer_wr : loop_end_io_transfer_rd;
  1156. + bh->b_rsector = rbh->b_rsector + lo->lo_offs_sec;
  1157. + init_waitqueue_head(&bh->b_wait);
  1158. +
  1159. + return bh;
  1160. +}
  1161. +
  1162. +static int figure_loop_size(struct loop_device *lo)
  1163. +{
  1164. + loff_t size, offs;
  1165. + unsigned int x;
  1166. + int err = 0;
  1167. + kdev_t lodev = lo->lo_device;
  1168. +
  1169. + offs = lo->lo_offset;
  1170. + if (S_ISREG(lo->lo_backing_file->f_dentry->d_inode->i_mode)) {
  1171. + size = lo->lo_backing_file->f_dentry->d_inode->i_size;
  1172. + } else {
  1173. + offs &= ~((loff_t)511);
  1174. + if (blk_size[MAJOR(lodev)])
  1175. + size = (loff_t)(blk_size[MAJOR(lodev)][MINOR(lodev)]) << BLOCK_SIZE_BITS;
  1176. + else
  1177. + size = 1024*1024*1024; /* unknown size */
  1178. + }
  1179. + if ((offs > 0) && (offs < size)) {
  1180. + size -= offs;
  1181. + } else {
  1182. + if (offs)
  1183. + err = -EINVAL;
  1184. + lo->lo_offset = 0;
  1185. + lo->lo_offs_sec = lo->lo_iv_remove = 0;
  1186. + }
  1187. + if ((lo->lo_sizelimit > 0) && (lo->lo_sizelimit <= size)) {
  1188. + size = lo->lo_sizelimit;
  1189. + } else {
  1190. + if (lo->lo_sizelimit)
  1191. + err = -EINVAL;
  1192. + lo->lo_sizelimit = 0;
  1193. + }
  1194. + size >>= BLOCK_SIZE_BITS;
  1195. /*
  1196. - * easy way out, although it does waste some memory for < PAGE_SIZE
  1197. - * blocks... if highmem bounce buffering can get away with it,
  1198. - * so can we :-)
  1199. + * Unfortunately, if we want to do I/O on the device,
  1200. + * the number of 1024-byte blocks has to fit into unsigned int
  1201. */
  1202. - do {
  1203. - bh->b_page = alloc_page(GFP_NOIO);
  1204. - if (bh->b_page)
  1205. - break;
  1206. + x = (unsigned int)size;
  1207. + if ((loff_t)x != size) {
  1208. + err = -EFBIG;
  1209. + size = 0;
  1210. + }
  1211. - run_task_queue(&tq_disk);
  1212. - set_current_state(TASK_INTERRUPTIBLE);
  1213. - schedule_timeout(HZ);
  1214. - } while (1);
  1215. + loop_sizes[lo->lo_number] = size;
  1216. + return err;
  1217. +}
  1218. - bh->b_data = page_address(bh->b_page);
  1219. - bh->b_end_io = loop_end_io_transfer;
  1220. - bh->b_private = rbh;
  1221. - init_waitqueue_head(&bh->b_wait);
  1222. +static int loop_file_io(struct file *file, char *buf, int size, loff_t *ppos, int w)
  1223. +{
  1224. + mm_segment_t fs;
  1225. + int x, y, z;
  1226. -out_bh:
  1227. - bh->b_rsector = rbh->b_rsector + (lo->lo_offset >> 9);
  1228. - spin_lock_irq(&lo->lo_lock);
  1229. - bh->b_rdev = lo->lo_device;
  1230. - spin_unlock_irq(&lo->lo_lock);
  1231. + y = 0;
  1232. + do {
  1233. + z = size - y;
  1234. + fs = get_fs();
  1235. + set_fs(get_ds());
  1236. + if (w) {
  1237. + x = file->f_op->write(file, buf + y, z, ppos);
  1238. + set_fs(fs);
  1239. + } else {
  1240. + x = file->f_op->read(file, buf + y, z, ppos);
  1241. + set_fs(fs);
  1242. + if (!x)
  1243. + return 1;
  1244. + }
  1245. + if (x < 0) {
  1246. + if ((x == -EAGAIN) || (x == -ENOMEM) || (x == -ERESTART) || (x == -EINTR)) {
  1247. + run_task_queue(&tq_disk);
  1248. + set_current_state(TASK_INTERRUPTIBLE);
  1249. + schedule_timeout(HZ / 2);
  1250. + continue;
  1251. + }
  1252. + return 1;
  1253. + }
  1254. + y += x;
  1255. + } while (y < size);
  1256. + return 0;
  1257. +}
  1258. - return bh;
  1259. +static int do_bh_filebacked(struct loop_device *lo, struct buffer_head *bh, int rw)
  1260. +{
  1261. + loff_t pos;
  1262. + struct file *file = lo->lo_backing_file;
  1263. + char *data, *buf;
  1264. + unsigned int size, len;
  1265. + unsigned long IV;
  1266. +
  1267. + pos = ((loff_t) bh->b_rsector << 9) + lo->lo_offset;
  1268. + buf = page_address(lo->lo_bh_free->b_page);
  1269. + len = bh->b_size;
  1270. + data = bh_kmap(bh);
  1271. + IV = bh->b_rsector;
  1272. + if (!lo->lo_iv_remove)
  1273. + IV += lo->lo_offs_sec;
  1274. + while (len > 0) {
  1275. + if (lo->lo_encrypt_type == LO_CRYPT_NONE) {
  1276. + /* this code relies that NONE transfer is a no-op */
  1277. + buf = data;
  1278. + }
  1279. + size = PAGE_SIZE;
  1280. + if (size > len)
  1281. + size = len;
  1282. + if (rw == WRITE) {
  1283. + if (lo_do_transfer(lo, WRITE, buf, data, size, IV)) {
  1284. + printk(KERN_ERR "loop%d: write transfer error, sector %lu\n", lo->lo_number, IV);
  1285. + goto kunmap_and_out;
  1286. + }
  1287. + if (loop_file_io(file, buf, size, &pos, 1)) {
  1288. + printk(KERN_ERR "loop%d: write i/o error, sector %lu\n", lo->lo_number, IV);
  1289. + goto kunmap_and_out;
  1290. + }
  1291. + } else {
  1292. + if (loop_file_io(file, buf, size, &pos, 0)) {
  1293. + printk(KERN_ERR "loop%d: read i/o error, sector %lu\n", lo->lo_number, IV);
  1294. + goto kunmap_and_out;
  1295. + }
  1296. + if (lo_do_transfer(lo, READ, buf, data, size, IV)) {
  1297. + printk(KERN_ERR "loop%d: read transfer error, sector %lu\n", lo->lo_number, IV);
  1298. + goto kunmap_and_out;
  1299. + }
  1300. + flush_dcache_page(bh->b_page);
  1301. + }
  1302. + data += size;
  1303. + len -= size;
  1304. + IV += size >> 9;
  1305. + }
  1306. + bh_kunmap(bh);
  1307. + return 0;
  1308. +
  1309. +kunmap_and_out:
  1310. + bh_kunmap(bh);
  1311. + return 1;
  1312. }
  1313. static int loop_make_request(request_queue_t *q, int rw, struct buffer_head *rbh)
  1314. {
  1315. - struct buffer_head *bh = NULL;
  1316. + struct buffer_head *bh;
  1317. struct loop_device *lo;
  1318. - unsigned long IV;
  1319. + char *md;
  1320. + set_current_state(TASK_RUNNING);
  1321. if (!buffer_locked(rbh))
  1322. BUG();
  1323. @@ -483,45 +1121,55 @@ static int loop_make_request(request_que
  1324. } else if (rw == READA) {
  1325. rw = READ;
  1326. } else if (rw != READ) {
  1327. - printk(KERN_ERR "loop: unknown command (%d)\n", rw);
  1328. + printk(KERN_ERR "loop%d: unknown command (%d)\n", lo->lo_number, rw);
  1329. goto err;
  1330. }
  1331. - rbh = blk_queue_bounce(q, rw, rbh);
  1332. -
  1333. /*
  1334. * file backed, queue for loop_thread to handle
  1335. */
  1336. if (lo->lo_flags & LO_FLAGS_DO_BMAP) {
  1337. - /*
  1338. - * rbh locked at this point, noone else should clear
  1339. - * the dirty flag
  1340. - */
  1341. - if (rw == WRITE)
  1342. - set_bit(BH_Dirty, &rbh->b_state);
  1343. - loop_add_bh(lo, rbh);
  1344. + loop_add_queue_last(lo, rbh, (rw == WRITE) ? &lo->lo_bh_que1 : &lo->lo_bh_que0);
  1345. return 0;
  1346. }
  1347. /*
  1348. - * piggy old buffer on original, and submit for I/O
  1349. + * device backed, just remap rdev & rsector for NONE transfer
  1350. */
  1351. - bh = loop_get_buffer(lo, rbh);
  1352. - IV = loop_get_iv(lo, rbh->b_rsector);
  1353. + if (lo->lo_encrypt_type == LO_CRYPT_NONE) {
  1354. + rbh->b_rsector += lo->lo_offs_sec;
  1355. + rbh->b_rdev = lo->lo_device;
  1356. + generic_make_request(rw, rbh);
  1357. + if (atomic_dec_and_test(&lo->lo_pending))
  1358. + wake_up_interruptible(&lo->lo_bh_wait);
  1359. + return 0;
  1360. + }
  1361. +
  1362. + /*
  1363. + * device backed, start reads and writes now if buffer available
  1364. + */
  1365. + bh = loop_get_buffer(lo, rbh, 0, rw);
  1366. + if (!bh) {
  1367. + /* just queue request and let thread handle alloc later */
  1368. + loop_add_queue_last(lo, rbh, (rw == WRITE) ? &lo->lo_bh_que1 : &lo->lo_bh_que2);
  1369. + return 0;
  1370. + }
  1371. if (rw == WRITE) {
  1372. - set_bit(BH_Dirty, &bh->b_state);
  1373. - if (lo_do_transfer(lo, WRITE, bh->b_data, rbh->b_data,
  1374. - bh->b_size, IV))
  1375. + int trv;
  1376. + md = bh_kmap(rbh);
  1377. + trv = lo_do_transfer(lo, WRITE, bh->b_data, md, bh->b_size, bh->b_rsector - lo->lo_iv_remove);
  1378. + bh_kunmap(rbh);
  1379. + if (trv) {
  1380. + loop_put_buffer(lo, bh);
  1381. goto err;
  1382. + }
  1383. }
  1384. -
  1385. generic_make_request(rw, bh);
  1386. return 0;
  1387. err:
  1388. if (atomic_dec_and_test(&lo->lo_pending))
  1389. - up(&lo->lo_bh_mutex);
  1390. - loop_put_buffer(bh);
  1391. + wake_up_interruptible(&lo->lo_bh_wait);
  1392. out:
  1393. buffer_IO_error(rbh);
  1394. return 0;
  1395. @@ -530,30 +1178,6 @@ inactive:
  1396. goto out;
  1397. }
  1398. -static inline void loop_handle_bh(struct loop_device *lo,struct buffer_head *bh)
  1399. -{
  1400. - int ret;
  1401. -
  1402. - /*
  1403. - * For block backed loop, we know this is a READ
  1404. - */
  1405. - if (lo->lo_flags & LO_FLAGS_DO_BMAP) {
  1406. - int rw = !!test_and_clear_bit(BH_Dirty, &bh->b_state);
  1407. -
  1408. - ret = do_bh_filebacked(lo, bh, rw);
  1409. - bh->b_end_io(bh, !ret);
  1410. - } else {
  1411. - struct buffer_head *rbh = bh->b_private;
  1412. - unsigned long IV = loop_get_iv(lo, rbh->b_rsector);
  1413. -
  1414. - ret = lo_do_transfer(lo, READ, bh->b_data, rbh->b_data,
  1415. - bh->b_size, IV);
  1416. -
  1417. - rbh->b_end_io(rbh, !ret);
  1418. - loop_put_buffer(bh);
  1419. - }
  1420. -}
  1421. -
  1422. /*
  1423. * worker thread that handles reads/writes to file backed loop devices,
  1424. * to avoid blocking in our make_request_fn. it also does loop decrypting
  1425. @@ -563,8 +1187,20 @@ static inline void loop_handle_bh(struct
  1426. static int loop_thread(void *data)
  1427. {
  1428. struct loop_device *lo = data;
  1429. - struct buffer_head *bh;
  1430. + struct buffer_head *bh, *xbh;
  1431. + int x, rw, qi = 0, flushcnt = 0;
  1432. + wait_queue_t waitq;
  1433. + que_look_up_table qt[4] = {
  1434. + { &lo->lo_bh_que0, &lo->lo_bh_que1, &lo->lo_bh_que2, 0, 1, 2 },
  1435. + { &lo->lo_bh_que2, &lo->lo_bh_que0, &lo->lo_bh_que1, 2, 0, 1 },
  1436. + { &lo->lo_bh_que0, &lo->lo_bh_que2, &lo->lo_bh_que1, 0, 2, 1 },
  1437. + { &lo->lo_bh_que1, &lo->lo_bh_que0, &lo->lo_bh_que2, 1, 0, 2 }
  1438. + };
  1439. + char *md;
  1440. + static const struct rlimit loop_rlim_defaults[RLIM_NLIMITS] = INIT_RLIMITS;
  1441. + init_waitqueue_entry(&waitq, current);
  1442. + memcpy(&current->rlim[0], &loop_rlim_defaults[0], sizeof(current->rlim));
  1443. daemonize();
  1444. exit_files(current);
  1445. reparent_to_init();
  1446. @@ -576,12 +1212,30 @@ static int loop_thread(void *data)
  1447. flush_signals(current);
  1448. spin_unlock_irq(&current->sigmask_lock);
  1449. + if (lo_nice > 0)
  1450. + lo_nice = 0;
  1451. + if (lo_nice < -20)
  1452. + lo_nice = -20;
  1453. +#if defined(DEF_NICE) && defined(DEF_COUNTER)
  1454. + /* old scheduler syntax */
  1455. + current->policy = SCHED_OTHER;
  1456. + current->nice = lo_nice;
  1457. +#else
  1458. + /* O(1) scheduler syntax */
  1459. + set_user_nice(current, lo_nice);
  1460. +#endif
  1461. +
  1462. spin_lock_irq(&lo->lo_lock);
  1463. lo->lo_state = Lo_bound;
  1464. atomic_inc(&lo->lo_pending);
  1465. spin_unlock_irq(&lo->lo_lock);
  1466. current->flags |= PF_NOIO;
  1467. +#if defined(PF_NOFREEZE)
  1468. + current->flags |= PF_NOFREEZE;
  1469. +#elif defined(PF_IOTHREAD)
  1470. + current->flags |= PF_IOTHREAD;
  1471. +#endif
  1472. /*
  1473. * up sem, we are running
  1474. @@ -589,23 +1243,120 @@ static int loop_thread(void *data)
  1475. up(&lo->lo_sem);
  1476. for (;;) {
  1477. - down_interruptible(&lo->lo_bh_mutex);
  1478. + add_wait_queue(&lo->lo_bh_wait, &waitq);
  1479. + for (;;) {
  1480. + set_current_state(TASK_INTERRUPTIBLE);
  1481. + if (!atomic_read(&lo->lo_pending))
  1482. + break;
  1483. +
  1484. + x = 0;
  1485. + spin_lock_irq(&lo->lo_lock);
  1486. +#if CONFIG_BLK_DEV_LOOP_KEYSCRUB
  1487. + if(lo->lo_keyscrub_fn) x = 1;
  1488. +#endif
  1489. + if (lo->lo_bh_que0) {
  1490. + x = 1;
  1491. + } else if (lo->lo_bh_que1 || lo->lo_bh_que2) {
  1492. + /* file backed works too because lo->lo_bh_need == 0 */
  1493. + if (lo->lo_bh_free || !lo->lo_bh_need)
  1494. + x = 1;
  1495. + }
  1496. + spin_unlock_irq(&lo->lo_lock);
  1497. + if (x)
  1498. + break;
  1499. +
  1500. + schedule();
  1501. + }
  1502. + set_current_state(TASK_RUNNING);
  1503. + remove_wait_queue(&lo->lo_bh_wait, &waitq);
  1504. +
  1505. +#if CONFIG_BLK_DEV_LOOP_KEYSCRUB
  1506. + if(lo->lo_keyscrub_fn) {
  1507. + (*lo->lo_keyscrub_fn)(lo->lo_keyscrub_ptr);
  1508. + lo->lo_keyscrub_fn = 0;
  1509. + }
  1510. +#endif
  1511. /*
  1512. - * could be upped because of tear-down, not because of
  1513. + * could be woken because of tear-down, not because of
  1514. * pending work
  1515. */
  1516. if (!atomic_read(&lo->lo_pending))
  1517. break;
  1518. - bh = loop_get_bh(lo);
  1519. - if (!bh) {
  1520. - printk("loop: missing bh\n");
  1521. + /*
  1522. + * read queues using alternating order to prevent starvation
  1523. + */
  1524. + bh = loop_get_bh(lo, &x, &qt[++qi & 3]);
  1525. + if (!bh)
  1526. continue;
  1527. +
  1528. + /*
  1529. + * x list tag usage(buffer-allocated)
  1530. + * --- -------------- -----------------------
  1531. + * 0 lo->lo_bh_que0 dev-read(y) / file-read
  1532. + * 1 lo->lo_bh_que1 dev-write(n) / file-write
  1533. + * 2 lo->lo_bh_que2 dev-read(n)
  1534. + */
  1535. + rw = (x == 1) ? WRITE : READ;
  1536. + if ((x >= 1) && !(lo->lo_flags & LO_FLAGS_DO_BMAP)) {
  1537. + /* loop_make_request didn't allocate a buffer, do that now */
  1538. + xbh = loop_get_buffer(lo, bh, 1, rw);
  1539. + if (!xbh) {
  1540. + run_task_queue(&tq_disk);
  1541. + flushcnt = 0;
  1542. + loop_add_queue_first(lo, bh, (rw == WRITE) ? &lo->lo_bh_que1 : &lo->lo_bh_que2);
  1543. + /* lo->lo_bh_need should be 1 now, go back to sleep */
  1544. + continue;
  1545. + }
  1546. + if (rw == WRITE) {
  1547. + int trv;
  1548. + md = bh_kmap(bh);
  1549. + trv = lo_do_transfer(lo, WRITE, xbh->b_data, md, xbh->b_size, xbh->b_rsector - lo->lo_iv_remove);
  1550. + bh_kunmap(bh);
  1551. + if (trv) {
  1552. + loop_put_buffer(lo, xbh);
  1553. + buffer_IO_error(bh);
  1554. + atomic_dec(&lo->lo_pending);
  1555. + continue;
  1556. + }
  1557. + }
  1558. + generic_make_request(rw, xbh);
  1559. +
  1560. + /* start I/O if there are no more requests lacking buffers */
  1561. + x = 0;
  1562. + spin_lock_irq(&lo->lo_lock);
  1563. + if (!lo->lo_bh_que1 && !lo->lo_bh_que2)
  1564. + x = 1;
  1565. + spin_unlock_irq(&lo->lo_lock);
  1566. + if (x || (++flushcnt >= lo->lo_bh_flsh)) {
  1567. + run_task_queue(&tq_disk);
  1568. + flushcnt = 0;
  1569. + }
  1570. +
  1571. + /* request not completely processed yet */
  1572. + continue;
  1573. + }
  1574. + if (lo->lo_flags & LO_FLAGS_DO_BMAP) {
  1575. + /* request is for file backed device */
  1576. + x = do_bh_filebacked(lo, bh, rw);
  1577. + bh->b_reqnext = NULL;
  1578. + bh->b_end_io(bh, !x);
  1579. + } else {
  1580. + /* device backed read has completed, do decrypt now */
  1581. + xbh = bh->b_private;
  1582. + /* must not use bh->b_rsector as IV, as it may be modified by LVM at this point */
  1583. + /* instead, recompute IV from original request */
  1584. + md = bh_kmap(xbh);
  1585. + x = lo_do_transfer(lo, READ, bh->b_data, md, bh->b_size, xbh->b_rsector + lo->lo_offs_sec - lo->lo_iv_remove);
  1586. + flush_dcache_page(xbh->b_page);
  1587. + bh_kunmap(xbh);
  1588. + xbh->b_reqnext = NULL;
  1589. + xbh->b_end_io(xbh, !x);
  1590. + loop_put_buffer(lo, bh);
  1591. }
  1592. - loop_handle_bh(lo, bh);
  1593. /*
  1594. - * upped both for pending work and tear-down, lo_pending
  1595. + * woken both for pending work and tear-down, lo_pending
  1596. * will hit zero then
  1597. */
  1598. if (atomic_dec_and_test(&lo->lo_pending))
  1599. @@ -616,15 +1367,34 @@ static int loop_thread(void *data)
  1600. return 0;
  1601. }
  1602. +static void loop_set_softblksz(struct loop_device *lo, kdev_t dev)
  1603. +{
  1604. + int bs = 0, x;
  1605. +
  1606. + if (blksize_size[MAJOR(lo->lo_device)])
  1607. + bs = blksize_size[MAJOR(lo->lo_device)][MINOR(lo->lo_device)];
  1608. + if (!bs)
  1609. + bs = BLOCK_SIZE;
  1610. + if (lo->lo_flags & LO_FLAGS_DO_BMAP) {
  1611. + x = loop_sizes[lo->lo_number];
  1612. + if ((bs == 8192) && (x & 7))
  1613. + bs = 4096;
  1614. + if ((bs == 4096) && (x & 3))
  1615. + bs = 2048;
  1616. + if ((bs == 2048) && (x & 1))
  1617. + bs = 1024;
  1618. + }
  1619. + set_blocksize(dev, bs);
  1620. +}
  1621. +
  1622. static int loop_set_fd(struct loop_device *lo, struct file *lo_file, kdev_t dev,
  1623. unsigned int arg)
  1624. {
  1625. struct file *file;
  1626. struct inode *inode;
  1627. kdev_t lo_device;
  1628. - int lo_flags = 0;
  1629. + int lo_flags = 0, hardsz = 512;
  1630. int error;
  1631. - int bs;
  1632. MOD_INC_USE_COUNT;
  1633. @@ -643,33 +1413,49 @@ static int loop_set_fd(struct loop_devic
  1634. if (!(file->f_mode & FMODE_WRITE))
  1635. lo_flags |= LO_FLAGS_READ_ONLY;
  1636. +#if CONFIG_BLK_DEV_LOOP_KEYSCRUB
  1637. + lo->lo_keyscrub_fn = 0;
  1638. +#endif
  1639. + lo->lo_offset = lo->lo_sizelimit = 0;
  1640. + lo->lo_offs_sec = lo->lo_iv_remove = 0;
  1641. + lo->lo_bh_free = lo->lo_bh_que2 = lo->lo_bh_que1 = lo->lo_bh_que0 = NULL;
  1642. + lo->lo_bh_need = lo->lo_bh_flsh = 0;
  1643. + init_waitqueue_head(&lo->lo_bh_wait);
  1644. if (S_ISBLK(inode->i_mode)) {
  1645. lo_device = inode->i_rdev;
  1646. if (lo_device == dev) {
  1647. error = -EBUSY;
  1648. goto out_putf;
  1649. }
  1650. + if (loop_prealloc_init(lo, 0)) {
  1651. + error = -ENOMEM;
  1652. + goto out_putf;
  1653. + }
  1654. + hardsz = get_hardsect_size(lo_device);
  1655. } else if (S_ISREG(inode->i_mode)) {
  1656. - struct address_space_operations *aops = inode->i_mapping->a_ops;
  1657. /*
  1658. * If we can't read - sorry. If we only can't write - well,
  1659. * it's going to be read-only.
  1660. */
  1661. - if (!aops->readpage)
  1662. + if (!file->f_op || !file->f_op->read)
  1663. goto out_putf;
  1664. - if (!aops->prepare_write || !aops->commit_write)
  1665. + if (!file->f_op->write)
  1666. lo_flags |= LO_FLAGS_READ_ONLY;
  1667. lo_device = inode->i_dev;
  1668. lo_flags |= LO_FLAGS_DO_BMAP;
  1669. + if (loop_prealloc_init(lo, 1)) {
  1670. + error = -ENOMEM;
  1671. + goto out_putf;
  1672. + }
  1673. error = 0;
  1674. } else
  1675. goto out_putf;
  1676. get_file(file);
  1677. - if (IS_RDONLY (inode) || is_read_only(lo_device)
  1678. + if ((S_ISREG(inode->i_mode) && IS_RDONLY(inode)) || is_read_only(lo_device)
  1679. || !(lo_file->f_mode & FMODE_WRITE))
  1680. lo_flags |= LO_FLAGS_READ_ONLY;
  1681. @@ -677,28 +1463,40 @@ static int loop_set_fd(struct loop_devic
  1682. lo->lo_device = lo_device;
  1683. lo->lo_flags = lo_flags;
  1684. + if(lo_flags & LO_FLAGS_READ_ONLY)
  1685. + lo->lo_flags |= 0x200000; /* export to user space */
  1686. lo->lo_backing_file = file;
  1687. lo->transfer = NULL;
  1688. lo->ioctl = NULL;
  1689. - figure_loop_size(lo);
  1690. - lo->old_gfp_mask = inode->i_mapping->gfp_mask;
  1691. - inode->i_mapping->gfp_mask &= ~(__GFP_IO|__GFP_FS);
  1692. -
  1693. - bs = 0;
  1694. - if (blksize_size[MAJOR(lo_device)])
  1695. - bs = blksize_size[MAJOR(lo_device)][MINOR(lo_device)];
  1696. - if (!bs)
  1697. - bs = BLOCK_SIZE;
  1698. + if (figure_loop_size(lo)) {
  1699. + error = -EFBIG;
  1700. + goto out_cleanup;
  1701. + }
  1702. - set_blocksize(dev, bs);
  1703. + if (lo_flags & LO_FLAGS_DO_BMAP) {
  1704. + lo->old_gfp_mask = inode->i_mapping->gfp_mask;
  1705. + inode->i_mapping->gfp_mask &= ~(__GFP_IO|__GFP_FS);
  1706. + inode->i_mapping->gfp_mask |= __GFP_HIGH;
  1707. + } else {
  1708. + lo->old_gfp_mask = -1;
  1709. + }
  1710. - lo->lo_bh = lo->lo_bhtail = NULL;
  1711. - kernel_thread(loop_thread, lo, CLONE_FS | CLONE_FILES | CLONE_SIGHAND);
  1712. - down(&lo->lo_sem);
  1713. + loop_hardsizes[MINOR(dev)] = hardsz;
  1714. + loop_set_softblksz(lo, dev);
  1715. + error = kernel_thread(loop_thread, lo, CLONE_FS | CLONE_FILES | CLONE_SIGHAND);
  1716. + if(error < 0)
  1717. + goto out_mapping;
  1718. + down(&lo->lo_sem);
  1719. fput(file);
  1720. return 0;
  1721. + out_mapping:
  1722. + if(lo->old_gfp_mask != -1)
  1723. + inode->i_mapping->gfp_mask = lo->old_gfp_mask;
  1724. + out_cleanup:
  1725. + loop_prealloc_cleanup(lo);
  1726. + fput(file);
  1727. out_putf:
  1728. fput(file);
  1729. out:
  1730. @@ -708,13 +1506,14 @@ static int loop_set_fd(struct loop_devic
  1731. static int loop_release_xfer(struct loop_device *lo)
  1732. {
  1733. - int err = 0;
  1734. + int err = 0;
  1735. if (lo->lo_encrypt_type) {
  1736. - struct loop_func_table *xfer= xfer_funcs[lo->lo_encrypt_type];
  1737. + struct loop_func_table *xfer= xfer_funcs[lo->lo_encrypt_type];
  1738. + lo->transfer = NULL;
  1739. if (xfer && xfer->release)
  1740. - err = xfer->release(lo);
  1741. + err = xfer->release(lo);
  1742. if (xfer && xfer->unlock)
  1743. - xfer->unlock(lo);
  1744. + xfer->unlock(lo);
  1745. lo->lo_encrypt_type = 0;
  1746. }
  1747. return err;
  1748. @@ -722,19 +1521,19 @@ static int loop_release_xfer(struct loop
  1749. static int loop_init_xfer(struct loop_device *lo, int type,struct loop_info *i)
  1750. {
  1751. - int err = 0;
  1752. + int err = 0;
  1753. if (type) {
  1754. - struct loop_func_table *xfer = xfer_funcs[type];
  1755. + struct loop_func_table *xfer = xfer_funcs[type];
  1756. if (xfer->init)
  1757. err = xfer->init(lo, i);
  1758. - if (!err) {
  1759. + if (!err) {
  1760. lo->lo_encrypt_type = type;
  1761. if (xfer->lock)
  1762. xfer->lock(lo);
  1763. }
  1764. }
  1765. return err;
  1766. -}
  1767. +}
  1768. static int loop_clr_fd(struct loop_device *lo, struct block_device *bdev)
  1769. {
  1770. @@ -751,11 +1550,12 @@ static int loop_clr_fd(struct loop_devic
  1771. spin_lock_irq(&lo->lo_lock);
  1772. lo->lo_state = Lo_rundown;
  1773. if (atomic_dec_and_test(&lo->lo_pending))
  1774. - up(&lo->lo_bh_mutex);
  1775. + wake_up_interruptible(&lo->lo_bh_wait);
  1776. spin_unlock_irq(&lo->lo_lock);
  1777. down(&lo->lo_sem);
  1778. + loop_prealloc_cleanup(lo);
  1779. lo->lo_backing_file = NULL;
  1780. loop_release_xfer(lo);
  1781. @@ -763,87 +1563,219 @@ static int loop_clr_fd(struct loop_devic
  1782. lo->ioctl = NULL;
  1783. lo->lo_device = 0;
  1784. lo->lo_encrypt_type = 0;
  1785. - lo->lo_offset = 0;
  1786. +#if CONFIG_BLK_DEV_LOOP_KEYSCRUB
  1787. + lo->lo_keyscrub_fn = 0;
  1788. +#endif
  1789. + lo->lo_offset = lo->lo_sizelimit = 0;
  1790. + lo->lo_offs_sec = lo->lo_iv_remove = 0;
  1791. lo->lo_encrypt_key_size = 0;
  1792. lo->lo_flags = 0;
  1793. memset(lo->lo_encrypt_key, 0, LO_KEY_SIZE);
  1794. memset(lo->lo_name, 0, LO_NAME_SIZE);
  1795. + memset(lo->lo_crypt_name, 0, LO_NAME_SIZE);
  1796. loop_sizes[lo->lo_number] = 0;
  1797. invalidate_bdev(bdev, 0);
  1798. - filp->f_dentry->d_inode->i_mapping->gfp_mask = gfp;
  1799. + if (gfp != -1)
  1800. + filp->f_dentry->d_inode->i_mapping->gfp_mask = gfp;
  1801. lo->lo_state = Lo_unbound;
  1802. fput(filp);
  1803. MOD_DEC_USE_COUNT;
  1804. return 0;
  1805. }
  1806. -static int loop_set_status(struct loop_device *lo, struct loop_info *arg)
  1807. +static void
  1808. +loop_info64_from_old(const struct loop_info *info, struct loop_info64 *info64)
  1809. +{
  1810. + memset(info64, 0, sizeof(*info64));
  1811. + info64->lo_number = info->lo_number;
  1812. + info64->lo_device = info->lo_device;
  1813. + info64->lo_inode = info->lo_inode;
  1814. + info64->lo_rdevice = info->lo_rdevice;
  1815. + info64->lo_offset = info->lo_offset;
  1816. + info64->lo_encrypt_type = info->lo_encrypt_type;
  1817. + info64->lo_encrypt_key_size = info->lo_encrypt_key_size;
  1818. + info64->lo_flags = info->lo_flags;
  1819. + info64->lo_init[0] = info->lo_init[0];
  1820. + info64->lo_init[1] = info->lo_init[1];
  1821. + if (info->lo_encrypt_type == 18) /* LO_CRYPT_CRYPTOAPI */
  1822. + memcpy(info64->lo_crypt_name, info->lo_name, LO_NAME_SIZE);
  1823. + else
  1824. + memcpy(info64->lo_file_name, info->lo_name, LO_NAME_SIZE);
  1825. + memcpy(info64->lo_encrypt_key, info->lo_encrypt_key, LO_KEY_SIZE);
  1826. +}
  1827. +
  1828. +static int
  1829. +loop_info64_to_old(struct loop_info64 *info64, struct loop_info *info)
  1830. +{
  1831. + memset(info, 0, sizeof(*info));
  1832. + info->lo_number = info64->lo_number;
  1833. + info->lo_device = info64->lo_device;
  1834. + info->lo_inode = info64->lo_inode;
  1835. + info->lo_rdevice = info64->lo_rdevice;
  1836. + info->lo_offset = info64->lo_offset;
  1837. + info->lo_encrypt_type = info64->lo_encrypt_type;
  1838. + info->lo_encrypt_key_size = info64->lo_encrypt_key_size;
  1839. + info->lo_flags = info64->lo_flags;
  1840. + info->lo_init[0] = info64->lo_init[0];
  1841. + info->lo_init[1] = info64->lo_init[1];
  1842. + if (info->lo_encrypt_type == 18) /* LO_CRYPT_CRYPTOAPI */
  1843. + memcpy(info->lo_name, info64->lo_crypt_name, LO_NAME_SIZE);
  1844. + else
  1845. + memcpy(info->lo_name, info64->lo_file_name, LO_NAME_SIZE);
  1846. + memcpy(info->lo_encrypt_key, info64->lo_encrypt_key, LO_KEY_SIZE);
  1847. +
  1848. + /* error in case values were truncated */
  1849. + if (info->lo_device != info64->lo_device ||
  1850. + info->lo_rdevice != info64->lo_rdevice ||
  1851. + info->lo_inode != info64->lo_inode ||
  1852. + info->lo_offset != info64->lo_offset ||
  1853. + info64->lo_sizelimit)
  1854. + return -EOVERFLOW;
  1855. +
  1856. + return 0;
  1857. +}
  1858. +
  1859. +static int loop_set_status(struct loop_device *lo, kdev_t dev, struct loop_info64 *info, struct loop_info *oldinfo)
  1860. {
  1861. - struct loop_info info;
  1862. int err;
  1863. unsigned int type;
  1864. - if (lo->lo_encrypt_key_size && lo->lo_key_owner != current->uid &&
  1865. + if (lo->lo_encrypt_key_size && lo->lo_key_owner != current->uid &&
  1866. !capable(CAP_SYS_ADMIN))
  1867. return -EPERM;
  1868. if (lo->lo_state != Lo_bound)
  1869. return -ENXIO;
  1870. - if (copy_from_user(&info, arg, sizeof (struct loop_info)))
  1871. - return -EFAULT;
  1872. - if ((unsigned int) info.lo_encrypt_key_size > LO_KEY_SIZE)
  1873. + if ((unsigned int) info->lo_encrypt_key_size > LO_KEY_SIZE)
  1874. return -EINVAL;
  1875. - type = info.lo_encrypt_type;
  1876. + type = info->lo_encrypt_type;
  1877. if (type >= MAX_LO_CRYPT || xfer_funcs[type] == NULL)
  1878. return -EINVAL;
  1879. - if (type == LO_CRYPT_XOR && info.lo_encrypt_key_size == 0)
  1880. + if (type == LO_CRYPT_XOR && info->lo_encrypt_key_size == 0)
  1881. return -EINVAL;
  1882. err = loop_release_xfer(lo);
  1883. - if (!err)
  1884. - err = loop_init_xfer(lo, type, &info);
  1885. if (err)
  1886. return err;
  1887. - lo->lo_offset = info.lo_offset;
  1888. - strncpy(lo->lo_name, info.lo_name, LO_NAME_SIZE);
  1889. + if ((loff_t)info->lo_offset < 0) {
  1890. + /* negative offset == remove offset from IV computations */
  1891. + lo->lo_offset = -(info->lo_offset);
  1892. + lo->lo_iv_remove = lo->lo_offset >> 9;
  1893. + } else {
  1894. + /* positive offset == include offset in IV computations */
  1895. + lo->lo_offset = info->lo_offset;
  1896. + lo->lo_iv_remove = 0;
  1897. + }
  1898. + lo->lo_offs_sec = lo->lo_offset >> 9;
  1899. + lo->lo_sizelimit = info->lo_sizelimit;
  1900. + err = figure_loop_size(lo);
  1901. + if (err)
  1902. + return err;
  1903. + loop_set_softblksz(lo, dev);
  1904. +
  1905. + /* transfer init function for 2.4 kernels takes old style struct */
  1906. + err = loop_init_xfer(lo, type, oldinfo);
  1907. + /* copy key -- just in case transfer init func modified it */
  1908. + memcpy(info->lo_encrypt_key, oldinfo->lo_encrypt_key, sizeof(info->lo_encrypt_key));
  1909. + if (err)
  1910. + return err;
  1911. + strncpy(lo->lo_name, info->lo_file_name, LO_NAME_SIZE);
  1912. + strncpy(lo->lo_crypt_name, info->lo_crypt_name, LO_NAME_SIZE);
  1913. lo->transfer = xfer_funcs[type]->transfer;
  1914. lo->ioctl = xfer_funcs[type]->ioctl;
  1915. - lo->lo_encrypt_key_size = info.lo_encrypt_key_size;
  1916. - lo->lo_init[0] = info.lo_init[0];
  1917. - lo->lo_init[1] = info.lo_init[1];
  1918. - if (info.lo_encrypt_key_size) {
  1919. - memcpy(lo->lo_encrypt_key, info.lo_encrypt_key,
  1920. - info.lo_encrypt_key_size);
  1921. - lo->lo_key_owner = current->uid;
  1922. - }
  1923. - figure_loop_size(lo);
  1924. + lo->lo_encrypt_key_size = info->lo_encrypt_key_size;
  1925. + lo->lo_init[0] = info->lo_init[0];
  1926. + lo->lo_init[1] = info->lo_init[1];
  1927. + if (info->lo_encrypt_key_size) {
  1928. + memcpy(lo->lo_encrypt_key, info->lo_encrypt_key,
  1929. + info->lo_encrypt_key_size);
  1930. + lo->lo_key_owner = current->uid;
  1931. + }
  1932. +
  1933. return 0;
  1934. }
  1935. -static int loop_get_status(struct loop_device *lo, struct loop_info *arg)
  1936. +static int loop_get_status(struct loop_device *lo, struct loop_info64 *info)
  1937. {
  1938. - struct loop_info info;
  1939. struct file *file = lo->lo_backing_file;
  1940. if (lo->lo_state != Lo_bound)
  1941. return -ENXIO;
  1942. - if (!arg)
  1943. - return -EINVAL;
  1944. - memset(&info, 0, sizeof(info));
  1945. - info.lo_number = lo->lo_number;
  1946. - info.lo_device = kdev_t_to_nr(file->f_dentry->d_inode->i_dev);
  1947. - info.lo_inode = file->f_dentry->d_inode->i_ino;
  1948. - info.lo_rdevice = kdev_t_to_nr(lo->lo_device);
  1949. - info.lo_offset = lo->lo_offset;
  1950. - info.lo_flags = lo->lo_flags;
  1951. - strncpy(info.lo_name, lo->lo_name, LO_NAME_SIZE);
  1952. - info.lo_encrypt_type = lo->lo_encrypt_type;
  1953. + memset(info, 0, sizeof(*info));
  1954. + info->lo_number = lo->lo_number;
  1955. + info->lo_device = kdev_t_to_nr(file->f_dentry->d_inode->i_dev);
  1956. + info->lo_inode = file->f_dentry->d_inode->i_ino;
  1957. + info->lo_rdevice = kdev_t_to_nr(lo->lo_device);
  1958. + info->lo_offset = lo->lo_iv_remove ? -(lo->lo_offset) : lo->lo_offset;
  1959. + info->lo_sizelimit = lo->lo_sizelimit;
  1960. + info->lo_flags = lo->lo_flags;
  1961. + strncpy(info->lo_file_name, lo->lo_name, LO_NAME_SIZE);
  1962. + strncpy(info->lo_crypt_name, lo->lo_crypt_name, LO_NAME_SIZE);
  1963. + info->lo_encrypt_type = lo->lo_encrypt_type;
  1964. if (lo->lo_encrypt_key_size && capable(CAP_SYS_ADMIN)) {
  1965. - info.lo_encrypt_key_size = lo->lo_encrypt_key_size;
  1966. - memcpy(info.lo_encrypt_key, lo->lo_encrypt_key,
  1967. + info->lo_encrypt_key_size = lo->lo_encrypt_key_size;
  1968. + memcpy(info->lo_encrypt_key, lo->lo_encrypt_key,
  1969. lo->lo_encrypt_key_size);
  1970. + info->lo_init[0] = lo->lo_init[0];
  1971. + info->lo_init[1] = lo->lo_init[1];
  1972. }
  1973. - return copy_to_user(arg, &info, sizeof(info)) ? -EFAULT : 0;
  1974. + return 0;
  1975. +}
  1976. +
  1977. +static int
  1978. +loop_set_status_n(struct loop_device *lo, kdev_t dev, void *arg, int n)
  1979. +{
  1980. + struct loop_info info;
  1981. + struct loop_info64 info64;
  1982. + int err;
  1983. +
  1984. + if (n) {
  1985. + if (copy_from_user(&info64, arg, sizeof (struct loop_info64)))
  1986. + return -EFAULT;
  1987. + /* truncation errors can be ignored here as transfer init func only wants key bits */
  1988. + loop_info64_to_old(&info64, &info);
  1989. + } else {
  1990. + if (copy_from_user(&info, arg, sizeof (struct loop_info)))
  1991. + return -EFAULT;
  1992. + loop_info64_from_old(&info, &info64);
  1993. + }
  1994. + err = loop_set_status(lo, dev, &info64, &info);
  1995. + memset(&info.lo_encrypt_key[0], 0, sizeof(info.lo_encrypt_key));
  1996. + memset(&info64.lo_encrypt_key[0], 0, sizeof(info64.lo_encrypt_key));
  1997. + return err;
  1998. +}
  1999. +
  2000. +static int
  2001. +loop_get_status_old(struct loop_device *lo, struct loop_info *arg) {
  2002. + struct loop_info info;
  2003. + struct loop_info64 info64;
  2004. + int err = 0;
  2005. +
  2006. + if (!arg)
  2007. + err = -EINVAL;
  2008. + if (!err)
  2009. + err = loop_get_status(lo, &info64);
  2010. + if (!err)
  2011. + err = loop_info64_to_old(&info64, &info);
  2012. + if (!err && copy_to_user(arg, &info, sizeof(info)))
  2013. + err = -EFAULT;
  2014. +
  2015. + return err;
  2016. +}
  2017. +
  2018. +static int
  2019. +loop_get_status64(struct loop_device *lo, struct loop_info64 *arg) {
  2020. + struct loop_info64 info64;
  2021. + int err = 0;
  2022. +
  2023. + if (!arg)
  2024. + err = -EINVAL;
  2025. + if (!err)
  2026. + err = loop_get_status(lo, &info64);
  2027. + if (!err && copy_to_user(arg, &info64, sizeof(info64)))
  2028. + err = -EFAULT;
  2029. +
  2030. + return err;
  2031. }
  2032. static int lo_ioctl(struct inode * inode, struct file * file,
  2033. @@ -872,10 +1804,16 @@ static int lo_ioctl(struct inode * inode
  2034. err = loop_clr_fd(lo, inode->i_bdev);
  2035. break;
  2036. case LOOP_SET_STATUS:
  2037. - err = loop_set_status(lo, (struct loop_info *) arg);
  2038. + err = loop_set_status_n(lo, inode->i_rdev, (void *) arg, 0);
  2039. break;
  2040. case LOOP_GET_STATUS:
  2041. - err = loop_get_status(lo, (struct loop_info *) arg);
  2042. + err = loop_get_status_old(lo, (struct loop_info *) arg);
  2043. + break;
  2044. + case LOOP_SET_STATUS64:
  2045. + err = loop_set_status_n(lo, inode->i_rdev, (void *) arg, 1);
  2046. + break;
  2047. + case LOOP_GET_STATUS64:
  2048. + err = loop_get_status64(lo, (struct loop_info64 *) arg);
  2049. break;
  2050. case BLKGETSIZE:
  2051. if (lo->lo_state != Lo_bound) {
  2052. @@ -894,6 +1832,8 @@ static int lo_ioctl(struct inode * inode
  2053. case BLKBSZGET:
  2054. case BLKBSZSET:
  2055. case BLKSSZGET:
  2056. + case BLKROGET:
  2057. + case BLKROSET:
  2058. err = blk_ioctl(inode->i_rdev, cmd, arg);
  2059. break;
  2060. default:
  2061. @@ -906,7 +1846,7 @@ static int lo_ioctl(struct inode * inode
  2062. static int lo_open(struct inode *inode, struct file *file)
  2063. {
  2064. struct loop_device *lo;
  2065. - int dev, type;
  2066. + int dev;
  2067. if (!inode)
  2068. return -EINVAL;
  2069. @@ -921,10 +1861,6 @@ static int lo_open(struct inode *inode,
  2070. lo = &loop_dev[dev];
  2071. MOD_INC_USE_COUNT;
  2072. down(&lo->lo_ctl_mutex);
  2073. -
  2074. - type = lo->lo_encrypt_type;
  2075. - if (type && xfer_funcs[type] && xfer_funcs[type]->lock)
  2076. - xfer_funcs[type]->lock(lo);
  2077. lo->lo_refcnt++;
  2078. up(&lo->lo_ctl_mutex);
  2079. return 0;
  2080. @@ -933,7 +1869,7 @@ static int lo_open(struct inode *inode,
  2081. static int lo_release(struct inode *inode, struct file *file)
  2082. {
  2083. struct loop_device *lo;
  2084. - int dev, type;
  2085. + int dev;
  2086. if (!inode)
  2087. return 0;
  2088. @@ -948,11 +1884,7 @@ static int lo_release(struct inode *inod
  2089. lo = &loop_dev[dev];
  2090. down(&lo->lo_ctl_mutex);
  2091. - type = lo->lo_encrypt_type;
  2092. --lo->lo_refcnt;
  2093. - if (xfer_funcs[type] && xfer_funcs[type]->unlock)
  2094. - xfer_funcs[type]->unlock(lo);
  2095. -
  2096. up(&lo->lo_ctl_mutex);
  2097. MOD_DEC_USE_COUNT;
  2098. return 0;
  2099. @@ -974,34 +1906,32 @@ MODULE_LICENSE("GPL");
  2100. int loop_register_transfer(struct loop_func_table *funcs)
  2101. {
  2102. - if ((unsigned)funcs->number > MAX_LO_CRYPT || xfer_funcs[funcs->number])
  2103. + if ((unsigned)funcs->number >= MAX_LO_CRYPT || xfer_funcs[funcs->number])
  2104. return -EINVAL;
  2105. xfer_funcs[funcs->number] = funcs;
  2106. - return 0;
  2107. + return 0;
  2108. }
  2109. int loop_unregister_transfer(int number)
  2110. {
  2111. - struct loop_device *lo;
  2112. + struct loop_device *lo;
  2113. if ((unsigned)number >= MAX_LO_CRYPT)
  2114. - return -EINVAL;
  2115. - for (lo = &loop_dev[0]; lo < &loop_dev[max_loop]; lo++) {
  2116. + return -EINVAL;
  2117. + for (lo = &loop_dev[0]; lo < &loop_dev[max_loop]; lo++) {
  2118. int type = lo->lo_encrypt_type;
  2119. - if (type == number) {
  2120. - xfer_funcs[type]->release(lo);
  2121. - lo->transfer = NULL;
  2122. - lo->lo_encrypt_type = 0;
  2123. + if (type == number) {
  2124. + loop_release_xfer(lo);
  2125. }
  2126. }
  2127. - xfer_funcs[number] = NULL;
  2128. - return 0;
  2129. + xfer_funcs[number] = NULL;
  2130. + return 0;
  2131. }
  2132. EXPORT_SYMBOL(loop_register_transfer);
  2133. EXPORT_SYMBOL(loop_unregister_transfer);
  2134. -int __init loop_init(void)
  2135. +int __init loop_init(void)
  2136. {
  2137. int i;
  2138. @@ -1017,10 +1947,9 @@ int __init loop_init(void)
  2139. return -EIO;
  2140. }
  2141. -
  2142. loop_dev = kmalloc(max_loop * sizeof(struct loop_device), GFP_KERNEL);
  2143. if (!loop_dev)
  2144. - return -ENOMEM;
  2145. + goto out_dev;
  2146. loop_sizes = kmalloc(max_loop * sizeof(int), GFP_KERNEL);
  2147. if (!loop_sizes)
  2148. @@ -1030,6 +1959,10 @@ int __init loop_init(void)
  2149. if (!loop_blksizes)
  2150. goto out_blksizes;
  2151. + loop_hardsizes = kmalloc(max_loop * sizeof(int), GFP_KERNEL);
  2152. + if (!loop_hardsizes)
  2153. + goto out_hardsizes;
  2154. +
  2155. blk_queue_make_request(BLK_DEFAULT_QUEUE(MAJOR_NR), loop_make_request);
  2156. for (i = 0; i < max_loop; i++) {
  2157. @@ -1037,45 +1970,86 @@ int __init loop_init(void)
  2158. memset(lo, 0, sizeof(struct loop_device));
  2159. init_MUTEX(&lo->lo_ctl_mutex);
  2160. init_MUTEX_LOCKED(&lo->lo_sem);
  2161. - init_MUTEX_LOCKED(&lo->lo_bh_mutex);
  2162. lo->lo_number = i;
  2163. spin_lock_init(&lo->lo_lock);
  2164. }
  2165. memset(loop_sizes, 0, max_loop * sizeof(int));
  2166. memset(loop_blksizes, 0, max_loop * sizeof(int));
  2167. + memset(loop_hardsizes, 0, max_loop * sizeof(int));
  2168. blk_size[MAJOR_NR] = loop_sizes;
  2169. blksize_size[MAJOR_NR] = loop_blksizes;
  2170. + hardsect_size[MAJOR_NR] = loop_hardsizes;
  2171. for (i = 0; i < max_loop; i++)
  2172. register_disk(NULL, MKDEV(MAJOR_NR, i), 1, &lo_fops, 0);
  2173. + for (i = 0; i < (sizeof(lo_prealloc) / sizeof(int)); i += 2) {
  2174. + if (!lo_prealloc[i])
  2175. + continue;
  2176. + if (lo_prealloc[i] < LO_PREALLOC_MIN)
  2177. + lo_prealloc[i] = LO_PREALLOC_MIN;
  2178. + if (lo_prealloc[i] > LO_PREALLOC_MAX)
  2179. + lo_prealloc[i] = LO_PREALLOC_MAX;
  2180. + }
  2181. +
  2182. +#if defined(IOCTL32_COMPATIBLE_PTR)
  2183. + lock_kernel();
  2184. + register_ioctl32_conversion(LOOP_SET_STATUS64, IOCTL32_COMPATIBLE_PTR);
  2185. + register_ioctl32_conversion(LOOP_GET_STATUS64, IOCTL32_COMPATIBLE_PTR);
  2186. + register_ioctl32_conversion(LOOP_MULTI_KEY_SETUP, IOCTL32_COMPATIBLE_PTR);
  2187. + register_ioctl32_conversion(LOOP_MULTI_KEY_SETUP_V3, IOCTL32_COMPATIBLE_PTR);
  2188. + unlock_kernel();
  2189. +#endif
  2190. +
  2191. devfs_handle = devfs_mk_dir(NULL, "loop", NULL);
  2192. devfs_register_series(devfs_handle, "%u", max_loop, DEVFS_FL_DEFAULT,
  2193. MAJOR_NR, 0,
  2194. S_IFBLK | S_IRUSR | S_IWUSR | S_IRGRP,
  2195. &lo_fops, NULL);
  2196. +#if CONFIG_BLK_DEV_LOOP_AES
  2197. +#if CONFIG_BLK_DEV_LOOP_KEYSCRUB
  2198. + printk(KERN_INFO "loop: AES key scrubbing enabled\n");
  2199. +#endif
  2200. +#endif
  2201. printk(KERN_INFO "loop: loaded (max %d devices)\n", max_loop);
  2202. return 0;
  2203. +out_hardsizes:
  2204. + kfree(loop_blksizes);
  2205. out_blksizes:
  2206. kfree(loop_sizes);
  2207. out_sizes:
  2208. kfree(loop_dev);
  2209. +out_dev:
  2210. if (devfs_unregister_blkdev(MAJOR_NR, "loop"))
  2211. printk(KERN_WARNING "loop: cannot unregister blkdev\n");
  2212. printk(KERN_ERR "loop: ran out of memory\n");
  2213. return -ENOMEM;
  2214. }
  2215. -void loop_exit(void)
  2216. +void loop_exit(void)
  2217. {
  2218. devfs_unregister(devfs_handle);
  2219. if (devfs_unregister_blkdev(MAJOR_NR, "loop"))
  2220. printk(KERN_WARNING "loop: cannot unregister blkdev\n");
  2221. +
  2222. + blk_size[MAJOR_NR] = 0;
  2223. + blksize_size[MAJOR_NR] = 0;
  2224. + hardsect_size[MAJOR_NR] = 0;
  2225. kfree(loop_dev);
  2226. kfree(loop_sizes);
  2227. kfree(loop_blksizes);
  2228. + kfree(loop_hardsizes);
  2229. +
  2230. +#if defined(IOCTL32_COMPATIBLE_PTR)
  2231. + lock_kernel();
  2232. + unregister_ioctl32_conversion(LOOP_SET_STATUS64);
  2233. + unregister_ioctl32_conversion(LOOP_GET_STATUS64);
  2234. + unregister_ioctl32_conversion(LOOP_MULTI_KEY_SETUP);
  2235. + unregister_ioctl32_conversion(LOOP_MULTI_KEY_SETUP_V3);
  2236. + unlock_kernel();
  2237. +#endif
  2238. }
  2239. module_init(loop_init);
  2240. @@ -1090,3 +2064,14 @@ static int __init max_loop_setup(char *s
  2241. __setup("max_loop=", max_loop_setup);
  2242. #endif
  2243. +
  2244. +#if CONFIG_BLK_DEV_LOOP_KEYSCRUB
  2245. +void loop_add_keyscrub_fn(struct loop_device *lo, void (*fn)(void *), void *ptr)
  2246. +{
  2247. + lo->lo_keyscrub_ptr = ptr;
  2248. + wmb();
  2249. + lo->lo_keyscrub_fn = fn;
  2250. + wake_up_interruptible(&lo->lo_bh_wait);
  2251. +}
  2252. +EXPORT_SYMBOL(loop_add_keyscrub_fn);
  2253. +#endif
  2254. diff -pruN linux-2.4.28_orig/drivers/misc/Makefile linux-2.4.28/drivers/misc/Makefile
  2255. --- linux-2.4.28_orig/drivers/misc/Makefile 2000-12-29 23:07:22.000000000 +0100
  2256. +++ linux-2.4.28/drivers/misc/Makefile 2005-01-11 09:33:37.476526384 +0100
  2257. @@ -9,8 +9,35 @@
  2258. # parent makes..
  2259. #
  2260. +.S.o:
  2261. + $(CC) $(AFLAGS) $(AFLAGS_$@) -c $< -o $*.o
  2262. +
  2263. O_TARGET := misc.o
  2264. +ifeq ($(CONFIG_BLK_DEV_LOOP_AES),y)
  2265. +AES_X86_ASM=n
  2266. +ifeq ($(CONFIG_X86),y)
  2267. +ifneq ($(CONFIG_X86_64),y)
  2268. + AES_X86_ASM=y
  2269. +endif
  2270. +endif
  2271. +ifeq ($(AES_X86_ASM),y)
  2272. + export-objs += crypto-ksym.o
  2273. + obj-y += aes-x86.o md5-x86.o crypto-ksym.o
  2274. + AFLAGS_aes-x86.o := -DUSE_UNDERLINE=1
  2275. +else
  2276. +ifeq ($(CONFIG_X86_64),y)
  2277. + export-objs += crypto-ksym.o
  2278. + obj-y += aes-amd64.o md5-amd64.o crypto-ksym.o
  2279. + AFLAGS_aes-amd64.o := -DUSE_UNDERLINE=1
  2280. +else
  2281. + export-objs += crypto-ksym.o
  2282. + obj-y += aes.o md5.o crypto-ksym.o
  2283. + CFLAGS_aes.o := -DDATA_ALWAYS_ALIGNED=1
  2284. +endif
  2285. +endif
  2286. +endif
  2287. +
  2288. include $(TOPDIR)/Rules.make
  2289. fastdep:
  2290. diff -pruN linux-2.4.28_orig/drivers/misc/aes-amd64.S linux-2.4.28/drivers/misc/aes-amd64.S
  2291. --- linux-2.4.28_orig/drivers/misc/aes-amd64.S 1970-01-01 01:00:00.000000000 +0100
  2292. +++ linux-2.4.28/drivers/misc/aes-amd64.S 2005-01-11 09:33:37.478526080 +0100
  2293. @@ -0,0 +1,893 @@
  2294. +//
  2295. +// Copyright (c) 2001, Dr Brian Gladman <brg@gladman.uk.net>, Worcester, UK.
  2296. +// All rights reserved.
  2297. +//
  2298. +// TERMS
  2299. +//
  2300. +// Redistribution and use in source and binary forms, with or without
  2301. +// modification, are permitted subject to the following conditions:
  2302. +//
  2303. +// 1. Redistributions of source code must retain the above copyright
  2304. +// notice, this list of conditions and the following disclaimer.
  2305. +//
  2306. +// 2. Redistributions in binary form must reproduce the above copyright
  2307. +// notice, this list of conditions and the following disclaimer in the
  2308. +// documentation and/or other materials provided with the distribution.
  2309. +//
  2310. +// 3. The copyright holder's name must not be used to endorse or promote
  2311. +// any products derived from this software without his specific prior
  2312. +// written permission.
  2313. +//
  2314. +// This software is provided 'as is' with no express or implied warranties
  2315. +// of correctness or fitness for purpose.
  2316. +
  2317. +// Modified by Jari Ruusu, December 24 2001
  2318. +// - Converted syntax to GNU CPP/assembler syntax
  2319. +// - C programming interface converted back to "old" API
  2320. +// - Minor portability cleanups and speed optimizations
  2321. +
  2322. +// Modified by Jari Ruusu, April 11 2002
  2323. +// - Added above copyright and terms to resulting object code so that
  2324. +// binary distributions can avoid legal trouble
  2325. +
  2326. +// Modified by Jari Ruusu, June 12 2004
  2327. +// - Converted 32 bit x86 code to 64 bit AMD64 code
  2328. +// - Re-wrote encrypt and decrypt code from scratch
  2329. +
  2330. +// An AES (Rijndael) implementation for the AMD64. This version only
  2331. +// implements the standard AES block length (128 bits, 16 bytes). This code
  2332. +// does not preserve the rax, rcx, rdx, rsi, rdi or r8-r11 registers or the
  2333. +// artihmetic status flags. However, the rbx, rbp and r12-r15 registers are
  2334. +// preserved across calls.
  2335. +
  2336. +// void aes_set_key(aes_context *cx, const unsigned char key[], const int key_len, const int f)
  2337. +// void aes_encrypt(const aes_context *cx, const unsigned char in_blk[], unsigned char out_blk[])
  2338. +// void aes_decrypt(const aes_context *cx, const unsigned char in_blk[], unsigned char out_blk[])
  2339. +
  2340. +#if defined(USE_UNDERLINE)
  2341. +# define aes_set_key _aes_set_key
  2342. +# define aes_encrypt _aes_encrypt
  2343. +# define aes_decrypt _aes_decrypt
  2344. +#endif
  2345. +#if !defined(ALIGN64BYTES)
  2346. +# define ALIGN64BYTES 64
  2347. +#endif
  2348. +
  2349. + .file "aes-amd64.S"
  2350. + .globl aes_set_key
  2351. + .globl aes_encrypt
  2352. + .globl aes_decrypt
  2353. +
  2354. + .section .rodata
  2355. +copyright:
  2356. + .ascii " \000"
  2357. + .ascii "Copyright (c) 2001, Dr Brian Gladman <brg@gladman.uk.net>, Worcester, UK.\000"
  2358. + .ascii "All rights reserved.\000"
  2359. + .ascii " \000"
  2360. + .ascii "TERMS\000"
  2361. + .ascii " \000"
  2362. + .ascii " Redistribution and use in source and binary forms, with or without\000"
  2363. + .ascii " modification, are permitted subject to the following conditions:\000"
  2364. + .ascii " \000"
  2365. + .ascii " 1. Redistributions of source code must retain the above copyright\000"
  2366. + .ascii " notice, this list of conditions and the following disclaimer.\000"
  2367. + .ascii " \000"
  2368. + .ascii " 2. Redistributions in binary form must reproduce the above copyright\000"
  2369. + .ascii " notice, this list of conditions and the following disclaimer in the\000"
  2370. + .ascii " documentation and/or other materials provided with the distribution.\000"
  2371. + .ascii " \000"
  2372. + .ascii " 3. The copyright holder's name must not be used to endorse or promote\000"
  2373. + .ascii " any products derived from this software without his specific prior\000"
  2374. + .ascii " written permission.\000"
  2375. + .ascii " \000"
  2376. + .ascii " This software is provided 'as is' with no express or implied warranties\000"
  2377. + .ascii " of correctness or fitness for purpose.\000"
  2378. + .ascii " \000"
  2379. +
  2380. +#define tlen 1024 // length of each of 4 'xor' arrays (256 32-bit words)
  2381. +
  2382. +// offsets in context structure
  2383. +
  2384. +#define nkey 0 // key length, size 4
  2385. +#define nrnd 4 // number of rounds, size 4
  2386. +#define ekey 8 // encryption key schedule base address, size 256
  2387. +#define dkey 264 // decryption key schedule base address, size 256
  2388. +
  2389. +// This macro performs a forward encryption cycle. It is entered with
  2390. +// the first previous round column values in I1E, I2E, I3E and I4E and
  2391. +// exits with the final values OU1, OU2, OU3 and OU4 registers.
  2392. +
  2393. +#define fwd_rnd(p1,p2,I1E,I1B,I1H,I2E,I2B,I2H,I3E,I3B,I3R,I4E,I4B,I4R,OU1,OU2,OU3,OU4) \
  2394. + movl p2(%rbp),OU1 ;\
  2395. + movl p2+4(%rbp),OU2 ;\
  2396. + movl p2+8(%rbp),OU3 ;\
  2397. + movl p2+12(%rbp),OU4 ;\
  2398. + movzbl I1B,%edi ;\
  2399. + movzbl I2B,%esi ;\
  2400. + movzbl I3B,%r8d ;\
  2401. + movzbl I4B,%r13d ;\
  2402. + shrl $8,I3E ;\
  2403. + shrl $8,I4E ;\
  2404. + xorl p1(,%rdi,4),OU1 ;\
  2405. + xorl p1(,%rsi,4),OU2 ;\
  2406. + xorl p1(,%r8,4),OU3 ;\
  2407. + xorl p1(,%r13,4),OU4 ;\
  2408. + movzbl I2H,%esi ;\
  2409. + movzbl I3B,%r8d ;\
  2410. + movzbl I4B,%r13d ;\
  2411. + movzbl I1H,%edi ;\
  2412. + shrl $8,I3E ;\
  2413. + shrl $8,I4E ;\
  2414. + xorl p1+tlen(,%rsi,4),OU1 ;\
  2415. + xorl p1+tlen(,%r8,4),OU2 ;\
  2416. + xorl p1+tlen(,%r13,4),OU3 ;\
  2417. + xorl p1+tlen(,%rdi,4),OU4 ;\
  2418. + shrl $16,I1E ;\
  2419. + shrl $16,I2E ;\
  2420. + movzbl I3B,%r8d ;\
  2421. + movzbl I4B,%r13d ;\
  2422. + movzbl I1B,%edi ;\
  2423. + movzbl I2B,%esi ;\
  2424. + xorl p1+2*tlen(,%r8,4),OU1 ;\
  2425. + xorl p1+2*tlen(,%r13,4),OU2 ;\
  2426. + xorl p1+2*tlen(,%rdi,4),OU3 ;\
  2427. + xorl p1+2*tlen(,%rsi,4),OU4 ;\
  2428. + shrl $8,I4E ;\
  2429. + movzbl I1H,%edi ;\
  2430. + movzbl I2H,%esi ;\
  2431. + shrl $8,I3E ;\
  2432. + xorl p1+3*tlen(,I4R,4),OU1 ;\
  2433. + xorl p1+3*tlen(,%rdi,4),OU2 ;\
  2434. + xorl p1+3*tlen(,%rsi,4),OU3 ;\
  2435. + xorl p1+3*tlen(,I3R,4),OU4
  2436. +
  2437. +// This macro performs an inverse encryption cycle. It is entered with
  2438. +// the first previous round column values in I1E, I2E, I3E and I4E and
  2439. +// exits with the final values OU1, OU2, OU3 and OU4 registers.
  2440. +
  2441. +#define inv_rnd(p1,p2,I1E,I1B,I1R,I2E,I2B,I2R,I3E,I3B,I3H,I4E,I4B,I4H,OU1,OU2,OU3,OU4) \
  2442. + movl p2+12(%rbp),OU4 ;\
  2443. + movl p2+8(%rbp),OU3 ;\
  2444. + movl p2+4(%rbp),OU2 ;\
  2445. + movl p2(%rbp),OU1 ;\
  2446. + movzbl I4B,%edi ;\
  2447. + movzbl I3B,%esi ;\
  2448. + movzbl I2B,%r8d ;\
  2449. + movzbl I1B,%r13d ;\
  2450. + shrl $8,I2E ;\
  2451. + shrl $8,I1E ;\
  2452. + xorl p1(,%rdi,4),OU4 ;\
  2453. + xorl p1(,%rsi,4),OU3 ;\
  2454. + xorl p1(,%r8,4),OU2 ;\
  2455. + xorl p1(,%r13,4),OU1 ;\
  2456. + movzbl I3H,%esi ;\
  2457. + movzbl I2B,%r8d ;\
  2458. + movzbl I1B,%r13d ;\
  2459. + movzbl I4H,%edi ;\
  2460. + shrl $8,I2E ;\
  2461. + shrl $8,I1E ;\
  2462. + xorl p1+tlen(,%rsi,4),OU4 ;\
  2463. + xorl p1+tlen(,%r8,4),OU3 ;\
  2464. + xorl p1+tlen(,%r13,4),OU2 ;\
  2465. + xorl p1+tlen(,%rdi,4),OU1 ;\
  2466. + shrl $16,I4E ;\
  2467. + shrl $16,I3E ;\
  2468. + movzbl I2B,%r8d ;\
  2469. + movzbl I1B,%r13d ;\
  2470. + movzbl I4B,%edi ;\
  2471. + movzbl I3B,%esi ;\
  2472. + xorl p1+2*tlen(,%r8,4),OU4 ;\
  2473. + xorl p1+2*tlen(,%r13,4),OU3 ;\
  2474. + xorl p1+2*tlen(,%rdi,4),OU2 ;\
  2475. + xorl p1+2*tlen(,%rsi,4),OU1 ;\
  2476. + shrl $8,I1E ;\
  2477. + movzbl I4H,%edi ;\
  2478. + movzbl I3H,%esi ;\
  2479. + shrl $8,I2E ;\
  2480. + xorl p1+3*tlen(,I1R,4),OU4 ;\
  2481. + xorl p1+3*tlen(,%rdi,4),OU3 ;\
  2482. + xorl p1+3*tlen(,%rsi,4),OU2 ;\
  2483. + xorl p1+3*tlen(,I2R,4),OU1
  2484. +
  2485. +// AES (Rijndael) Encryption Subroutine
  2486. +
  2487. +// rdi = pointer to AES context
  2488. +// rsi = pointer to input plaintext bytes
  2489. +// rdx = pointer to output ciphertext bytes
  2490. +
  2491. + .text
  2492. + .align ALIGN64BYTES
  2493. +aes_encrypt:
  2494. + movl (%rsi),%eax // read in plaintext
  2495. + movl 4(%rsi),%ecx
  2496. + movl 8(%rsi),%r10d
  2497. + movl 12(%rsi),%r11d
  2498. +
  2499. + pushq %rbp
  2500. + leaq ekey+16(%rdi),%rbp // encryption key pointer
  2501. + movq %rdx,%r9 // pointer to out block
  2502. + movl nrnd(%rdi),%edx // number of rounds
  2503. + pushq %rbx
  2504. + pushq %r13
  2505. + pushq %r14
  2506. + pushq %r15
  2507. +
  2508. + xorl -16(%rbp),%eax // xor in first round key
  2509. + xorl -12(%rbp),%ecx
  2510. + xorl -8(%rbp),%r10d
  2511. + xorl -4(%rbp),%r11d
  2512. +
  2513. + subl $10,%edx
  2514. + je aes_15
  2515. + addq $32,%rbp
  2516. + subl $2,%edx
  2517. + je aes_13
  2518. + addq $32,%rbp
  2519. +
  2520. + fwd_rnd(aes_ft_tab,-64,%eax,%al,%ah,%ecx,%cl,%ch,%r10d,%r10b,%r10,%r11d,%r11b,%r11,%ebx,%edx,%r14d,%r15d)
  2521. + fwd_rnd(aes_ft_tab,-48,%ebx,%bl,%bh,%edx,%dl,%dh,%r14d,%r14b,%r14,%r15d,%r15b,%r15,%eax,%ecx,%r10d,%r11d)
  2522. + jmp aes_13
  2523. + .align ALIGN64BYTES
  2524. +aes_13: fwd_rnd(aes_ft_tab,-32,%eax,%al,%ah,%ecx,%cl,%ch,%r10d,%r10b,%r10,%r11d,%r11b,%r11,%ebx,%edx,%r14d,%r15d)
  2525. + fwd_rnd(aes_ft_tab,-16,%ebx,%bl,%bh,%edx,%dl,%dh,%r14d,%r14b,%r14,%r15d,%r15b,%r15,%eax,%ecx,%r10d,%r11d)
  2526. + jmp aes_15
  2527. + .align ALIGN64BYTES
  2528. +aes_15: fwd_rnd(aes_ft_tab,0, %eax,%al,%ah,%ecx,%cl,%ch,%r10d,%r10b,%r10,%r11d,%r11b,%r11,%ebx,%edx,%r14d,%r15d)
  2529. + fwd_rnd(aes_ft_tab,16, %ebx,%bl,%bh,%edx,%dl,%dh,%r14d,%r14b,%r14,%r15d,%r15b,%r15,%eax,%ecx,%r10d,%r11d)
  2530. + fwd_rnd(aes_ft_tab,32, %eax,%al,%ah,%ecx,%cl,%ch,%r10d,%r10b,%r10,%r11d,%r11b,%r11,%ebx,%edx,%r14d,%r15d)
  2531. + fwd_rnd(aes_ft_tab,48, %ebx,%bl,%bh,%edx,%dl,%dh,%r14d,%r14b,%r14,%r15d,%r15b,%r15,%eax,%ecx,%r10d,%r11d)
  2532. + fwd_rnd(aes_ft_tab,64, %eax,%al,%ah,%ecx,%cl,%ch,%r10d,%r10b,%r10,%r11d,%r11b,%r11,%ebx,%edx,%r14d,%r15d)
  2533. + fwd_rnd(aes_ft_tab,80, %ebx,%bl,%bh,%edx,%dl,%dh,%r14d,%r14b,%r14,%r15d,%r15b,%r15,%eax,%ecx,%r10d,%r11d)
  2534. + fwd_rnd(aes_ft_tab,96, %eax,%al,%ah,%ecx,%cl,%ch,%r10d,%r10b,%r10,%r11d,%r11b,%r11,%ebx,%edx,%r14d,%r15d)
  2535. + fwd_rnd(aes_ft_tab,112,%ebx,%bl,%bh,%edx,%dl,%dh,%r14d,%r14b,%r14,%r15d,%r15b,%r15,%eax,%ecx,%r10d,%r11d)
  2536. + fwd_rnd(aes_ft_tab,128,%eax,%al,%ah,%ecx,%cl,%ch,%r10d,%r10b,%r10,%r11d,%r11b,%r11,%ebx,%edx,%r14d,%r15d)
  2537. + fwd_rnd(aes_fl_tab,144,%ebx,%bl,%bh,%edx,%dl,%dh,%r14d,%r14b,%r14,%r15d,%r15b,%r15,%eax,%ecx,%r10d,%r11d)
  2538. +
  2539. + popq %r15
  2540. + popq %r14
  2541. + popq %r13
  2542. + popq %rbx
  2543. + popq %rbp
  2544. +
  2545. + movl %eax,(%r9) // move final values to the output array.
  2546. + movl %ecx,4(%r9)
  2547. + movl %r10d,8(%r9)
  2548. + movl %r11d,12(%r9)
  2549. + ret
  2550. +
  2551. +// AES (Rijndael) Decryption Subroutine
  2552. +
  2553. +// rdi = pointer to AES context
  2554. +// rsi = pointer to input ciphertext bytes
  2555. +// rdx = pointer to output plaintext bytes
  2556. +
  2557. + .align ALIGN64BYTES
  2558. +aes_decrypt:
  2559. + movl 12(%rsi),%eax // read in ciphertext
  2560. + movl 8(%rsi),%ecx
  2561. + movl 4(%rsi),%r10d
  2562. + movl (%rsi),%r11d
  2563. +
  2564. + pushq %rbp
  2565. + leaq dkey+16(%rdi),%rbp // decryption key pointer
  2566. + movq %rdx,%r9 // pointer to out block
  2567. + movl nrnd(%rdi),%edx // number of rounds
  2568. + pushq %rbx
  2569. + pushq %r13
  2570. + pushq %r14
  2571. + pushq %r15
  2572. +
  2573. + xorl -4(%rbp),%eax // xor in first round key
  2574. + xorl -8(%rbp),%ecx
  2575. + xorl -12(%rbp),%r10d
  2576. + xorl -16(%rbp),%r11d
  2577. +
  2578. + subl $10,%edx
  2579. + je aes_25
  2580. + addq $32,%rbp
  2581. + subl $2,%edx
  2582. + je aes_23
  2583. + addq $32,%rbp
  2584. +
  2585. + inv_rnd(aes_it_tab,-64,%r11d,%r11b,%r11,%r10d,%r10b,%r10,%ecx,%cl,%ch,%eax,%al,%ah,%r15d,%r14d,%edx,%ebx)
  2586. + inv_rnd(aes_it_tab,-48,%r15d,%r15b,%r15,%r14d,%r14b,%r14,%edx,%dl,%dh,%ebx,%bl,%bh,%r11d,%r10d,%ecx,%eax)
  2587. + jmp aes_23
  2588. + .align ALIGN64BYTES
  2589. +aes_23: inv_rnd(aes_it_tab,-32,%r11d,%r11b,%r11,%r10d,%r10b,%r10,%ecx,%cl,%ch,%eax,%al,%ah,%r15d,%r14d,%edx,%ebx)
  2590. + inv_rnd(aes_it_tab,-16,%r15d,%r15b,%r15,%r14d,%r14b,%r14,%edx,%dl,%dh,%ebx,%bl,%bh,%r11d,%r10d,%ecx,%eax)
  2591. + jmp aes_25
  2592. + .align ALIGN64BYTES
  2593. +aes_25: inv_rnd(aes_it_tab,0, %r11d,%r11b,%r11,%r10d,%r10b,%r10,%ecx,%cl,%ch,%eax,%al,%ah,%r15d,%r14d,%edx,%ebx)
  2594. + inv_rnd(aes_it_tab,16, %r15d,%r15b,%r15,%r14d,%r14b,%r14,%edx,%dl,%dh,%ebx,%bl,%bh,%r11d,%r10d,%ecx,%eax)
  2595. + inv_rnd(aes_it_tab,32, %r11d,%r11b,%r11,%r10d,%r10b,%r10,%ecx,%cl,%ch,%eax,%al,%ah,%r15d,%r14d,%edx,%ebx)
  2596. + inv_rnd(aes_it_tab,48, %r15d,%r15b,%r15,%r14d,%r14b,%r14,%edx,%dl,%dh,%ebx,%bl,%bh,%r11d,%r10d,%ecx,%eax)
  2597. + inv_rnd(aes_it_tab,64, %r11d,%r11b,%r11,%r10d,%r10b,%r10,%ecx,%cl,%ch,%eax,%al,%ah,%r15d,%r14d,%edx,%ebx)
  2598. + inv_rnd(aes_it_tab,80, %r15d,%r15b,%r15,%r14d,%r14b,%r14,%edx,%dl,%dh,%ebx,%bl,%bh,%r11d,%r10d,%ecx,%eax)
  2599. + inv_rnd(aes_it_tab,96, %r11d,%r11b,%r11,%r10d,%r10b,%r10,%ecx,%cl,%ch,%eax,%al,%ah,%r15d,%r14d,%edx,%ebx)
  2600. + inv_rnd(aes_it_tab,112,%r15d,%r15b,%r15,%r14d,%r14b,%r14,%edx,%dl,%dh,%ebx,%bl,%bh,%r11d,%r10d,%ecx,%eax)
  2601. + inv_rnd(aes_it_tab,128,%r11d,%r11b,%r11,%r10d,%r10b,%r10,%ecx,%cl,%ch,%eax,%al,%ah,%r15d,%r14d,%edx,%ebx)
  2602. + inv_rnd(aes_il_tab,144,%r15d,%r15b,%r15,%r14d,%r14b,%r14,%edx,%dl,%dh,%ebx,%bl,%bh,%r11d,%r10d,%ecx,%eax)
  2603. +
  2604. + popq %r15
  2605. + popq %r14
  2606. + popq %r13
  2607. + popq %rbx
  2608. + popq %rbp
  2609. +
  2610. + movl %eax,12(%r9) // move final values to the output array.
  2611. + movl %ecx,8(%r9)
  2612. + movl %r10d,4(%r9)
  2613. + movl %r11d,(%r9)
  2614. + ret
  2615. +
  2616. +// AES (Rijndael) Key Schedule Subroutine
  2617. +
  2618. +// This macro performs a column mixing operation on an input 32-bit
  2619. +// word to give a 32-bit result. It uses each of the 4 bytes in the
  2620. +// the input column to index 4 different tables of 256 32-bit words
  2621. +// that are xored together to form the output value.
  2622. +
  2623. +#define mix_col(p1) \
  2624. + movzbl %bl,%ecx ;\
  2625. + movl p1(,%rcx,4),%eax ;\
  2626. + movzbl %bh,%ecx ;\
  2627. + ror $16,%ebx ;\
  2628. + xorl p1+tlen(,%rcx,4),%eax ;\
  2629. + movzbl %bl,%ecx ;\
  2630. + xorl p1+2*tlen(,%rcx,4),%eax ;\
  2631. + movzbl %bh,%ecx ;\
  2632. + xorl p1+3*tlen(,%rcx,4),%eax
  2633. +
  2634. +// Key Schedule Macros
  2635. +
  2636. +#define ksc4(p1) \
  2637. + rol $24,%ebx ;\
  2638. + mix_col(aes_fl_tab) ;\
  2639. + ror $8,%ebx ;\
  2640. + xorl 4*p1+aes_rcon_tab,%eax ;\
  2641. + xorl %eax,%esi ;\
  2642. + xorl %esi,%ebp ;\
  2643. + movl %esi,16*p1(%rdi) ;\
  2644. + movl %ebp,16*p1+4(%rdi) ;\
  2645. + xorl %ebp,%edx ;\
  2646. + xorl %edx,%ebx ;\
  2647. + movl %edx,16*p1+8(%rdi) ;\
  2648. + movl %ebx,16*p1+12(%rdi)
  2649. +
  2650. +#define ksc6(p1) \
  2651. + rol $24,%ebx ;\
  2652. + mix_col(aes_fl_tab) ;\
  2653. + ror $8,%ebx ;\
  2654. + xorl 4*p1+aes_rcon_tab,%eax ;\
  2655. + xorl 24*p1-24(%rdi),%eax ;\
  2656. + movl %eax,24*p1(%rdi) ;\
  2657. + xorl 24*p1-20(%rdi),%eax ;\
  2658. + movl %eax,24*p1+4(%rdi) ;\
  2659. + xorl %eax,%esi ;\
  2660. + xorl %esi,%ebp ;\
  2661. + movl %esi,24*p1+8(%rdi) ;\
  2662. + movl %ebp,24*p1+12(%rdi) ;\
  2663. + xorl %ebp,%edx ;\
  2664. + xorl %edx,%ebx ;\
  2665. + movl %edx,24*p1+16(%rdi) ;\
  2666. + movl %ebx,24*p1+20(%rdi)
  2667. +
  2668. +#define ksc8(p1) \
  2669. + rol $24,%ebx ;\
  2670. + mix_col(aes_fl_tab) ;\
  2671. + ror $8,%ebx ;\
  2672. + xorl 4*p1+aes_rcon_tab,%eax ;\
  2673. + xorl 32*p1-32(%rdi),%eax ;\
  2674. + movl %eax,32*p1(%rdi) ;\
  2675. + xorl 32*p1-28(%rdi),%eax ;\
  2676. + movl %eax,32*p1+4(%rdi) ;\
  2677. + xorl 32*p1-24(%rdi),%eax ;\
  2678. + movl %eax,32*p1+8(%rdi) ;\
  2679. + xorl 32*p1-20(%rdi),%eax ;\
  2680. + movl %eax,32*p1+12(%rdi) ;\
  2681. + pushq %rbx ;\
  2682. + movl %eax,%ebx ;\
  2683. + mix_col(aes_fl_tab) ;\
  2684. + popq %rbx ;\
  2685. + xorl %eax,%esi ;\
  2686. + xorl %esi,%ebp ;\
  2687. + movl %esi,32*p1+16(%rdi) ;\
  2688. + movl %ebp,32*p1+20(%rdi) ;\
  2689. + xorl %ebp,%edx ;\
  2690. + xorl %edx,%ebx ;\
  2691. + movl %edx,32*p1+24(%rdi) ;\
  2692. + movl %ebx,32*p1+28(%rdi)
  2693. +
  2694. +// rdi = pointer to AES context
  2695. +// rsi = pointer to key bytes
  2696. +// rdx = key length, bytes or bits
  2697. +// rcx = ed_flag, 1=encrypt only, 0=both encrypt and decrypt
  2698. +
  2699. + .align ALIGN64BYTES
  2700. +aes_set_key:
  2701. + pushfq
  2702. + pushq %rbp
  2703. + pushq %rbx
  2704. +
  2705. + movq %rcx,%r11 // ed_flg
  2706. + movq %rdx,%rcx // key length
  2707. + movq %rdi,%r10 // AES context
  2708. +
  2709. + cmpl $128,%ecx
  2710. + jb aes_30
  2711. + shrl $3,%ecx
  2712. +aes_30: cmpl $32,%ecx
  2713. + je aes_32
  2714. + cmpl $24,%ecx
  2715. + je aes_32
  2716. + movl $16,%ecx
  2717. +aes_32: shrl $2,%ecx
  2718. + movl %ecx,nkey(%r10)
  2719. + leaq 6(%rcx),%rax // 10/12/14 for 4/6/8 32-bit key length
  2720. + movl %eax,nrnd(%r10)
  2721. + leaq ekey(%r10),%rdi // key position in AES context
  2722. + cld
  2723. + movl %ecx,%eax // save key length in eax
  2724. + rep ; movsl // words in the key schedule
  2725. + movl -4(%rsi),%ebx // put some values in registers
  2726. + movl -8(%rsi),%edx // to allow faster code
  2727. + movl -12(%rsi),%ebp
  2728. + movl -16(%rsi),%esi
  2729. +
  2730. + cmpl $4,%eax // jump on key size
  2731. + je aes_36
  2732. + cmpl $6,%eax
  2733. + je aes_35
  2734. +
  2735. + ksc8(0)
  2736. + ksc8(1)
  2737. + ksc8(2)
  2738. + ksc8(3)
  2739. + ksc8(4)
  2740. + ksc8(5)
  2741. + ksc8(6)
  2742. + jmp aes_37
  2743. +aes_35: ksc6(0)
  2744. + ksc6(1)
  2745. + ksc6(2)
  2746. + ksc6(3)
  2747. + ksc6(4)
  2748. + ksc6(5)
  2749. + ksc6(6)
  2750. + ksc6(7)
  2751. + jmp aes_37
  2752. +aes_36: ksc4(0)
  2753. + ksc4(1)
  2754. + ksc4(2)
  2755. + ksc4(3)
  2756. + ksc4(4)
  2757. + ksc4(5)
  2758. + ksc4(6)
  2759. + ksc4(7)
  2760. + ksc4(8)
  2761. + ksc4(9)
  2762. +aes_37: cmpl $0,%r11d // ed_flg
  2763. + jne aes_39
  2764. +
  2765. +// compile decryption key schedule from encryption schedule - reverse
  2766. +// order and do mix_column operation on round keys except first and last
  2767. +
  2768. + movl nrnd(%r10),%eax // kt = cx->d_key + nc * cx->Nrnd
  2769. + shl $2,%rax
  2770. + leaq dkey(%r10,%rax,4),%rdi
  2771. + leaq ekey(%r10),%rsi // kf = cx->e_key
  2772. +
  2773. + movsq // copy first round key (unmodified)
  2774. + movsq
  2775. + subq $32,%rdi
  2776. + movl $1,%r9d
  2777. +aes_38: // do mix column on each column of
  2778. + lodsl // each round key
  2779. + movl %eax,%ebx
  2780. + mix_col(aes_im_tab)
  2781. + stosl
  2782. + lodsl
  2783. + movl %eax,%ebx
  2784. + mix_col(aes_im_tab)
  2785. + stosl
  2786. + lodsl
  2787. + movl %eax,%ebx
  2788. + mix_col(aes_im_tab)
  2789. + stosl
  2790. + lodsl
  2791. + movl %eax,%ebx
  2792. + mix_col(aes_im_tab)
  2793. + stosl
  2794. + subq $32,%rdi
  2795. +
  2796. + incl %r9d
  2797. + cmpl nrnd(%r10),%r9d
  2798. + jb aes_38
  2799. +
  2800. + movsq // copy last round key (unmodified)
  2801. + movsq
  2802. +aes_39: popq %rbx
  2803. + popq %rbp
  2804. + popfq
  2805. + ret
  2806. +
  2807. +
  2808. +// finite field multiplies by {02}, {04} and {08}
  2809. +
  2810. +#define f2(x) ((x<<1)^(((x>>7)&1)*0x11b))
  2811. +#define f4(x) ((x<<2)^(((x>>6)&1)*0x11b)^(((x>>6)&2)*0x11b))
  2812. +#define f8(x) ((x<<3)^(((x>>5)&1)*0x11b)^(((x>>5)&2)*0x11b)^(((x>>5)&4)*0x11b))
  2813. +
  2814. +// finite field multiplies required in table generation
  2815. +
  2816. +#define f3(x) (f2(x) ^ x)
  2817. +#define f9(x) (f8(x) ^ x)
  2818. +#define fb(x) (f8(x) ^ f2(x) ^ x)
  2819. +#define fd(x) (f8(x) ^ f4(x) ^ x)
  2820. +#define fe(x) (f8(x) ^ f4(x) ^ f2(x))
  2821. +
  2822. +// These defines generate the forward table entries
  2823. +
  2824. +#define u0(x) ((f3(x) << 24) | (x << 16) | (x << 8) | f2(x))
  2825. +#define u1(x) ((x << 24) | (x << 16) | (f2(x) << 8) | f3(x))
  2826. +#define u2(x) ((x << 24) | (f2(x) << 16) | (f3(x) << 8) | x)
  2827. +#define u3(x) ((f2(x) << 24) | (f3(x) << 16) | (x << 8) | x)
  2828. +
  2829. +// These defines generate the inverse table entries
  2830. +
  2831. +#define v0(x) ((fb(x) << 24) | (fd(x) << 16) | (f9(x) << 8) | fe(x))
  2832. +#define v1(x) ((fd(x) << 24) | (f9(x) << 16) | (fe(x) << 8) | fb(x))
  2833. +#define v2(x) ((f9(x) << 24) | (fe(x) << 16) | (fb(x) << 8) | fd(x))
  2834. +#define v3(x) ((fe(x) << 24) | (fb(x) << 16) | (fd(x) << 8) | f9(x))
  2835. +
  2836. +// These defines generate entries for the last round tables
  2837. +
  2838. +#define w0(x) (x)
  2839. +#define w1(x) (x << 8)
  2840. +#define w2(x) (x << 16)
  2841. +#define w3(x) (x << 24)
  2842. +
  2843. +// macro to generate inverse mix column tables (needed for the key schedule)
  2844. +
  2845. +#define im_data0(p1) \
  2846. + .long p1(0x00),p1(0x01),p1(0x02),p1(0x03),p1(0x04),p1(0x05),p1(0x06),p1(0x07) ;\
  2847. + .long p1(0x08),p1(0x09),p1(0x0a),p1(0x0b),p1(0x0c),p1(0x0d),p1(0x0e),p1(0x0f) ;\
  2848. + .long p1(0x10),p1(0x11),p1(0x12),p1(0x13),p1(0x14),p1(0x15),p1(0x16),p1(0x17) ;\
  2849. + .long p1(0x18),p1(0x19),p1(0x1a),p1(0x1b),p1(0x1c),p1(0x1d),p1(0x1e),p1(0x1f)
  2850. +#define im_data1(p1) \
  2851. + .long p1(0x20),p1(0x21),p1(0x22),p1(0x23),p1(0x24),p1(0x25),p1(0x26),p1(0x27) ;\
  2852. + .long p1(0x28),p1(0x29),p1(0x2a),p1(0x2b),p1(0x2c),p1(0x2d),p1(0x2e),p1(0x2f) ;\
  2853. + .long p1(0x30),p1(0x31),p1(0x32),p1(0x33),p1(0x34),p1(0x35),p1(0x36),p1(0x37) ;\
  2854. + .long p1(0x38),p1(0x39),p1(0x3a),p1(0x3b),p1(0x3c),p1(0x3d),p1(0x3e),p1(0x3f)
  2855. +#define im_data2(p1) \
  2856. + .long p1(0x40),p1(0x41),p1(0x42),p1(0x43),p1(0x44),p1(0x45),p1(0x46),p1(0x47) ;\
  2857. + .long p1(0x48),p1(0x49),p1(0x4a),p1(0x4b),p1(0x4c),p1(0x4d),p1(0x4e),p1(0x4f) ;\
  2858. + .long p1(0x50),p1(0x51),p1(0x52),p1(0x53),p1(0x54),p1(0x55),p1(0x56),p1(0x57) ;\
  2859. + .long p1(0x58),p1(0x59),p1(0x5a),p1(0x5b),p1(0x5c),p1(0x5d),p1(0x5e),p1(0x5f)
  2860. +#define im_data3(p1) \
  2861. + .long p1(0x60),p1(0x61),p1(0x62),p1(0x63),p1(0x64),p1(0x65),p1(0x66),p1(0x67) ;\
  2862. + .long p1(0x68),p1(0x69),p1(0x6a),p1(0x6b),p1(0x6c),p1(0x6d),p1(0x6e),p1(0x6f) ;\
  2863. + .long p1(0x70),p1(0x71),p1(0x72),p1(0x73),p1(0x74),p1(0x75),p1(0x76),p1(0x77) ;\
  2864. + .long p1(0x78),p1(0x79),p1(0x7a),p1(0x7b),p1(0x7c),p1(0x7d),p1(0x7e),p1(0x7f)
  2865. +#define im_data4(p1) \
  2866. + .long p1(0x80),p1(0x81),p1(0x82),p1(0x83),p1(0x84),p1(0x85),p1(0x86),p1(0x87) ;\
  2867. + .long p1(0x88),p1(0x89),p1(0x8a),p1(0x8b),p1(0x8c),p1(0x8d),p1(0x8e),p1(0x8f) ;\
  2868. + .long p1(0x90),p1(0x91),p1(0x92),p1(0x93),p1(0x94),p1(0x95),p1(0x96),p1(0x97) ;\
  2869. + .long p1(0x98),p1(0x99),p1(0x9a),p1(0x9b),p1(0x9c),p1(0x9d),p1(0x9e),p1(0x9f)
  2870. +#define im_data5(p1) \
  2871. + .long p1(0xa0),p1(0xa1),p1(0xa2),p1(0xa3),p1(0xa4),p1(0xa5),p1(0xa6),p1(0xa7) ;\
  2872. + .long p1(0xa8),p1(0xa9),p1(0xaa),p1(0xab),p1(0xac),p1(0xad),p1(0xae),p1(0xaf) ;\
  2873. + .long p1(0xb0),p1(0xb1),p1(0xb2),p1(0xb3),p1(0xb4),p1(0xb5),p1(0xb6),p1(0xb7) ;\
  2874. + .long p1(0xb8),p1(0xb9),p1(0xba),p1(0xbb),p1(0xbc),p1(0xbd),p1(0xbe),p1(0xbf)
  2875. +#define im_data6(p1) \
  2876. + .long p1(0xc0),p1(0xc1),p1(0xc2),p1(0xc3),p1(0xc4),p1(0xc5),p1(0xc6),p1(0xc7) ;\
  2877. + .long p1(0xc8),p1(0xc9),p1(0xca),p1(0xcb),p1(0xcc),p1(0xcd),p1(0xce),p1(0xcf) ;\
  2878. + .long p1(0xd0),p1(0xd1),p1(0xd2),p1(0xd3),p1(0xd4),p1(0xd5),p1(0xd6),p1(0xd7) ;\
  2879. + .long p1(0xd8),p1(0xd9),p1(0xda),p1(0xdb),p1(0xdc),p1(0xdd),p1(0xde),p1(0xdf)
  2880. +#define im_data7(p1) \
  2881. + .long p1(0xe0),p1(0xe1),p1(0xe2),p1(0xe3),p1(0xe4),p1(0xe5),p1(0xe6),p1(0xe7) ;\
  2882. + .long p1(0xe8),p1(0xe9),p1(0xea),p1(0xeb),p1(0xec),p1(0xed),p1(0xee),p1(0xef) ;\
  2883. + .long p1(0xf0),p1(0xf1),p1(0xf2),p1(0xf3),p1(0xf4),p1(0xf5),p1(0xf6),p1(0xf7) ;\
  2884. + .long p1(0xf8),p1(0xf9),p1(0xfa),p1(0xfb),p1(0xfc),p1(0xfd),p1(0xfe),p1(0xff)
  2885. +
  2886. +// S-box data - 256 entries
  2887. +
  2888. +#define sb_data0(p1) \
  2889. + .long p1(0x63),p1(0x7c),p1(0x77),p1(0x7b),p1(0xf2),p1(0x6b),p1(0x6f),p1(0xc5) ;\
  2890. + .long p1(0x30),p1(0x01),p1(0x67),p1(0x2b),p1(0xfe),p1(0xd7),p1(0xab),p1(0x76) ;\
  2891. + .long p1(0xca),p1(0x82),p1(0xc9),p1(0x7d),p1(0xfa),p1(0x59),p1(0x47),p1(0xf0) ;\
  2892. + .long p1(0xad),p1(0xd4),p1(0xa2),p1(0xaf),p1(0x9c),p1(0xa4),p1(0x72),p1(0xc0)
  2893. +#define sb_data1(p1) \
  2894. + .long p1(0xb7),p1(0xfd),p1(0x93),p1(0x26),p1(0x36),p1(0x3f),p1(0xf7),p1(0xcc) ;\
  2895. + .long p1(0x34),p1(0xa5),p1(0xe5),p1(0xf1),p1(0x71),p1(0xd8),p1(0x31),p1(0x15) ;\
  2896. + .long p1(0x04),p1(0xc7),p1(0x23),p1(0xc3),p1(0x18),p1(0x96),p1(0x05),p1(0x9a) ;\
  2897. + .long p1(0x07),p1(0x12),p1(0x80),p1(0xe2),p1(0xeb),p1(0x27),p1(0xb2),p1(0x75)
  2898. +#define sb_data2(p1) \
  2899. + .long p1(0x09),p1(0x83),p1(0x2c),p1(0x1a),p1(0x1b),p1(0x6e),p1(0x5a),p1(0xa0) ;\
  2900. + .long p1(0x52),p1(0x3b),p1(0xd6),p1(0xb3),p1(0x29),p1(0xe3),p1(0x2f),p1(0x84) ;\
  2901. + .long p1(0x53),p1(0xd1),p1(0x00),p1(0xed),p1(0x20),p1(0xfc),p1(0xb1),p1(0x5b) ;\
  2902. + .long p1(0x6a),p1(0xcb),p1(0xbe),p1(0x39),p1(0x4a),p1(0x4c),p1(0x58),p1(0xcf)
  2903. +#define sb_data3(p1) \
  2904. + .long p1(0xd0),p1(0xef),p1(0xaa),p1(0xfb),p1(0x43),p1(0x4d),p1(0x33),p1(0x85) ;\
  2905. + .long p1(0x45),p1(0xf9),p1(0x02),p1(0x7f),p1(0x50),p1(0x3c),p1(0x9f),p1(0xa8) ;\
  2906. + .long p1(0x51),p1(0xa3),p1(0x40),p1(0x8f),p1(0x92),p1(0x9d),p1(0x38),p1(0xf5) ;\
  2907. + .long p1(0xbc),p1(0xb6),p1(0xda),p1(0x21),p1(0x10),p1(0xff),p1(0xf3),p1(0xd2)
  2908. +#define sb_data4(p1) \
  2909. + .long p1(0xcd),p1(0x0c),p1(0x13),p1(0xec),p1(0x5f),p1(0x97),p1(0x44),p1(0x17) ;\
  2910. + .long p1(0xc4),p1(0xa7),p1(0x7e),p1(0x3d),p1(0x64),p1(0x5d),p1(0x19),p1(0x73) ;\
  2911. + .long p1(0x60),p1(0x81),p1(0x4f),p1(0xdc),p1(0x22),p1(0x2a),p1(0x90),p1(0x88) ;\
  2912. + .long p1(0x46),p1(0xee),p1(0xb8),p1(0x14),p1(0xde),p1(0x5e),p1(0x0b),p1(0xdb)
  2913. +#define sb_data5(p1) \
  2914. + .long p1(0xe0),p1(0x32),p1(0x3a),p1(0x0a),p1(0x49),p1(0x06),p1(0x24),p1(0x5c) ;\
  2915. + .long p1(0xc2),p1(0xd3),p1(0xac),p1(0x62),p1(0x91),p1(0x95),p1(0xe4),p1(0x79) ;\
  2916. + .long p1(0xe7),p1(0xc8),p1(0x37),p1(0x6d),p1(0x8d),p1(0xd5),p1(0x4e),p1(0xa9) ;\
  2917. + .long p1(0x6c),p1(0x56),p1(0xf4),p1(0xea),p1(0x65),p1(0x7a),p1(0xae),p1(0x08)
  2918. +#define sb_data6(p1) \
  2919. + .long p1(0xba),p1(0x78),p1(0x25),p1(0x2e),p1(0x1c),p1(0xa6),p1(0xb4),p1(0xc6) ;\
  2920. + .long p1(0xe8),p1(0xdd),p1(0x74),p1(0x1f),p1(0x4b),p1(0xbd),p1(0x8b),p1(0x8a) ;\
  2921. + .long p1(0x70),p1(0x3e),p1(0xb5),p1(0x66),p1(0x48),p1(0x03),p1(0xf6),p1(0x0e) ;\
  2922. + .long p1(0x61),p1(0x35),p1(0x57),p1(0xb9),p1(0x86),p1(0xc1),p1(0x1d),p1(0x9e)
  2923. +#define sb_data7(p1) \
  2924. + .long p1(0xe1),p1(0xf8),p1(0x98),p1(0x11),p1(0x69),p1(0xd9),p1(0x8e),p1(0x94) ;\
  2925. + .long p1(0x9b),p1(0x1e),p1(0x87),p1(0xe9),p1(0xce),p1(0x55),p1(0x28),p1(0xdf) ;\
  2926. + .long p1(0x8c),p1(0xa1),p1(0x89),p1(0x0d),p1(0xbf),p1(0xe6),p1(0x42),p1(0x68) ;\
  2927. + .long p1(0x41),p1(0x99),p1(0x2d),p1(0x0f),p1(0xb0),p1(0x54),p1(0xbb),p1(0x16)
  2928. +
  2929. +// Inverse S-box data - 256 entries
  2930. +
  2931. +#define ib_data0(p1) \
  2932. + .long p1(0x52),p1(0x09),p1(0x6a),p1(0xd5),p1(0x30),p1(0x36),p1(0xa5),p1(0x38) ;\
  2933. + .long p1(0xbf),p1(0x40),p1(0xa3),p1(0x9e),p1(0x81),p1(0xf3),p1(0xd7),p1(0xfb) ;\
  2934. + .long p1(0x7c),p1(0xe3),p1(0x39),p1(0x82),p1(0x9b),p1(0x2f),p1(0xff),p1(0x87) ;\
  2935. + .long p1(0x34),p1(0x8e),p1(0x43),p1(0x44),p1(0xc4),p1(0xde),p1(0xe9),p1(0xcb)
  2936. +#define ib_data1(p1) \
  2937. + .long p1(0x54),p1(0x7b),p1(0x94),p1(0x32),p1(0xa6),p1(0xc2),p1(0x23),p1(0x3d) ;\
  2938. + .long p1(0xee),p1(0x4c),p1(0x95),p1(0x0b),p1(0x42),p1(0xfa),p1(0xc3),p1(0x4e) ;\
  2939. + .long p1(0x08),p1(0x2e),p1(0xa1),p1(0x66),p1(0x28),p1(0xd9),p1(0x24),p1(0xb2) ;\
  2940. + .long p1(0x76),p1(0x5b),p1(0xa2),p1(0x49),p1(0x6d),p1(0x8b),p1(0xd1),p1(0x25)
  2941. +#define ib_data2(p1) \
  2942. + .long p1(0x72),p1(0xf8),p1(0xf6),p1(0x64),p1(0x86),p1(0x68),p1(0x98),p1(0x16) ;\
  2943. + .long p1(0xd4),p1(0xa4),p1(0x5c),p1(0xcc),p1(0x5d),p1(0x65),p1(0xb6),p1(0x92) ;\
  2944. + .long p1(0x6c),p1(0x70),p1(0x48),p1(0x50),p1(0xfd),p1(0xed),p1(0xb9),p1(0xda) ;\
  2945. + .long p1(0x5e),p1(0x15),p1(0x46),p1(0x57),p1(0xa7),p1(0x8d),p1(0x9d),p1(0x84)
  2946. +#define ib_data3(p1) \
  2947. + .long p1(0x90),p1(0xd8),p1(0xab),p1(0x00),p1(0x8c),p1(0xbc),p1(0xd3),p1(0x0a) ;\
  2948. + .long p1(0xf7),p1(0xe4),p1(0x58),p1(0x05),p1(0xb8),p1(0xb3),p1(0x45),p1(0x06) ;\
  2949. + .long p1(0xd0),p1(0x2c),p1(0x1e),p1(0x8f),p1(0xca),p1(0x3f),p1(0x0f),p1(0x02) ;\
  2950. + .long p1(0xc1),p1(0xaf),p1(0xbd),p1(0x03),p1(0x01),p1(0x13),p1(0x8a),p1(0x6b)
  2951. +#define ib_data4(p1) \
  2952. + .long p1(0x3a),p1(0x91),p1(0x11),p1(0x41),p1(0x4f),p1(0x67),p1(0xdc),p1(0xea) ;\
  2953. + .long p1(0x97),p1(0xf2),p1(0xcf),p1(0xce),p1(0xf0),p1(0xb4),p1(0xe6),p1(0x73) ;\
  2954. + .long p1(0x96),p1(0xac),p1(0x74),p1(0x22),p1(0xe7),p1(0xad),p1(0x35),p1(0x85) ;\
  2955. + .long p1(0xe2),p1(0xf9),p1(0x37),p1(0xe8),p1(0x1c),p1(0x75),p1(0xdf),p1(0x6e)
  2956. +#define ib_data5(p1) \
  2957. + .long p1(0x47),p1(0xf1),p1(0x1a),p1(0x71),p1(0x1d),p1(0x29),p1(0xc5),p1(0x89) ;\
  2958. + .long p1(0x6f),p1(0xb7),p1(0x62),p1(0x0e),p1(0xaa),p1(0x18),p1(0xbe),p1(0x1b) ;\
  2959. + .long p1(0xfc),p1(0x56),p1(0x3e),p1(0x4b),p1(0xc6),p1(0xd2),p1(0x79),p1(0x20) ;\
  2960. + .long p1(0x9a),p1(0xdb),p1(0xc0),p1(0xfe),p1(0x78),p1(0xcd),p1(0x5a),p1(0xf4)
  2961. +#define ib_data6(p1) \
  2962. + .long p1(0x1f),p1(0xdd),p1(0xa8),p1(0x33),p1(0x88),p1(0x07),p1(0xc7),p1(0x31) ;\
  2963. + .long p1(0xb1),p1(0x12),p1(0x10),p1(0x59),p1(0x27),p1(0x80),p1(0xec),p1(0x5f) ;\
  2964. + .long p1(0x60),p1(0x51),p1(0x7f),p1(0xa9),p1(0x19),p1(0xb5),p1(0x4a),p1(0x0d) ;\
  2965. + .long p1(0x2d),p1(0xe5),p1(0x7a),p1(0x9f),p1(0x93),p1(0xc9),p1(0x9c),p1(0xef)
  2966. +#define ib_data7(p1) \
  2967. + .long p1(0xa0),p1(0xe0),p1(0x3b),p1(0x4d),p1(0xae),p1(0x2a),p1(0xf5),p1(0xb0) ;\
  2968. + .long p1(0xc8),p1(0xeb),p1(0xbb),p1(0x3c),p1(0x83),p1(0x53),p1(0x99),p1(0x61) ;\
  2969. + .long p1(0x17),p1(0x2b),p1(0x04),p1(0x7e),p1(0xba),p1(0x77),p1(0xd6),p1(0x26) ;\
  2970. + .long p1(0xe1),p1(0x69),p1(0x14),p1(0x63),p1(0x55),p1(0x21),p1(0x0c),p1(0x7d)
  2971. +
  2972. +// The rcon_table (needed for the key schedule)
  2973. +//
  2974. +// Here is original Dr Brian Gladman's source code:
  2975. +// _rcon_tab:
  2976. +// %assign x 1
  2977. +// %rep 29
  2978. +// dd x
  2979. +// %assign x f2(x)
  2980. +// %endrep
  2981. +//
  2982. +// Here is precomputed output (it's more portable this way):
  2983. +
  2984. + .section .rodata
  2985. + .align ALIGN64BYTES
  2986. +aes_rcon_tab:
  2987. + .long 0x01,0x02,0x04,0x08,0x10,0x20,0x40,0x80
  2988. + .long 0x1b,0x36,0x6c,0xd8,0xab,0x4d,0x9a,0x2f
  2989. + .long 0x5e,0xbc,0x63,0xc6,0x97,0x35,0x6a,0xd4
  2990. + .long 0xb3,0x7d,0xfa,0xef,0xc5
  2991. +
  2992. +// The forward xor tables
  2993. +
  2994. + .align ALIGN64BYTES
  2995. +aes_ft_tab:
  2996. + sb_data0(u0)
  2997. + sb_data1(u0)
  2998. + sb_data2(u0)
  2999. + sb_data3(u0)
  3000. + sb_data4(u0)
  3001. + sb_data5(u0)
  3002. + sb_data6(u0)
  3003. + sb_data7(u0)
  3004. +
  3005. + sb_data0(u1)
  3006. + sb_data1(u1)
  3007. + sb_data2(u1)
  3008. + sb_data3(u1)
  3009. + sb_data4(u1)
  3010. + sb_data5(u1)
  3011. + sb_data6(u1)
  3012. + sb_data7(u1)
  3013. +
  3014. + sb_data0(u2)
  3015. + sb_data1(u2)
  3016. + sb_data2(u2)
  3017. + sb_data3(u2)
  3018. + sb_data4(u2)
  3019. + sb_data5(u2)
  3020. + sb_data6(u2)
  3021. + sb_data7(u2)
  3022. +
  3023. + sb_data0(u3)
  3024. + sb_data1(u3)
  3025. + sb_data2(u3)
  3026. + sb_data3(u3)
  3027. + sb_data4(u3)
  3028. + sb_data5(u3)
  3029. + sb_data6(u3)
  3030. + sb_data7(u3)
  3031. +
  3032. + .align ALIGN64BYTES
  3033. +aes_fl_tab:
  3034. + sb_data0(w0)
  3035. + sb_data1(w0)
  3036. + sb_data2(w0)
  3037. + sb_data3(w0)
  3038. + sb_data4(w0)
  3039. + sb_data5(w0)
  3040. + sb_data6(w0)
  3041. + sb_data7(w0)
  3042. +
  3043. + sb_data0(w1)
  3044. + sb_data1(w1)
  3045. + sb_data2(w1)
  3046. + sb_data3(w1)
  3047. + sb_data4(w1)
  3048. + sb_data5(w1)
  3049. + sb_data6(w1)
  3050. + sb_data7(w1)
  3051. +
  3052. + sb_data0(w2)
  3053. + sb_data1(w2)
  3054. + sb_data2(w2)
  3055. + sb_data3(w2)
  3056. + sb_data4(w2)
  3057. + sb_data5(w2)
  3058. + sb_data6(w2)
  3059. + sb_data7(w2)
  3060. +
  3061. + sb_data0(w3)
  3062. + sb_data1(w3)
  3063. + sb_data2(w3)
  3064. + sb_data3(w3)
  3065. + sb_data4(w3)
  3066. + sb_data5(w3)
  3067. + sb_data6(w3)
  3068. + sb_data7(w3)
  3069. +
  3070. +// The inverse xor tables
  3071. +
  3072. + .align ALIGN64BYTES
  3073. +aes_it_tab:
  3074. + ib_data0(v0)
  3075. + ib_data1(v0)
  3076. + ib_data2(v0)
  3077. + ib_data3(v0)
  3078. + ib_data4(v0)
  3079. + ib_data5(v0)
  3080. + ib_data6(v0)
  3081. + ib_data7(v0)
  3082. +
  3083. + ib_data0(v1)
  3084. + ib_data1(v1)
  3085. + ib_data2(v1)
  3086. + ib_data3(v1)
  3087. + ib_data4(v1)
  3088. + ib_data5(v1)
  3089. + ib_data6(v1)
  3090. + ib_data7(v1)
  3091. +
  3092. + ib_data0(v2)
  3093. + ib_data1(v2)
  3094. + ib_data2(v2)
  3095. + ib_data3(v2)
  3096. + ib_data4(v2)
  3097. + ib_data5(v2)
  3098. + ib_data6(v2)
  3099. + ib_data7(v2)
  3100. +
  3101. + ib_data0(v3)
  3102. + ib_data1(v3)
  3103. + ib_data2(v3)
  3104. + ib_data3(v3)
  3105. + ib_data4(v3)
  3106. + ib_data5(v3)
  3107. + ib_data6(v3)
  3108. + ib_data7(v3)
  3109. +
  3110. + .align ALIGN64BYTES
  3111. +aes_il_tab:
  3112. + ib_data0(w0)
  3113. + ib_data1(w0)
  3114. + ib_data2(w0)
  3115. + ib_data3(w0)
  3116. + ib_data4(w0)
  3117. + ib_data5(w0)
  3118. + ib_data6(w0)
  3119. + ib_data7(w0)
  3120. +
  3121. + ib_data0(w1)
  3122. + ib_data1(w1)
  3123. + ib_data2(w1)
  3124. + ib_data3(w1)
  3125. + ib_data4(w1)
  3126. + ib_data5(w1)
  3127. + ib_data6(w1)
  3128. + ib_data7(w1)
  3129. +
  3130. + ib_data0(w2)
  3131. + ib_data1(w2)
  3132. + ib_data2(w2)
  3133. + ib_data3(w2)
  3134. + ib_data4(w2)
  3135. + ib_data5(w2)
  3136. + ib_data6(w2)
  3137. + ib_data7(w2)
  3138. +
  3139. + ib_data0(w3)
  3140. + ib_data1(w3)
  3141. + ib_data2(w3)
  3142. + ib_data3(w3)
  3143. + ib_data4(w3)
  3144. + ib_data5(w3)
  3145. + ib_data6(w3)
  3146. + ib_data7(w3)
  3147. +
  3148. +// The inverse mix column tables
  3149. +
  3150. + .align ALIGN64BYTES
  3151. +aes_im_tab:
  3152. + im_data0(v0)
  3153. + im_data1(v0)
  3154. + im_data2(v0)
  3155. + im_data3(v0)
  3156. + im_data4(v0)
  3157. + im_data5(v0)
  3158. + im_data6(v0)
  3159. + im_data7(v0)
  3160. +
  3161. + im_data0(v1)
  3162. + im_data1(v1)
  3163. + im_data2(v1)
  3164. + im_data3(v1)
  3165. + im_data4(v1)
  3166. + im_data5(v1)
  3167. + im_data6(v1)
  3168. + im_data7(v1)
  3169. +
  3170. + im_data0(v2)
  3171. + im_data1(v2)
  3172. + im_data2(v2)
  3173. + im_data3(v2)
  3174. + im_data4(v2)
  3175. + im_data5(v2)
  3176. + im_data6(v2)
  3177. + im_data7(v2)
  3178. +
  3179. + im_data0(v3)
  3180. + im_data1(v3)
  3181. + im_data2(v3)
  3182. + im_data3(v3)
  3183. + im_data4(v3)
  3184. + im_data5(v3)
  3185. + im_data6(v3)
  3186. + im_data7(v3)
  3187. diff -pruN linux-2.4.28_orig/drivers/misc/aes-x86.S linux-2.4.28/drivers/misc/aes-x86.S
  3188. --- linux-2.4.28_orig/drivers/misc/aes-x86.S 1970-01-01 01:00:00.000000000 +0100
  3189. +++ linux-2.4.28/drivers/misc/aes-x86.S 2005-01-11 09:33:37.480525776 +0100
  3190. @@ -0,0 +1,922 @@
  3191. +//
  3192. +// Copyright (c) 2001, Dr Brian Gladman <brg@gladman.uk.net>, Worcester, UK.
  3193. +// All rights reserved.
  3194. +//
  3195. +// TERMS
  3196. +//
  3197. +// Redistribution and use in source and binary forms, with or without
  3198. +// modification, are permitted subject to the following conditions:
  3199. +//
  3200. +// 1. Redistributions of source code must retain the above copyright
  3201. +// notice, this list of conditions and the following disclaimer.
  3202. +//
  3203. +// 2. Redistributions in binary form must reproduce the above copyright
  3204. +// notice, this list of conditions and the following disclaimer in the
  3205. +// documentation and/or other materials provided with the distribution.
  3206. +//
  3207. +// 3. The copyright holder's name must not be used to endorse or promote
  3208. +// any products derived from this software without his specific prior
  3209. +// written permission.
  3210. +//
  3211. +// This software is provided 'as is' with no express or implied warranties
  3212. +// of correctness or fitness for purpose.
  3213. +
  3214. +// Modified by Jari Ruusu, December 24 2001
  3215. +// - Converted syntax to GNU CPP/assembler syntax
  3216. +// - C programming interface converted back to "old" API
  3217. +// - Minor portability cleanups and speed optimizations
  3218. +
  3219. +// Modified by Jari Ruusu, April 11 2002
  3220. +// - Added above copyright and terms to resulting object code so that
  3221. +// binary distributions can avoid legal trouble
  3222. +
  3223. +// An AES (Rijndael) implementation for x86 compatible processors. This
  3224. +// version uses i386 instruction set but instruction scheduling is optimized
  3225. +// for Pentium-2. This version only implements the standard AES block length
  3226. +// (128 bits, 16 bytes). This code does not preserve the eax, ecx or edx
  3227. +// registers or the artihmetic status flags. However, the ebx, esi, edi, and
  3228. +// ebp registers are preserved across calls.
  3229. +
  3230. +// void aes_set_key(aes_context *cx, const unsigned char key[], const int key_len, const int f)
  3231. +// void aes_encrypt(const aes_context *cx, const unsigned char in_blk[], unsigned char out_blk[])
  3232. +// void aes_decrypt(const aes_context *cx, const unsigned char in_blk[], unsigned char out_blk[])
  3233. +
  3234. +#if defined(USE_UNDERLINE)
  3235. +# define aes_set_key _aes_set_key
  3236. +# define aes_encrypt _aes_encrypt
  3237. +# define aes_decrypt _aes_decrypt
  3238. +#endif
  3239. +#if !defined(ALIGN32BYTES)
  3240. +# define ALIGN32BYTES 32
  3241. +#endif
  3242. +
  3243. + .file "aes-x86.S"
  3244. + .globl aes_set_key
  3245. + .globl aes_encrypt
  3246. + .globl aes_decrypt
  3247. +
  3248. + .text
  3249. +copyright:
  3250. + .ascii " \000"
  3251. + .ascii "Copyright (c) 2001, Dr Brian Gladman <brg@gladman.uk.net>, Worcester, UK.\000"
  3252. + .ascii "All rights reserved.\000"
  3253. + .ascii " \000"
  3254. + .ascii "TERMS\000"
  3255. + .ascii " \000"
  3256. + .ascii " Redistribution and use in source and binary forms, with or without\000"
  3257. + .ascii " modification, are permitted subject to the following conditions:\000"
  3258. + .ascii " \000"
  3259. + .ascii " 1. Redistributions of source code must retain the above copyright\000"
  3260. + .ascii " notice, this list of conditions and the following disclaimer.\000"
  3261. + .ascii " \000"
  3262. + .ascii " 2. Redistributions in binary form must reproduce the above copyright\000"
  3263. + .ascii " notice, this list of conditions and the following disclaimer in the\000"
  3264. + .ascii " documentation and/or other materials provided with the distribution.\000"
  3265. + .ascii " \000"
  3266. + .ascii " 3. The copyright holder's name must not be used to endorse or promote\000"
  3267. + .ascii " any products derived from this software without his specific prior\000"
  3268. + .ascii " written permission.\000"
  3269. + .ascii " \000"
  3270. + .ascii " This software is provided 'as is' with no express or implied warranties\000"
  3271. + .ascii " of correctness or fitness for purpose.\000"
  3272. + .ascii " \000"
  3273. +
  3274. +#define tlen 1024 // length of each of 4 'xor' arrays (256 32-bit words)
  3275. +
  3276. +// offsets to parameters with one register pushed onto stack
  3277. +
  3278. +#define ctx 8 // AES context structure
  3279. +#define in_blk 12 // input byte array address parameter
  3280. +#define out_blk 16 // output byte array address parameter
  3281. +
  3282. +// offsets in context structure
  3283. +
  3284. +#define nkey 0 // key length, size 4
  3285. +#define nrnd 4 // number of rounds, size 4
  3286. +#define ekey 8 // encryption key schedule base address, size 256
  3287. +#define dkey 264 // decryption key schedule base address, size 256
  3288. +
  3289. +// This macro performs a forward encryption cycle. It is entered with
  3290. +// the first previous round column values in %eax, %ebx, %esi and %edi and
  3291. +// exits with the final values in the same registers.
  3292. +
  3293. +#define fwd_rnd(p1,p2) \
  3294. + mov %ebx,(%esp) ;\
  3295. + movzbl %al,%edx ;\
  3296. + mov %eax,%ecx ;\
  3297. + mov p2(%ebp),%eax ;\
  3298. + mov %edi,4(%esp) ;\
  3299. + mov p2+12(%ebp),%edi ;\
  3300. + xor p1(,%edx,4),%eax ;\
  3301. + movzbl %ch,%edx ;\
  3302. + shr $16,%ecx ;\
  3303. + mov p2+4(%ebp),%ebx ;\
  3304. + xor p1+tlen(,%edx,4),%edi ;\
  3305. + movzbl %cl,%edx ;\
  3306. + movzbl %ch,%ecx ;\
  3307. + xor p1+3*tlen(,%ecx,4),%ebx ;\
  3308. + mov %esi,%ecx ;\
  3309. + mov p1+2*tlen(,%edx,4),%esi ;\
  3310. + movzbl %cl,%edx ;\
  3311. + xor p1(,%edx,4),%esi ;\
  3312. + movzbl %ch,%edx ;\
  3313. + shr $16,%ecx ;\
  3314. + xor p1+tlen(,%edx,4),%ebx ;\
  3315. + movzbl %cl,%edx ;\
  3316. + movzbl %ch,%ecx ;\
  3317. + xor p1+2*tlen(,%edx,4),%eax ;\
  3318. + mov (%esp),%edx ;\
  3319. + xor p1+3*tlen(,%ecx,4),%edi ;\
  3320. + movzbl %dl,%ecx ;\
  3321. + xor p2+8(%ebp),%esi ;\
  3322. + xor p1(,%ecx,4),%ebx ;\
  3323. + movzbl %dh,%ecx ;\
  3324. + shr $16,%edx ;\
  3325. + xor p1+tlen(,%ecx,4),%eax ;\
  3326. + movzbl %dl,%ecx ;\
  3327. + movzbl %dh,%edx ;\
  3328. + xor p1+2*tlen(,%ecx,4),%edi ;\
  3329. + mov 4(%esp),%ecx ;\
  3330. + xor p1+3*tlen(,%edx,4),%esi ;\
  3331. + movzbl %cl,%edx ;\
  3332. + xor p1(,%edx,4),%edi ;\
  3333. + movzbl %ch,%edx ;\
  3334. + shr $16,%ecx ;\
  3335. + xor p1+tlen(,%edx,4),%esi ;\
  3336. + movzbl %cl,%edx ;\
  3337. + movzbl %ch,%ecx ;\
  3338. + xor p1+2*tlen(,%edx,4),%ebx ;\
  3339. + xor p1+3*tlen(,%ecx,4),%eax
  3340. +
  3341. +// This macro performs an inverse encryption cycle. It is entered with
  3342. +// the first previous round column values in %eax, %ebx, %esi and %edi and
  3343. +// exits with the final values in the same registers.
  3344. +
  3345. +#define inv_rnd(p1,p2) \
  3346. + movzbl %al,%edx ;\
  3347. + mov %ebx,(%esp) ;\
  3348. + mov %eax,%ecx ;\
  3349. + mov p2(%ebp),%eax ;\
  3350. + mov %edi,4(%esp) ;\
  3351. + mov p2+4(%ebp),%ebx ;\
  3352. + xor p1(,%edx,4),%eax ;\
  3353. + movzbl %ch,%edx ;\
  3354. + shr $16,%ecx ;\
  3355. + mov p2+12(%ebp),%edi ;\
  3356. + xor p1+tlen(,%edx,4),%ebx ;\
  3357. + movzbl %cl,%edx ;\
  3358. + movzbl %ch,%ecx ;\
  3359. + xor p1+3*tlen(,%ecx,4),%edi ;\
  3360. + mov %esi,%ecx ;\
  3361. + mov p1+2*tlen(,%edx,4),%esi ;\
  3362. + movzbl %cl,%edx ;\
  3363. + xor p1(,%edx,4),%esi ;\
  3364. + movzbl %ch,%edx ;\
  3365. + shr $16,%ecx ;\
  3366. + xor p1+tlen(,%edx,4),%edi ;\
  3367. + movzbl %cl,%edx ;\
  3368. + movzbl %ch,%ecx ;\
  3369. + xor p1+2*tlen(,%edx,4),%eax ;\
  3370. + mov (%esp),%edx ;\
  3371. + xor p1+3*tlen(,%ecx,4),%ebx ;\
  3372. + movzbl %dl,%ecx ;\
  3373. + xor p2+8(%ebp),%esi ;\
  3374. + xor p1(,%ecx,4),%ebx ;\
  3375. + movzbl %dh,%ecx ;\
  3376. + shr $16,%edx ;\
  3377. + xor p1+tlen(,%ecx,4),%esi ;\
  3378. + movzbl %dl,%ecx ;\
  3379. + movzbl %dh,%edx ;\
  3380. + xor p1+2*tlen(,%ecx,4),%edi ;\
  3381. + mov 4(%esp),%ecx ;\
  3382. + xor p1+3*tlen(,%edx,4),%eax ;\
  3383. + movzbl %cl,%edx ;\
  3384. + xor p1(,%edx,4),%edi ;\
  3385. + movzbl %ch,%edx ;\
  3386. + shr $16,%ecx ;\
  3387. + xor p1+tlen(,%edx,4),%eax ;\
  3388. + movzbl %cl,%edx ;\
  3389. + movzbl %ch,%ecx ;\
  3390. + xor p1+2*tlen(,%edx,4),%ebx ;\
  3391. + xor p1+3*tlen(,%ecx,4),%esi
  3392. +
  3393. +// AES (Rijndael) Encryption Subroutine
  3394. +
  3395. + .text
  3396. + .align ALIGN32BYTES
  3397. +aes_encrypt:
  3398. + push %ebp
  3399. + mov ctx(%esp),%ebp // pointer to context
  3400. + mov in_blk(%esp),%ecx
  3401. + push %ebx
  3402. + push %esi
  3403. + push %edi
  3404. + mov nrnd(%ebp),%edx // number of rounds
  3405. + lea ekey+16(%ebp),%ebp // key pointer
  3406. +
  3407. +// input four columns and xor in first round key
  3408. +
  3409. + mov (%ecx),%eax
  3410. + mov 4(%ecx),%ebx
  3411. + mov 8(%ecx),%esi
  3412. + mov 12(%ecx),%edi
  3413. + xor -16(%ebp),%eax
  3414. + xor -12(%ebp),%ebx
  3415. + xor -8(%ebp),%esi
  3416. + xor -4(%ebp),%edi
  3417. +
  3418. + sub $8,%esp // space for register saves on stack
  3419. +
  3420. + sub $10,%edx
  3421. + je aes_15
  3422. + add $32,%ebp
  3423. + sub $2,%edx
  3424. + je aes_13
  3425. + add $32,%ebp
  3426. +
  3427. + fwd_rnd(aes_ft_tab,-64) // 14 rounds for 256-bit key
  3428. + fwd_rnd(aes_ft_tab,-48)
  3429. +aes_13: fwd_rnd(aes_ft_tab,-32) // 12 rounds for 192-bit key
  3430. + fwd_rnd(aes_ft_tab,-16)
  3431. +aes_15: fwd_rnd(aes_ft_tab,0) // 10 rounds for 128-bit key
  3432. + fwd_rnd(aes_ft_tab,16)
  3433. + fwd_rnd(aes_ft_tab,32)
  3434. + fwd_rnd(aes_ft_tab,48)
  3435. + fwd_rnd(aes_ft_tab,64)
  3436. + fwd_rnd(aes_ft_tab,80)
  3437. + fwd_rnd(aes_ft_tab,96)
  3438. + fwd_rnd(aes_ft_tab,112)
  3439. + fwd_rnd(aes_ft_tab,128)
  3440. + fwd_rnd(aes_fl_tab,144) // last round uses a different table
  3441. +
  3442. +// move final values to the output array.
  3443. +
  3444. + mov out_blk+20(%esp),%ebp
  3445. + add $8,%esp
  3446. + mov %eax,(%ebp)
  3447. + mov %ebx,4(%ebp)
  3448. + mov %esi,8(%ebp)
  3449. + mov %edi,12(%ebp)
  3450. + pop %edi
  3451. + pop %esi
  3452. + pop %ebx
  3453. + pop %ebp
  3454. + ret
  3455. +
  3456. +
  3457. +// AES (Rijndael) Decryption Subroutine
  3458. +
  3459. + .align ALIGN32BYTES
  3460. +aes_decrypt:
  3461. + push %ebp
  3462. + mov ctx(%esp),%ebp // pointer to context
  3463. + mov in_blk(%esp),%ecx
  3464. + push %ebx
  3465. + push %esi
  3466. + push %edi
  3467. + mov nrnd(%ebp),%edx // number of rounds
  3468. + lea dkey+16(%ebp),%ebp // key pointer
  3469. +
  3470. +// input four columns and xor in first round key
  3471. +
  3472. + mov (%ecx),%eax
  3473. + mov 4(%ecx),%ebx
  3474. + mov 8(%ecx),%esi
  3475. + mov 12(%ecx),%edi
  3476. + xor -16(%ebp),%eax
  3477. + xor -12(%ebp),%ebx
  3478. + xor -8(%ebp),%esi
  3479. + xor -4(%ebp),%edi
  3480. +
  3481. + sub $8,%esp // space for register saves on stack
  3482. +
  3483. + sub $10,%edx
  3484. + je aes_25
  3485. + add $32,%ebp
  3486. + sub $2,%edx
  3487. + je aes_23
  3488. + add $32,%ebp
  3489. +
  3490. + inv_rnd(aes_it_tab,-64) // 14 rounds for 256-bit key
  3491. + inv_rnd(aes_it_tab,-48)
  3492. +aes_23: inv_rnd(aes_it_tab,-32) // 12 rounds for 192-bit key
  3493. + inv_rnd(aes_it_tab,-16)
  3494. +aes_25: inv_rnd(aes_it_tab,0) // 10 rounds for 128-bit key
  3495. + inv_rnd(aes_it_tab,16)
  3496. + inv_rnd(aes_it_tab,32)
  3497. + inv_rnd(aes_it_tab,48)
  3498. + inv_rnd(aes_it_tab,64)
  3499. + inv_rnd(aes_it_tab,80)
  3500. + inv_rnd(aes_it_tab,96)
  3501. + inv_rnd(aes_it_tab,112)
  3502. + inv_rnd(aes_it_tab,128)
  3503. + inv_rnd(aes_il_tab,144) // last round uses a different table
  3504. +
  3505. +// move final values to the output array.
  3506. +
  3507. + mov out_blk+20(%esp),%ebp
  3508. + add $8,%esp
  3509. + mov %eax,(%ebp)
  3510. + mov %ebx,4(%ebp)
  3511. + mov %esi,8(%ebp)
  3512. + mov %edi,12(%ebp)
  3513. + pop %edi
  3514. + pop %esi
  3515. + pop %ebx
  3516. + pop %ebp
  3517. + ret
  3518. +
  3519. +// AES (Rijndael) Key Schedule Subroutine
  3520. +
  3521. +// input/output parameters
  3522. +
  3523. +#define aes_cx 12 // AES context
  3524. +#define in_key 16 // key input array address
  3525. +#define key_ln 20 // key length, bytes (16,24,32) or bits (128,192,256)
  3526. +#define ed_flg 24 // 0=create both encr/decr keys, 1=create encr key only
  3527. +
  3528. +// offsets for locals
  3529. +
  3530. +#define cnt -4
  3531. +#define slen 8
  3532. +
  3533. +// This macro performs a column mixing operation on an input 32-bit
  3534. +// word to give a 32-bit result. It uses each of the 4 bytes in the
  3535. +// the input column to index 4 different tables of 256 32-bit words
  3536. +// that are xored together to form the output value.
  3537. +
  3538. +#define mix_col(p1) \
  3539. + movzbl %bl,%ecx ;\
  3540. + mov p1(,%ecx,4),%eax ;\
  3541. + movzbl %bh,%ecx ;\
  3542. + ror $16,%ebx ;\
  3543. + xor p1+tlen(,%ecx,4),%eax ;\
  3544. + movzbl %bl,%ecx ;\
  3545. + xor p1+2*tlen(,%ecx,4),%eax ;\
  3546. + movzbl %bh,%ecx ;\
  3547. + xor p1+3*tlen(,%ecx,4),%eax
  3548. +
  3549. +// Key Schedule Macros
  3550. +
  3551. +#define ksc4(p1) \
  3552. + rol $24,%ebx ;\
  3553. + mix_col(aes_fl_tab) ;\
  3554. + ror $8,%ebx ;\
  3555. + xor 4*p1+aes_rcon_tab,%eax ;\
  3556. + xor %eax,%esi ;\
  3557. + xor %esi,%ebp ;\
  3558. + mov %esi,16*p1(%edi) ;\
  3559. + mov %ebp,16*p1+4(%edi) ;\
  3560. + xor %ebp,%edx ;\
  3561. + xor %edx,%ebx ;\
  3562. + mov %edx,16*p1+8(%edi) ;\
  3563. + mov %ebx,16*p1+12(%edi)
  3564. +
  3565. +#define ksc6(p1) \
  3566. + rol $24,%ebx ;\
  3567. + mix_col(aes_fl_tab) ;\
  3568. + ror $8,%ebx ;\
  3569. + xor 4*p1+aes_rcon_tab,%eax ;\
  3570. + xor 24*p1-24(%edi),%eax ;\
  3571. + mov %eax,24*p1(%edi) ;\
  3572. + xor 24*p1-20(%edi),%eax ;\
  3573. + mov %eax,24*p1+4(%edi) ;\
  3574. + xor %eax,%esi ;\
  3575. + xor %esi,%ebp ;\
  3576. + mov %esi,24*p1+8(%edi) ;\
  3577. + mov %ebp,24*p1+12(%edi) ;\
  3578. + xor %ebp,%edx ;\
  3579. + xor %edx,%ebx ;\
  3580. + mov %edx,24*p1+16(%edi) ;\
  3581. + mov %ebx,24*p1+20(%edi)
  3582. +
  3583. +#define ksc8(p1) \
  3584. + rol $24,%ebx ;\
  3585. + mix_col(aes_fl_tab) ;\
  3586. + ror $8,%ebx ;\
  3587. + xor 4*p1+aes_rcon_tab,%eax ;\
  3588. + xor 32*p1-32(%edi),%eax ;\
  3589. + mov %eax,32*p1(%edi) ;\
  3590. + xor 32*p1-28(%edi),%eax ;\
  3591. + mov %eax,32*p1+4(%edi) ;\
  3592. + xor 32*p1-24(%edi),%eax ;\
  3593. + mov %eax,32*p1+8(%edi) ;\
  3594. + xor 32*p1-20(%edi),%eax ;\
  3595. + mov %eax,32*p1+12(%edi) ;\
  3596. + push %ebx ;\
  3597. + mov %eax,%ebx ;\
  3598. + mix_col(aes_fl_tab) ;\
  3599. + pop %ebx ;\
  3600. + xor %eax,%esi ;\
  3601. + xor %esi,%ebp ;\
  3602. + mov %esi,32*p1+16(%edi) ;\
  3603. + mov %ebp,32*p1+20(%edi) ;\
  3604. + xor %ebp,%edx ;\
  3605. + xor %edx,%ebx ;\
  3606. + mov %edx,32*p1+24(%edi) ;\
  3607. + mov %ebx,32*p1+28(%edi)
  3608. +
  3609. + .align ALIGN32BYTES
  3610. +aes_set_key:
  3611. + pushfl
  3612. + push %ebp
  3613. + mov %esp,%ebp
  3614. + sub $slen,%esp
  3615. + push %ebx
  3616. + push %esi
  3617. + push %edi
  3618. +
  3619. + mov aes_cx(%ebp),%edx // edx -> AES context
  3620. +
  3621. + mov key_ln(%ebp),%ecx // key length
  3622. + cmpl $128,%ecx
  3623. + jb aes_30
  3624. + shr $3,%ecx
  3625. +aes_30: cmpl $32,%ecx
  3626. + je aes_32
  3627. + cmpl $24,%ecx
  3628. + je aes_32
  3629. + mov $16,%ecx
  3630. +aes_32: shr $2,%ecx
  3631. + mov %ecx,nkey(%edx)
  3632. +
  3633. + lea 6(%ecx),%eax // 10/12/14 for 4/6/8 32-bit key length
  3634. + mov %eax,nrnd(%edx)
  3635. +
  3636. + mov in_key(%ebp),%esi // key input array
  3637. + lea ekey(%edx),%edi // key position in AES context
  3638. + cld
  3639. + push %ebp
  3640. + mov %ecx,%eax // save key length in eax
  3641. + rep ; movsl // words in the key schedule
  3642. + mov -4(%esi),%ebx // put some values in registers
  3643. + mov -8(%esi),%edx // to allow faster code
  3644. + mov -12(%esi),%ebp
  3645. + mov -16(%esi),%esi
  3646. +
  3647. + cmpl $4,%eax // jump on key size
  3648. + je aes_36
  3649. + cmpl $6,%eax
  3650. + je aes_35
  3651. +
  3652. + ksc8(0)
  3653. + ksc8(1)
  3654. + ksc8(2)
  3655. + ksc8(3)
  3656. + ksc8(4)
  3657. + ksc8(5)
  3658. + ksc8(6)
  3659. + jmp aes_37
  3660. +aes_35: ksc6(0)
  3661. + ksc6(1)
  3662. + ksc6(2)
  3663. + ksc6(3)
  3664. + ksc6(4)
  3665. + ksc6(5)
  3666. + ksc6(6)
  3667. + ksc6(7)
  3668. + jmp aes_37
  3669. +aes_36: ksc4(0)
  3670. + ksc4(1)
  3671. + ksc4(2)
  3672. + ksc4(3)
  3673. + ksc4(4)
  3674. + ksc4(5)
  3675. + ksc4(6)
  3676. + ksc4(7)
  3677. + ksc4(8)
  3678. + ksc4(9)
  3679. +aes_37: pop %ebp
  3680. + mov aes_cx(%ebp),%edx // edx -> AES context
  3681. + cmpl $0,ed_flg(%ebp)
  3682. + jne aes_39
  3683. +
  3684. +// compile decryption key schedule from encryption schedule - reverse
  3685. +// order and do mix_column operation on round keys except first and last
  3686. +
  3687. + mov nrnd(%edx),%eax // kt = cx->d_key + nc * cx->Nrnd
  3688. + shl $2,%eax
  3689. + lea dkey(%edx,%eax,4),%edi
  3690. + lea ekey(%edx),%esi // kf = cx->e_key
  3691. +
  3692. + movsl // copy first round key (unmodified)
  3693. + movsl
  3694. + movsl
  3695. + movsl
  3696. + sub $32,%edi
  3697. + movl $1,cnt(%ebp)
  3698. +aes_38: // do mix column on each column of
  3699. + lodsl // each round key
  3700. + mov %eax,%ebx
  3701. + mix_col(aes_im_tab)
  3702. + stosl
  3703. + lodsl
  3704. + mov %eax,%ebx
  3705. + mix_col(aes_im_tab)
  3706. + stosl
  3707. + lodsl
  3708. + mov %eax,%ebx
  3709. + mix_col(aes_im_tab)
  3710. + stosl
  3711. + lodsl
  3712. + mov %eax,%ebx
  3713. + mix_col(aes_im_tab)
  3714. + stosl
  3715. + sub $32,%edi
  3716. +
  3717. + incl cnt(%ebp)
  3718. + mov cnt(%ebp),%eax
  3719. + cmp nrnd(%edx),%eax
  3720. + jb aes_38
  3721. +
  3722. + movsl // copy last round key (unmodified)
  3723. + movsl
  3724. + movsl
  3725. + movsl
  3726. +aes_39: pop %edi
  3727. + pop %esi
  3728. + pop %ebx
  3729. + mov %ebp,%esp
  3730. + pop %ebp
  3731. + popfl
  3732. + ret
  3733. +
  3734. +
  3735. +// finite field multiplies by {02}, {04} and {08}
  3736. +
  3737. +#define f2(x) ((x<<1)^(((x>>7)&1)*0x11b))
  3738. +#define f4(x) ((x<<2)^(((x>>6)&1)*0x11b)^(((x>>6)&2)*0x11b))
  3739. +#define f8(x) ((x<<3)^(((x>>5)&1)*0x11b)^(((x>>5)&2)*0x11b)^(((x>>5)&4)*0x11b))
  3740. +
  3741. +// finite field multiplies required in table generation
  3742. +
  3743. +#define f3(x) (f2(x) ^ x)
  3744. +#define f9(x) (f8(x) ^ x)
  3745. +#define fb(x) (f8(x) ^ f2(x) ^ x)
  3746. +#define fd(x) (f8(x) ^ f4(x) ^ x)
  3747. +#define fe(x) (f8(x) ^ f4(x) ^ f2(x))
  3748. +
  3749. +// These defines generate the forward table entries
  3750. +
  3751. +#define u0(x) ((f3(x) << 24) | (x << 16) | (x << 8) | f2(x))
  3752. +#define u1(x) ((x << 24) | (x << 16) | (f2(x) << 8) | f3(x))
  3753. +#define u2(x) ((x << 24) | (f2(x) << 16) | (f3(x) << 8) | x)
  3754. +#define u3(x) ((f2(x) << 24) | (f3(x) << 16) | (x << 8) | x)
  3755. +
  3756. +// These defines generate the inverse table entries
  3757. +
  3758. +#define v0(x) ((fb(x) << 24) | (fd(x) << 16) | (f9(x) << 8) | fe(x))
  3759. +#define v1(x) ((fd(x) << 24) | (f9(x) << 16) | (fe(x) << 8) | fb(x))
  3760. +#define v2(x) ((f9(x) << 24) | (fe(x) << 16) | (fb(x) << 8) | fd(x))
  3761. +#define v3(x) ((fe(x) << 24) | (fb(x) << 16) | (fd(x) << 8) | f9(x))
  3762. +
  3763. +// These defines generate entries for the last round tables
  3764. +
  3765. +#define w0(x) (x)
  3766. +#define w1(x) (x << 8)
  3767. +#define w2(x) (x << 16)
  3768. +#define w3(x) (x << 24)
  3769. +
  3770. +// macro to generate inverse mix column tables (needed for the key schedule)
  3771. +
  3772. +#define im_data0(p1) \
  3773. + .long p1(0x00),p1(0x01),p1(0x02),p1(0x03),p1(0x04),p1(0x05),p1(0x06),p1(0x07) ;\
  3774. + .long p1(0x08),p1(0x09),p1(0x0a),p1(0x0b),p1(0x0c),p1(0x0d),p1(0x0e),p1(0x0f) ;\
  3775. + .long p1(0x10),p1(0x11),p1(0x12),p1(0x13),p1(0x14),p1(0x15),p1(0x16),p1(0x17) ;\
  3776. + .long p1(0x18),p1(0x19),p1(0x1a),p1(0x1b),p1(0x1c),p1(0x1d),p1(0x1e),p1(0x1f)
  3777. +#define im_data1(p1) \
  3778. + .long p1(0x20),p1(0x21),p1(0x22),p1(0x23),p1(0x24),p1(0x25),p1(0x26),p1(0x27) ;\
  3779. + .long p1(0x28),p1(0x29),p1(0x2a),p1(0x2b),p1(0x2c),p1(0x2d),p1(0x2e),p1(0x2f) ;\
  3780. + .long p1(0x30),p1(0x31),p1(0x32),p1(0x33),p1(0x34),p1(0x35),p1(0x36),p1(0x37) ;\
  3781. + .long p1(0x38),p1(0x39),p1(0x3a),p1(0x3b),p1(0x3c),p1(0x3d),p1(0x3e),p1(0x3f)
  3782. +#define im_data2(p1) \
  3783. + .long p1(0x40),p1(0x41),p1(0x42),p1(0x43),p1(0x44),p1(0x45),p1(0x46),p1(0x47) ;\
  3784. + .long p1(0x48),p1(0x49),p1(0x4a),p1(0x4b),p1(0x4c),p1(0x4d),p1(0x4e),p1(0x4f) ;\
  3785. + .long p1(0x50),p1(0x51),p1(0x52),p1(0x53),p1(0x54),p1(0x55),p1(0x56),p1(0x57) ;\
  3786. + .long p1(0x58),p1(0x59),p1(0x5a),p1(0x5b),p1(0x5c),p1(0x5d),p1(0x5e),p1(0x5f)
  3787. +#define im_data3(p1) \
  3788. + .long p1(0x60),p1(0x61),p1(0x62),p1(0x63),p1(0x64),p1(0x65),p1(0x66),p1(0x67) ;\
  3789. + .long p1(0x68),p1(0x69),p1(0x6a),p1(0x6b),p1(0x6c),p1(0x6d),p1(0x6e),p1(0x6f) ;\
  3790. + .long p1(0x70),p1(0x71),p1(0x72),p1(0x73),p1(0x74),p1(0x75),p1(0x76),p1(0x77) ;\
  3791. + .long p1(0x78),p1(0x79),p1(0x7a),p1(0x7b),p1(0x7c),p1(0x7d),p1(0x7e),p1(0x7f)
  3792. +#define im_data4(p1) \
  3793. + .long p1(0x80),p1(0x81),p1(0x82),p1(0x83),p1(0x84),p1(0x85),p1(0x86),p1(0x87) ;\
  3794. + .long p1(0x88),p1(0x89),p1(0x8a),p1(0x8b),p1(0x8c),p1(0x8d),p1(0x8e),p1(0x8f) ;\
  3795. + .long p1(0x90),p1(0x91),p1(0x92),p1(0x93),p1(0x94),p1(0x95),p1(0x96),p1(0x97) ;\
  3796. + .long p1(0x98),p1(0x99),p1(0x9a),p1(0x9b),p1(0x9c),p1(0x9d),p1(0x9e),p1(0x9f)
  3797. +#define im_data5(p1) \
  3798. + .long p1(0xa0),p1(0xa1),p1(0xa2),p1(0xa3),p1(0xa4),p1(0xa5),p1(0xa6),p1(0xa7) ;\
  3799. + .long p1(0xa8),p1(0xa9),p1(0xaa),p1(0xab),p1(0xac),p1(0xad),p1(0xae),p1(0xaf) ;\
  3800. + .long p1(0xb0),p1(0xb1),p1(0xb2),p1(0xb3),p1(0xb4),p1(0xb5),p1(0xb6),p1(0xb7) ;\
  3801. + .long p1(0xb8),p1(0xb9),p1(0xba),p1(0xbb),p1(0xbc),p1(0xbd),p1(0xbe),p1(0xbf)
  3802. +#define im_data6(p1) \
  3803. + .long p1(0xc0),p1(0xc1),p1(0xc2),p1(0xc3),p1(0xc4),p1(0xc5),p1(0xc6),p1(0xc7) ;\
  3804. + .long p1(0xc8),p1(0xc9),p1(0xca),p1(0xcb),p1(0xcc),p1(0xcd),p1(0xce),p1(0xcf) ;\
  3805. + .long p1(0xd0),p1(0xd1),p1(0xd2),p1(0xd3),p1(0xd4),p1(0xd5),p1(0xd6),p1(0xd7) ;\
  3806. + .long p1(0xd8),p1(0xd9),p1(0xda),p1(0xdb),p1(0xdc),p1(0xdd),p1(0xde),p1(0xdf)
  3807. +#define im_data7(p1) \
  3808. + .long p1(0xe0),p1(0xe1),p1(0xe2),p1(0xe3),p1(0xe4),p1(0xe5),p1(0xe6),p1(0xe7) ;\
  3809. + .long p1(0xe8),p1(0xe9),p1(0xea),p1(0xeb),p1(0xec),p1(0xed),p1(0xee),p1(0xef) ;\
  3810. + .long p1(0xf0),p1(0xf1),p1(0xf2),p1(0xf3),p1(0xf4),p1(0xf5),p1(0xf6),p1(0xf7) ;\
  3811. + .long p1(0xf8),p1(0xf9),p1(0xfa),p1(0xfb),p1(0xfc),p1(0xfd),p1(0xfe),p1(0xff)
  3812. +
  3813. +// S-box data - 256 entries
  3814. +
  3815. +#define sb_data0(p1) \
  3816. + .long p1(0x63),p1(0x7c),p1(0x77),p1(0x7b),p1(0xf2),p1(0x6b),p1(0x6f),p1(0xc5) ;\
  3817. + .long p1(0x30),p1(0x01),p1(0x67),p1(0x2b),p1(0xfe),p1(0xd7),p1(0xab),p1(0x76) ;\
  3818. + .long p1(0xca),p1(0x82),p1(0xc9),p1(0x7d),p1(0xfa),p1(0x59),p1(0x47),p1(0xf0) ;\
  3819. + .long p1(0xad),p1(0xd4),p1(0xa2),p1(0xaf),p1(0x9c),p1(0xa4),p1(0x72),p1(0xc0)
  3820. +#define sb_data1(p1) \
  3821. + .long p1(0xb7),p1(0xfd),p1(0x93),p1(0x26),p1(0x36),p1(0x3f),p1(0xf7),p1(0xcc) ;\
  3822. + .long p1(0x34),p1(0xa5),p1(0xe5),p1(0xf1),p1(0x71),p1(0xd8),p1(0x31),p1(0x15) ;\
  3823. + .long p1(0x04),p1(0xc7),p1(0x23),p1(0xc3),p1(0x18),p1(0x96),p1(0x05),p1(0x9a) ;\
  3824. + .long p1(0x07),p1(0x12),p1(0x80),p1(0xe2),p1(0xeb),p1(0x27),p1(0xb2),p1(0x75)
  3825. +#define sb_data2(p1) \
  3826. + .long p1(0x09),p1(0x83),p1(0x2c),p1(0x1a),p1(0x1b),p1(0x6e),p1(0x5a),p1(0xa0) ;\
  3827. + .long p1(0x52),p1(0x3b),p1(0xd6),p1(0xb3),p1(0x29),p1(0xe3),p1(0x2f),p1(0x84) ;\
  3828. + .long p1(0x53),p1(0xd1),p1(0x00),p1(0xed),p1(0x20),p1(0xfc),p1(0xb1),p1(0x5b) ;\
  3829. + .long p1(0x6a),p1(0xcb),p1(0xbe),p1(0x39),p1(0x4a),p1(0x4c),p1(0x58),p1(0xcf)
  3830. +#define sb_data3(p1) \
  3831. + .long p1(0xd0),p1(0xef),p1(0xaa),p1(0xfb),p1(0x43),p1(0x4d),p1(0x33),p1(0x85) ;\
  3832. + .long p1(0x45),p1(0xf9),p1(0x02),p1(0x7f),p1(0x50),p1(0x3c),p1(0x9f),p1(0xa8) ;\
  3833. + .long p1(0x51),p1(0xa3),p1(0x40),p1(0x8f),p1(0x92),p1(0x9d),p1(0x38),p1(0xf5) ;\
  3834. + .long p1(0xbc),p1(0xb6),p1(0xda),p1(0x21),p1(0x10),p1(0xff),p1(0xf3),p1(0xd2)
  3835. +#define sb_data4(p1) \
  3836. + .long p1(0xcd),p1(0x0c),p1(0x13),p1(0xec),p1(0x5f),p1(0x97),p1(0x44),p1(0x17) ;\
  3837. + .long p1(0xc4),p1(0xa7),p1(0x7e),p1(0x3d),p1(0x64),p1(0x5d),p1(0x19),p1(0x73) ;\
  3838. + .long p1(0x60),p1(0x81),p1(0x4f),p1(0xdc),p1(0x22),p1(0x2a),p1(0x90),p1(0x88) ;\
  3839. + .long p1(0x46),p1(0xee),p1(0xb8),p1(0x14),p1(0xde),p1(0x5e),p1(0x0b),p1(0xdb)
  3840. +#define sb_data5(p1) \
  3841. + .long p1(0xe0),p1(0x32),p1(0x3a),p1(0x0a),p1(0x49),p1(0x06),p1(0x24),p1(0x5c) ;\
  3842. + .long p1(0xc2),p1(0xd3),p1(0xac),p1(0x62),p1(0x91),p1(0x95),p1(0xe4),p1(0x79) ;\
  3843. + .long p1(0xe7),p1(0xc8),p1(0x37),p1(0x6d),p1(0x8d),p1(0xd5),p1(0x4e),p1(0xa9) ;\
  3844. + .long p1(0x6c),p1(0x56),p1(0xf4),p1(0xea),p1(0x65),p1(0x7a),p1(0xae),p1(0x08)
  3845. +#define sb_data6(p1) \
  3846. + .long p1(0xba),p1(0x78),p1(0x25),p1(0x2e),p1(0x1c),p1(0xa6),p1(0xb4),p1(0xc6) ;\
  3847. + .long p1(0xe8),p1(0xdd),p1(0x74),p1(0x1f),p1(0x4b),p1(0xbd),p1(0x8b),p1(0x8a) ;\
  3848. + .long p1(0x70),p1(0x3e),p1(0xb5),p1(0x66),p1(0x48),p1(0x03),p1(0xf6),p1(0x0e) ;\
  3849. + .long p1(0x61),p1(0x35),p1(0x57),p1(0xb9),p1(0x86),p1(0xc1),p1(0x1d),p1(0x9e)
  3850. +#define sb_data7(p1) \
  3851. + .long p1(0xe1),p1(0xf8),p1(0x98),p1(0x11),p1(0x69),p1(0xd9),p1(0x8e),p1(0x94) ;\
  3852. + .long p1(0x9b),p1(0x1e),p1(0x87),p1(0xe9),p1(0xce),p1(0x55),p1(0x28),p1(0xdf) ;\
  3853. + .long p1(0x8c),p1(0xa1),p1(0x89),p1(0x0d),p1(0xbf),p1(0xe6),p1(0x42),p1(0x68) ;\
  3854. + .long p1(0x41),p1(0x99),p1(0x2d),p1(0x0f),p1(0xb0),p1(0x54),p1(0xbb),p1(0x16)
  3855. +
  3856. +// Inverse S-box data - 256 entries
  3857. +
  3858. +#define ib_data0(p1) \
  3859. + .long p1(0x52),p1(0x09),p1(0x6a),p1(0xd5),p1(0x30),p1(0x36),p1(0xa5),p1(0x38) ;\
  3860. + .long p1(0xbf),p1(0x40),p1(0xa3),p1(0x9e),p1(0x81),p1(0xf3),p1(0xd7),p1(0xfb) ;\
  3861. + .long p1(0x7c),p1(0xe3),p1(0x39),p1(0x82),p1(0x9b),p1(0x2f),p1(0xff),p1(0x87) ;\
  3862. + .long p1(0x34),p1(0x8e),p1(0x43),p1(0x44),p1(0xc4),p1(0xde),p1(0xe9),p1(0xcb)
  3863. +#define ib_data1(p1) \
  3864. + .long p1(0x54),p1(0x7b),p1(0x94),p1(0x32),p1(0xa6),p1(0xc2),p1(0x23),p1(0x3d) ;\
  3865. + .long p1(0xee),p1(0x4c),p1(0x95),p1(0x0b),p1(0x42),p1(0xfa),p1(0xc3),p1(0x4e) ;\
  3866. + .long p1(0x08),p1(0x2e),p1(0xa1),p1(0x66),p1(0x28),p1(0xd9),p1(0x24),p1(0xb2) ;\
  3867. + .long p1(0x76),p1(0x5b),p1(0xa2),p1(0x49),p1(0x6d),p1(0x8b),p1(0xd1),p1(0x25)
  3868. +#define ib_data2(p1) \
  3869. + .long p1(0x72),p1(0xf8),p1(0xf6),p1(0x64),p1(0x86),p1(0x68),p1(0x98),p1(0x16) ;\
  3870. + .long p1(0xd4),p1(0xa4),p1(0x5c),p1(0xcc),p1(0x5d),p1(0x65),p1(0xb6),p1(0x92) ;\
  3871. + .long p1(0x6c),p1(0x70),p1(0x48),p1(0x50),p1(0xfd),p1(0xed),p1(0xb9),p1(0xda) ;\
  3872. + .long p1(0x5e),p1(0x15),p1(0x46),p1(0x57),p1(0xa7),p1(0x8d),p1(0x9d),p1(0x84)
  3873. +#define ib_data3(p1) \
  3874. + .long p1(0x90),p1(0xd8),p1(0xab),p1(0x00),p1(0x8c),p1(0xbc),p1(0xd3),p1(0x0a) ;\
  3875. + .long p1(0xf7),p1(0xe4),p1(0x58),p1(0x05),p1(0xb8),p1(0xb3),p1(0x45),p1(0x06) ;\
  3876. + .long p1(0xd0),p1(0x2c),p1(0x1e),p1(0x8f),p1(0xca),p1(0x3f),p1(0x0f),p1(0x02) ;\
  3877. + .long p1(0xc1),p1(0xaf),p1(0xbd),p1(0x03),p1(0x01),p1(0x13),p1(0x8a),p1(0x6b)
  3878. +#define ib_data4(p1) \
  3879. + .long p1(0x3a),p1(0x91),p1(0x11),p1(0x41),p1(0x4f),p1(0x67),p1(0xdc),p1(0xea) ;\
  3880. + .long p1(0x97),p1(0xf2),p1(0xcf),p1(0xce),p1(0xf0),p1(0xb4),p1(0xe6),p1(0x73) ;\
  3881. + .long p1(0x96),p1(0xac),p1(0x74),p1(0x22),p1(0xe7),p1(0xad),p1(0x35),p1(0x85) ;\
  3882. + .long p1(0xe2),p1(0xf9),p1(0x37),p1(0xe8),p1(0x1c),p1(0x75),p1(0xdf),p1(0x6e)
  3883. +#define ib_data5(p1) \
  3884. + .long p1(0x47),p1(0xf1),p1(0x1a),p1(0x71),p1(0x1d),p1(0x29),p1(0xc5),p1(0x89) ;\
  3885. + .long p1(0x6f),p1(0xb7),p1(0x62),p1(0x0e),p1(0xaa),p1(0x18),p1(0xbe),p1(0x1b) ;\
  3886. + .long p1(0xfc),p1(0x56),p1(0x3e),p1(0x4b),p1(0xc6),p1(0xd2),p1(0x79),p1(0x20) ;\
  3887. + .long p1(0x9a),p1(0xdb),p1(0xc0),p1(0xfe),p1(0x78),p1(0xcd),p1(0x5a),p1(0xf4)
  3888. +#define ib_data6(p1) \
  3889. + .long p1(0x1f),p1(0xdd),p1(0xa8),p1(0x33),p1(0x88),p1(0x07),p1(0xc7),p1(0x31) ;\
  3890. + .long p1(0xb1),p1(0x12),p1(0x10),p1(0x59),p1(0x27),p1(0x80),p1(0xec),p1(0x5f) ;\
  3891. + .long p1(0x60),p1(0x51),p1(0x7f),p1(0xa9),p1(0x19),p1(0xb5),p1(0x4a),p1(0x0d) ;\
  3892. + .long p1(0x2d),p1(0xe5),p1(0x7a),p1(0x9f),p1(0x93),p1(0xc9),p1(0x9c),p1(0xef)
  3893. +#define ib_data7(p1) \
  3894. + .long p1(0xa0),p1(0xe0),p1(0x3b),p1(0x4d),p1(0xae),p1(0x2a),p1(0xf5),p1(0xb0) ;\
  3895. + .long p1(0xc8),p1(0xeb),p1(0xbb),p1(0x3c),p1(0x83),p1(0x53),p1(0x99),p1(0x61) ;\
  3896. + .long p1(0x17),p1(0x2b),p1(0x04),p1(0x7e),p1(0xba),p1(0x77),p1(0xd6),p1(0x26) ;\
  3897. + .long p1(0xe1),p1(0x69),p1(0x14),p1(0x63),p1(0x55),p1(0x21),p1(0x0c),p1(0x7d)
  3898. +
  3899. +// The rcon_table (needed for the key schedule)
  3900. +//
  3901. +// Here is original Dr Brian Gladman's source code:
  3902. +// _rcon_tab:
  3903. +// %assign x 1
  3904. +// %rep 29
  3905. +// dd x
  3906. +// %assign x f2(x)
  3907. +// %endrep
  3908. +//
  3909. +// Here is precomputed output (it's more portable this way):
  3910. +
  3911. + .align ALIGN32BYTES
  3912. +aes_rcon_tab:
  3913. + .long 0x01,0x02,0x04,0x08,0x10,0x20,0x40,0x80
  3914. + .long 0x1b,0x36,0x6c,0xd8,0xab,0x4d,0x9a,0x2f
  3915. + .long 0x5e,0xbc,0x63,0xc6,0x97,0x35,0x6a,0xd4
  3916. + .long 0xb3,0x7d,0xfa,0xef,0xc5
  3917. +
  3918. +// The forward xor tables
  3919. +
  3920. + .align ALIGN32BYTES
  3921. +aes_ft_tab:
  3922. + sb_data0(u0)
  3923. + sb_data1(u0)
  3924. + sb_data2(u0)
  3925. + sb_data3(u0)
  3926. + sb_data4(u0)
  3927. + sb_data5(u0)
  3928. + sb_data6(u0)
  3929. + sb_data7(u0)
  3930. +
  3931. + sb_data0(u1)
  3932. + sb_data1(u1)
  3933. + sb_data2(u1)
  3934. + sb_data3(u1)
  3935. + sb_data4(u1)
  3936. + sb_data5(u1)
  3937. + sb_data6(u1)
  3938. + sb_data7(u1)
  3939. +
  3940. + sb_data0(u2)
  3941. + sb_data1(u2)
  3942. + sb_data2(u2)
  3943. + sb_data3(u2)
  3944. + sb_data4(u2)
  3945. + sb_data5(u2)
  3946. + sb_data6(u2)
  3947. + sb_data7(u2)
  3948. +
  3949. + sb_data0(u3)
  3950. + sb_data1(u3)
  3951. + sb_data2(u3)
  3952. + sb_data3(u3)
  3953. + sb_data4(u3)
  3954. + sb_data5(u3)
  3955. + sb_data6(u3)
  3956. + sb_data7(u3)
  3957. +
  3958. + .align ALIGN32BYTES
  3959. +aes_fl_tab:
  3960. + sb_data0(w0)
  3961. + sb_data1(w0)
  3962. + sb_data2(w0)
  3963. + sb_data3(w0)
  3964. + sb_data4(w0)
  3965. + sb_data5(w0)
  3966. + sb_data6(w0)
  3967. + sb_data7(w0)
  3968. +
  3969. + sb_data0(w1)
  3970. + sb_data1(w1)
  3971. + sb_data2(w1)
  3972. + sb_data3(w1)
  3973. + sb_data4(w1)
  3974. + sb_data5(w1)
  3975. + sb_data6(w1)
  3976. + sb_data7(w1)
  3977. +
  3978. + sb_data0(w2)
  3979. + sb_data1(w2)
  3980. + sb_data2(w2)
  3981. + sb_data3(w2)
  3982. + sb_data4(w2)
  3983. + sb_data5(w2)
  3984. + sb_data6(w2)
  3985. + sb_data7(w2)
  3986. +
  3987. + sb_data0(w3)
  3988. + sb_data1(w3)
  3989. + sb_data2(w3)
  3990. + sb_data3(w3)
  3991. + sb_data4(w3)
  3992. + sb_data5(w3)
  3993. + sb_data6(w3)
  3994. + sb_data7(w3)
  3995. +
  3996. +// The inverse xor tables
  3997. +
  3998. + .align ALIGN32BYTES
  3999. +aes_it_tab:
  4000. + ib_data0(v0)
  4001. + ib_data1(v0)
  4002. + ib_data2(v0)
  4003. + ib_data3(v0)
  4004. + ib_data4(v0)
  4005. + ib_data5(v0)
  4006. + ib_data6(v0)
  4007. + ib_data7(v0)
  4008. +
  4009. + ib_data0(v1)
  4010. + ib_data1(v1)
  4011. + ib_data2(v1)
  4012. + ib_data3(v1)
  4013. + ib_data4(v1)
  4014. + ib_data5(v1)
  4015. + ib_data6(v1)
  4016. + ib_data7(v1)
  4017. +
  4018. + ib_data0(v2)
  4019. + ib_data1(v2)
  4020. + ib_data2(v2)
  4021. + ib_data3(v2)
  4022. + ib_data4(v2)
  4023. + ib_data5(v2)
  4024. + ib_data6(v2)
  4025. + ib_data7(v2)
  4026. +
  4027. + ib_data0(v3)
  4028. + ib_data1(v3)
  4029. + ib_data2(v3)
  4030. + ib_data3(v3)
  4031. + ib_data4(v3)
  4032. + ib_data5(v3)
  4033. + ib_data6(v3)
  4034. + ib_data7(v3)
  4035. +
  4036. + .align ALIGN32BYTES
  4037. +aes_il_tab:
  4038. + ib_data0(w0)
  4039. + ib_data1(w0)
  4040. + ib_data2(w0)
  4041. + ib_data3(w0)
  4042. + ib_data4(w0)
  4043. + ib_data5(w0)
  4044. + ib_data6(w0)
  4045. + ib_data7(w0)
  4046. +
  4047. + ib_data0(w1)
  4048. + ib_data1(w1)
  4049. + ib_data2(w1)
  4050. + ib_data3(w1)
  4051. + ib_data4(w1)
  4052. + ib_data5(w1)
  4053. + ib_data6(w1)
  4054. + ib_data7(w1)
  4055. +
  4056. + ib_data0(w2)
  4057. + ib_data1(w2)
  4058. + ib_data2(w2)
  4059. + ib_data3(w2)
  4060. + ib_data4(w2)
  4061. + ib_data5(w2)
  4062. + ib_data6(w2)
  4063. + ib_data7(w2)
  4064. +
  4065. + ib_data0(w3)
  4066. + ib_data1(w3)
  4067. + ib_data2(w3)
  4068. + ib_data3(w3)
  4069. + ib_data4(w3)
  4070. + ib_data5(w3)
  4071. + ib_data6(w3)
  4072. + ib_data7(w3)
  4073. +
  4074. +// The inverse mix column tables
  4075. +
  4076. + .align ALIGN32BYTES
  4077. +aes_im_tab:
  4078. + im_data0(v0)
  4079. + im_data1(v0)
  4080. + im_data2(v0)
  4081. + im_data3(v0)
  4082. + im_data4(v0)
  4083. + im_data5(v0)
  4084. + im_data6(v0)
  4085. + im_data7(v0)
  4086. +
  4087. + im_data0(v1)
  4088. + im_data1(v1)
  4089. + im_data2(v1)
  4090. + im_data3(v1)
  4091. + im_data4(v1)
  4092. + im_data5(v1)
  4093. + im_data6(v1)
  4094. + im_data7(v1)
  4095. +
  4096. + im_data0(v2)
  4097. + im_data1(v2)
  4098. + im_data2(v2)
  4099. + im_data3(v2)
  4100. + im_data4(v2)
  4101. + im_data5(v2)
  4102. + im_data6(v2)
  4103. + im_data7(v2)
  4104. +
  4105. + im_data0(v3)
  4106. + im_data1(v3)
  4107. + im_data2(v3)
  4108. + im_data3(v3)
  4109. + im_data4(v3)
  4110. + im_data5(v3)
  4111. + im_data6(v3)
  4112. + im_data7(v3)
  4113. diff -pruN linux-2.4.28_orig/drivers/misc/aes.c linux-2.4.28/drivers/misc/aes.c
  4114. --- linux-2.4.28_orig/drivers/misc/aes.c 1970-01-01 01:00:00.000000000 +0100
  4115. +++ linux-2.4.28/drivers/misc/aes.c 2005-01-11 09:33:37.484525168 +0100
  4116. @@ -0,0 +1,1479 @@
  4117. +// I retain copyright in this code but I encourage its free use provided
  4118. +// that I don't carry any responsibility for the results. I am especially
  4119. +// happy to see it used in free and open source software. If you do use
  4120. +// it I would appreciate an acknowledgement of its origin in the code or
  4121. +// the product that results and I would also appreciate knowing a little
  4122. +// about the use to which it is being put. I am grateful to Frank Yellin
  4123. +// for some ideas that are used in this implementation.
  4124. +//
  4125. +// Dr B. R. Gladman <brg@gladman.uk.net> 6th April 2001.
  4126. +//
  4127. +// This is an implementation of the AES encryption algorithm (Rijndael)
  4128. +// designed by Joan Daemen and Vincent Rijmen. This version is designed
  4129. +// to provide both fixed and dynamic block and key lengths and can also
  4130. +// run with either big or little endian internal byte order (see aes.h).
  4131. +// It inputs block and key lengths in bytes with the legal values being
  4132. +// 16, 24 and 32.
  4133. +
  4134. +/*
  4135. + * Modified by Jari Ruusu, May 1 2001
  4136. + * - Fixed some compile warnings, code was ok but gcc warned anyway.
  4137. + * - Changed basic types: byte -> unsigned char, word -> u_int32_t
  4138. + * - Major name space cleanup: Names visible to outside now begin
  4139. + * with "aes_" or "AES_". A lot of stuff moved from aes.h to aes.c
  4140. + * - Removed C++ and DLL support as part of name space cleanup.
  4141. + * - Eliminated unnecessary recomputation of tables. (actual bug fix)
  4142. + * - Merged precomputed constant tables to aes.c file.
  4143. + * - Removed data alignment restrictions for portability reasons.
  4144. + * - Made block and key lengths accept bit count (128/192/256)
  4145. + * as well byte count (16/24/32).
  4146. + * - Removed all error checks. This change also eliminated the need
  4147. + * to preinitialize the context struct to zero.
  4148. + * - Removed some totally unused constants.
  4149. + */
  4150. +/*
  4151. + * Modified by Jari Ruusu, April 21 2004
  4152. + * - Added back code that avoids byte swaps on big endian boxes.
  4153. + */
  4154. +
  4155. +#include "aes.h"
  4156. +
  4157. +// CONFIGURATION OPTIONS (see also aes.h)
  4158. +//
  4159. +// 1. Define UNROLL for full loop unrolling in encryption and decryption.
  4160. +// 2. Define PARTIAL_UNROLL to unroll two loops in encryption and decryption.
  4161. +// 3. Define FIXED_TABLES for compiled rather than dynamic tables.
  4162. +// 4. Define FF_TABLES to use tables for field multiplies and inverses.
  4163. +// Do not enable this without understanding stack space requirements.
  4164. +// 5. Define ARRAYS to use arrays to hold the local state block. If this
  4165. +// is not defined, individually declared 32-bit words are used.
  4166. +// 6. Define FAST_VARIABLE if a high speed variable block implementation
  4167. +// is needed (essentially three separate fixed block size code sequences)
  4168. +// 7. Define either ONE_TABLE or FOUR_TABLES for a fast table driven
  4169. +// version using 1 table (2 kbytes of table space) or 4 tables (8
  4170. +// kbytes of table space) for higher speed.
  4171. +// 8. Define either ONE_LR_TABLE or FOUR_LR_TABLES for a further speed
  4172. +// increase by using tables for the last rounds but with more table
  4173. +// space (2 or 8 kbytes extra).
  4174. +// 9. If neither ONE_TABLE nor FOUR_TABLES is defined, a compact but
  4175. +// slower version is provided.
  4176. +// 10. If fast decryption key scheduling is needed define ONE_IM_TABLE
  4177. +// or FOUR_IM_TABLES for higher speed (2 or 8 kbytes extra).
  4178. +
  4179. +#define UNROLL
  4180. +//#define PARTIAL_UNROLL
  4181. +
  4182. +#define FIXED_TABLES
  4183. +//#define FF_TABLES
  4184. +//#define ARRAYS
  4185. +#define FAST_VARIABLE
  4186. +
  4187. +//#define ONE_TABLE
  4188. +#define FOUR_TABLES
  4189. +
  4190. +//#define ONE_LR_TABLE
  4191. +#define FOUR_LR_TABLES
  4192. +
  4193. +//#define ONE_IM_TABLE
  4194. +#define FOUR_IM_TABLES
  4195. +
  4196. +#if defined(UNROLL) && defined (PARTIAL_UNROLL)
  4197. +#error both UNROLL and PARTIAL_UNROLL are defined
  4198. +#endif
  4199. +
  4200. +#if defined(ONE_TABLE) && defined (FOUR_TABLES)
  4201. +#error both ONE_TABLE and FOUR_TABLES are defined
  4202. +#endif
  4203. +
  4204. +#if defined(ONE_LR_TABLE) && defined (FOUR_LR_TABLES)
  4205. +#error both ONE_LR_TABLE and FOUR_LR_TABLES are defined
  4206. +#endif
  4207. +
  4208. +#if defined(ONE_IM_TABLE) && defined (FOUR_IM_TABLES)
  4209. +#error both ONE_IM_TABLE and FOUR_IM_TABLES are defined
  4210. +#endif
  4211. +
  4212. +#if defined(AES_BLOCK_SIZE) && AES_BLOCK_SIZE != 16 && AES_BLOCK_SIZE != 24 && AES_BLOCK_SIZE != 32
  4213. +#error an illegal block size has been specified
  4214. +#endif
  4215. +
  4216. +/* INTERNAL_BYTE_ORDER: 0=unknown, 1=little endian, 2=big endian */
  4217. +#if defined(INTERNAL_BYTE_ORDER)
  4218. +#elif defined(__i386__)||defined(__i386)||defined(__x86_64__)||defined(__x86_64)||defined(__amd64__)||defined(__amd64)||defined(__AMD64__)||defined(__AMD64)
  4219. +# define INTERNAL_BYTE_ORDER 1
  4220. +# undef DATA_ALWAYS_ALIGNED
  4221. +# define DATA_ALWAYS_ALIGNED 1 /* unaligned access is always ok */
  4222. +#elif defined(__ppc__)||defined(__ppc)||defined(__PPC__)||defined(__PPC)||defined(__powerpc__)||defined(__powerpc)||defined(__POWERPC__)||defined(__POWERPC)||defined(__PowerPC__)||defined(__PowerPC)||defined(__ppc64__)||defined(__ppc64)||defined(__PPC64__)||defined(__PPC64)||defined(__powerpc64__)||defined(__powerpc64)||defined(__s390__)||defined(__s390)
  4223. +# define INTERNAL_BYTE_ORDER 2
  4224. +# undef DATA_ALWAYS_ALIGNED
  4225. +# define DATA_ALWAYS_ALIGNED 1 /* unaligned access is always ok */
  4226. +#elif defined(__alpha__)||defined(__alpha)||defined(__ia64__)||defined(__ia64)
  4227. +# define INTERNAL_BYTE_ORDER 1
  4228. +#elif defined(__hppa__)||defined(__hppa)||defined(__HPPA__)||defined(__HPPA)||defined(__parisc__)||defined(__parisc)||defined(__sparc__)||defined(__sparc)||defined(__sparc_v9__)||defined(__sparc_v9)||defined(__sparc64__)||defined(__sparc64)||defined(__mc68000__)||defined(__mc68000)
  4229. +# define INTERNAL_BYTE_ORDER 2
  4230. +#elif defined(CONFIGURE_DETECTS_BYTE_ORDER)
  4231. +# if WORDS_BIGENDIAN
  4232. +# define INTERNAL_BYTE_ORDER 2
  4233. +# else
  4234. +# define INTERNAL_BYTE_ORDER 1
  4235. +# endif
  4236. +#elif defined(__linux__) && defined(__KERNEL__)
  4237. +# include <asm/byteorder.h>
  4238. +# if defined(__BIG_ENDIAN)
  4239. +# define INTERNAL_BYTE_ORDER 2
  4240. +# else
  4241. +# define INTERNAL_BYTE_ORDER 1
  4242. +# endif
  4243. +#else
  4244. +# include <sys/param.h>
  4245. +# if (defined(BYTE_ORDER) && defined(LITTLE_ENDIAN) && (BYTE_ORDER == LITTLE_ENDIAN)) || (defined(__BYTE_ORDER) && defined(__LITTLE_ENDIAN) && (__BYTE_ORDER == __LITTLE_ENDIAN))
  4246. +# define INTERNAL_BYTE_ORDER 1
  4247. +# elif WORDS_BIGENDIAN || defined(__BIG_ENDIAN__) || (defined(BYTE_ORDER) && defined(BIG_ENDIAN) && (BYTE_ORDER == BIG_ENDIAN)) || (defined(__BYTE_ORDER) && defined(__BIG_ENDIAN) && (__BYTE_ORDER == __BIG_ENDIAN))
  4248. +# define INTERNAL_BYTE_ORDER 2
  4249. +# else
  4250. +# define INTERNAL_BYTE_ORDER 0
  4251. +# endif
  4252. +#endif
  4253. +
  4254. +#if defined(DATA_ALWAYS_ALIGNED) && (INTERNAL_BYTE_ORDER > 0)
  4255. +# define word_in(x) *(u_int32_t*)(x)
  4256. +# define word_out(x,v) *(u_int32_t*)(x) = (v)
  4257. +#elif defined(__linux__) && defined(__KERNEL__)
  4258. +# include <asm/unaligned.h>
  4259. +# define word_in(x) get_unaligned((u_int32_t*)(x))
  4260. +# define word_out(x,v) put_unaligned((v),(u_int32_t*)(x))
  4261. +#else
  4262. +/* unknown endianness and/or unable to handle unaligned data */
  4263. +# undef INTERNAL_BYTE_ORDER
  4264. +# define INTERNAL_BYTE_ORDER 1
  4265. +# define word_in(x) ((u_int32_t)(((unsigned char *)(x))[0])|((u_int32_t)(((unsigned char *)(x))[1])<<8)|((u_int32_t)(((unsigned char *)(x))[2])<<16)|((u_int32_t)(((unsigned char *)(x))[3])<<24))
  4266. +# define word_out(x,v) ((unsigned char *)(x))[0]=(v),((unsigned char *)(x))[1]=((v)>>8),((unsigned char *)(x))[2]=((v)>>16),((unsigned char *)(x))[3]=((v)>>24)
  4267. +#endif
  4268. +
  4269. +// upr(x,n): rotates bytes within words by n positions, moving bytes
  4270. +// to higher index positions with wrap around into low positions
  4271. +// ups(x,n): moves bytes by n positions to higher index positions in
  4272. +// words but without wrap around
  4273. +// bval(x,n): extracts a byte from a word
  4274. +
  4275. +#if (INTERNAL_BYTE_ORDER < 2)
  4276. +/* little endian */
  4277. +#define upr(x,n) (((x) << 8 * (n)) | ((x) >> (32 - 8 * (n))))
  4278. +#define ups(x,n) ((x) << 8 * (n))
  4279. +#define bval(x,n) ((unsigned char)((x) >> 8 * (n)))
  4280. +#define bytes2word(b0, b1, b2, b3) \
  4281. + ((u_int32_t)(b3) << 24 | (u_int32_t)(b2) << 16 | (u_int32_t)(b1) << 8 | (b0))
  4282. +#else
  4283. +/* big endian */
  4284. +#define upr(x,n) (((x) >> 8 * (n)) | ((x) << (32 - 8 * (n))))
  4285. +#define ups(x,n) ((x) >> 8 * (n)))
  4286. +#define bval(x,n) ((unsigned char)((x) >> (24 - 8 * (n))))
  4287. +#define bytes2word(b0, b1, b2, b3) \
  4288. + ((u_int32_t)(b0) << 24 | (u_int32_t)(b1) << 16 | (u_int32_t)(b2) << 8 | (b3))
  4289. +#endif
  4290. +
  4291. +// Disable at least some poor combinations of options
  4292. +
  4293. +#if !defined(ONE_TABLE) && !defined(FOUR_TABLES)
  4294. +#define FIXED_TABLES
  4295. +#undef UNROLL
  4296. +#undef ONE_LR_TABLE
  4297. +#undef FOUR_LR_TABLES
  4298. +#undef ONE_IM_TABLE
  4299. +#undef FOUR_IM_TABLES
  4300. +#elif !defined(FOUR_TABLES)
  4301. +#ifdef FOUR_LR_TABLES
  4302. +#undef FOUR_LR_TABLES
  4303. +#define ONE_LR_TABLE
  4304. +#endif
  4305. +#ifdef FOUR_IM_TABLES
  4306. +#undef FOUR_IM_TABLES
  4307. +#define ONE_IM_TABLE
  4308. +#endif
  4309. +#elif !defined(AES_BLOCK_SIZE)
  4310. +#if defined(UNROLL)
  4311. +#define PARTIAL_UNROLL
  4312. +#undef UNROLL
  4313. +#endif
  4314. +#endif
  4315. +
  4316. +// the finite field modular polynomial and elements
  4317. +
  4318. +#define ff_poly 0x011b
  4319. +#define ff_hi 0x80
  4320. +
  4321. +// multiply four bytes in GF(2^8) by 'x' {02} in parallel
  4322. +
  4323. +#define m1 0x80808080
  4324. +#define m2 0x7f7f7f7f
  4325. +#define m3 0x0000001b
  4326. +#define FFmulX(x) ((((x) & m2) << 1) ^ ((((x) & m1) >> 7) * m3))
  4327. +
  4328. +// The following defines provide alternative definitions of FFmulX that might
  4329. +// give improved performance if a fast 32-bit multiply is not available. Note
  4330. +// that a temporary variable u needs to be defined where FFmulX is used.
  4331. +
  4332. +// #define FFmulX(x) (u = (x) & m1, u |= (u >> 1), ((x) & m2) << 1) ^ ((u >> 3) | (u >> 6))
  4333. +// #define m4 0x1b1b1b1b
  4334. +// #define FFmulX(x) (u = (x) & m1, ((x) & m2) << 1) ^ ((u - (u >> 7)) & m4)
  4335. +
  4336. +// perform column mix operation on four bytes in parallel
  4337. +
  4338. +#define fwd_mcol(x) (f2 = FFmulX(x), f2 ^ upr(x ^ f2,3) ^ upr(x,2) ^ upr(x,1))
  4339. +
  4340. +#if defined(FIXED_TABLES)
  4341. +
  4342. +// the S-Box table
  4343. +
  4344. +static const unsigned char s_box[256] =
  4345. +{
  4346. + 0x63, 0x7c, 0x77, 0x7b, 0xf2, 0x6b, 0x6f, 0xc5,
  4347. + 0x30, 0x01, 0x67, 0x2b, 0xfe, 0xd7, 0xab, 0x76,
  4348. + 0xca, 0x82, 0xc9, 0x7d, 0xfa, 0x59, 0x47, 0xf0,
  4349. + 0xad, 0xd4, 0xa2, 0xaf, 0x9c, 0xa4, 0x72, 0xc0,
  4350. + 0xb7, 0xfd, 0x93, 0x26, 0x36, 0x3f, 0xf7, 0xcc,
  4351. + 0x34, 0xa5, 0xe5, 0xf1, 0x71, 0xd8, 0x31, 0x15,
  4352. + 0x04, 0xc7, 0x23, 0xc3, 0x18, 0x96, 0x05, 0x9a,
  4353. + 0x07, 0x12, 0x80, 0xe2, 0xeb, 0x27, 0xb2, 0x75,
  4354. + 0x09, 0x83, 0x2c, 0x1a, 0x1b, 0x6e, 0x5a, 0xa0,
  4355. + 0x52, 0x3b, 0xd6, 0xb3, 0x29, 0xe3, 0x2f, 0x84,
  4356. + 0x53, 0xd1, 0x00, 0xed, 0x20, 0xfc, 0xb1, 0x5b,
  4357. + 0x6a, 0xcb, 0xbe, 0x39, 0x4a, 0x4c, 0x58, 0xcf,
  4358. + 0xd0, 0xef, 0xaa, 0xfb, 0x43, 0x4d, 0x33, 0x85,
  4359. + 0x45, 0xf9, 0x02, 0x7f, 0x50, 0x3c, 0x9f, 0xa8,
  4360. + 0x51, 0xa3, 0x40, 0x8f, 0x92, 0x9d, 0x38, 0xf5,
  4361. + 0xbc, 0xb6, 0xda, 0x21, 0x10, 0xff, 0xf3, 0xd2,
  4362. + 0xcd, 0x0c, 0x13, 0xec, 0x5f, 0x97, 0x44, 0x17,
  4363. + 0xc4, 0xa7, 0x7e, 0x3d, 0x64, 0x5d, 0x19, 0x73,
  4364. + 0x60, 0x81, 0x4f, 0xdc, 0x22, 0x2a, 0x90, 0x88,
  4365. + 0x46, 0xee, 0xb8, 0x14, 0xde, 0x5e, 0x0b, 0xdb,
  4366. + 0xe0, 0x32, 0x3a, 0x0a, 0x49, 0x06, 0x24, 0x5c,
  4367. + 0xc2, 0xd3, 0xac, 0x62, 0x91, 0x95, 0xe4, 0x79,
  4368. + 0xe7, 0xc8, 0x37, 0x6d, 0x8d, 0xd5, 0x4e, 0xa9,
  4369. + 0x6c, 0x56, 0xf4, 0xea, 0x65, 0x7a, 0xae, 0x08,
  4370. + 0xba, 0x78, 0x25, 0x2e, 0x1c, 0xa6, 0xb4, 0xc6,
  4371. + 0xe8, 0xdd, 0x74, 0x1f, 0x4b, 0xbd, 0x8b, 0x8a,
  4372. + 0x70, 0x3e, 0xb5, 0x66, 0x48, 0x03, 0xf6, 0x0e,
  4373. + 0x61, 0x35, 0x57, 0xb9, 0x86, 0xc1, 0x1d, 0x9e,
  4374. + 0xe1, 0xf8, 0x98, 0x11, 0x69, 0xd9, 0x8e, 0x94,
  4375. + 0x9b, 0x1e, 0x87, 0xe9, 0xce, 0x55, 0x28, 0xdf,
  4376. + 0x8c, 0xa1, 0x89, 0x0d, 0xbf, 0xe6, 0x42, 0x68,
  4377. + 0x41, 0x99, 0x2d, 0x0f, 0xb0, 0x54, 0xbb, 0x16
  4378. +};
  4379. +
  4380. +// the inverse S-Box table
  4381. +
  4382. +static const unsigned char inv_s_box[256] =
  4383. +{
  4384. + 0x52, 0x09, 0x6a, 0xd5, 0x30, 0x36, 0xa5, 0x38,
  4385. + 0xbf, 0x40, 0xa3, 0x9e, 0x81, 0xf3, 0xd7, 0xfb,
  4386. + 0x7c, 0xe3, 0x39, 0x82, 0x9b, 0x2f, 0xff, 0x87,
  4387. + 0x34, 0x8e, 0x43, 0x44, 0xc4, 0xde, 0xe9, 0xcb,
  4388. + 0x54, 0x7b, 0x94, 0x32, 0xa6, 0xc2, 0x23, 0x3d,
  4389. + 0xee, 0x4c, 0x95, 0x0b, 0x42, 0xfa, 0xc3, 0x4e,
  4390. + 0x08, 0x2e, 0xa1, 0x66, 0x28, 0xd9, 0x24, 0xb2,
  4391. + 0x76, 0x5b, 0xa2, 0x49, 0x6d, 0x8b, 0xd1, 0x25,
  4392. + 0x72, 0xf8, 0xf6, 0x64, 0x86, 0x68, 0x98, 0x16,
  4393. + 0xd4, 0xa4, 0x5c, 0xcc, 0x5d, 0x65, 0xb6, 0x92,
  4394. + 0x6c, 0x70, 0x48, 0x50, 0xfd, 0xed, 0xb9, 0xda,
  4395. + 0x5e, 0x15, 0x46, 0x57, 0xa7, 0x8d, 0x9d, 0x84,
  4396. + 0x90, 0xd8, 0xab, 0x00, 0x8c, 0xbc, 0xd3, 0x0a,
  4397. + 0xf7, 0xe4, 0x58, 0x05, 0xb8, 0xb3, 0x45, 0x06,
  4398. + 0xd0, 0x2c, 0x1e, 0x8f, 0xca, 0x3f, 0x0f, 0x02,
  4399. + 0xc1, 0xaf, 0xbd, 0x03, 0x01, 0x13, 0x8a, 0x6b,
  4400. + 0x3a, 0x91, 0x11, 0x41, 0x4f, 0x67, 0xdc, 0xea,
  4401. + 0x97, 0xf2, 0xcf, 0xce, 0xf0, 0xb4, 0xe6, 0x73,
  4402. + 0x96, 0xac, 0x74, 0x22, 0xe7, 0xad, 0x35, 0x85,
  4403. + 0xe2, 0xf9, 0x37, 0xe8, 0x1c, 0x75, 0xdf, 0x6e,
  4404. + 0x47, 0xf1, 0x1a, 0x71, 0x1d, 0x29, 0xc5, 0x89,
  4405. + 0x6f, 0xb7, 0x62, 0x0e, 0xaa, 0x18, 0xbe, 0x1b,
  4406. + 0xfc, 0x56, 0x3e, 0x4b, 0xc6, 0xd2, 0x79, 0x20,
  4407. + 0x9a, 0xdb, 0xc0, 0xfe, 0x78, 0xcd, 0x5a, 0xf4,
  4408. + 0x1f, 0xdd, 0xa8, 0x33, 0x88, 0x07, 0xc7, 0x31,
  4409. + 0xb1, 0x12, 0x10, 0x59, 0x27, 0x80, 0xec, 0x5f,
  4410. + 0x60, 0x51, 0x7f, 0xa9, 0x19, 0xb5, 0x4a, 0x0d,
  4411. + 0x2d, 0xe5, 0x7a, 0x9f, 0x93, 0xc9, 0x9c, 0xef,
  4412. + 0xa0, 0xe0, 0x3b, 0x4d, 0xae, 0x2a, 0xf5, 0xb0,
  4413. + 0xc8, 0xeb, 0xbb, 0x3c, 0x83, 0x53, 0x99, 0x61,
  4414. + 0x17, 0x2b, 0x04, 0x7e, 0xba, 0x77, 0xd6, 0x26,
  4415. + 0xe1, 0x69, 0x14, 0x63, 0x55, 0x21, 0x0c, 0x7d
  4416. +};
  4417. +
  4418. +// used to ensure table is generated in the right format
  4419. +// depending on the internal byte order required
  4420. +
  4421. +#if (INTERNAL_BYTE_ORDER < 2)
  4422. +/* little endian */
  4423. +#define w0(p) 0x000000##p
  4424. +#else
  4425. +/* big endian */
  4426. +#define w0(p) 0x##p##000000
  4427. +#endif
  4428. +
  4429. +// Number of elements required in this table for different
  4430. +// block and key lengths is:
  4431. +//
  4432. +// Nk = 4 6 8
  4433. +// ----------
  4434. +// Nb = 4 | 10 8 7
  4435. +// 6 | 19 12 11
  4436. +// 8 | 29 19 14
  4437. +//
  4438. +// this table can be a table of bytes if the key schedule
  4439. +// code is adjusted accordingly
  4440. +
  4441. +static const u_int32_t rcon_tab[29] =
  4442. +{
  4443. + w0(01), w0(02), w0(04), w0(08),
  4444. + w0(10), w0(20), w0(40), w0(80),
  4445. + w0(1b), w0(36), w0(6c), w0(d8),
  4446. + w0(ab), w0(4d), w0(9a), w0(2f),
  4447. + w0(5e), w0(bc), w0(63), w0(c6),
  4448. + w0(97), w0(35), w0(6a), w0(d4),
  4449. + w0(b3), w0(7d), w0(fa), w0(ef),
  4450. + w0(c5)
  4451. +};
  4452. +
  4453. +#undef w0
  4454. +
  4455. +// used to ensure table is generated in the right format
  4456. +// depending on the internal byte order required
  4457. +
  4458. +#if (INTERNAL_BYTE_ORDER < 2)
  4459. +/* little endian */
  4460. +#define r0(p,q,r,s) 0x##p##q##r##s
  4461. +#define r1(p,q,r,s) 0x##q##r##s##p
  4462. +#define r2(p,q,r,s) 0x##r##s##p##q
  4463. +#define r3(p,q,r,s) 0x##s##p##q##r
  4464. +#define w0(p) 0x000000##p
  4465. +#define w1(p) 0x0000##p##00
  4466. +#define w2(p) 0x00##p##0000
  4467. +#define w3(p) 0x##p##000000
  4468. +#else
  4469. +/* big endian */
  4470. +#define r0(p,q,r,s) 0x##s##r##q##p
  4471. +#define r1(p,q,r,s) 0x##p##s##r##q
  4472. +#define r2(p,q,r,s) 0x##q##p##s##r
  4473. +#define r3(p,q,r,s) 0x##r##q##p##s
  4474. +#define w0(p) 0x##p##000000
  4475. +#define w1(p) 0x00##p##0000
  4476. +#define w2(p) 0x0000##p##00
  4477. +#define w3(p) 0x000000##p
  4478. +#endif
  4479. +
  4480. +#if defined(FIXED_TABLES) && (defined(ONE_TABLE) || defined(FOUR_TABLES))
  4481. +
  4482. +// data for forward tables (other than last round)
  4483. +
  4484. +#define f_table \
  4485. + r(a5,63,63,c6), r(84,7c,7c,f8), r(99,77,77,ee), r(8d,7b,7b,f6),\
  4486. + r(0d,f2,f2,ff), r(bd,6b,6b,d6), r(b1,6f,6f,de), r(54,c5,c5,91),\
  4487. + r(50,30,30,60), r(03,01,01,02), r(a9,67,67,ce), r(7d,2b,2b,56),\
  4488. + r(19,fe,fe,e7), r(62,d7,d7,b5), r(e6,ab,ab,4d), r(9a,76,76,ec),\
  4489. + r(45,ca,ca,8f), r(9d,82,82,1f), r(40,c9,c9,89), r(87,7d,7d,fa),\
  4490. + r(15,fa,fa,ef), r(eb,59,59,b2), r(c9,47,47,8e), r(0b,f0,f0,fb),\
  4491. + r(ec,ad,ad,41), r(67,d4,d4,b3), r(fd,a2,a2,5f), r(ea,af,af,45),\
  4492. + r(bf,9c,9c,23), r(f7,a4,a4,53), r(96,72,72,e4), r(5b,c0,c0,9b),\
  4493. + r(c2,b7,b7,75), r(1c,fd,fd,e1), r(ae,93,93,3d), r(6a,26,26,4c),\
  4494. + r(5a,36,36,6c), r(41,3f,3f,7e), r(02,f7,f7,f5), r(4f,cc,cc,83),\
  4495. + r(5c,34,34,68), r(f4,a5,a5,51), r(34,e5,e5,d1), r(08,f1,f1,f9),\
  4496. + r(93,71,71,e2), r(73,d8,d8,ab), r(53,31,31,62), r(3f,15,15,2a),\
  4497. + r(0c,04,04,08), r(52,c7,c7,95), r(65,23,23,46), r(5e,c3,c3,9d),\
  4498. + r(28,18,18,30), r(a1,96,96,37), r(0f,05,05,0a), r(b5,9a,9a,2f),\
  4499. + r(09,07,07,0e), r(36,12,12,24), r(9b,80,80,1b), r(3d,e2,e2,df),\
  4500. + r(26,eb,eb,cd), r(69,27,27,4e), r(cd,b2,b2,7f), r(9f,75,75,ea),\
  4501. + r(1b,09,09,12), r(9e,83,83,1d), r(74,2c,2c,58), r(2e,1a,1a,34),\
  4502. + r(2d,1b,1b,36), r(b2,6e,6e,dc), r(ee,5a,5a,b4), r(fb,a0,a0,5b),\
  4503. + r(f6,52,52,a4), r(4d,3b,3b,76), r(61,d6,d6,b7), r(ce,b3,b3,7d),\
  4504. + r(7b,29,29,52), r(3e,e3,e3,dd), r(71,2f,2f,5e), r(97,84,84,13),\
  4505. + r(f5,53,53,a6), r(68,d1,d1,b9), r(00,00,00,00), r(2c,ed,ed,c1),\
  4506. + r(60,20,20,40), r(1f,fc,fc,e3), r(c8,b1,b1,79), r(ed,5b,5b,b6),\
  4507. + r(be,6a,6a,d4), r(46,cb,cb,8d), r(d9,be,be,67), r(4b,39,39,72),\
  4508. + r(de,4a,4a,94), r(d4,4c,4c,98), r(e8,58,58,b0), r(4a,cf,cf,85),\
  4509. + r(6b,d0,d0,bb), r(2a,ef,ef,c5), r(e5,aa,aa,4f), r(16,fb,fb,ed),\
  4510. + r(c5,43,43,86), r(d7,4d,4d,9a), r(55,33,33,66), r(94,85,85,11),\
  4511. + r(cf,45,45,8a), r(10,f9,f9,e9), r(06,02,02,04), r(81,7f,7f,fe),\
  4512. + r(f0,50,50,a0), r(44,3c,3c,78), r(ba,9f,9f,25), r(e3,a8,a8,4b),\
  4513. + r(f3,51,51,a2), r(fe,a3,a3,5d), r(c0,40,40,80), r(8a,8f,8f,05),\
  4514. + r(ad,92,92,3f), r(bc,9d,9d,21), r(48,38,38,70), r(04,f5,f5,f1),\
  4515. + r(df,bc,bc,63), r(c1,b6,b6,77), r(75,da,da,af), r(63,21,21,42),\
  4516. + r(30,10,10,20), r(1a,ff,ff,e5), r(0e,f3,f3,fd), r(6d,d2,d2,bf),\
  4517. + r(4c,cd,cd,81), r(14,0c,0c,18), r(35,13,13,26), r(2f,ec,ec,c3),\
  4518. + r(e1,5f,5f,be), r(a2,97,97,35), r(cc,44,44,88), r(39,17,17,2e),\
  4519. + r(57,c4,c4,93), r(f2,a7,a7,55), r(82,7e,7e,fc), r(47,3d,3d,7a),\
  4520. + r(ac,64,64,c8), r(e7,5d,5d,ba), r(2b,19,19,32), r(95,73,73,e6),\
  4521. + r(a0,60,60,c0), r(98,81,81,19), r(d1,4f,4f,9e), r(7f,dc,dc,a3),\
  4522. + r(66,22,22,44), r(7e,2a,2a,54), r(ab,90,90,3b), r(83,88,88,0b),\
  4523. + r(ca,46,46,8c), r(29,ee,ee,c7), r(d3,b8,b8,6b), r(3c,14,14,28),\
  4524. + r(79,de,de,a7), r(e2,5e,5e,bc), r(1d,0b,0b,16), r(76,db,db,ad),\
  4525. + r(3b,e0,e0,db), r(56,32,32,64), r(4e,3a,3a,74), r(1e,0a,0a,14),\
  4526. + r(db,49,49,92), r(0a,06,06,0c), r(6c,24,24,48), r(e4,5c,5c,b8),\
  4527. + r(5d,c2,c2,9f), r(6e,d3,d3,bd), r(ef,ac,ac,43), r(a6,62,62,c4),\
  4528. + r(a8,91,91,39), r(a4,95,95,31), r(37,e4,e4,d3), r(8b,79,79,f2),\
  4529. + r(32,e7,e7,d5), r(43,c8,c8,8b), r(59,37,37,6e), r(b7,6d,6d,da),\
  4530. + r(8c,8d,8d,01), r(64,d5,d5,b1), r(d2,4e,4e,9c), r(e0,a9,a9,49),\
  4531. + r(b4,6c,6c,d8), r(fa,56,56,ac), r(07,f4,f4,f3), r(25,ea,ea,cf),\
  4532. + r(af,65,65,ca), r(8e,7a,7a,f4), r(e9,ae,ae,47), r(18,08,08,10),\
  4533. + r(d5,ba,ba,6f), r(88,78,78,f0), r(6f,25,25,4a), r(72,2e,2e,5c),\
  4534. + r(24,1c,1c,38), r(f1,a6,a6,57), r(c7,b4,b4,73), r(51,c6,c6,97),\
  4535. + r(23,e8,e8,cb), r(7c,dd,dd,a1), r(9c,74,74,e8), r(21,1f,1f,3e),\
  4536. + r(dd,4b,4b,96), r(dc,bd,bd,61), r(86,8b,8b,0d), r(85,8a,8a,0f),\
  4537. + r(90,70,70,e0), r(42,3e,3e,7c), r(c4,b5,b5,71), r(aa,66,66,cc),\
  4538. + r(d8,48,48,90), r(05,03,03,06), r(01,f6,f6,f7), r(12,0e,0e,1c),\
  4539. + r(a3,61,61,c2), r(5f,35,35,6a), r(f9,57,57,ae), r(d0,b9,b9,69),\
  4540. + r(91,86,86,17), r(58,c1,c1,99), r(27,1d,1d,3a), r(b9,9e,9e,27),\
  4541. + r(38,e1,e1,d9), r(13,f8,f8,eb), r(b3,98,98,2b), r(33,11,11,22),\
  4542. + r(bb,69,69,d2), r(70,d9,d9,a9), r(89,8e,8e,07), r(a7,94,94,33),\
  4543. + r(b6,9b,9b,2d), r(22,1e,1e,3c), r(92,87,87,15), r(20,e9,e9,c9),\
  4544. + r(49,ce,ce,87), r(ff,55,55,aa), r(78,28,28,50), r(7a,df,df,a5),\
  4545. + r(8f,8c,8c,03), r(f8,a1,a1,59), r(80,89,89,09), r(17,0d,0d,1a),\
  4546. + r(da,bf,bf,65), r(31,e6,e6,d7), r(c6,42,42,84), r(b8,68,68,d0),\
  4547. + r(c3,41,41,82), r(b0,99,99,29), r(77,2d,2d,5a), r(11,0f,0f,1e),\
  4548. + r(cb,b0,b0,7b), r(fc,54,54,a8), r(d6,bb,bb,6d), r(3a,16,16,2c)
  4549. +
  4550. +// data for inverse tables (other than last round)
  4551. +
  4552. +#define i_table \
  4553. + r(50,a7,f4,51), r(53,65,41,7e), r(c3,a4,17,1a), r(96,5e,27,3a),\
  4554. + r(cb,6b,ab,3b), r(f1,45,9d,1f), r(ab,58,fa,ac), r(93,03,e3,4b),\
  4555. + r(55,fa,30,20), r(f6,6d,76,ad), r(91,76,cc,88), r(25,4c,02,f5),\
  4556. + r(fc,d7,e5,4f), r(d7,cb,2a,c5), r(80,44,35,26), r(8f,a3,62,b5),\
  4557. + r(49,5a,b1,de), r(67,1b,ba,25), r(98,0e,ea,45), r(e1,c0,fe,5d),\
  4558. + r(02,75,2f,c3), r(12,f0,4c,81), r(a3,97,46,8d), r(c6,f9,d3,6b),\
  4559. + r(e7,5f,8f,03), r(95,9c,92,15), r(eb,7a,6d,bf), r(da,59,52,95),\
  4560. + r(2d,83,be,d4), r(d3,21,74,58), r(29,69,e0,49), r(44,c8,c9,8e),\
  4561. + r(6a,89,c2,75), r(78,79,8e,f4), r(6b,3e,58,99), r(dd,71,b9,27),\
  4562. + r(b6,4f,e1,be), r(17,ad,88,f0), r(66,ac,20,c9), r(b4,3a,ce,7d),\
  4563. + r(18,4a,df,63), r(82,31,1a,e5), r(60,33,51,97), r(45,7f,53,62),\
  4564. + r(e0,77,64,b1), r(84,ae,6b,bb), r(1c,a0,81,fe), r(94,2b,08,f9),\
  4565. + r(58,68,48,70), r(19,fd,45,8f), r(87,6c,de,94), r(b7,f8,7b,52),\
  4566. + r(23,d3,73,ab), r(e2,02,4b,72), r(57,8f,1f,e3), r(2a,ab,55,66),\
  4567. + r(07,28,eb,b2), r(03,c2,b5,2f), r(9a,7b,c5,86), r(a5,08,37,d3),\
  4568. + r(f2,87,28,30), r(b2,a5,bf,23), r(ba,6a,03,02), r(5c,82,16,ed),\
  4569. + r(2b,1c,cf,8a), r(92,b4,79,a7), r(f0,f2,07,f3), r(a1,e2,69,4e),\
  4570. + r(cd,f4,da,65), r(d5,be,05,06), r(1f,62,34,d1), r(8a,fe,a6,c4),\
  4571. + r(9d,53,2e,34), r(a0,55,f3,a2), r(32,e1,8a,05), r(75,eb,f6,a4),\
  4572. + r(39,ec,83,0b), r(aa,ef,60,40), r(06,9f,71,5e), r(51,10,6e,bd),\
  4573. + r(f9,8a,21,3e), r(3d,06,dd,96), r(ae,05,3e,dd), r(46,bd,e6,4d),\
  4574. + r(b5,8d,54,91), r(05,5d,c4,71), r(6f,d4,06,04), r(ff,15,50,60),\
  4575. + r(24,fb,98,19), r(97,e9,bd,d6), r(cc,43,40,89), r(77,9e,d9,67),\
  4576. + r(bd,42,e8,b0), r(88,8b,89,07), r(38,5b,19,e7), r(db,ee,c8,79),\
  4577. + r(47,0a,7c,a1), r(e9,0f,42,7c), r(c9,1e,84,f8), r(00,00,00,00),\
  4578. + r(83,86,80,09), r(48,ed,2b,32), r(ac,70,11,1e), r(4e,72,5a,6c),\
  4579. + r(fb,ff,0e,fd), r(56,38,85,0f), r(1e,d5,ae,3d), r(27,39,2d,36),\
  4580. + r(64,d9,0f,0a), r(21,a6,5c,68), r(d1,54,5b,9b), r(3a,2e,36,24),\
  4581. + r(b1,67,0a,0c), r(0f,e7,57,93), r(d2,96,ee,b4), r(9e,91,9b,1b),\
  4582. + r(4f,c5,c0,80), r(a2,20,dc,61), r(69,4b,77,5a), r(16,1a,12,1c),\
  4583. + r(0a,ba,93,e2), r(e5,2a,a0,c0), r(43,e0,22,3c), r(1d,17,1b,12),\
  4584. + r(0b,0d,09,0e), r(ad,c7,8b,f2), r(b9,a8,b6,2d), r(c8,a9,1e,14),\
  4585. + r(85,19,f1,57), r(4c,07,75,af), r(bb,dd,99,ee), r(fd,60,7f,a3),\
  4586. + r(9f,26,01,f7), r(bc,f5,72,5c), r(c5,3b,66,44), r(34,7e,fb,5b),\
  4587. + r(76,29,43,8b), r(dc,c6,23,cb), r(68,fc,ed,b6), r(63,f1,e4,b8),\
  4588. + r(ca,dc,31,d7), r(10,85,63,42), r(40,22,97,13), r(20,11,c6,84),\
  4589. + r(7d,24,4a,85), r(f8,3d,bb,d2), r(11,32,f9,ae), r(6d,a1,29,c7),\
  4590. + r(4b,2f,9e,1d), r(f3,30,b2,dc), r(ec,52,86,0d), r(d0,e3,c1,77),\
  4591. + r(6c,16,b3,2b), r(99,b9,70,a9), r(fa,48,94,11), r(22,64,e9,47),\
  4592. + r(c4,8c,fc,a8), r(1a,3f,f0,a0), r(d8,2c,7d,56), r(ef,90,33,22),\
  4593. + r(c7,4e,49,87), r(c1,d1,38,d9), r(fe,a2,ca,8c), r(36,0b,d4,98),\
  4594. + r(cf,81,f5,a6), r(28,de,7a,a5), r(26,8e,b7,da), r(a4,bf,ad,3f),\
  4595. + r(e4,9d,3a,2c), r(0d,92,78,50), r(9b,cc,5f,6a), r(62,46,7e,54),\
  4596. + r(c2,13,8d,f6), r(e8,b8,d8,90), r(5e,f7,39,2e), r(f5,af,c3,82),\
  4597. + r(be,80,5d,9f), r(7c,93,d0,69), r(a9,2d,d5,6f), r(b3,12,25,cf),\
  4598. + r(3b,99,ac,c8), r(a7,7d,18,10), r(6e,63,9c,e8), r(7b,bb,3b,db),\
  4599. + r(09,78,26,cd), r(f4,18,59,6e), r(01,b7,9a,ec), r(a8,9a,4f,83),\
  4600. + r(65,6e,95,e6), r(7e,e6,ff,aa), r(08,cf,bc,21), r(e6,e8,15,ef),\
  4601. + r(d9,9b,e7,ba), r(ce,36,6f,4a), r(d4,09,9f,ea), r(d6,7c,b0,29),\
  4602. + r(af,b2,a4,31), r(31,23,3f,2a), r(30,94,a5,c6), r(c0,66,a2,35),\
  4603. + r(37,bc,4e,74), r(a6,ca,82,fc), r(b0,d0,90,e0), r(15,d8,a7,33),\
  4604. + r(4a,98,04,f1), r(f7,da,ec,41), r(0e,50,cd,7f), r(2f,f6,91,17),\
  4605. + r(8d,d6,4d,76), r(4d,b0,ef,43), r(54,4d,aa,cc), r(df,04,96,e4),\
  4606. + r(e3,b5,d1,9e), r(1b,88,6a,4c), r(b8,1f,2c,c1), r(7f,51,65,46),\
  4607. + r(04,ea,5e,9d), r(5d,35,8c,01), r(73,74,87,fa), r(2e,41,0b,fb),\
  4608. + r(5a,1d,67,b3), r(52,d2,db,92), r(33,56,10,e9), r(13,47,d6,6d),\
  4609. + r(8c,61,d7,9a), r(7a,0c,a1,37), r(8e,14,f8,59), r(89,3c,13,eb),\
  4610. + r(ee,27,a9,ce), r(35,c9,61,b7), r(ed,e5,1c,e1), r(3c,b1,47,7a),\
  4611. + r(59,df,d2,9c), r(3f,73,f2,55), r(79,ce,14,18), r(bf,37,c7,73),\
  4612. + r(ea,cd,f7,53), r(5b,aa,fd,5f), r(14,6f,3d,df), r(86,db,44,78),\
  4613. + r(81,f3,af,ca), r(3e,c4,68,b9), r(2c,34,24,38), r(5f,40,a3,c2),\
  4614. + r(72,c3,1d,16), r(0c,25,e2,bc), r(8b,49,3c,28), r(41,95,0d,ff),\
  4615. + r(71,01,a8,39), r(de,b3,0c,08), r(9c,e4,b4,d8), r(90,c1,56,64),\
  4616. + r(61,84,cb,7b), r(70,b6,32,d5), r(74,5c,6c,48), r(42,57,b8,d0)
  4617. +
  4618. +// generate the required tables in the desired endian format
  4619. +
  4620. +#undef r
  4621. +#define r r0
  4622. +
  4623. +#if defined(ONE_TABLE)
  4624. +static const u_int32_t ft_tab[256] =
  4625. + { f_table };
  4626. +#elif defined(FOUR_TABLES)
  4627. +static const u_int32_t ft_tab[4][256] =
  4628. +{ { f_table },
  4629. +#undef r
  4630. +#define r r1
  4631. + { f_table },
  4632. +#undef r
  4633. +#define r r2
  4634. + { f_table },
  4635. +#undef r
  4636. +#define r r3
  4637. + { f_table }
  4638. +};
  4639. +#endif
  4640. +
  4641. +#undef r
  4642. +#define r r0
  4643. +#if defined(ONE_TABLE)
  4644. +static const u_int32_t it_tab[256] =
  4645. + { i_table };
  4646. +#elif defined(FOUR_TABLES)
  4647. +static const u_int32_t it_tab[4][256] =
  4648. +{ { i_table },
  4649. +#undef r
  4650. +#define r r1
  4651. + { i_table },
  4652. +#undef r
  4653. +#define r r2
  4654. + { i_table },
  4655. +#undef r
  4656. +#define r r3
  4657. + { i_table }
  4658. +};
  4659. +#endif
  4660. +
  4661. +#endif
  4662. +
  4663. +#if defined(FIXED_TABLES) && (defined(ONE_LR_TABLE) || defined(FOUR_LR_TABLES))
  4664. +
  4665. +// data for inverse tables (last round)
  4666. +
  4667. +#define li_table \
  4668. + w(52), w(09), w(6a), w(d5), w(30), w(36), w(a5), w(38),\
  4669. + w(bf), w(40), w(a3), w(9e), w(81), w(f3), w(d7), w(fb),\
  4670. + w(7c), w(e3), w(39), w(82), w(9b), w(2f), w(ff), w(87),\
  4671. + w(34), w(8e), w(43), w(44), w(c4), w(de), w(e9), w(cb),\
  4672. + w(54), w(7b), w(94), w(32), w(a6), w(c2), w(23), w(3d),\
  4673. + w(ee), w(4c), w(95), w(0b), w(42), w(fa), w(c3), w(4e),\
  4674. + w(08), w(2e), w(a1), w(66), w(28), w(d9), w(24), w(b2),\
  4675. + w(76), w(5b), w(a2), w(49), w(6d), w(8b), w(d1), w(25),\
  4676. + w(72), w(f8), w(f6), w(64), w(86), w(68), w(98), w(16),\
  4677. + w(d4), w(a4), w(5c), w(cc), w(5d), w(65), w(b6), w(92),\
  4678. + w(6c), w(70), w(48), w(50), w(fd), w(ed), w(b9), w(da),\
  4679. + w(5e), w(15), w(46), w(57), w(a7), w(8d), w(9d), w(84),\
  4680. + w(90), w(d8), w(ab), w(00), w(8c), w(bc), w(d3), w(0a),\
  4681. + w(f7), w(e4), w(58), w(05), w(b8), w(b3), w(45), w(06),\
  4682. + w(d0), w(2c), w(1e), w(8f), w(ca), w(3f), w(0f), w(02),\
  4683. + w(c1), w(af), w(bd), w(03), w(01), w(13), w(8a), w(6b),\
  4684. + w(3a), w(91), w(11), w(41), w(4f), w(67), w(dc), w(ea),\
  4685. + w(97), w(f2), w(cf), w(ce), w(f0), w(b4), w(e6), w(73),\
  4686. + w(96), w(ac), w(74), w(22), w(e7), w(ad), w(35), w(85),\
  4687. + w(e2), w(f9), w(37), w(e8), w(1c), w(75), w(df), w(6e),\
  4688. + w(47), w(f1), w(1a), w(71), w(1d), w(29), w(c5), w(89),\
  4689. + w(6f), w(b7), w(62), w(0e), w(aa), w(18), w(be), w(1b),\
  4690. + w(fc), w(56), w(3e), w(4b), w(c6), w(d2), w(79), w(20),\
  4691. + w(9a), w(db), w(c0), w(fe), w(78), w(cd), w(5a), w(f4),\
  4692. + w(1f), w(dd), w(a8), w(33), w(88), w(07), w(c7), w(31),\
  4693. + w(b1), w(12), w(10), w(59), w(27), w(80), w(ec), w(5f),\
  4694. + w(60), w(51), w(7f), w(a9), w(19), w(b5), w(4a), w(0d),\
  4695. + w(2d), w(e5), w(7a), w(9f), w(93), w(c9), w(9c), w(ef),\
  4696. + w(a0), w(e0), w(3b), w(4d), w(ae), w(2a), w(f5), w(b0),\
  4697. + w(c8), w(eb), w(bb), w(3c), w(83), w(53), w(99), w(61),\
  4698. + w(17), w(2b), w(04), w(7e), w(ba), w(77), w(d6), w(26),\
  4699. + w(e1), w(69), w(14), w(63), w(55), w(21), w(0c), w(7d),
  4700. +
  4701. +// generate the required tables in the desired endian format
  4702. +
  4703. +#undef r
  4704. +#define r(p,q,r,s) w0(q)
  4705. +#if defined(ONE_LR_TABLE)
  4706. +static const u_int32_t fl_tab[256] =
  4707. + { f_table };
  4708. +#elif defined(FOUR_LR_TABLES)
  4709. +static const u_int32_t fl_tab[4][256] =
  4710. +{ { f_table },
  4711. +#undef r
  4712. +#define r(p,q,r,s) w1(q)
  4713. + { f_table },
  4714. +#undef r
  4715. +#define r(p,q,r,s) w2(q)
  4716. + { f_table },
  4717. +#undef r
  4718. +#define r(p,q,r,s) w3(q)
  4719. + { f_table }
  4720. +};
  4721. +#endif
  4722. +
  4723. +#undef w
  4724. +#define w w0
  4725. +#if defined(ONE_LR_TABLE)
  4726. +static const u_int32_t il_tab[256] =
  4727. + { li_table };
  4728. +#elif defined(FOUR_LR_TABLES)
  4729. +static const u_int32_t il_tab[4][256] =
  4730. +{ { li_table },
  4731. +#undef w
  4732. +#define w w1
  4733. + { li_table },
  4734. +#undef w
  4735. +#define w w2
  4736. + { li_table },
  4737. +#undef w
  4738. +#define w w3
  4739. + { li_table }
  4740. +};
  4741. +#endif
  4742. +
  4743. +#endif
  4744. +
  4745. +#if defined(FIXED_TABLES) && (defined(ONE_IM_TABLE) || defined(FOUR_IM_TABLES))
  4746. +
  4747. +#define m_table \
  4748. + r(00,00,00,00), r(0b,0d,09,0e), r(16,1a,12,1c), r(1d,17,1b,12),\
  4749. + r(2c,34,24,38), r(27,39,2d,36), r(3a,2e,36,24), r(31,23,3f,2a),\
  4750. + r(58,68,48,70), r(53,65,41,7e), r(4e,72,5a,6c), r(45,7f,53,62),\
  4751. + r(74,5c,6c,48), r(7f,51,65,46), r(62,46,7e,54), r(69,4b,77,5a),\
  4752. + r(b0,d0,90,e0), r(bb,dd,99,ee), r(a6,ca,82,fc), r(ad,c7,8b,f2),\
  4753. + r(9c,e4,b4,d8), r(97,e9,bd,d6), r(8a,fe,a6,c4), r(81,f3,af,ca),\
  4754. + r(e8,b8,d8,90), r(e3,b5,d1,9e), r(fe,a2,ca,8c), r(f5,af,c3,82),\
  4755. + r(c4,8c,fc,a8), r(cf,81,f5,a6), r(d2,96,ee,b4), r(d9,9b,e7,ba),\
  4756. + r(7b,bb,3b,db), r(70,b6,32,d5), r(6d,a1,29,c7), r(66,ac,20,c9),\
  4757. + r(57,8f,1f,e3), r(5c,82,16,ed), r(41,95,0d,ff), r(4a,98,04,f1),\
  4758. + r(23,d3,73,ab), r(28,de,7a,a5), r(35,c9,61,b7), r(3e,c4,68,b9),\
  4759. + r(0f,e7,57,93), r(04,ea,5e,9d), r(19,fd,45,8f), r(12,f0,4c,81),\
  4760. + r(cb,6b,ab,3b), r(c0,66,a2,35), r(dd,71,b9,27), r(d6,7c,b0,29),\
  4761. + r(e7,5f,8f,03), r(ec,52,86,0d), r(f1,45,9d,1f), r(fa,48,94,11),\
  4762. + r(93,03,e3,4b), r(98,0e,ea,45), r(85,19,f1,57), r(8e,14,f8,59),\
  4763. + r(bf,37,c7,73), r(b4,3a,ce,7d), r(a9,2d,d5,6f), r(a2,20,dc,61),\
  4764. + r(f6,6d,76,ad), r(fd,60,7f,a3), r(e0,77,64,b1), r(eb,7a,6d,bf),\
  4765. + r(da,59,52,95), r(d1,54,5b,9b), r(cc,43,40,89), r(c7,4e,49,87),\
  4766. + r(ae,05,3e,dd), r(a5,08,37,d3), r(b8,1f,2c,c1), r(b3,12,25,cf),\
  4767. + r(82,31,1a,e5), r(89,3c,13,eb), r(94,2b,08,f9), r(9f,26,01,f7),\
  4768. + r(46,bd,e6,4d), r(4d,b0,ef,43), r(50,a7,f4,51), r(5b,aa,fd,5f),\
  4769. + r(6a,89,c2,75), r(61,84,cb,7b), r(7c,93,d0,69), r(77,9e,d9,67),\
  4770. + r(1e,d5,ae,3d), r(15,d8,a7,33), r(08,cf,bc,21), r(03,c2,b5,2f),\
  4771. + r(32,e1,8a,05), r(39,ec,83,0b), r(24,fb,98,19), r(2f,f6,91,17),\
  4772. + r(8d,d6,4d,76), r(86,db,44,78), r(9b,cc,5f,6a), r(90,c1,56,64),\
  4773. + r(a1,e2,69,4e), r(aa,ef,60,40), r(b7,f8,7b,52), r(bc,f5,72,5c),\
  4774. + r(d5,be,05,06), r(de,b3,0c,08), r(c3,a4,17,1a), r(c8,a9,1e,14),\
  4775. + r(f9,8a,21,3e), r(f2,87,28,30), r(ef,90,33,22), r(e4,9d,3a,2c),\
  4776. + r(3d,06,dd,96), r(36,0b,d4,98), r(2b,1c,cf,8a), r(20,11,c6,84),\
  4777. + r(11,32,f9,ae), r(1a,3f,f0,a0), r(07,28,eb,b2), r(0c,25,e2,bc),\
  4778. + r(65,6e,95,e6), r(6e,63,9c,e8), r(73,74,87,fa), r(78,79,8e,f4),\
  4779. + r(49,5a,b1,de), r(42,57,b8,d0), r(5f,40,a3,c2), r(54,4d,aa,cc),\
  4780. + r(f7,da,ec,41), r(fc,d7,e5,4f), r(e1,c0,fe,5d), r(ea,cd,f7,53),\
  4781. + r(db,ee,c8,79), r(d0,e3,c1,77), r(cd,f4,da,65), r(c6,f9,d3,6b),\
  4782. + r(af,b2,a4,31), r(a4,bf,ad,3f), r(b9,a8,b6,2d), r(b2,a5,bf,23),\
  4783. + r(83,86,80,09), r(88,8b,89,07), r(95,9c,92,15), r(9e,91,9b,1b),\
  4784. + r(47,0a,7c,a1), r(4c,07,75,af), r(51,10,6e,bd), r(5a,1d,67,b3),\
  4785. + r(6b,3e,58,99), r(60,33,51,97), r(7d,24,4a,85), r(76,29,43,8b),\
  4786. + r(1f,62,34,d1), r(14,6f,3d,df), r(09,78,26,cd), r(02,75,2f,c3),\
  4787. + r(33,56,10,e9), r(38,5b,19,e7), r(25,4c,02,f5), r(2e,41,0b,fb),\
  4788. + r(8c,61,d7,9a), r(87,6c,de,94), r(9a,7b,c5,86), r(91,76,cc,88),\
  4789. + r(a0,55,f3,a2), r(ab,58,fa,ac), r(b6,4f,e1,be), r(bd,42,e8,b0),\
  4790. + r(d4,09,9f,ea), r(df,04,96,e4), r(c2,13,8d,f6), r(c9,1e,84,f8),\
  4791. + r(f8,3d,bb,d2), r(f3,30,b2,dc), r(ee,27,a9,ce), r(e5,2a,a0,c0),\
  4792. + r(3c,b1,47,7a), r(37,bc,4e,74), r(2a,ab,55,66), r(21,a6,5c,68),\
  4793. + r(10,85,63,42), r(1b,88,6a,4c), r(06,9f,71,5e), r(0d,92,78,50),\
  4794. + r(64,d9,0f,0a), r(6f,d4,06,04), r(72,c3,1d,16), r(79,ce,14,18),\
  4795. + r(48,ed,2b,32), r(43,e0,22,3c), r(5e,f7,39,2e), r(55,fa,30,20),\
  4796. + r(01,b7,9a,ec), r(0a,ba,93,e2), r(17,ad,88,f0), r(1c,a0,81,fe),\
  4797. + r(2d,83,be,d4), r(26,8e,b7,da), r(3b,99,ac,c8), r(30,94,a5,c6),\
  4798. + r(59,df,d2,9c), r(52,d2,db,92), r(4f,c5,c0,80), r(44,c8,c9,8e),\
  4799. + r(75,eb,f6,a4), r(7e,e6,ff,aa), r(63,f1,e4,b8), r(68,fc,ed,b6),\
  4800. + r(b1,67,0a,0c), r(ba,6a,03,02), r(a7,7d,18,10), r(ac,70,11,1e),\
  4801. + r(9d,53,2e,34), r(96,5e,27,3a), r(8b,49,3c,28), r(80,44,35,26),\
  4802. + r(e9,0f,42,7c), r(e2,02,4b,72), r(ff,15,50,60), r(f4,18,59,6e),\
  4803. + r(c5,3b,66,44), r(ce,36,6f,4a), r(d3,21,74,58), r(d8,2c,7d,56),\
  4804. + r(7a,0c,a1,37), r(71,01,a8,39), r(6c,16,b3,2b), r(67,1b,ba,25),\
  4805. + r(56,38,85,0f), r(5d,35,8c,01), r(40,22,97,13), r(4b,2f,9e,1d),\
  4806. + r(22,64,e9,47), r(29,69,e0,49), r(34,7e,fb,5b), r(3f,73,f2,55),\
  4807. + r(0e,50,cd,7f), r(05,5d,c4,71), r(18,4a,df,63), r(13,47,d6,6d),\
  4808. + r(ca,dc,31,d7), r(c1,d1,38,d9), r(dc,c6,23,cb), r(d7,cb,2a,c5),\
  4809. + r(e6,e8,15,ef), r(ed,e5,1c,e1), r(f0,f2,07,f3), r(fb,ff,0e,fd),\
  4810. + r(92,b4,79,a7), r(99,b9,70,a9), r(84,ae,6b,bb), r(8f,a3,62,b5),\
  4811. + r(be,80,5d,9f), r(b5,8d,54,91), r(a8,9a,4f,83), r(a3,97,46,8d)
  4812. +
  4813. +#undef r
  4814. +#define r r0
  4815. +
  4816. +#if defined(ONE_IM_TABLE)
  4817. +static const u_int32_t im_tab[256] =
  4818. + { m_table };
  4819. +#elif defined(FOUR_IM_TABLES)
  4820. +static const u_int32_t im_tab[4][256] =
  4821. +{ { m_table },
  4822. +#undef r
  4823. +#define r r1
  4824. + { m_table },
  4825. +#undef r
  4826. +#define r r2
  4827. + { m_table },
  4828. +#undef r
  4829. +#define r r3
  4830. + { m_table }
  4831. +};
  4832. +#endif
  4833. +
  4834. +#endif
  4835. +
  4836. +#else
  4837. +
  4838. +static int tab_gen = 0;
  4839. +
  4840. +static unsigned char s_box[256]; // the S box
  4841. +static unsigned char inv_s_box[256]; // the inverse S box
  4842. +static u_int32_t rcon_tab[AES_RC_LENGTH]; // table of round constants
  4843. +
  4844. +#if defined(ONE_TABLE)
  4845. +static u_int32_t ft_tab[256];
  4846. +static u_int32_t it_tab[256];
  4847. +#elif defined(FOUR_TABLES)
  4848. +static u_int32_t ft_tab[4][256];
  4849. +static u_int32_t it_tab[4][256];
  4850. +#endif
  4851. +
  4852. +#if defined(ONE_LR_TABLE)
  4853. +static u_int32_t fl_tab[256];
  4854. +static u_int32_t il_tab[256];
  4855. +#elif defined(FOUR_LR_TABLES)
  4856. +static u_int32_t fl_tab[4][256];
  4857. +static u_int32_t il_tab[4][256];
  4858. +#endif
  4859. +
  4860. +#if defined(ONE_IM_TABLE)
  4861. +static u_int32_t im_tab[256];
  4862. +#elif defined(FOUR_IM_TABLES)
  4863. +static u_int32_t im_tab[4][256];
  4864. +#endif
  4865. +
  4866. +// Generate the tables for the dynamic table option
  4867. +
  4868. +#if !defined(FF_TABLES)
  4869. +
  4870. +// It will generally be sensible to use tables to compute finite
  4871. +// field multiplies and inverses but where memory is scarse this
  4872. +// code might sometimes be better.
  4873. +
  4874. +// return 2 ^ (n - 1) where n is the bit number of the highest bit
  4875. +// set in x with x in the range 1 < x < 0x00000200. This form is
  4876. +// used so that locals within FFinv can be bytes rather than words
  4877. +
  4878. +static unsigned char hibit(const u_int32_t x)
  4879. +{ unsigned char r = (unsigned char)((x >> 1) | (x >> 2));
  4880. +
  4881. + r |= (r >> 2);
  4882. + r |= (r >> 4);
  4883. + return (r + 1) >> 1;
  4884. +}
  4885. +
  4886. +// return the inverse of the finite field element x
  4887. +
  4888. +static unsigned char FFinv(const unsigned char x)
  4889. +{ unsigned char p1 = x, p2 = 0x1b, n1 = hibit(x), n2 = 0x80, v1 = 1, v2 = 0;
  4890. +
  4891. + if(x < 2) return x;
  4892. +
  4893. + for(;;)
  4894. + {
  4895. + if(!n1) return v1;
  4896. +
  4897. + while(n2 >= n1)
  4898. + {
  4899. + n2 /= n1; p2 ^= p1 * n2; v2 ^= v1 * n2; n2 = hibit(p2);
  4900. + }
  4901. +
  4902. + if(!n2) return v2;
  4903. +
  4904. + while(n1 >= n2)
  4905. + {
  4906. + n1 /= n2; p1 ^= p2 * n1; v1 ^= v2 * n1; n1 = hibit(p1);
  4907. + }
  4908. + }
  4909. +}
  4910. +
  4911. +// define the finite field multiplies required for Rijndael
  4912. +
  4913. +#define FFmul02(x) ((((x) & 0x7f) << 1) ^ ((x) & 0x80 ? 0x1b : 0))
  4914. +#define FFmul03(x) ((x) ^ FFmul02(x))
  4915. +#define FFmul09(x) ((x) ^ FFmul02(FFmul02(FFmul02(x))))
  4916. +#define FFmul0b(x) ((x) ^ FFmul02((x) ^ FFmul02(FFmul02(x))))
  4917. +#define FFmul0d(x) ((x) ^ FFmul02(FFmul02((x) ^ FFmul02(x))))
  4918. +#define FFmul0e(x) FFmul02((x) ^ FFmul02((x) ^ FFmul02(x)))
  4919. +
  4920. +#else
  4921. +
  4922. +#define FFinv(x) ((x) ? pow[255 - log[x]]: 0)
  4923. +
  4924. +#define FFmul02(x) (x ? pow[log[x] + 0x19] : 0)
  4925. +#define FFmul03(x) (x ? pow[log[x] + 0x01] : 0)
  4926. +#define FFmul09(x) (x ? pow[log[x] + 0xc7] : 0)
  4927. +#define FFmul0b(x) (x ? pow[log[x] + 0x68] : 0)
  4928. +#define FFmul0d(x) (x ? pow[log[x] + 0xee] : 0)
  4929. +#define FFmul0e(x) (x ? pow[log[x] + 0xdf] : 0)
  4930. +
  4931. +#endif
  4932. +
  4933. +// The forward and inverse affine transformations used in the S-box
  4934. +
  4935. +#define fwd_affine(x) \
  4936. + (w = (u_int32_t)x, w ^= (w<<1)^(w<<2)^(w<<3)^(w<<4), 0x63^(unsigned char)(w^(w>>8)))
  4937. +
  4938. +#define inv_affine(x) \
  4939. + (w = (u_int32_t)x, w = (w<<1)^(w<<3)^(w<<6), 0x05^(unsigned char)(w^(w>>8)))
  4940. +
  4941. +static void gen_tabs(void)
  4942. +{ u_int32_t i, w;
  4943. +
  4944. +#if defined(FF_TABLES)
  4945. +
  4946. + unsigned char pow[512], log[256];
  4947. +
  4948. + // log and power tables for GF(2^8) finite field with
  4949. + // 0x011b as modular polynomial - the simplest primitive
  4950. + // root is 0x03, used here to generate the tables
  4951. +
  4952. + i = 0; w = 1;
  4953. + do
  4954. + {
  4955. + pow[i] = (unsigned char)w;
  4956. + pow[i + 255] = (unsigned char)w;
  4957. + log[w] = (unsigned char)i++;
  4958. + w ^= (w << 1) ^ (w & ff_hi ? ff_poly : 0);
  4959. + }
  4960. + while (w != 1);
  4961. +
  4962. +#endif
  4963. +
  4964. + for(i = 0, w = 1; i < AES_RC_LENGTH; ++i)
  4965. + {
  4966. + rcon_tab[i] = bytes2word(w, 0, 0, 0);
  4967. + w = (w << 1) ^ (w & ff_hi ? ff_poly : 0);
  4968. + }
  4969. +
  4970. + for(i = 0; i < 256; ++i)
  4971. + { unsigned char b;
  4972. +
  4973. + s_box[i] = b = fwd_affine(FFinv((unsigned char)i));
  4974. +
  4975. + w = bytes2word(b, 0, 0, 0);
  4976. +#if defined(ONE_LR_TABLE)
  4977. + fl_tab[i] = w;
  4978. +#elif defined(FOUR_LR_TABLES)
  4979. + fl_tab[0][i] = w;
  4980. + fl_tab[1][i] = upr(w,1);
  4981. + fl_tab[2][i] = upr(w,2);
  4982. + fl_tab[3][i] = upr(w,3);
  4983. +#endif
  4984. + w = bytes2word(FFmul02(b), b, b, FFmul03(b));
  4985. +#if defined(ONE_TABLE)
  4986. + ft_tab[i] = w;
  4987. +#elif defined(FOUR_TABLES)
  4988. + ft_tab[0][i] = w;
  4989. + ft_tab[1][i] = upr(w,1);
  4990. + ft_tab[2][i] = upr(w,2);
  4991. + ft_tab[3][i] = upr(w,3);
  4992. +#endif
  4993. + inv_s_box[i] = b = FFinv(inv_affine((unsigned char)i));
  4994. +
  4995. + w = bytes2word(b, 0, 0, 0);
  4996. +#if defined(ONE_LR_TABLE)
  4997. + il_tab[i] = w;
  4998. +#elif defined(FOUR_LR_TABLES)
  4999. + il_tab[0][i] = w;
  5000. + il_tab[1][i] = upr(w,1);
  5001. + il_tab[2][i] = upr(w,2);
  5002. + il_tab[3][i] = upr(w,3);
  5003. +#endif
  5004. + w = bytes2word(FFmul0e(b), FFmul09(b), FFmul0d(b), FFmul0b(b));
  5005. +#if defined(ONE_TABLE)
  5006. + it_tab[i] = w;
  5007. +#elif defined(FOUR_TABLES)
  5008. + it_tab[0][i] = w;
  5009. + it_tab[1][i] = upr(w,1);
  5010. + it_tab[2][i] = upr(w,2);
  5011. + it_tab[3][i] = upr(w,3);
  5012. +#endif
  5013. +#if defined(ONE_IM_TABLE)
  5014. + im_tab[b] = w;
  5015. +#elif defined(FOUR_IM_TABLES)
  5016. + im_tab[0][b] = w;
  5017. + im_tab[1][b] = upr(w,1);
  5018. + im_tab[2][b] = upr(w,2);
  5019. + im_tab[3][b] = upr(w,3);
  5020. +#endif
  5021. +
  5022. + }
  5023. +}
  5024. +
  5025. +#endif
  5026. +
  5027. +#define no_table(x,box,vf,rf,c) bytes2word( \
  5028. + box[bval(vf(x,0,c),rf(0,c))], \
  5029. + box[bval(vf(x,1,c),rf(1,c))], \
  5030. + box[bval(vf(x,2,c),rf(2,c))], \
  5031. + box[bval(vf(x,3,c),rf(3,c))])
  5032. +
  5033. +#define one_table(x,op,tab,vf,rf,c) \
  5034. + ( tab[bval(vf(x,0,c),rf(0,c))] \
  5035. + ^ op(tab[bval(vf(x,1,c),rf(1,c))],1) \
  5036. + ^ op(tab[bval(vf(x,2,c),rf(2,c))],2) \
  5037. + ^ op(tab[bval(vf(x,3,c),rf(3,c))],3))
  5038. +
  5039. +#define four_tables(x,tab,vf,rf,c) \
  5040. + ( tab[0][bval(vf(x,0,c),rf(0,c))] \
  5041. + ^ tab[1][bval(vf(x,1,c),rf(1,c))] \
  5042. + ^ tab[2][bval(vf(x,2,c),rf(2,c))] \
  5043. + ^ tab[3][bval(vf(x,3,c),rf(3,c))])
  5044. +
  5045. +#define vf1(x,r,c) (x)
  5046. +#define rf1(r,c) (r)
  5047. +#define rf2(r,c) ((r-c)&3)
  5048. +
  5049. +#if defined(FOUR_LR_TABLES)
  5050. +#define ls_box(x,c) four_tables(x,fl_tab,vf1,rf2,c)
  5051. +#elif defined(ONE_LR_TABLE)
  5052. +#define ls_box(x,c) one_table(x,upr,fl_tab,vf1,rf2,c)
  5053. +#else
  5054. +#define ls_box(x,c) no_table(x,s_box,vf1,rf2,c)
  5055. +#endif
  5056. +
  5057. +#if defined(FOUR_IM_TABLES)
  5058. +#define inv_mcol(x) four_tables(x,im_tab,vf1,rf1,0)
  5059. +#elif defined(ONE_IM_TABLE)
  5060. +#define inv_mcol(x) one_table(x,upr,im_tab,vf1,rf1,0)
  5061. +#else
  5062. +#define inv_mcol(x) \
  5063. + (f9 = (x),f2 = FFmulX(f9), f4 = FFmulX(f2), f8 = FFmulX(f4), f9 ^= f8, \
  5064. + f2 ^= f4 ^ f8 ^ upr(f2 ^ f9,3) ^ upr(f4 ^ f9,2) ^ upr(f9,1))
  5065. +#endif
  5066. +
  5067. +// Subroutine to set the block size (if variable) in bytes, legal
  5068. +// values being 16, 24 and 32.
  5069. +
  5070. +#if defined(AES_BLOCK_SIZE)
  5071. +#define nc (AES_BLOCK_SIZE / 4)
  5072. +#else
  5073. +#define nc (cx->aes_Ncol)
  5074. +
  5075. +void aes_set_blk(aes_context *cx, int n_bytes)
  5076. +{
  5077. +#if !defined(FIXED_TABLES)
  5078. + if(!tab_gen) { gen_tabs(); tab_gen = 1; }
  5079. +#endif
  5080. +
  5081. + switch(n_bytes) {
  5082. + case 32: /* bytes */
  5083. + case 256: /* bits */
  5084. + nc = 8;
  5085. + break;
  5086. + case 24: /* bytes */
  5087. + case 192: /* bits */
  5088. + nc = 6;
  5089. + break;
  5090. + case 16: /* bytes */
  5091. + case 128: /* bits */
  5092. + default:
  5093. + nc = 4;
  5094. + break;
  5095. + }
  5096. +}
  5097. +
  5098. +#endif
  5099. +
  5100. +// Initialise the key schedule from the user supplied key. The key
  5101. +// length is now specified in bytes - 16, 24 or 32 as appropriate.
  5102. +// This corresponds to bit lengths of 128, 192 and 256 bits, and
  5103. +// to Nk values of 4, 6 and 8 respectively.
  5104. +
  5105. +#define mx(t,f) (*t++ = inv_mcol(*f),f++)
  5106. +#define cp(t,f) *t++ = *f++
  5107. +
  5108. +#if AES_BLOCK_SIZE == 16
  5109. +#define cpy(d,s) cp(d,s); cp(d,s); cp(d,s); cp(d,s)
  5110. +#define mix(d,s) mx(d,s); mx(d,s); mx(d,s); mx(d,s)
  5111. +#elif AES_BLOCK_SIZE == 24
  5112. +#define cpy(d,s) cp(d,s); cp(d,s); cp(d,s); cp(d,s); \
  5113. + cp(d,s); cp(d,s)
  5114. +#define mix(d,s) mx(d,s); mx(d,s); mx(d,s); mx(d,s); \
  5115. + mx(d,s); mx(d,s)
  5116. +#elif AES_BLOCK_SIZE == 32
  5117. +#define cpy(d,s) cp(d,s); cp(d,s); cp(d,s); cp(d,s); \
  5118. + cp(d,s); cp(d,s); cp(d,s); cp(d,s)
  5119. +#define mix(d,s) mx(d,s); mx(d,s); mx(d,s); mx(d,s); \
  5120. + mx(d,s); mx(d,s); mx(d,s); mx(d,s)
  5121. +#else
  5122. +
  5123. +#define cpy(d,s) \
  5124. +switch(nc) \
  5125. +{ case 8: cp(d,s); cp(d,s); \
  5126. + case 6: cp(d,s); cp(d,s); \
  5127. + case 4: cp(d,s); cp(d,s); \
  5128. + cp(d,s); cp(d,s); \
  5129. +}
  5130. +
  5131. +#define mix(d,s) \
  5132. +switch(nc) \
  5133. +{ case 8: mx(d,s); mx(d,s); \
  5134. + case 6: mx(d,s); mx(d,s); \
  5135. + case 4: mx(d,s); mx(d,s); \
  5136. + mx(d,s); mx(d,s); \
  5137. +}
  5138. +
  5139. +#endif
  5140. +
  5141. +void aes_set_key(aes_context *cx, const unsigned char in_key[], int n_bytes, const int f)
  5142. +{ u_int32_t *kf, *kt, rci;
  5143. +
  5144. +#if !defined(FIXED_TABLES)
  5145. + if(!tab_gen) { gen_tabs(); tab_gen = 1; }
  5146. +#endif
  5147. +
  5148. + switch(n_bytes) {
  5149. + case 32: /* bytes */
  5150. + case 256: /* bits */
  5151. + cx->aes_Nkey = 8;
  5152. + break;
  5153. + case 24: /* bytes */
  5154. + case 192: /* bits */
  5155. + cx->aes_Nkey = 6;
  5156. + break;
  5157. + case 16: /* bytes */
  5158. + case 128: /* bits */
  5159. + default:
  5160. + cx->aes_Nkey = 4;
  5161. + break;
  5162. + }
  5163. +
  5164. + cx->aes_Nrnd = (cx->aes_Nkey > nc ? cx->aes_Nkey : nc) + 6;
  5165. +
  5166. + cx->aes_e_key[0] = word_in(in_key );
  5167. + cx->aes_e_key[1] = word_in(in_key + 4);
  5168. + cx->aes_e_key[2] = word_in(in_key + 8);
  5169. + cx->aes_e_key[3] = word_in(in_key + 12);
  5170. +
  5171. + kf = cx->aes_e_key;
  5172. + kt = kf + nc * (cx->aes_Nrnd + 1) - cx->aes_Nkey;
  5173. + rci = 0;
  5174. +
  5175. + switch(cx->aes_Nkey)
  5176. + {
  5177. + case 4: do
  5178. + { kf[4] = kf[0] ^ ls_box(kf[3],3) ^ rcon_tab[rci++];
  5179. + kf[5] = kf[1] ^ kf[4];
  5180. + kf[6] = kf[2] ^ kf[5];
  5181. + kf[7] = kf[3] ^ kf[6];
  5182. + kf += 4;
  5183. + }
  5184. + while(kf < kt);
  5185. + break;
  5186. +
  5187. + case 6: cx->aes_e_key[4] = word_in(in_key + 16);
  5188. + cx->aes_e_key[5] = word_in(in_key + 20);
  5189. + do
  5190. + { kf[ 6] = kf[0] ^ ls_box(kf[5],3) ^ rcon_tab[rci++];
  5191. + kf[ 7] = kf[1] ^ kf[ 6];
  5192. + kf[ 8] = kf[2] ^ kf[ 7];
  5193. + kf[ 9] = kf[3] ^ kf[ 8];
  5194. + kf[10] = kf[4] ^ kf[ 9];
  5195. + kf[11] = kf[5] ^ kf[10];
  5196. + kf += 6;
  5197. + }
  5198. + while(kf < kt);
  5199. + break;
  5200. +
  5201. + case 8: cx->aes_e_key[4] = word_in(in_key + 16);
  5202. + cx->aes_e_key[5] = word_in(in_key + 20);
  5203. + cx->aes_e_key[6] = word_in(in_key + 24);
  5204. + cx->aes_e_key[7] = word_in(in_key + 28);
  5205. + do
  5206. + { kf[ 8] = kf[0] ^ ls_box(kf[7],3) ^ rcon_tab[rci++];
  5207. + kf[ 9] = kf[1] ^ kf[ 8];
  5208. + kf[10] = kf[2] ^ kf[ 9];
  5209. + kf[11] = kf[3] ^ kf[10];
  5210. + kf[12] = kf[4] ^ ls_box(kf[11],0);
  5211. + kf[13] = kf[5] ^ kf[12];
  5212. + kf[14] = kf[6] ^ kf[13];
  5213. + kf[15] = kf[7] ^ kf[14];
  5214. + kf += 8;
  5215. + }
  5216. + while (kf < kt);
  5217. + break;
  5218. + }
  5219. +
  5220. + if(!f)
  5221. + { u_int32_t i;
  5222. +
  5223. + kt = cx->aes_d_key + nc * cx->aes_Nrnd;
  5224. + kf = cx->aes_e_key;
  5225. +
  5226. + cpy(kt, kf); kt -= 2 * nc;
  5227. +
  5228. + for(i = 1; i < cx->aes_Nrnd; ++i)
  5229. + {
  5230. +#if defined(ONE_TABLE) || defined(FOUR_TABLES)
  5231. +#if !defined(ONE_IM_TABLE) && !defined(FOUR_IM_TABLES)
  5232. + u_int32_t f2, f4, f8, f9;
  5233. +#endif
  5234. + mix(kt, kf);
  5235. +#else
  5236. + cpy(kt, kf);
  5237. +#endif
  5238. + kt -= 2 * nc;
  5239. + }
  5240. +
  5241. + cpy(kt, kf);
  5242. + }
  5243. +}
  5244. +
  5245. +// y = output word, x = input word, r = row, c = column
  5246. +// for r = 0, 1, 2 and 3 = column accessed for row r
  5247. +
  5248. +#if defined(ARRAYS)
  5249. +#define s(x,c) x[c]
  5250. +#else
  5251. +#define s(x,c) x##c
  5252. +#endif
  5253. +
  5254. +// I am grateful to Frank Yellin for the following constructions
  5255. +// which, given the column (c) of the output state variable that
  5256. +// is being computed, return the input state variables which are
  5257. +// needed for each row (r) of the state
  5258. +
  5259. +// For the fixed block size options, compilers reduce these two
  5260. +// expressions to fixed variable references. For variable block
  5261. +// size code conditional clauses will sometimes be returned
  5262. +
  5263. +#define unused 77 // Sunset Strip
  5264. +
  5265. +#define fwd_var(x,r,c) \
  5266. + ( r==0 ? \
  5267. + ( c==0 ? s(x,0) \
  5268. + : c==1 ? s(x,1) \
  5269. + : c==2 ? s(x,2) \
  5270. + : c==3 ? s(x,3) \
  5271. + : c==4 ? s(x,4) \
  5272. + : c==5 ? s(x,5) \
  5273. + : c==6 ? s(x,6) \
  5274. + : s(x,7)) \
  5275. + : r==1 ? \
  5276. + ( c==0 ? s(x,1) \
  5277. + : c==1 ? s(x,2) \
  5278. + : c==2 ? s(x,3) \
  5279. + : c==3 ? nc==4 ? s(x,0) : s(x,4) \
  5280. + : c==4 ? s(x,5) \
  5281. + : c==5 ? nc==8 ? s(x,6) : s(x,0) \
  5282. + : c==6 ? s(x,7) \
  5283. + : s(x,0)) \
  5284. + : r==2 ? \
  5285. + ( c==0 ? nc==8 ? s(x,3) : s(x,2) \
  5286. + : c==1 ? nc==8 ? s(x,4) : s(x,3) \
  5287. + : c==2 ? nc==4 ? s(x,0) : nc==8 ? s(x,5) : s(x,4) \
  5288. + : c==3 ? nc==4 ? s(x,1) : nc==8 ? s(x,6) : s(x,5) \
  5289. + : c==4 ? nc==8 ? s(x,7) : s(x,0) \
  5290. + : c==5 ? nc==8 ? s(x,0) : s(x,1) \
  5291. + : c==6 ? s(x,1) \
  5292. + : s(x,2)) \
  5293. + : \
  5294. + ( c==0 ? nc==8 ? s(x,4) : s(x,3) \
  5295. + : c==1 ? nc==4 ? s(x,0) : nc==8 ? s(x,5) : s(x,4) \
  5296. + : c==2 ? nc==4 ? s(x,1) : nc==8 ? s(x,6) : s(x,5) \
  5297. + : c==3 ? nc==4 ? s(x,2) : nc==8 ? s(x,7) : s(x,0) \
  5298. + : c==4 ? nc==8 ? s(x,0) : s(x,1) \
  5299. + : c==5 ? nc==8 ? s(x,1) : s(x,2) \
  5300. + : c==6 ? s(x,2) \
  5301. + : s(x,3)))
  5302. +
  5303. +#define inv_var(x,r,c) \
  5304. + ( r==0 ? \
  5305. + ( c==0 ? s(x,0) \
  5306. + : c==1 ? s(x,1) \
  5307. + : c==2 ? s(x,2) \
  5308. + : c==3 ? s(x,3) \
  5309. + : c==4 ? s(x,4) \
  5310. + : c==5 ? s(x,5) \
  5311. + : c==6 ? s(x,6) \
  5312. + : s(x,7)) \
  5313. + : r==1 ? \
  5314. + ( c==0 ? nc==4 ? s(x,3) : nc==8 ? s(x,7) : s(x,5) \
  5315. + : c==1 ? s(x,0) \
  5316. + : c==2 ? s(x,1) \
  5317. + : c==3 ? s(x,2) \
  5318. + : c==4 ? s(x,3) \
  5319. + : c==5 ? s(x,4) \
  5320. + : c==6 ? s(x,5) \
  5321. + : s(x,6)) \
  5322. + : r==2 ? \
  5323. + ( c==0 ? nc==4 ? s(x,2) : nc==8 ? s(x,5) : s(x,4) \
  5324. + : c==1 ? nc==4 ? s(x,3) : nc==8 ? s(x,6) : s(x,5) \
  5325. + : c==2 ? nc==8 ? s(x,7) : s(x,0) \
  5326. + : c==3 ? nc==8 ? s(x,0) : s(x,1) \
  5327. + : c==4 ? nc==8 ? s(x,1) : s(x,2) \
  5328. + : c==5 ? nc==8 ? s(x,2) : s(x,3) \
  5329. + : c==6 ? s(x,3) \
  5330. + : s(x,4)) \
  5331. + : \
  5332. + ( c==0 ? nc==4 ? s(x,1) : nc==8 ? s(x,4) : s(x,3) \
  5333. + : c==1 ? nc==4 ? s(x,2) : nc==8 ? s(x,5) : s(x,4) \
  5334. + : c==2 ? nc==4 ? s(x,3) : nc==8 ? s(x,6) : s(x,5) \
  5335. + : c==3 ? nc==8 ? s(x,7) : s(x,0) \
  5336. + : c==4 ? nc==8 ? s(x,0) : s(x,1) \
  5337. + : c==5 ? nc==8 ? s(x,1) : s(x,2) \
  5338. + : c==6 ? s(x,2) \
  5339. + : s(x,3)))
  5340. +
  5341. +#define si(y,x,k,c) s(y,c) = word_in(x + 4 * c) ^ k[c]
  5342. +#define so(y,x,c) word_out(y + 4 * c, s(x,c))
  5343. +
  5344. +#if defined(FOUR_TABLES)
  5345. +#define fwd_rnd(y,x,k,c) s(y,c)= (k)[c] ^ four_tables(x,ft_tab,fwd_var,rf1,c)
  5346. +#define inv_rnd(y,x,k,c) s(y,c)= (k)[c] ^ four_tables(x,it_tab,inv_var,rf1,c)
  5347. +#elif defined(ONE_TABLE)
  5348. +#define fwd_rnd(y,x,k,c) s(y,c)= (k)[c] ^ one_table(x,upr,ft_tab,fwd_var,rf1,c)
  5349. +#define inv_rnd(y,x,k,c) s(y,c)= (k)[c] ^ one_table(x,upr,it_tab,inv_var,rf1,c)
  5350. +#else
  5351. +#define fwd_rnd(y,x,k,c) s(y,c) = fwd_mcol(no_table(x,s_box,fwd_var,rf1,c)) ^ (k)[c]
  5352. +#define inv_rnd(y,x,k,c) s(y,c) = inv_mcol(no_table(x,inv_s_box,inv_var,rf1,c) ^ (k)[c])
  5353. +#endif
  5354. +
  5355. +#if defined(FOUR_LR_TABLES)
  5356. +#define fwd_lrnd(y,x,k,c) s(y,c)= (k)[c] ^ four_tables(x,fl_tab,fwd_var,rf1,c)
  5357. +#define inv_lrnd(y,x,k,c) s(y,c)= (k)[c] ^ four_tables(x,il_tab,inv_var,rf1,c)
  5358. +#elif defined(ONE_LR_TABLE)
  5359. +#define fwd_lrnd(y,x,k,c) s(y,c)= (k)[c] ^ one_table(x,ups,fl_tab,fwd_var,rf1,c)
  5360. +#define inv_lrnd(y,x,k,c) s(y,c)= (k)[c] ^ one_table(x,ups,il_tab,inv_var,rf1,c)
  5361. +#else
  5362. +#define fwd_lrnd(y,x,k,c) s(y,c) = no_table(x,s_box,fwd_var,rf1,c) ^ (k)[c]
  5363. +#define inv_lrnd(y,x,k,c) s(y,c) = no_table(x,inv_s_box,inv_var,rf1,c) ^ (k)[c]
  5364. +#endif
  5365. +
  5366. +#if AES_BLOCK_SIZE == 16
  5367. +
  5368. +#if defined(ARRAYS)
  5369. +#define locals(y,x) x[4],y[4]
  5370. +#else
  5371. +#define locals(y,x) x##0,x##1,x##2,x##3,y##0,y##1,y##2,y##3
  5372. +// the following defines prevent the compiler requiring the declaration
  5373. +// of generated but unused variables in the fwd_var and inv_var macros
  5374. +#define b04 unused
  5375. +#define b05 unused
  5376. +#define b06 unused
  5377. +#define b07 unused
  5378. +#define b14 unused
  5379. +#define b15 unused
  5380. +#define b16 unused
  5381. +#define b17 unused
  5382. +#endif
  5383. +#define l_copy(y, x) s(y,0) = s(x,0); s(y,1) = s(x,1); \
  5384. + s(y,2) = s(x,2); s(y,3) = s(x,3);
  5385. +#define state_in(y,x,k) si(y,x,k,0); si(y,x,k,1); si(y,x,k,2); si(y,x,k,3)
  5386. +#define state_out(y,x) so(y,x,0); so(y,x,1); so(y,x,2); so(y,x,3)
  5387. +#define round(rm,y,x,k) rm(y,x,k,0); rm(y,x,k,1); rm(y,x,k,2); rm(y,x,k,3)
  5388. +
  5389. +#elif AES_BLOCK_SIZE == 24
  5390. +
  5391. +#if defined(ARRAYS)
  5392. +#define locals(y,x) x[6],y[6]
  5393. +#else
  5394. +#define locals(y,x) x##0,x##1,x##2,x##3,x##4,x##5, \
  5395. + y##0,y##1,y##2,y##3,y##4,y##5
  5396. +#define b06 unused
  5397. +#define b07 unused
  5398. +#define b16 unused
  5399. +#define b17 unused
  5400. +#endif
  5401. +#define l_copy(y, x) s(y,0) = s(x,0); s(y,1) = s(x,1); \
  5402. + s(y,2) = s(x,2); s(y,3) = s(x,3); \
  5403. + s(y,4) = s(x,4); s(y,5) = s(x,5);
  5404. +#define state_in(y,x,k) si(y,x,k,0); si(y,x,k,1); si(y,x,k,2); \
  5405. + si(y,x,k,3); si(y,x,k,4); si(y,x,k,5)
  5406. +#define state_out(y,x) so(y,x,0); so(y,x,1); so(y,x,2); \
  5407. + so(y,x,3); so(y,x,4); so(y,x,5)
  5408. +#define round(rm,y,x,k) rm(y,x,k,0); rm(y,x,k,1); rm(y,x,k,2); \
  5409. + rm(y,x,k,3); rm(y,x,k,4); rm(y,x,k,5)
  5410. +#else
  5411. +
  5412. +#if defined(ARRAYS)
  5413. +#define locals(y,x) x[8],y[8]
  5414. +#else
  5415. +#define locals(y,x) x##0,x##1,x##2,x##3,x##4,x##5,x##6,x##7, \
  5416. + y##0,y##1,y##2,y##3,y##4,y##5,y##6,y##7
  5417. +#endif
  5418. +#define l_copy(y, x) s(y,0) = s(x,0); s(y,1) = s(x,1); \
  5419. + s(y,2) = s(x,2); s(y,3) = s(x,3); \
  5420. + s(y,4) = s(x,4); s(y,5) = s(x,5); \
  5421. + s(y,6) = s(x,6); s(y,7) = s(x,7);
  5422. +
  5423. +#if AES_BLOCK_SIZE == 32
  5424. +
  5425. +#define state_in(y,x,k) si(y,x,k,0); si(y,x,k,1); si(y,x,k,2); si(y,x,k,3); \
  5426. + si(y,x,k,4); si(y,x,k,5); si(y,x,k,6); si(y,x,k,7)
  5427. +#define state_out(y,x) so(y,x,0); so(y,x,1); so(y,x,2); so(y,x,3); \
  5428. + so(y,x,4); so(y,x,5); so(y,x,6); so(y,x,7)
  5429. +#define round(rm,y,x,k) rm(y,x,k,0); rm(y,x,k,1); rm(y,x,k,2); rm(y,x,k,3); \
  5430. + rm(y,x,k,4); rm(y,x,k,5); rm(y,x,k,6); rm(y,x,k,7)
  5431. +#else
  5432. +
  5433. +#define state_in(y,x,k) \
  5434. +switch(nc) \
  5435. +{ case 8: si(y,x,k,7); si(y,x,k,6); \
  5436. + case 6: si(y,x,k,5); si(y,x,k,4); \
  5437. + case 4: si(y,x,k,3); si(y,x,k,2); \
  5438. + si(y,x,k,1); si(y,x,k,0); \
  5439. +}
  5440. +
  5441. +#define state_out(y,x) \
  5442. +switch(nc) \
  5443. +{ case 8: so(y,x,7); so(y,x,6); \
  5444. + case 6: so(y,x,5); so(y,x,4); \
  5445. + case 4: so(y,x,3); so(y,x,2); \
  5446. + so(y,x,1); so(y,x,0); \
  5447. +}
  5448. +
  5449. +#if defined(FAST_VARIABLE)
  5450. +
  5451. +#define round(rm,y,x,k) \
  5452. +switch(nc) \
  5453. +{ case 8: rm(y,x,k,7); rm(y,x,k,6); \
  5454. + rm(y,x,k,5); rm(y,x,k,4); \
  5455. + rm(y,x,k,3); rm(y,x,k,2); \
  5456. + rm(y,x,k,1); rm(y,x,k,0); \
  5457. + break; \
  5458. + case 6: rm(y,x,k,5); rm(y,x,k,4); \
  5459. + rm(y,x,k,3); rm(y,x,k,2); \
  5460. + rm(y,x,k,1); rm(y,x,k,0); \
  5461. + break; \
  5462. + case 4: rm(y,x,k,3); rm(y,x,k,2); \
  5463. + rm(y,x,k,1); rm(y,x,k,0); \
  5464. + break; \
  5465. +}
  5466. +#else
  5467. +
  5468. +#define round(rm,y,x,k) \
  5469. +switch(nc) \
  5470. +{ case 8: rm(y,x,k,7); rm(y,x,k,6); \
  5471. + case 6: rm(y,x,k,5); rm(y,x,k,4); \
  5472. + case 4: rm(y,x,k,3); rm(y,x,k,2); \
  5473. + rm(y,x,k,1); rm(y,x,k,0); \
  5474. +}
  5475. +
  5476. +#endif
  5477. +
  5478. +#endif
  5479. +#endif
  5480. +
  5481. +void aes_encrypt(const aes_context *cx, const unsigned char in_blk[], unsigned char out_blk[])
  5482. +{ u_int32_t locals(b0, b1);
  5483. + const u_int32_t *kp = cx->aes_e_key;
  5484. +
  5485. +#if !defined(ONE_TABLE) && !defined(FOUR_TABLES)
  5486. + u_int32_t f2;
  5487. +#endif
  5488. +
  5489. + state_in(b0, in_blk, kp); kp += nc;
  5490. +
  5491. +#if defined(UNROLL)
  5492. +
  5493. + switch(cx->aes_Nrnd)
  5494. + {
  5495. + case 14: round(fwd_rnd, b1, b0, kp );
  5496. + round(fwd_rnd, b0, b1, kp + nc ); kp += 2 * nc;
  5497. + case 12: round(fwd_rnd, b1, b0, kp );
  5498. + round(fwd_rnd, b0, b1, kp + nc ); kp += 2 * nc;
  5499. + case 10: round(fwd_rnd, b1, b0, kp );
  5500. + round(fwd_rnd, b0, b1, kp + nc);
  5501. + round(fwd_rnd, b1, b0, kp + 2 * nc);
  5502. + round(fwd_rnd, b0, b1, kp + 3 * nc);
  5503. + round(fwd_rnd, b1, b0, kp + 4 * nc);
  5504. + round(fwd_rnd, b0, b1, kp + 5 * nc);
  5505. + round(fwd_rnd, b1, b0, kp + 6 * nc);
  5506. + round(fwd_rnd, b0, b1, kp + 7 * nc);
  5507. + round(fwd_rnd, b1, b0, kp + 8 * nc);
  5508. + round(fwd_lrnd, b0, b1, kp + 9 * nc);
  5509. + }
  5510. +
  5511. +#elif defined(PARTIAL_UNROLL)
  5512. + { u_int32_t rnd;
  5513. +
  5514. + for(rnd = 0; rnd < (cx->aes_Nrnd >> 1) - 1; ++rnd)
  5515. + {
  5516. + round(fwd_rnd, b1, b0, kp);
  5517. + round(fwd_rnd, b0, b1, kp + nc); kp += 2 * nc;
  5518. + }
  5519. +
  5520. + round(fwd_rnd, b1, b0, kp);
  5521. + round(fwd_lrnd, b0, b1, kp + nc);
  5522. + }
  5523. +#else
  5524. + { u_int32_t rnd;
  5525. +
  5526. + for(rnd = 0; rnd < cx->aes_Nrnd - 1; ++rnd)
  5527. + {
  5528. + round(fwd_rnd, b1, b0, kp);
  5529. + l_copy(b0, b1); kp += nc;
  5530. + }
  5531. +
  5532. + round(fwd_lrnd, b0, b1, kp);
  5533. + }
  5534. +#endif
  5535. +
  5536. + state_out(out_blk, b0);
  5537. +}
  5538. +
  5539. +void aes_decrypt(const aes_context *cx, const unsigned char in_blk[], unsigned char out_blk[])
  5540. +{ u_int32_t locals(b0, b1);
  5541. + const u_int32_t *kp = cx->aes_d_key;
  5542. +
  5543. +#if !defined(ONE_TABLE) && !defined(FOUR_TABLES)
  5544. + u_int32_t f2, f4, f8, f9;
  5545. +#endif
  5546. +
  5547. + state_in(b0, in_blk, kp); kp += nc;
  5548. +
  5549. +#if defined(UNROLL)
  5550. +
  5551. + switch(cx->aes_Nrnd)
  5552. + {
  5553. + case 14: round(inv_rnd, b1, b0, kp );
  5554. + round(inv_rnd, b0, b1, kp + nc ); kp += 2 * nc;
  5555. + case 12: round(inv_rnd, b1, b0, kp );
  5556. + round(inv_rnd, b0, b1, kp + nc ); kp += 2 * nc;
  5557. + case 10: round(inv_rnd, b1, b0, kp );
  5558. + round(inv_rnd, b0, b1, kp + nc);
  5559. + round(inv_rnd, b1, b0, kp + 2 * nc);
  5560. + round(inv_rnd, b0, b1, kp + 3 * nc);
  5561. + round(inv_rnd, b1, b0, kp + 4 * nc);
  5562. + round(inv_rnd, b0, b1, kp + 5 * nc);
  5563. + round(inv_rnd, b1, b0, kp + 6 * nc);
  5564. + round(inv_rnd, b0, b1, kp + 7 * nc);
  5565. + round(inv_rnd, b1, b0, kp + 8 * nc);
  5566. + round(inv_lrnd, b0, b1, kp + 9 * nc);
  5567. + }
  5568. +
  5569. +#elif defined(PARTIAL_UNROLL)
  5570. + { u_int32_t rnd;
  5571. +
  5572. + for(rnd = 0; rnd < (cx->aes_Nrnd >> 1) - 1; ++rnd)
  5573. + {
  5574. + round(inv_rnd, b1, b0, kp);
  5575. + round(inv_rnd, b0, b1, kp + nc); kp += 2 * nc;
  5576. + }
  5577. +
  5578. + round(inv_rnd, b1, b0, kp);
  5579. + round(inv_lrnd, b0, b1, kp + nc);
  5580. + }
  5581. +#else
  5582. + { u_int32_t rnd;
  5583. +
  5584. + for(rnd = 0; rnd < cx->aes_Nrnd - 1; ++rnd)
  5585. + {
  5586. + round(inv_rnd, b1, b0, kp);
  5587. + l_copy(b0, b1); kp += nc;
  5588. + }
  5589. +
  5590. + round(inv_lrnd, b0, b1, kp);
  5591. + }
  5592. +#endif
  5593. +
  5594. + state_out(out_blk, b0);
  5595. +}
  5596. diff -pruN linux-2.4.28_orig/drivers/misc/aes.h linux-2.4.28/drivers/misc/aes.h
  5597. --- linux-2.4.28_orig/drivers/misc/aes.h 1970-01-01 01:00:00.000000000 +0100
  5598. +++ linux-2.4.28/drivers/misc/aes.h 2005-01-11 09:33:37.485525016 +0100
  5599. @@ -0,0 +1,113 @@
  5600. +// I retain copyright in this code but I encourage its free use provided
  5601. +// that I don't carry any responsibility for the results. I am especially
  5602. +// happy to see it used in free and open source software. If you do use
  5603. +// it I would appreciate an acknowledgement of its origin in the code or
  5604. +// the product that results and I would also appreciate knowing a little
  5605. +// about the use to which it is being put. I am grateful to Frank Yellin
  5606. +// for some ideas that are used in this implementation.
  5607. +//
  5608. +// Dr B. R. Gladman <brg@gladman.uk.net> 6th April 2001.
  5609. +//
  5610. +// This is an implementation of the AES encryption algorithm (Rijndael)
  5611. +// designed by Joan Daemen and Vincent Rijmen. This version is designed
  5612. +// to provide both fixed and dynamic block and key lengths and can also
  5613. +// run with either big or little endian internal byte order (see aes.h).
  5614. +// It inputs block and key lengths in bytes with the legal values being
  5615. +// 16, 24 and 32.
  5616. +
  5617. +/*
  5618. + * Modified by Jari Ruusu, May 1 2001
  5619. + * - Fixed some compile warnings, code was ok but gcc warned anyway.
  5620. + * - Changed basic types: byte -> unsigned char, word -> u_int32_t
  5621. + * - Major name space cleanup: Names visible to outside now begin
  5622. + * with "aes_" or "AES_". A lot of stuff moved from aes.h to aes.c
  5623. + * - Removed C++ and DLL support as part of name space cleanup.
  5624. + * - Eliminated unnecessary recomputation of tables. (actual bug fix)
  5625. + * - Merged precomputed constant tables to aes.c file.
  5626. + * - Removed data alignment restrictions for portability reasons.
  5627. + * - Made block and key lengths accept bit count (128/192/256)
  5628. + * as well byte count (16/24/32).
  5629. + * - Removed all error checks. This change also eliminated the need
  5630. + * to preinitialize the context struct to zero.
  5631. + * - Removed some totally unused constants.
  5632. + */
  5633. +
  5634. +#ifndef _AES_H
  5635. +#define _AES_H
  5636. +
  5637. +#include <linux/types.h>
  5638. +#include <linux/linkage.h>
  5639. +#include <linux/config.h>
  5640. +#include <linux/module.h>
  5641. +
  5642. +// CONFIGURATION OPTIONS (see also aes.c)
  5643. +//
  5644. +// Define AES_BLOCK_SIZE to set the cipher block size (16, 24 or 32) or
  5645. +// leave this undefined for dynamically variable block size (this will
  5646. +// result in much slower code).
  5647. +// IMPORTANT NOTE: AES_BLOCK_SIZE is in BYTES (16, 24, 32 or undefined). If
  5648. +// left undefined a slower version providing variable block length is compiled
  5649. +
  5650. +#define AES_BLOCK_SIZE 16
  5651. +
  5652. +// The number of key schedule words for different block and key lengths
  5653. +// allowing for method of computation which requires the length to be a
  5654. +// multiple of the key length
  5655. +//
  5656. +// Nk = 4 6 8
  5657. +// -------------
  5658. +// Nb = 4 | 60 60 64
  5659. +// 6 | 96 90 96
  5660. +// 8 | 120 120 120
  5661. +
  5662. +#if !defined(AES_BLOCK_SIZE) || (AES_BLOCK_SIZE == 32)
  5663. +#define AES_KS_LENGTH 120
  5664. +#define AES_RC_LENGTH 29
  5665. +#else
  5666. +#define AES_KS_LENGTH 4 * AES_BLOCK_SIZE
  5667. +#define AES_RC_LENGTH (9 * AES_BLOCK_SIZE) / 8 - 8
  5668. +#endif
  5669. +
  5670. +typedef struct
  5671. +{
  5672. + u_int32_t aes_Nkey; // the number of words in the key input block
  5673. + u_int32_t aes_Nrnd; // the number of cipher rounds
  5674. + u_int32_t aes_e_key[AES_KS_LENGTH]; // the encryption key schedule
  5675. + u_int32_t aes_d_key[AES_KS_LENGTH]; // the decryption key schedule
  5676. +#if !defined(AES_BLOCK_SIZE)
  5677. + u_int32_t aes_Ncol; // the number of columns in the cipher state
  5678. +#endif
  5679. +} aes_context;
  5680. +
  5681. +// avoid global name conflict with mainline kernel
  5682. +#define aes_set_key _aes_set_key
  5683. +#define aes_encrypt _aes_encrypt
  5684. +#define aes_decrypt _aes_decrypt
  5685. +
  5686. +// THE CIPHER INTERFACE
  5687. +
  5688. +#if !defined(AES_BLOCK_SIZE)
  5689. +extern void aes_set_blk(aes_context *, const int);
  5690. +#endif
  5691. +
  5692. +#if defined(CONFIG_X86) || defined(CONFIG_X86_64)
  5693. + asmlinkage
  5694. +#endif
  5695. +extern void aes_set_key(aes_context *, const unsigned char [], const int, const int);
  5696. +
  5697. +#if defined(CONFIG_X86) || defined(CONFIG_X86_64)
  5698. + asmlinkage
  5699. +#endif
  5700. +extern void aes_encrypt(const aes_context *, const unsigned char [], unsigned char []);
  5701. +
  5702. +#if defined(CONFIG_X86) || defined(CONFIG_X86_64)
  5703. + asmlinkage
  5704. +#endif
  5705. +extern void aes_decrypt(const aes_context *, const unsigned char [], unsigned char []);
  5706. +
  5707. +// The block length inputs to aes_set_block and aes_set_key are in numbers
  5708. +// of bytes or bits. The calls to subroutines must be made in the above
  5709. +// order but multiple calls can be made without repeating earlier calls
  5710. +// if their parameters have not changed.
  5711. +
  5712. +#endif // _AES_H
  5713. diff -pruN linux-2.4.28_orig/drivers/misc/crypto-ksym.c linux-2.4.28/drivers/misc/crypto-ksym.c
  5714. --- linux-2.4.28_orig/drivers/misc/crypto-ksym.c 1970-01-01 01:00:00.000000000 +0100
  5715. +++ linux-2.4.28/drivers/misc/crypto-ksym.c 2005-01-11 09:33:37.485525016 +0100
  5716. @@ -0,0 +1,7 @@
  5717. +#include <linux/module.h>
  5718. +#include "aes.h"
  5719. +#include "md5.h"
  5720. +EXPORT_SYMBOL_NOVERS(aes_set_key);
  5721. +EXPORT_SYMBOL_NOVERS(aes_encrypt);
  5722. +EXPORT_SYMBOL_NOVERS(aes_decrypt);
  5723. +EXPORT_SYMBOL_NOVERS(md5_transform_CPUbyteorder);
  5724. diff -pruN linux-2.4.28_orig/drivers/misc/md5-amd64.S linux-2.4.28/drivers/misc/md5-amd64.S
  5725. --- linux-2.4.28_orig/drivers/misc/md5-amd64.S 1970-01-01 01:00:00.000000000 +0100
  5726. +++ linux-2.4.28/drivers/misc/md5-amd64.S 2005-01-11 09:33:37.486524864 +0100
  5727. @@ -0,0 +1,200 @@
  5728. +//
  5729. +// md5-amd64.S
  5730. +//
  5731. +// Written by Jari Ruusu, October 1 2003
  5732. +//
  5733. +// Copyright 2003 by Jari Ruusu.
  5734. +// Redistribution of this file is permitted under the GNU Public License.
  5735. +//
  5736. +
  5737. +// Modified by Jari Ruusu, June 12 2004
  5738. +// - Converted 32 bit x86 code to 64 bit AMD64 code
  5739. +
  5740. +// A MD5 transform implementation for AMD64 compatible processors.
  5741. +// This code does not preserve the rax, rcx, rdx, rsi, rdi or r8-r11
  5742. +// registers or the artihmetic status flags. However, the rbx, rbp and
  5743. +// r12-r15 registers are preserved across calls.
  5744. +
  5745. +// void md5_transform_CPUbyteorder(u_int32_t *hash, u_int32_t *in)
  5746. +
  5747. +#if defined(USE_UNDERLINE)
  5748. +# define md5_transform_CPUbyteorder _md5_transform_CPUbyteorder
  5749. +#endif
  5750. +#if !defined(ALIGN64BYTES)
  5751. +# define ALIGN64BYTES 64
  5752. +#endif
  5753. +
  5754. + .file "md5-amd64.S"
  5755. + .globl md5_transform_CPUbyteorder
  5756. +
  5757. +// rdi = pointer to hash[4] array which is read and written
  5758. +// rsi = pointer to in[16] array which is read only
  5759. +
  5760. + .text
  5761. + .align ALIGN64BYTES
  5762. +md5_transform_CPUbyteorder:
  5763. + movl 12(%rdi),%eax
  5764. + movl 8(%rdi),%ecx
  5765. + movl (%rdi),%r8d
  5766. + movl 4(%rdi),%r9d
  5767. + movl (%rsi),%r10d
  5768. + prefetcht0 60(%rsi)
  5769. + movl %eax,%edx
  5770. + xorl %ecx,%eax
  5771. +
  5772. +#define REPEAT1(p1w,p2x,p3z,p4c,p5s,p6Nin,p7Nz,p8Ny) \
  5773. + addl $p4c,p1w ;\
  5774. + andl p2x,%eax ;\
  5775. + addl %r10d,p1w ;\
  5776. + xorl p3z,%eax ;\
  5777. + movl p6Nin*4(%rsi),%r10d ;\
  5778. + addl %eax,p1w ;\
  5779. + movl p7Nz,%eax ;\
  5780. + roll $p5s,p1w ;\
  5781. + xorl p8Ny,%eax ;\
  5782. + addl p2x,p1w
  5783. +
  5784. + REPEAT1(%r8d,%r9d,%edx,0xd76aa478, 7, 1,%ecx,%r9d)
  5785. + REPEAT1(%edx,%r8d,%ecx,0xe8c7b756,12, 2,%r9d,%r8d)
  5786. + REPEAT1(%ecx,%edx,%r9d,0x242070db,17, 3,%r8d,%edx)
  5787. + REPEAT1(%r9d,%ecx,%r8d,0xc1bdceee,22, 4,%edx,%ecx)
  5788. + REPEAT1(%r8d,%r9d,%edx,0xf57c0faf, 7, 5,%ecx,%r9d)
  5789. + REPEAT1(%edx,%r8d,%ecx,0x4787c62a,12, 6,%r9d,%r8d)
  5790. + REPEAT1(%ecx,%edx,%r9d,0xa8304613,17, 7,%r8d,%edx)
  5791. + REPEAT1(%r9d,%ecx,%r8d,0xfd469501,22, 8,%edx,%ecx)
  5792. + REPEAT1(%r8d,%r9d,%edx,0x698098d8, 7, 9,%ecx,%r9d)
  5793. + REPEAT1(%edx,%r8d,%ecx,0x8b44f7af,12,10,%r9d,%r8d)
  5794. + REPEAT1(%ecx,%edx,%r9d,0xffff5bb1,17,11,%r8d,%edx)
  5795. + REPEAT1(%r9d,%ecx,%r8d,0x895cd7be,22,12,%edx,%ecx)
  5796. + REPEAT1(%r8d,%r9d,%edx,0x6b901122, 7,13,%ecx,%r9d)
  5797. + REPEAT1(%edx,%r8d,%ecx,0xfd987193,12,14,%r9d,%r8d)
  5798. + REPEAT1(%ecx,%edx,%r9d,0xa679438e,17,15,%r8d,%edx)
  5799. +
  5800. + addl $0x49b40821,%r9d
  5801. + andl %ecx,%eax
  5802. + addl %r10d,%r9d
  5803. + xorl %r8d,%eax
  5804. + movl 1*4(%rsi),%r10d
  5805. + addl %eax,%r9d
  5806. + movl %ecx,%eax
  5807. + roll $22,%r9d
  5808. + addl %ecx,%r9d
  5809. +
  5810. +#define REPEAT2(p1w,p2x,p3y,p4z,p5c,p6s,p7Nin,p8Ny) \
  5811. + xorl p2x,%eax ;\
  5812. + addl $p5c,p1w ;\
  5813. + andl p4z,%eax ;\
  5814. + addl %r10d,p1w ;\
  5815. + xorl p3y,%eax ;\
  5816. + movl p7Nin*4(%rsi),%r10d ;\
  5817. + addl %eax,p1w ;\
  5818. + movl p8Ny,%eax ;\
  5819. + roll $p6s,p1w ;\
  5820. + addl p2x,p1w
  5821. +
  5822. + REPEAT2(%r8d,%r9d,%ecx,%edx,0xf61e2562, 5, 6,%r9d)
  5823. + REPEAT2(%edx,%r8d,%r9d,%ecx,0xc040b340, 9,11,%r8d)
  5824. + REPEAT2(%ecx,%edx,%r8d,%r9d,0x265e5a51,14, 0,%edx)
  5825. + REPEAT2(%r9d,%ecx,%edx,%r8d,0xe9b6c7aa,20, 5,%ecx)
  5826. + REPEAT2(%r8d,%r9d,%ecx,%edx,0xd62f105d, 5,10,%r9d)
  5827. + REPEAT2(%edx,%r8d,%r9d,%ecx,0x02441453, 9,15,%r8d)
  5828. + REPEAT2(%ecx,%edx,%r8d,%r9d,0xd8a1e681,14, 4,%edx)
  5829. + REPEAT2(%r9d,%ecx,%edx,%r8d,0xe7d3fbc8,20, 9,%ecx)
  5830. + REPEAT2(%r8d,%r9d,%ecx,%edx,0x21e1cde6, 5,14,%r9d)
  5831. + REPEAT2(%edx,%r8d,%r9d,%ecx,0xc33707d6, 9, 3,%r8d)
  5832. + REPEAT2(%ecx,%edx,%r8d,%r9d,0xf4d50d87,14, 8,%edx)
  5833. + REPEAT2(%r9d,%ecx,%edx,%r8d,0x455a14ed,20,13,%ecx)
  5834. + REPEAT2(%r8d,%r9d,%ecx,%edx,0xa9e3e905, 5, 2,%r9d)
  5835. + REPEAT2(%edx,%r8d,%r9d,%ecx,0xfcefa3f8, 9, 7,%r8d)
  5836. + REPEAT2(%ecx,%edx,%r8d,%r9d,0x676f02d9,14,12,%edx)
  5837. +
  5838. + xorl %ecx,%eax
  5839. + addl $0x8d2a4c8a,%r9d
  5840. + andl %r8d,%eax
  5841. + addl %r10d,%r9d
  5842. + xorl %edx,%eax
  5843. + movl 5*4(%rsi),%r10d
  5844. + addl %eax,%r9d
  5845. + movl %ecx,%eax
  5846. + roll $20,%r9d
  5847. + xorl %edx,%eax
  5848. + addl %ecx,%r9d
  5849. +
  5850. +#define REPEAT3(p1w,p2x,p3c,p4s,p5Nin,p6Ny,p7Nz) \
  5851. + addl $p3c,p1w ;\
  5852. + xorl p2x,%eax ;\
  5853. + addl %r10d,p1w ;\
  5854. + movl p5Nin*4(%rsi),%r10d ;\
  5855. + addl %eax,p1w ;\
  5856. + movl p6Ny,%eax ;\
  5857. + roll $p4s,p1w ;\
  5858. + xorl p7Nz,%eax ;\
  5859. + addl p2x,p1w
  5860. +
  5861. + REPEAT3(%r8d,%r9d,0xfffa3942, 4, 8,%r9d,%ecx)
  5862. + REPEAT3(%edx,%r8d,0x8771f681,11,11,%r8d,%r9d)
  5863. + REPEAT3(%ecx,%edx,0x6d9d6122,16,14,%edx,%r8d)
  5864. + REPEAT3(%r9d,%ecx,0xfde5380c,23, 1,%ecx,%edx)
  5865. + REPEAT3(%r8d,%r9d,0xa4beea44, 4, 4,%r9d,%ecx)
  5866. + REPEAT3(%edx,%r8d,0x4bdecfa9,11, 7,%r8d,%r9d)
  5867. + REPEAT3(%ecx,%edx,0xf6bb4b60,16,10,%edx,%r8d)
  5868. + REPEAT3(%r9d,%ecx,0xbebfbc70,23,13,%ecx,%edx)
  5869. + REPEAT3(%r8d,%r9d,0x289b7ec6, 4, 0,%r9d,%ecx)
  5870. + REPEAT3(%edx,%r8d,0xeaa127fa,11, 3,%r8d,%r9d)
  5871. + REPEAT3(%ecx,%edx,0xd4ef3085,16, 6,%edx,%r8d)
  5872. + REPEAT3(%r9d,%ecx,0x04881d05,23, 9,%ecx,%edx)
  5873. + REPEAT3(%r8d,%r9d,0xd9d4d039, 4,12,%r9d,%ecx)
  5874. + REPEAT3(%edx,%r8d,0xe6db99e5,11,15,%r8d,%r9d)
  5875. + REPEAT3(%ecx,%edx,0x1fa27cf8,16, 2,%edx,%r8d)
  5876. +
  5877. + addl $0xc4ac5665,%r9d
  5878. + xorl %ecx,%eax
  5879. + addl %r10d,%r9d
  5880. + movl (%rsi),%r10d
  5881. + addl %eax,%r9d
  5882. + movl %edx,%eax
  5883. + roll $23,%r9d
  5884. + notl %eax
  5885. + addl %ecx,%r9d
  5886. +
  5887. +#define REPEAT4(p1w,p2x,p3y,p4c,p5s,p6Nin,p7Nz) \
  5888. + addl $p4c,p1w ;\
  5889. + orl p2x,%eax ;\
  5890. + addl %r10d,p1w ;\
  5891. + xorl p3y,%eax ;\
  5892. + movl p6Nin*4(%rsi),%r10d ;\
  5893. + addl %eax,p1w ;\
  5894. + movl p7Nz,%eax ;\
  5895. + roll $p5s,p1w ;\
  5896. + notl %eax ;\
  5897. + addl p2x,p1w
  5898. +
  5899. + REPEAT4(%r8d,%r9d,%ecx,0xf4292244, 6, 7,%ecx)
  5900. + REPEAT4(%edx,%r8d,%r9d,0x432aff97,10,14,%r9d)
  5901. + REPEAT4(%ecx,%edx,%r8d,0xab9423a7,15, 5,%r8d)
  5902. + REPEAT4(%r9d,%ecx,%edx,0xfc93a039,21,12,%edx)
  5903. + REPEAT4(%r8d,%r9d,%ecx,0x655b59c3, 6, 3,%ecx)
  5904. + REPEAT4(%edx,%r8d,%r9d,0x8f0ccc92,10,10,%r9d)
  5905. + REPEAT4(%ecx,%edx,%r8d,0xffeff47d,15, 1,%r8d)
  5906. + REPEAT4(%r9d,%ecx,%edx,0x85845dd1,21, 8,%edx)
  5907. + REPEAT4(%r8d,%r9d,%ecx,0x6fa87e4f, 6,15,%ecx)
  5908. + REPEAT4(%edx,%r8d,%r9d,0xfe2ce6e0,10, 6,%r9d)
  5909. + REPEAT4(%ecx,%edx,%r8d,0xa3014314,15,13,%r8d)
  5910. + REPEAT4(%r9d,%ecx,%edx,0x4e0811a1,21, 4,%edx)
  5911. + REPEAT4(%r8d,%r9d,%ecx,0xf7537e82, 6,11,%ecx)
  5912. + REPEAT4(%edx,%r8d,%r9d,0xbd3af235,10, 2,%r9d)
  5913. + REPEAT4(%ecx,%edx,%r8d,0x2ad7d2bb,15, 9,%r8d)
  5914. +
  5915. + addl $0xeb86d391,%r9d
  5916. + orl %ecx,%eax
  5917. + addl %r10d,%r9d
  5918. + xorl %edx,%eax
  5919. + addl %eax,%r9d
  5920. + roll $21,%r9d
  5921. + addl %ecx,%r9d
  5922. +
  5923. + addl %r8d,(%rdi)
  5924. + addl %r9d,4(%rdi)
  5925. + addl %ecx,8(%rdi)
  5926. + addl %edx,12(%rdi)
  5927. + ret
  5928. diff -pruN linux-2.4.28_orig/drivers/misc/md5-x86.S linux-2.4.28/drivers/misc/md5-x86.S
  5929. --- linux-2.4.28_orig/drivers/misc/md5-x86.S 1970-01-01 01:00:00.000000000 +0100
  5930. +++ linux-2.4.28/drivers/misc/md5-x86.S 2005-01-11 09:33:37.486524864 +0100
  5931. @@ -0,0 +1,207 @@
  5932. +//
  5933. +// md5-x86.S
  5934. +//
  5935. +// Written by Jari Ruusu, October 1 2003
  5936. +//
  5937. +// Copyright 2003 by Jari Ruusu.
  5938. +// Redistribution of this file is permitted under the GNU Public License.
  5939. +//
  5940. +
  5941. +// A MD5 transform implementation for x86 compatible processors. This
  5942. +// version uses i386 instruction set but instruction scheduling is optimized
  5943. +// for Pentium-2. This code does not preserve the eax, ecx or edx registers
  5944. +// or the artihmetic status flags. However, the ebx, esi, edi, and ebp
  5945. +// registers are preserved across calls.
  5946. +
  5947. +// void md5_transform_CPUbyteorder(u_int32_t *hash, u_int32_t *in)
  5948. +
  5949. +#if defined(USE_UNDERLINE)
  5950. +# define md5_transform_CPUbyteorder _md5_transform_CPUbyteorder
  5951. +#endif
  5952. +#if !defined(ALIGN32BYTES)
  5953. +# define ALIGN32BYTES 32
  5954. +#endif
  5955. +
  5956. + .file "md5-x86.S"
  5957. + .globl md5_transform_CPUbyteorder
  5958. + .text
  5959. + .align ALIGN32BYTES
  5960. +
  5961. +md5_transform_CPUbyteorder:
  5962. + push %ebp
  5963. + mov 4+4(%esp),%eax // pointer to 'hash' input
  5964. + mov 8+4(%esp),%ebp // pointer to 'in' array
  5965. + push %ebx
  5966. + push %esi
  5967. + push %edi
  5968. +
  5969. + mov (%eax),%esi
  5970. + mov 4(%eax),%edi
  5971. + mov 8(%eax),%ecx
  5972. + mov 12(%eax),%eax
  5973. + mov (%ebp),%ebx
  5974. + mov %eax,%edx
  5975. + xor %ecx,%eax
  5976. +
  5977. +#define REPEAT1(p1w,p2x,p3z,p4c,p5s,p6Nin,p7Nz,p8Ny) \
  5978. + add $p4c,p1w ;\
  5979. + and p2x,%eax ;\
  5980. + add %ebx,p1w ;\
  5981. + xor p3z,%eax ;\
  5982. + mov p6Nin*4(%ebp),%ebx ;\
  5983. + add %eax,p1w ;\
  5984. + mov p7Nz,%eax ;\
  5985. + rol $p5s,p1w ;\
  5986. + xor p8Ny,%eax ;\
  5987. + add p2x,p1w
  5988. +
  5989. + REPEAT1(%esi,%edi,%edx,0xd76aa478, 7, 1,%ecx,%edi)
  5990. + REPEAT1(%edx,%esi,%ecx,0xe8c7b756,12, 2,%edi,%esi)
  5991. + REPEAT1(%ecx,%edx,%edi,0x242070db,17, 3,%esi,%edx)
  5992. + REPEAT1(%edi,%ecx,%esi,0xc1bdceee,22, 4,%edx,%ecx)
  5993. + REPEAT1(%esi,%edi,%edx,0xf57c0faf, 7, 5,%ecx,%edi)
  5994. + REPEAT1(%edx,%esi,%ecx,0x4787c62a,12, 6,%edi,%esi)
  5995. + REPEAT1(%ecx,%edx,%edi,0xa8304613,17, 7,%esi,%edx)
  5996. + REPEAT1(%edi,%ecx,%esi,0xfd469501,22, 8,%edx,%ecx)
  5997. + REPEAT1(%esi,%edi,%edx,0x698098d8, 7, 9,%ecx,%edi)
  5998. + REPEAT1(%edx,%esi,%ecx,0x8b44f7af,12,10,%edi,%esi)
  5999. + REPEAT1(%ecx,%edx,%edi,0xffff5bb1,17,11,%esi,%edx)
  6000. + REPEAT1(%edi,%ecx,%esi,0x895cd7be,22,12,%edx,%ecx)
  6001. + REPEAT1(%esi,%edi,%edx,0x6b901122, 7,13,%ecx,%edi)
  6002. + REPEAT1(%edx,%esi,%ecx,0xfd987193,12,14,%edi,%esi)
  6003. + REPEAT1(%ecx,%edx,%edi,0xa679438e,17,15,%esi,%edx)
  6004. +
  6005. + add $0x49b40821,%edi
  6006. + and %ecx,%eax
  6007. + add %ebx,%edi
  6008. + xor %esi,%eax
  6009. + mov 1*4(%ebp),%ebx
  6010. + add %eax,%edi
  6011. + mov %ecx,%eax
  6012. + rol $22,%edi
  6013. + add %ecx,%edi
  6014. +
  6015. +#define REPEAT2(p1w,p2x,p3y,p4z,p5c,p6s,p7Nin,p8Ny) \
  6016. + xor p2x,%eax ;\
  6017. + add $p5c,p1w ;\
  6018. + and p4z,%eax ;\
  6019. + add %ebx,p1w ;\
  6020. + xor p3y,%eax ;\
  6021. + mov p7Nin*4(%ebp),%ebx ;\
  6022. + add %eax,p1w ;\
  6023. + mov p8Ny,%eax ;\
  6024. + rol $p6s,p1w ;\
  6025. + add p2x,p1w
  6026. +
  6027. + REPEAT2(%esi,%edi,%ecx,%edx,0xf61e2562, 5, 6,%edi)
  6028. + REPEAT2(%edx,%esi,%edi,%ecx,0xc040b340, 9,11,%esi)
  6029. + REPEAT2(%ecx,%edx,%esi,%edi,0x265e5a51,14, 0,%edx)
  6030. + REPEAT2(%edi,%ecx,%edx,%esi,0xe9b6c7aa,20, 5,%ecx)
  6031. + REPEAT2(%esi,%edi,%ecx,%edx,0xd62f105d, 5,10,%edi)
  6032. + REPEAT2(%edx,%esi,%edi,%ecx,0x02441453, 9,15,%esi)
  6033. + REPEAT2(%ecx,%edx,%esi,%edi,0xd8a1e681,14, 4,%edx)
  6034. + REPEAT2(%edi,%ecx,%edx,%esi,0xe7d3fbc8,20, 9,%ecx)
  6035. + REPEAT2(%esi,%edi,%ecx,%edx,0x21e1cde6, 5,14,%edi)
  6036. + REPEAT2(%edx,%esi,%edi,%ecx,0xc33707d6, 9, 3,%esi)
  6037. + REPEAT2(%ecx,%edx,%esi,%edi,0xf4d50d87,14, 8,%edx)
  6038. + REPEAT2(%edi,%ecx,%edx,%esi,0x455a14ed,20,13,%ecx)
  6039. + REPEAT2(%esi,%edi,%ecx,%edx,0xa9e3e905, 5, 2,%edi)
  6040. + REPEAT2(%edx,%esi,%edi,%ecx,0xfcefa3f8, 9, 7,%esi)
  6041. + REPEAT2(%ecx,%edx,%esi,%edi,0x676f02d9,14,12,%edx)
  6042. +
  6043. + xor %ecx,%eax
  6044. + add $0x8d2a4c8a,%edi
  6045. + and %esi,%eax
  6046. + add %ebx,%edi
  6047. + xor %edx,%eax
  6048. + mov 5*4(%ebp),%ebx
  6049. + add %eax,%edi
  6050. + mov %ecx,%eax
  6051. + rol $20,%edi
  6052. + xor %edx,%eax
  6053. + add %ecx,%edi
  6054. +
  6055. +#define REPEAT3(p1w,p2x,p3c,p4s,p5Nin,p6Ny,p7Nz) \
  6056. + add $p3c,p1w ;\
  6057. + xor p2x,%eax ;\
  6058. + add %ebx,p1w ;\
  6059. + mov p5Nin*4(%ebp),%ebx ;\
  6060. + add %eax,p1w ;\
  6061. + mov p6Ny,%eax ;\
  6062. + rol $p4s,p1w ;\
  6063. + xor p7Nz,%eax ;\
  6064. + add p2x,p1w
  6065. +
  6066. + REPEAT3(%esi,%edi,0xfffa3942, 4, 8,%edi,%ecx)
  6067. + REPEAT3(%edx,%esi,0x8771f681,11,11,%esi,%edi)
  6068. + REPEAT3(%ecx,%edx,0x6d9d6122,16,14,%edx,%esi)
  6069. + REPEAT3(%edi,%ecx,0xfde5380c,23, 1,%ecx,%edx)
  6070. + REPEAT3(%esi,%edi,0xa4beea44, 4, 4,%edi,%ecx)
  6071. + REPEAT3(%edx,%esi,0x4bdecfa9,11, 7,%esi,%edi)
  6072. + REPEAT3(%ecx,%edx,0xf6bb4b60,16,10,%edx,%esi)
  6073. + REPEAT3(%edi,%ecx,0xbebfbc70,23,13,%ecx,%edx)
  6074. + REPEAT3(%esi,%edi,0x289b7ec6, 4, 0,%edi,%ecx)
  6075. + REPEAT3(%edx,%esi,0xeaa127fa,11, 3,%esi,%edi)
  6076. + REPEAT3(%ecx,%edx,0xd4ef3085,16, 6,%edx,%esi)
  6077. + REPEAT3(%edi,%ecx,0x04881d05,23, 9,%ecx,%edx)
  6078. + REPEAT3(%esi,%edi,0xd9d4d039, 4,12,%edi,%ecx)
  6079. + REPEAT3(%edx,%esi,0xe6db99e5,11,15,%esi,%edi)
  6080. + REPEAT3(%ecx,%edx,0x1fa27cf8,16, 2,%edx,%esi)
  6081. +
  6082. + add $0xc4ac5665,%edi
  6083. + xor %ecx,%eax
  6084. + add %ebx,%edi
  6085. + mov (%ebp),%ebx
  6086. + add %eax,%edi
  6087. + mov %edx,%eax
  6088. + rol $23,%edi
  6089. + not %eax
  6090. + add %ecx,%edi
  6091. +
  6092. +#define REPEAT4(p1w,p2x,p3y,p4c,p5s,p6Nin,p7Nz) \
  6093. + add $p4c,p1w ;\
  6094. + or p2x,%eax ;\
  6095. + add %ebx,p1w ;\
  6096. + xor p3y,%eax ;\
  6097. + mov p6Nin*4(%ebp),%ebx ;\
  6098. + add %eax,p1w ;\
  6099. + mov p7Nz,%eax ;\
  6100. + rol $p5s,p1w ;\
  6101. + not %eax ;\
  6102. + add p2x,p1w
  6103. +
  6104. + REPEAT4(%esi,%edi,%ecx,0xf4292244, 6, 7,%ecx)
  6105. + REPEAT4(%edx,%esi,%edi,0x432aff97,10,14,%edi)
  6106. + REPEAT4(%ecx,%edx,%esi,0xab9423a7,15, 5,%esi)
  6107. + REPEAT4(%edi,%ecx,%edx,0xfc93a039,21,12,%edx)
  6108. + REPEAT4(%esi,%edi,%ecx,0x655b59c3, 6, 3,%ecx)
  6109. + REPEAT4(%edx,%esi,%edi,0x8f0ccc92,10,10,%edi)
  6110. + REPEAT4(%ecx,%edx,%esi,0xffeff47d,15, 1,%esi)
  6111. + REPEAT4(%edi,%ecx,%edx,0x85845dd1,21, 8,%edx)
  6112. + REPEAT4(%esi,%edi,%ecx,0x6fa87e4f, 6,15,%ecx)
  6113. + REPEAT4(%edx,%esi,%edi,0xfe2ce6e0,10, 6,%edi)
  6114. + REPEAT4(%ecx,%edx,%esi,0xa3014314,15,13,%esi)
  6115. + REPEAT4(%edi,%ecx,%edx,0x4e0811a1,21, 4,%edx)
  6116. + REPEAT4(%esi,%edi,%ecx,0xf7537e82, 6,11,%ecx)
  6117. + REPEAT4(%edx,%esi,%edi,0xbd3af235,10, 2,%edi)
  6118. + REPEAT4(%ecx,%edx,%esi,0x2ad7d2bb,15, 9,%esi)
  6119. +
  6120. + add $0xeb86d391,%edi
  6121. + or %ecx,%eax
  6122. + add %ebx,%edi
  6123. + xor %edx,%eax
  6124. + mov 4+16(%esp),%ebp // pointer to 'hash' output
  6125. + add %eax,%edi
  6126. + rol $21,%edi
  6127. + add %ecx,%edi
  6128. +
  6129. + add %esi,(%ebp)
  6130. + add %edi,4(%ebp)
  6131. + add %ecx,8(%ebp)
  6132. + add %edx,12(%ebp)
  6133. +
  6134. + pop %edi
  6135. + pop %esi
  6136. + pop %ebx
  6137. + pop %ebp
  6138. + ret
  6139. diff -pruN linux-2.4.28_orig/drivers/misc/md5.c linux-2.4.28/drivers/misc/md5.c
  6140. --- linux-2.4.28_orig/drivers/misc/md5.c 1970-01-01 01:00:00.000000000 +0100
  6141. +++ linux-2.4.28/drivers/misc/md5.c 2005-01-11 09:33:37.487524712 +0100
  6142. @@ -0,0 +1,106 @@
  6143. +/*
  6144. + * MD5 Message Digest Algorithm (RFC1321).
  6145. + *
  6146. + * Derived from cryptoapi implementation, originally based on the
  6147. + * public domain implementation written by Colin Plumb in 1993.
  6148. + *
  6149. + * Copyright (c) Cryptoapi developers.
  6150. + * Copyright (c) 2002 James Morris <jmorris@intercode.com.au>
  6151. + *
  6152. + * This program is free software; you can redistribute it and/or modify it
  6153. + * under the terms of the GNU General Public License as published by the Free
  6154. + * Software Foundation; either version 2 of the License, or (at your option)
  6155. + * any later version.
  6156. + */
  6157. +
  6158. +#include "md5.h"
  6159. +
  6160. +#define MD5_F1(x, y, z) (z ^ (x & (y ^ z)))
  6161. +#define MD5_F2(x, y, z) MD5_F1(z, x, y)
  6162. +#define MD5_F3(x, y, z) (x ^ y ^ z)
  6163. +#define MD5_F4(x, y, z) (y ^ (x | ~z))
  6164. +#define MD5_STEP(f, w, x, y, z, in, s) \
  6165. + (w += f(x, y, z) + in, w = (w<<s | w>>(32-s)) + x)
  6166. +
  6167. +void md5_transform_CPUbyteorder(u_int32_t *hash, u_int32_t const *in)
  6168. +{
  6169. + u_int32_t a, b, c, d;
  6170. +
  6171. + a = hash[0];
  6172. + b = hash[1];
  6173. + c = hash[2];
  6174. + d = hash[3];
  6175. +
  6176. + MD5_STEP(MD5_F1, a, b, c, d, in[0] + 0xd76aa478, 7);
  6177. + MD5_STEP(MD5_F1, d, a, b, c, in[1] + 0xe8c7b756, 12);
  6178. + MD5_STEP(MD5_F1, c, d, a, b, in[2] + 0x242070db, 17);
  6179. + MD5_STEP(MD5_F1, b, c, d, a, in[3] + 0xc1bdceee, 22);
  6180. + MD5_STEP(MD5_F1, a, b, c, d, in[4] + 0xf57c0faf, 7);
  6181. + MD5_STEP(MD5_F1, d, a, b, c, in[5] + 0x4787c62a, 12);
  6182. + MD5_STEP(MD5_F1, c, d, a, b, in[6] + 0xa8304613, 17);
  6183. + MD5_STEP(MD5_F1, b, c, d, a, in[7] + 0xfd469501, 22);
  6184. + MD5_STEP(MD5_F1, a, b, c, d, in[8] + 0x698098d8, 7);
  6185. + MD5_STEP(MD5_F1, d, a, b, c, in[9] + 0x8b44f7af, 12);
  6186. + MD5_STEP(MD5_F1, c, d, a, b, in[10] + 0xffff5bb1, 17);
  6187. + MD5_STEP(MD5_F1, b, c, d, a, in[11] + 0x895cd7be, 22);
  6188. + MD5_STEP(MD5_F1, a, b, c, d, in[12] + 0x6b901122, 7);
  6189. + MD5_STEP(MD5_F1, d, a, b, c, in[13] + 0xfd987193, 12);
  6190. + MD5_STEP(MD5_F1, c, d, a, b, in[14] + 0xa679438e, 17);
  6191. + MD5_STEP(MD5_F1, b, c, d, a, in[15] + 0x49b40821, 22);
  6192. +
  6193. + MD5_STEP(MD5_F2, a, b, c, d, in[1] + 0xf61e2562, 5);
  6194. + MD5_STEP(MD5_F2, d, a, b, c, in[6] + 0xc040b340, 9);
  6195. + MD5_STEP(MD5_F2, c, d, a, b, in[11] + 0x265e5a51, 14);
  6196. + MD5_STEP(MD5_F2, b, c, d, a, in[0] + 0xe9b6c7aa, 20);
  6197. + MD5_STEP(MD5_F2, a, b, c, d, in[5] + 0xd62f105d, 5);
  6198. + MD5_STEP(MD5_F2, d, a, b, c, in[10] + 0x02441453, 9);
  6199. + MD5_STEP(MD5_F2, c, d, a, b, in[15] + 0xd8a1e681, 14);
  6200. + MD5_STEP(MD5_F2, b, c, d, a, in[4] + 0xe7d3fbc8, 20);
  6201. + MD5_STEP(MD5_F2, a, b, c, d, in[9] + 0x21e1cde6, 5);
  6202. + MD5_STEP(MD5_F2, d, a, b, c, in[14] + 0xc33707d6, 9);
  6203. + MD5_STEP(MD5_F2, c, d, a, b, in[3] + 0xf4d50d87, 14);
  6204. + MD5_STEP(MD5_F2, b, c, d, a, in[8] + 0x455a14ed, 20);
  6205. + MD5_STEP(MD5_F2, a, b, c, d, in[13] + 0xa9e3e905, 5);
  6206. + MD5_STEP(MD5_F2, d, a, b, c, in[2] + 0xfcefa3f8, 9);
  6207. + MD5_STEP(MD5_F2, c, d, a, b, in[7] + 0x676f02d9, 14);
  6208. + MD5_STEP(MD5_F2, b, c, d, a, in[12] + 0x8d2a4c8a, 20);
  6209. +
  6210. + MD5_STEP(MD5_F3, a, b, c, d, in[5] + 0xfffa3942, 4);
  6211. + MD5_STEP(MD5_F3, d, a, b, c, in[8] + 0x8771f681, 11);
  6212. + MD5_STEP(MD5_F3, c, d, a, b, in[11] + 0x6d9d6122, 16);
  6213. + MD5_STEP(MD5_F3, b, c, d, a, in[14] + 0xfde5380c, 23);
  6214. + MD5_STEP(MD5_F3, a, b, c, d, in[1] + 0xa4beea44, 4);
  6215. + MD5_STEP(MD5_F3, d, a, b, c, in[4] + 0x4bdecfa9, 11);
  6216. + MD5_STEP(MD5_F3, c, d, a, b, in[7] + 0xf6bb4b60, 16);
  6217. + MD5_STEP(MD5_F3, b, c, d, a, in[10] + 0xbebfbc70, 23);
  6218. + MD5_STEP(MD5_F3, a, b, c, d, in[13] + 0x289b7ec6, 4);
  6219. + MD5_STEP(MD5_F3, d, a, b, c, in[0] + 0xeaa127fa, 11);
  6220. + MD5_STEP(MD5_F3, c, d, a, b, in[3] + 0xd4ef3085, 16);
  6221. + MD5_STEP(MD5_F3, b, c, d, a, in[6] + 0x04881d05, 23);
  6222. + MD5_STEP(MD5_F3, a, b, c, d, in[9] + 0xd9d4d039, 4);
  6223. + MD5_STEP(MD5_F3, d, a, b, c, in[12] + 0xe6db99e5, 11);
  6224. + MD5_STEP(MD5_F3, c, d, a, b, in[15] + 0x1fa27cf8, 16);
  6225. + MD5_STEP(MD5_F3, b, c, d, a, in[2] + 0xc4ac5665, 23);
  6226. +
  6227. + MD5_STEP(MD5_F4, a, b, c, d, in[0] + 0xf4292244, 6);
  6228. + MD5_STEP(MD5_F4, d, a, b, c, in[7] + 0x432aff97, 10);
  6229. + MD5_STEP(MD5_F4, c, d, a, b, in[14] + 0xab9423a7, 15);
  6230. + MD5_STEP(MD5_F4, b, c, d, a, in[5] + 0xfc93a039, 21);
  6231. + MD5_STEP(MD5_F4, a, b, c, d, in[12] + 0x655b59c3, 6);
  6232. + MD5_STEP(MD5_F4, d, a, b, c, in[3] + 0x8f0ccc92, 10);
  6233. + MD5_STEP(MD5_F4, c, d, a, b, in[10] + 0xffeff47d, 15);
  6234. + MD5_STEP(MD5_F4, b, c, d, a, in[1] + 0x85845dd1, 21);
  6235. + MD5_STEP(MD5_F4, a, b, c, d, in[8] + 0x6fa87e4f, 6);
  6236. + MD5_STEP(MD5_F4, d, a, b, c, in[15] + 0xfe2ce6e0, 10);
  6237. + MD5_STEP(MD5_F4, c, d, a, b, in[6] + 0xa3014314, 15);
  6238. + MD5_STEP(MD5_F4, b, c, d, a, in[13] + 0x4e0811a1, 21);
  6239. + MD5_STEP(MD5_F4, a, b, c, d, in[4] + 0xf7537e82, 6);
  6240. + MD5_STEP(MD5_F4, d, a, b, c, in[11] + 0xbd3af235, 10);
  6241. + MD5_STEP(MD5_F4, c, d, a, b, in[2] + 0x2ad7d2bb, 15);
  6242. + MD5_STEP(MD5_F4, b, c, d, a, in[9] + 0xeb86d391, 21);
  6243. +
  6244. + hash[0] += a;
  6245. + hash[1] += b;
  6246. + hash[2] += c;
  6247. + hash[3] += d;
  6248. +}
  6249. diff -pruN linux-2.4.28_orig/drivers/misc/md5.h linux-2.4.28/drivers/misc/md5.h
  6250. --- linux-2.4.28_orig/drivers/misc/md5.h 1970-01-01 01:00:00.000000000 +0100
  6251. +++ linux-2.4.28/drivers/misc/md5.h 2005-01-11 09:33:37.487524712 +0100
  6252. @@ -0,0 +1,11 @@
  6253. +/* md5.h */
  6254. +
  6255. +#include <linux/types.h>
  6256. +#include <linux/linkage.h>
  6257. +#include <linux/config.h>
  6258. +#include <linux/module.h>
  6259. +
  6260. +#if defined(CONFIG_X86) || defined(CONFIG_X86_64)
  6261. + asmlinkage
  6262. +#endif
  6263. +extern void md5_transform_CPUbyteorder(u_int32_t *, u_int32_t const *);
  6264. diff -pruN linux-2.4.28_orig/include/linux/loop.h linux-2.4.28/include/linux/loop.h
  6265. --- linux-2.4.28_orig/include/linux/loop.h 2001-09-17 22:16:30.000000000 +0200
  6266. +++ linux-2.4.28/include/linux/loop.h 2005-01-11 09:33:37.488524560 +0100
  6267. @@ -17,6 +17,11 @@
  6268. #ifdef __KERNEL__
  6269. +/* definitions for IV metric -- cryptoapi specific */
  6270. +#define LOOP_IV_SECTOR_BITS 9
  6271. +#define LOOP_IV_SECTOR_SIZE (1 << LOOP_IV_SECTOR_BITS)
  6272. +typedef int loop_iv_t;
  6273. +
  6274. /* Possible states of device */
  6275. enum {
  6276. Lo_unbound,
  6277. @@ -27,35 +32,47 @@ enum {
  6278. struct loop_device {
  6279. int lo_number;
  6280. int lo_refcnt;
  6281. - kdev_t lo_device;
  6282. - int lo_offset;
  6283. + loff_t lo_offset;
  6284. + loff_t lo_sizelimit;
  6285. int lo_encrypt_type;
  6286. int lo_encrypt_key_size;
  6287. - int lo_flags;
  6288. int (*transfer)(struct loop_device *, int cmd,
  6289. char *raw_buf, char *loop_buf, int size,
  6290. int real_block);
  6291. + int (*ioctl)(struct loop_device *, int cmd,
  6292. + unsigned long arg);
  6293. char lo_name[LO_NAME_SIZE];
  6294. char lo_encrypt_key[LO_KEY_SIZE];
  6295. __u32 lo_init[2];
  6296. uid_t lo_key_owner; /* Who set the key */
  6297. - int (*ioctl)(struct loop_device *, int cmd,
  6298. - unsigned long arg);
  6299. + kdev_t lo_device;
  6300. + int lo_flags;
  6301. struct file * lo_backing_file;
  6302. - void *key_data;
  6303. + void *key_data;
  6304. char key_reserved[48]; /* for use by the filter modules */
  6305. int old_gfp_mask;
  6306. + int lo_state;
  6307. + struct buffer_head *lo_bh_que0;
  6308. + struct buffer_head *lo_bh_que1;
  6309. + struct buffer_head *lo_bh_que2;
  6310. + struct buffer_head *lo_bh_free;
  6311. spinlock_t lo_lock;
  6312. - struct buffer_head *lo_bh;
  6313. - struct buffer_head *lo_bhtail;
  6314. - int lo_state;
  6315. struct semaphore lo_sem;
  6316. struct semaphore lo_ctl_mutex;
  6317. - struct semaphore lo_bh_mutex;
  6318. atomic_t lo_pending;
  6319. + int lo_bh_flsh;
  6320. + int lo_bh_need;
  6321. + wait_queue_head_t lo_bh_wait;
  6322. + unsigned long lo_offs_sec;
  6323. + unsigned long lo_iv_remove;
  6324. + unsigned char lo_crypt_name[LO_NAME_SIZE];
  6325. +#if CONFIG_BLK_DEV_LOOP_KEYSCRUB
  6326. + void (*lo_keyscrub_fn)(void *);
  6327. + void *lo_keyscrub_ptr;
  6328. +#endif
  6329. };
  6330. typedef int (* transfer_proc_t)(struct loop_device *, int cmd,
  6331. @@ -77,20 +94,19 @@ static inline int lo_do_transfer(struct
  6332. */
  6333. #define LO_FLAGS_DO_BMAP 1
  6334. #define LO_FLAGS_READ_ONLY 2
  6335. -#define LO_FLAGS_BH_REMAP 4
  6336. -/*
  6337. +/*
  6338. * Note that this structure gets the wrong offsets when directly used
  6339. * from a glibc program, because glibc has a 32bit dev_t.
  6340. - * Prevent people from shooting in their own foot.
  6341. + * Prevent people from shooting in their own foot.
  6342. */
  6343. #if __GLIBC__ >= 2 && !defined(dev_t)
  6344. #error "Wrong dev_t in loop.h"
  6345. -#endif
  6346. +#endif
  6347. /*
  6348. * This uses kdev_t because glibc currently has no appropiate
  6349. - * conversion version for the loop ioctls.
  6350. + * conversion version for the loop ioctls.
  6351. * The situation is very unpleasant
  6352. */
  6353. @@ -109,6 +125,22 @@ struct loop_info {
  6354. char reserved[4];
  6355. };
  6356. +struct loop_info64 {
  6357. + __u64 lo_device; /* ioctl r/o */
  6358. + __u64 lo_inode; /* ioctl r/o */
  6359. + __u64 lo_rdevice; /* ioctl r/o */
  6360. + __u64 lo_offset;
  6361. + __u64 lo_sizelimit;/* bytes, 0 == max available */
  6362. + __u32 lo_number; /* ioctl r/o */
  6363. + __u32 lo_encrypt_type;
  6364. + __u32 lo_encrypt_key_size; /* ioctl w/o */
  6365. + __u32 lo_flags; /* ioctl r/o */
  6366. + __u8 lo_file_name[LO_NAME_SIZE];
  6367. + __u8 lo_crypt_name[LO_NAME_SIZE];
  6368. + __u8 lo_encrypt_key[LO_KEY_SIZE]; /* ioctl w/o */
  6369. + __u64 lo_init[2];
  6370. +};
  6371. +
  6372. /*
  6373. * Loop filter types
  6374. */
  6375. @@ -122,25 +154,27 @@ struct loop_info {
  6376. #define LO_CRYPT_IDEA 6
  6377. #define LO_CRYPT_DUMMY 9
  6378. #define LO_CRYPT_SKIPJACK 10
  6379. +#define LO_CRYPT_AES 16
  6380. +#define LO_CRYPT_CRYPTOAPI 18
  6381. #define MAX_LO_CRYPT 20
  6382. #ifdef __KERNEL__
  6383. /* Support for loadable transfer modules */
  6384. struct loop_func_table {
  6385. - int number; /* filter type */
  6386. + int number; /* filter type */
  6387. int (*transfer)(struct loop_device *lo, int cmd, char *raw_buf,
  6388. char *loop_buf, int size, int real_block);
  6389. - int (*init)(struct loop_device *, struct loop_info *);
  6390. + int (*init)(struct loop_device *, struct loop_info *);
  6391. /* release is called from loop_unregister_transfer or clr_fd */
  6392. - int (*release)(struct loop_device *);
  6393. + int (*release)(struct loop_device *);
  6394. int (*ioctl)(struct loop_device *, int cmd, unsigned long arg);
  6395. - /* lock and unlock manage the module use counts */
  6396. + /* lock and unlock manage the module use counts */
  6397. void (*lock)(struct loop_device *);
  6398. void (*unlock)(struct loop_device *);
  6399. -};
  6400. +};
  6401. -int loop_register_transfer(struct loop_func_table *funcs);
  6402. -int loop_unregister_transfer(int number);
  6403. +int loop_register_transfer(struct loop_func_table *funcs);
  6404. +int loop_unregister_transfer(int number);
  6405. #endif
  6406. /*
  6407. @@ -151,5 +185,10 @@ int loop_unregister_transfer(int number)
  6408. #define LOOP_CLR_FD 0x4C01
  6409. #define LOOP_SET_STATUS 0x4C02
  6410. #define LOOP_GET_STATUS 0x4C03
  6411. +#define LOOP_SET_STATUS64 0x4C04
  6412. +#define LOOP_GET_STATUS64 0x4C05
  6413. +
  6414. +#define LOOP_MULTI_KEY_SETUP 0x4C4D
  6415. +#define LOOP_MULTI_KEY_SETUP_V3 0x4C4E
  6416. #endif