mirror of the now-defunct rocklinux.org
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

1943 lines
54 KiB

  1. --- linux-2.6.4/drivers/block/loop.c 2004-03-11 03:55:29.000000000 +0100
  2. +++ linux-2.6.4/drivers/block/loop.c 2004-02-08 16:51:25.000000000 +0100
  3. @@ -2,7 +2,7 @@
  4. * linux/drivers/block/loop.c
  5. *
  6. * Written by Theodore Ts'o, 3/29/93
  7. - *
  8. + *
  9. * Copyright 1993 by Theodore Ts'o. Redistribution of this file is
  10. * permitted under the GNU General Public License.
  11. *
  12. @@ -21,12 +21,12 @@
  13. * Loadable modules and other fixes by AK, 1998
  14. *
  15. * Make real block number available to downstream transfer functions, enables
  16. - * CBC (and relatives) mode encryption requiring unique IVs per data block.
  17. + * CBC (and relatives) mode encryption requiring unique IVs per data block.
  18. * Reed H. Petty, rhp@draper.net
  19. *
  20. * Maximum number of loop devices now dynamic via max_loop module parameter.
  21. * Russell Kroll <rkroll@exploits.org> 19990701
  22. - *
  23. + *
  24. * Maximum number of loop devices when compiled-in now selectable by passing
  25. * max_loop=<1-255> to the kernel on boot.
  26. * Erik I. Bols�, <eriki@himolde.no>, Oct 31, 1999
  27. @@ -39,18 +39,43 @@
  28. * Support up to 256 loop devices
  29. * Heinz Mauelshagen <mge@sistina.com>, Feb 2002
  30. *
  31. - * Still To Fix:
  32. - * - Advisory locking is ignored here.
  33. - * - Should use an own CAP_* category instead of CAP_SYS_ADMIN
  34. + * IV is now passed as (512 byte) sector number.
  35. + * Jari Ruusu, May 18 2001
  36. *
  37. - */
  38. + * External encryption module locking bug fixed.
  39. + * Ingo Rohloff <rohloff@in.tum.de>, June 21 2001
  40. + *
  41. + * Make device backed loop work with swap (pre-allocated buffers + queue rewrite).
  42. + * Jari Ruusu, September 2 2001
  43. + *
  44. + * Ported 'pre-allocated buffers + queue rewrite' to BIO for 2.5 kernels
  45. + * Ben Slusky <sluskyb@stwing.org>, March 1 2002
  46. + * Jari Ruusu, March 27 2002
  47. + *
  48. + * File backed code now uses file->f_op->read/write. Based on Andrew Morton's idea.
  49. + * Jari Ruusu, May 23 2002
  50. + *
  51. + * Exported hard sector size correctly, fixed file-backed-loop-on-tmpfs bug,
  52. + * plus many more enhancements and optimizations.
  53. + * Adam J. Richter <adam@yggdrasil.com>, Aug 2002
  54. + *
  55. + * Added support for removing offset from IV computations.
  56. + * Jari Ruusu, September 21 2003
  57. + *
  58. + *
  59. + * Still To Fix:
  60. + * - Advisory locking is ignored here.
  61. + * - Should use an own CAP_* category instead of CAP_SYS_ADMIN
  62. + */
  63. +#include <linux/version.h>
  64. #include <linux/config.h>
  65. #include <linux/module.h>
  66. #include <linux/sched.h>
  67. #include <linux/fs.h>
  68. #include <linux/file.h>
  69. +#include <linux/bio.h>
  70. #include <linux/stat.h>
  71. #include <linux/errno.h>
  72. #include <linux/major.h>
  73. @@ -69,41 +94,34 @@
  74. #include <asm/uaccess.h>
  75. +#if !defined(LO_FLAGS_DO_BMAP)
  76. +# define LO_FLAGS_DO_BMAP 0x80000
  77. +#endif
  78. +#if !defined(LO_FLAGS_READ_ONLY)
  79. +# define LO_FLAGS_READ_ONLY 0x40000
  80. +#endif
  81. +
  82. static int max_loop = 8;
  83. -static struct loop_device *loop_dev;
  84. static struct gendisk **disks;
  85. /*
  86. * Transfer functions
  87. */
  88. -static int transfer_none(struct loop_device *lo, int cmd,
  89. - struct page *raw_page, unsigned raw_off,
  90. - struct page *loop_page, unsigned loop_off,
  91. - int size, sector_t real_block)
  92. +static int transfer_none(struct loop_device *lo, int cmd, char *raw_buf,
  93. + char *loop_buf, int size, sector_t real_block)
  94. {
  95. - char *raw_buf = kmap_atomic(raw_page, KM_USER0) + raw_off;
  96. - char *loop_buf = kmap_atomic(loop_page, KM_USER1) + loop_off;
  97. -
  98. - if (cmd == READ)
  99. - memcpy(loop_buf, raw_buf, size);
  100. - else
  101. - memcpy(raw_buf, loop_buf, size);
  102. + /* this code is only called from file backed loop */
  103. + /* and that code expects this function to be no-op */
  104. - kunmap_atomic(raw_buf, KM_USER0);
  105. - kunmap_atomic(loop_buf, KM_USER1);
  106. cond_resched();
  107. return 0;
  108. }
  109. -static int transfer_xor(struct loop_device *lo, int cmd,
  110. - struct page *raw_page, unsigned raw_off,
  111. - struct page *loop_page, unsigned loop_off,
  112. - int size, sector_t real_block)
  113. -{
  114. - char *raw_buf = kmap_atomic(raw_page, KM_USER0) + raw_off;
  115. - char *loop_buf = kmap_atomic(loop_page, KM_USER1) + loop_off;
  116. - char *in, *out, *key;
  117. - int i, keysize;
  118. +static int transfer_xor(struct loop_device *lo, int cmd, char *raw_buf,
  119. + char *loop_buf, int size, sector_t real_block)
  120. +{
  121. + char *in, *out, *key;
  122. + int i, keysize;
  123. if (cmd == READ) {
  124. in = raw_buf;
  125. @@ -117,324 +135,611 @@ static int transfer_xor(struct loop_devi
  126. keysize = lo->lo_encrypt_key_size;
  127. for (i = 0; i < size; i++)
  128. *out++ = *in++ ^ key[(i & 511) % keysize];
  129. -
  130. - kunmap_atomic(raw_buf, KM_USER0);
  131. - kunmap_atomic(loop_buf, KM_USER1);
  132. cond_resched();
  133. return 0;
  134. }
  135. -static int xor_init(struct loop_device *lo, const struct loop_info64 *info)
  136. +static int xor_init(struct loop_device *lo, struct loop_info64 *info)
  137. {
  138. if (info->lo_encrypt_key_size <= 0)
  139. return -EINVAL;
  140. return 0;
  141. }
  142. -static struct loop_func_table none_funcs = {
  143. +static struct loop_func_table none_funcs = {
  144. .number = LO_CRYPT_NONE,
  145. - .transfer = transfer_none,
  146. -};
  147. + .transfer = (void *)transfer_none,
  148. +};
  149. -static struct loop_func_table xor_funcs = {
  150. +static struct loop_func_table xor_funcs = {
  151. .number = LO_CRYPT_XOR,
  152. - .transfer = transfer_xor,
  153. - .init = xor_init
  154. -};
  155. + .transfer = (void *)transfer_xor,
  156. + .init = (void *)xor_init,
  157. +};
  158. -/* xfer_funcs[0] is special - its release function is never called */
  159. +/* xfer_funcs[0] is special - its release function is never called */
  160. static struct loop_func_table *xfer_funcs[MAX_LO_CRYPT] = {
  161. &none_funcs,
  162. - &xor_funcs
  163. + &xor_funcs,
  164. };
  165. -static int
  166. -figure_loop_size(struct loop_device *lo)
  167. -{
  168. - loff_t size, offset, loopsize;
  169. - sector_t x;
  170. +/*
  171. + * First number of 'lo_prealloc' is the default number of RAM pages
  172. + * to pre-allocate for each device backed loop. Every (configured)
  173. + * device backed loop pre-allocates this amount of RAM pages unless
  174. + * later 'lo_prealloc' numbers provide an override. 'lo_prealloc'
  175. + * overrides are defined in pairs: loop_index,number_of_pages
  176. + */
  177. +static int lo_prealloc[9] = { 125, -1, 0, -1, 0, -1, 0, -1, 0 };
  178. +#define LO_PREALLOC_MIN 4 /* minimum user defined pre-allocated RAM pages */
  179. +#define LO_PREALLOC_MAX 512 /* maximum user defined pre-allocated RAM pages */
  180. - /* Compute loopsize in bytes */
  181. - size = i_size_read(lo->lo_backing_file->f_mapping->host);
  182. - offset = lo->lo_offset;
  183. - loopsize = size - offset;
  184. - if (lo->lo_sizelimit > 0 && lo->lo_sizelimit < loopsize)
  185. - loopsize = lo->lo_sizelimit;
  186. +MODULE_PARM(lo_prealloc, "1-9i");
  187. +MODULE_PARM_DESC(lo_prealloc, "Number of pre-allocated pages [,index,pages]...");
  188. - /*
  189. - * Unfortunately, if we want to do I/O on the device,
  190. - * the number of 512-byte sectors has to fit into a sector_t.
  191. - */
  192. - size = loopsize >> 9;
  193. - x = (sector_t)size;
  194. +/*
  195. + * This is loop helper thread nice value in range
  196. + * from 0 (low priority) to -20 (high priority).
  197. + */
  198. +static int lo_nice = -1;
  199. - if ((loff_t)x != size)
  200. - return -EFBIG;
  201. +MODULE_PARM(lo_nice, "1i");
  202. +MODULE_PARM_DESC(lo_nice, "Loop thread scheduler nice (0 ... -20)");
  203. - set_capacity(disks[lo->lo_number], x);
  204. - return 0;
  205. -}
  206. +struct loop_bio_extension {
  207. + struct bio *bioext_merge;
  208. + struct loop_device *bioext_loop;
  209. + sector_t bioext_iv;
  210. + int bioext_index;
  211. + int bioext_size;
  212. +};
  213. +
  214. +typedef struct {
  215. + struct loop_device lo_orig;
  216. + struct bio *lo_bio_que0;
  217. + struct bio *lo_bio_que1;
  218. + struct bio *lo_bio_que2;
  219. + struct bio *lo_bio_free0;
  220. + struct bio *lo_bio_free1;
  221. + atomic_t lo_bio_barr;
  222. + int lo_bio_flsh;
  223. + int lo_bio_need;
  224. + wait_queue_head_t lo_bio_wait;
  225. + sector_t lo_offs_sec;
  226. + sector_t lo_iv_remove;
  227. +} LoDevExt;
  228. +static struct loop_device **loop_dev_ptr_arr;
  229. +
  230. +#define LDE_lo_bio_que0 (((LoDevExt *)lo)->lo_bio_que0)
  231. +#define LDE_lo_bio_que1 (((LoDevExt *)lo)->lo_bio_que1)
  232. +#define LDE_lo_bio_que2 (((LoDevExt *)lo)->lo_bio_que2)
  233. +#define LDE_lo_bio_free0 (((LoDevExt *)lo)->lo_bio_free0)
  234. +#define LDE_lo_bio_free1 (((LoDevExt *)lo)->lo_bio_free1)
  235. +#define LDE_lo_bio_barr (((LoDevExt *)lo)->lo_bio_barr)
  236. +#define LDE_lo_bio_flsh (((LoDevExt *)lo)->lo_bio_flsh)
  237. +#define LDE_lo_bio_need (((LoDevExt *)lo)->lo_bio_need)
  238. +#define LDE_lo_bio_wait (((LoDevExt *)lo)->lo_bio_wait)
  239. +#define LDE_lo_offs_sec (((LoDevExt *)lo)->lo_offs_sec)
  240. +#define LDE_lo_iv_remove (((LoDevExt *)lo)->lo_iv_remove)
  241. -static inline int
  242. -lo_do_transfer(struct loop_device *lo, int cmd,
  243. - struct page *rpage, unsigned roffs,
  244. - struct page *lpage, unsigned loffs,
  245. - int size, sector_t rblock)
  246. +static void loop_prealloc_cleanup(struct loop_device *lo)
  247. {
  248. - if (!lo->transfer)
  249. - return 0;
  250. + struct bio *bio;
  251. - return lo->transfer(lo, cmd, rpage, roffs, lpage, loffs, size, rblock);
  252. + while ((bio = LDE_lo_bio_free0)) {
  253. + LDE_lo_bio_free0 = bio->bi_next;
  254. + __free_page(bio->bi_io_vec[0].bv_page);
  255. + kfree(bio->bi_private);
  256. + bio->bi_next = NULL;
  257. + bio_put(bio);
  258. + }
  259. + while ((bio = LDE_lo_bio_free1)) {
  260. + LDE_lo_bio_free1 = bio->bi_next;
  261. + /* bi_flags was used for other purpose */
  262. + bio->bi_flags = 0;
  263. + /* bi_cnt was used for other purpose */
  264. + atomic_set(&bio->bi_cnt, 1);
  265. + bio->bi_next = NULL;
  266. + bio_put(bio);
  267. + }
  268. }
  269. -static int
  270. -do_lo_send(struct loop_device *lo, struct bio_vec *bvec, int bsize, loff_t pos)
  271. +static int loop_prealloc_init(struct loop_device *lo, int y)
  272. {
  273. - struct file *file = lo->lo_backing_file; /* kudos to NFsckingS */
  274. - struct address_space *mapping = file->f_mapping;
  275. - struct address_space_operations *aops = mapping->a_ops;
  276. - struct page *page;
  277. - pgoff_t index;
  278. - unsigned size, offset, bv_offs;
  279. - int len;
  280. - int ret = 0;
  281. + struct bio *bio;
  282. + int x;
  283. - down(&mapping->host->i_sem);
  284. - index = pos >> PAGE_CACHE_SHIFT;
  285. - offset = pos & ((pgoff_t)PAGE_CACHE_SIZE - 1);
  286. - bv_offs = bvec->bv_offset;
  287. - len = bvec->bv_len;
  288. - while (len > 0) {
  289. - sector_t IV;
  290. - int transfer_result;
  291. -
  292. - IV = ((sector_t)index << (PAGE_CACHE_SHIFT - 9))+(offset >> 9);
  293. -
  294. - size = PAGE_CACHE_SIZE - offset;
  295. - if (size > len)
  296. - size = len;
  297. -
  298. - page = grab_cache_page(mapping, index);
  299. - if (!page)
  300. - goto fail;
  301. - if (aops->prepare_write(file, page, offset, offset+size))
  302. - goto unlock;
  303. - transfer_result = lo_do_transfer(lo, WRITE, page, offset,
  304. - bvec->bv_page, bv_offs,
  305. - size, IV);
  306. - if (transfer_result) {
  307. - char *kaddr;
  308. -
  309. - /*
  310. - * The transfer failed, but we still write the data to
  311. - * keep prepare/commit calls balanced.
  312. - */
  313. - printk(KERN_ERR "loop: transfer error block %llu\n",
  314. - (unsigned long long)index);
  315. - kaddr = kmap_atomic(page, KM_USER0);
  316. - memset(kaddr + offset, 0, size);
  317. - kunmap_atomic(kaddr, KM_USER0);
  318. + if(!y) {
  319. + y = lo_prealloc[0];
  320. + for (x = 1; x < (sizeof(lo_prealloc) / sizeof(int)); x += 2) {
  321. + if (lo_prealloc[x + 1] && (lo->lo_number == lo_prealloc[x])) {
  322. + y = lo_prealloc[x + 1];
  323. + break;
  324. + }
  325. }
  326. - flush_dcache_page(page);
  327. - if (aops->commit_write(file, page, offset, offset+size))
  328. - goto unlock;
  329. - if (transfer_result)
  330. - goto unlock;
  331. - bv_offs += size;
  332. - len -= size;
  333. - offset = 0;
  334. - index++;
  335. - pos += size;
  336. - unlock_page(page);
  337. - page_cache_release(page);
  338. }
  339. - up(&mapping->host->i_sem);
  340. -out:
  341. - return ret;
  342. + LDE_lo_bio_flsh = (y * 3) / 4;
  343. -unlock:
  344. - unlock_page(page);
  345. - page_cache_release(page);
  346. -fail:
  347. - up(&mapping->host->i_sem);
  348. - ret = -1;
  349. - goto out;
  350. + for (x = 0; x < y; x++) {
  351. + bio = bio_alloc(GFP_KERNEL, 1);
  352. + if (!bio) {
  353. + fail1:
  354. + loop_prealloc_cleanup(lo);
  355. + return 1;
  356. + }
  357. + bio->bi_io_vec[0].bv_page = alloc_page(GFP_KERNEL);
  358. + if (!bio->bi_io_vec[0].bv_page) {
  359. + fail2:
  360. + bio->bi_next = NULL;
  361. + bio_put(bio);
  362. + goto fail1;
  363. + }
  364. + bio->bi_vcnt = 1;
  365. + bio->bi_private = kmalloc(sizeof(struct loop_bio_extension), GFP_KERNEL);
  366. + if (!bio->bi_private)
  367. + goto fail2;
  368. + bio->bi_next = LDE_lo_bio_free0;
  369. + LDE_lo_bio_free0 = bio;
  370. +
  371. + bio = bio_alloc(GFP_KERNEL, 1);
  372. + if (!bio)
  373. + goto fail1;
  374. + bio->bi_vcnt = 1;
  375. + bio->bi_next = LDE_lo_bio_free1;
  376. + LDE_lo_bio_free1 = bio;
  377. + }
  378. + return 0;
  379. }
  380. -static int
  381. -lo_send(struct loop_device *lo, struct bio *bio, int bsize, loff_t pos)
  382. +static void loop_add_queue_last(struct loop_device *lo, struct bio *bio, struct bio **q)
  383. {
  384. - struct bio_vec *bvec;
  385. - int i, ret = 0;
  386. + unsigned long flags;
  387. - bio_for_each_segment(bvec, bio, i) {
  388. - ret = do_lo_send(lo, bvec, bsize, pos);
  389. - if (ret < 0)
  390. - break;
  391. - pos += bvec->bv_len;
  392. + spin_lock_irqsave(&lo->lo_lock, flags);
  393. + if (*q) {
  394. + bio->bi_next = (*q)->bi_next;
  395. + (*q)->bi_next = bio;
  396. + } else {
  397. + bio->bi_next = bio;
  398. }
  399. - return ret;
  400. -}
  401. + *q = bio;
  402. + spin_unlock_irqrestore(&lo->lo_lock, flags);
  403. -struct lo_read_data {
  404. - struct loop_device *lo;
  405. - struct page *page;
  406. - unsigned offset;
  407. - int bsize;
  408. -};
  409. + if (waitqueue_active(&LDE_lo_bio_wait))
  410. + wake_up_interruptible(&LDE_lo_bio_wait);
  411. +}
  412. -static int
  413. -lo_read_actor(read_descriptor_t *desc, struct page *page,
  414. - unsigned long offset, unsigned long size)
  415. +static void loop_add_queue_first(struct loop_device *lo, struct bio *bio, struct bio **q)
  416. {
  417. - unsigned long count = desc->count;
  418. - struct lo_read_data *p = (struct lo_read_data*)desc->buf;
  419. - struct loop_device *lo = p->lo;
  420. - sector_t IV;
  421. -
  422. - IV = ((sector_t) page->index << (PAGE_CACHE_SHIFT - 9))+(offset >> 9);
  423. -
  424. - if (size > count)
  425. - size = count;
  426. -
  427. - if (lo_do_transfer(lo, READ, page, offset, p->page, p->offset, size, IV)) {
  428. - size = 0;
  429. - printk(KERN_ERR "loop: transfer error block %ld\n",
  430. - page->index);
  431. - desc->error = -EINVAL;
  432. + spin_lock_irq(&lo->lo_lock);
  433. + if (*q) {
  434. + bio->bi_next = (*q)->bi_next;
  435. + (*q)->bi_next = bio;
  436. + } else {
  437. + bio->bi_next = bio;
  438. + *q = bio;
  439. }
  440. -
  441. - desc->count = count - size;
  442. - desc->written += size;
  443. - p->offset += size;
  444. - return size;
  445. + spin_unlock_irq(&lo->lo_lock);
  446. }
  447. -static int
  448. -do_lo_receive(struct loop_device *lo,
  449. - struct bio_vec *bvec, int bsize, loff_t pos)
  450. +static struct bio *loop_get_bio(struct loop_device *lo, int *list_nr)
  451. {
  452. - struct lo_read_data cookie;
  453. - struct file *file;
  454. - int retval;
  455. -
  456. - cookie.lo = lo;
  457. - cookie.page = bvec->bv_page;
  458. - cookie.offset = bvec->bv_offset;
  459. - cookie.bsize = bsize;
  460. - file = lo->lo_backing_file;
  461. - retval = file->f_op->sendfile(file, &pos, bvec->bv_len,
  462. - lo_read_actor, &cookie);
  463. - return (retval < 0)? retval: 0;
  464. + struct bio *bio = NULL, *last;
  465. +
  466. + spin_lock_irq(&lo->lo_lock);
  467. + if ((last = LDE_lo_bio_que0)) {
  468. + bio = last->bi_next;
  469. + if (bio == last)
  470. + LDE_lo_bio_que0 = NULL;
  471. + else
  472. + last->bi_next = bio->bi_next;
  473. + bio->bi_next = NULL;
  474. + *list_nr = 0;
  475. + } else if ((last = LDE_lo_bio_que1)) {
  476. + bio = last->bi_next;
  477. + if (bio == last)
  478. + LDE_lo_bio_que1 = NULL;
  479. + else
  480. + last->bi_next = bio->bi_next;
  481. + bio->bi_next = NULL;
  482. + *list_nr = 1;
  483. + } else if ((last = LDE_lo_bio_que2)) {
  484. + bio = last->bi_next;
  485. + if (bio == last)
  486. + LDE_lo_bio_que2 = NULL;
  487. + else
  488. + last->bi_next = bio->bi_next;
  489. + bio->bi_next = NULL;
  490. + *list_nr = 2;
  491. + }
  492. + spin_unlock_irq(&lo->lo_lock);
  493. + return bio;
  494. }
  495. -static int
  496. -lo_receive(struct loop_device *lo, struct bio *bio, int bsize, loff_t pos)
  497. +static void loop_put_buffer(struct loop_device *lo, struct bio *b, int flist)
  498. {
  499. - struct bio_vec *bvec;
  500. - int i, ret = 0;
  501. + unsigned long flags;
  502. + int wk;
  503. - bio_for_each_segment(bvec, bio, i) {
  504. - ret = do_lo_receive(lo, bvec, bsize, pos);
  505. - if (ret < 0)
  506. - break;
  507. - pos += bvec->bv_len;
  508. + spin_lock_irqsave(&lo->lo_lock, flags);
  509. + if(!flist) {
  510. + b->bi_next = LDE_lo_bio_free0;
  511. + LDE_lo_bio_free0 = b;
  512. + wk = LDE_lo_bio_need & 1;
  513. + } else {
  514. + b->bi_next = LDE_lo_bio_free1;
  515. + LDE_lo_bio_free1 = b;
  516. + wk = LDE_lo_bio_need & 2;
  517. }
  518. - return ret;
  519. + spin_unlock_irqrestore(&lo->lo_lock, flags);
  520. +
  521. + if (wk && waitqueue_active(&LDE_lo_bio_wait))
  522. + wake_up_interruptible(&LDE_lo_bio_wait);
  523. }
  524. -static int do_bio_filebacked(struct loop_device *lo, struct bio *bio)
  525. +static int loop_end_io_transfer(struct bio *bio, unsigned int bytes_done, int err)
  526. {
  527. - loff_t pos;
  528. - int ret;
  529. + struct loop_bio_extension *extension = bio->bi_private;
  530. + struct bio *merge = extension->bioext_merge;
  531. + struct loop_device *lo = extension->bioext_loop;
  532. + struct bio *origbio = merge->bi_private;
  533. - pos = ((loff_t) bio->bi_sector << 9) + lo->lo_offset;
  534. - if (bio_rw(bio) == WRITE)
  535. - ret = lo_send(lo, bio, lo->lo_blocksize, pos);
  536. - else
  537. - ret = lo_receive(lo, bio, lo->lo_blocksize, pos);
  538. - return ret;
  539. + if (err)
  540. + clear_bit(0, &merge->bi_flags);
  541. + if (bio->bi_size)
  542. + return 1;
  543. + if (bio_rw(bio) == WRITE) {
  544. + loop_put_buffer(lo, bio, 0);
  545. + if (!atomic_dec_and_test(&merge->bi_cnt))
  546. + return 0;
  547. + if (bio_barrier(origbio))
  548. + atomic_dec(&LDE_lo_bio_barr);
  549. + origbio->bi_next = NULL;
  550. + bio_endio(origbio, origbio->bi_size, test_bit(0, &merge->bi_flags) ? 0 : -EIO);
  551. + loop_put_buffer(lo, merge, 1);
  552. + if (atomic_dec_and_test(&lo->lo_pending))
  553. + wake_up_interruptible(&LDE_lo_bio_wait);
  554. + } else {
  555. + loop_add_queue_last(lo, bio, &LDE_lo_bio_que0);
  556. + }
  557. + return 0;
  558. }
  559. -/*
  560. - * Add bio to back of pending list
  561. - */
  562. -static void loop_add_bio(struct loop_device *lo, struct bio *bio)
  563. +static struct bio *loop_get_buffer(struct loop_device *lo,
  564. + struct bio *orig_bio, int from_thread, struct bio **merge_ptr)
  565. {
  566. + struct bio *bio = NULL, *merge = *merge_ptr;
  567. + struct loop_bio_extension *extension;
  568. unsigned long flags;
  569. + int len;
  570. +
  571. + /*
  572. + * If called from make_request and if there are unprocessed
  573. + * barrier requests, fail allocation so that request is
  574. + * inserted to end of no-merge-allocated list. This guarantees
  575. + * FIFO processing order of requests.
  576. + */
  577. + if (!from_thread && atomic_read(&LDE_lo_bio_barr))
  578. + return NULL;
  579. spin_lock_irqsave(&lo->lo_lock, flags);
  580. - if (lo->lo_biotail) {
  581. - lo->lo_biotail->bi_next = bio;
  582. - lo->lo_biotail = bio;
  583. - } else
  584. - lo->lo_bio = lo->lo_biotail = bio;
  585. + if (!merge) {
  586. + merge = LDE_lo_bio_free1;
  587. + if (merge) {
  588. + LDE_lo_bio_free1 = merge->bi_next;
  589. + if (from_thread)
  590. + LDE_lo_bio_need = 0;
  591. + } else {
  592. + if (from_thread)
  593. + LDE_lo_bio_need = 2;
  594. + }
  595. + }
  596. +
  597. + /*
  598. + * If there are unprocessed barrier requests and a merge-bio was just
  599. + * allocated, do not allocate a buffer-bio yet. This causes request
  600. + * to be moved from head of no-merge-allocated list to end of
  601. + * merge-allocated list. This guarantees FIFO processing order
  602. + * of requests.
  603. + */
  604. + if (merge && (*merge_ptr || !atomic_read(&LDE_lo_bio_barr))) {
  605. + bio = LDE_lo_bio_free0;
  606. + if (bio) {
  607. + LDE_lo_bio_free0 = bio->bi_next;
  608. + if (from_thread)
  609. + LDE_lo_bio_need = 0;
  610. + } else {
  611. + if (from_thread)
  612. + LDE_lo_bio_need = 1;
  613. + }
  614. + }
  615. spin_unlock_irqrestore(&lo->lo_lock, flags);
  616. - up(&lo->lo_bh_mutex);
  617. + if (!(*merge_ptr) && merge) {
  618. + /*
  619. + * initialize "merge-bio" which is used as
  620. + * rendezvous point among multiple vecs
  621. + */
  622. + *merge_ptr = merge;
  623. + merge->bi_sector = orig_bio->bi_sector + LDE_lo_offs_sec;
  624. + set_bit(0, &merge->bi_flags);
  625. + merge->bi_idx = orig_bio->bi_idx;
  626. + atomic_set(&merge->bi_cnt, orig_bio->bi_vcnt - orig_bio->bi_idx);
  627. + merge->bi_private = orig_bio;
  628. + }
  629. +
  630. + if (!bio)
  631. + return NULL;
  632. +
  633. + /*
  634. + * initialize one page "buffer-bio"
  635. + */
  636. + bio->bi_sector = merge->bi_sector;
  637. + bio->bi_next = NULL;
  638. + bio->bi_bdev = lo->lo_device;
  639. + bio->bi_flags = 0;
  640. + bio->bi_rw = orig_bio->bi_rw & ~(1 << BIO_RW_BARRIER);
  641. + if (bio_barrier(orig_bio) && ((merge->bi_idx == orig_bio->bi_idx) || (merge->bi_idx == (orig_bio->bi_vcnt - 1))))
  642. + bio->bi_rw |= (1 << BIO_RW_BARRIER);
  643. + bio->bi_vcnt = 1;
  644. + bio->bi_idx = 0;
  645. + bio->bi_phys_segments = 0;
  646. + bio->bi_hw_segments = 0;
  647. + bio->bi_size = len = orig_bio->bi_io_vec[merge->bi_idx].bv_len;
  648. + /* bio->bi_max_vecs not touched */
  649. + bio->bi_io_vec[0].bv_len = len;
  650. + bio->bi_io_vec[0].bv_offset = 0;
  651. + bio->bi_end_io = loop_end_io_transfer;
  652. + /* bio->bi_cnt not touched */
  653. + /* bio->bi_private not touched */
  654. + /* bio->bi_destructor not touched */
  655. +
  656. + /*
  657. + * initialize "buffer-bio" extension. This extension is
  658. + * permanently glued to above "buffer-bio" via bio->bi_private
  659. + */
  660. + extension = bio->bi_private;
  661. + extension->bioext_merge = merge;
  662. + extension->bioext_loop = lo;
  663. + extension->bioext_iv = merge->bi_sector - LDE_lo_iv_remove;
  664. + extension->bioext_index = merge->bi_idx;
  665. + extension->bioext_size = len;
  666. +
  667. + /*
  668. + * prepare "merge-bio" for next vec
  669. + */
  670. + merge->bi_sector += len >> 9;
  671. + merge->bi_idx++;
  672. +
  673. + return bio;
  674. }
  675. -/*
  676. - * Grab first pending buffer
  677. - */
  678. -static struct bio *loop_get_bio(struct loop_device *lo)
  679. +static int figure_loop_size(struct loop_device *lo, struct block_device *bdev)
  680. {
  681. - struct bio *bio;
  682. + loff_t size, offs;
  683. + sector_t x;
  684. + int err = 0;
  685. - spin_lock_irq(&lo->lo_lock);
  686. - if ((bio = lo->lo_bio)) {
  687. - if (bio == lo->lo_biotail)
  688. - lo->lo_biotail = NULL;
  689. - lo->lo_bio = bio->bi_next;
  690. - bio->bi_next = NULL;
  691. + size = i_size_read(lo->lo_backing_file->f_dentry->d_inode->i_mapping->host);
  692. + offs = lo->lo_offset;
  693. + if (!(lo->lo_flags & LO_FLAGS_DO_BMAP))
  694. + offs &= ~((loff_t)511);
  695. + if ((offs > 0) && (offs < size)) {
  696. + size -= offs;
  697. + } else {
  698. + if (offs)
  699. + err = -EINVAL;
  700. + lo->lo_offset = 0;
  701. + LDE_lo_offs_sec = LDE_lo_iv_remove = 0;
  702. }
  703. - spin_unlock_irq(&lo->lo_lock);
  704. + if ((lo->lo_sizelimit > 0) && (lo->lo_sizelimit <= size)) {
  705. + size = lo->lo_sizelimit;
  706. + } else {
  707. + if (lo->lo_sizelimit)
  708. + err = -EINVAL;
  709. + lo->lo_sizelimit = 0;
  710. + }
  711. + size >>= 9;
  712. - return bio;
  713. + /*
  714. + * Unfortunately, if we want to do I/O on the device,
  715. + * the number of 512-byte sectors has to fit into a sector_t.
  716. + */
  717. + x = (sector_t)size;
  718. + if ((loff_t)x != size) {
  719. + err = -EFBIG;
  720. + size = 0;
  721. + }
  722. +
  723. + bdev->bd_inode->i_size = size << 9; /* byte units */
  724. + set_capacity(disks[lo->lo_number], size); /* 512 byte units */
  725. + return err;
  726. +}
  727. +
  728. +static inline int lo_do_transfer(struct loop_device *lo, int cmd, char *rbuf,
  729. + char *lbuf, int size, sector_t rblock)
  730. +{
  731. + if (!lo->transfer)
  732. + return 0;
  733. +
  734. + /* this ugly cast is needed to work around (possible) kmap damage in function prototype */
  735. + /* should be: return lo->transfer(lo, cmd, rbuf, lbuf, size, rblock); */
  736. + return ((int (*)(struct loop_device *, int, char *, char *, int, sector_t))lo->transfer)(lo, cmd, rbuf, lbuf, size, rblock);
  737. +}
  738. +
  739. +static int loop_file_io(struct file *file, char *buf, int size, loff_t *ppos, int w)
  740. +{
  741. + mm_segment_t fs;
  742. + int x, y, z;
  743. +
  744. + y = 0;
  745. + do {
  746. + z = size - y;
  747. + fs = get_fs();
  748. + set_fs(get_ds());
  749. + if (w) {
  750. + x = file->f_op->write(file, buf + y, z, ppos);
  751. + set_fs(fs);
  752. + } else {
  753. + x = file->f_op->read(file, buf + y, z, ppos);
  754. + set_fs(fs);
  755. + if (!x)
  756. + return 1;
  757. + }
  758. + if (x < 0) {
  759. + if ((x == -EAGAIN) || (x == -ENOMEM) || (x == -ERESTART) || (x == -EINTR)) {
  760. + blk_run_queues();
  761. + set_current_state(TASK_INTERRUPTIBLE);
  762. + schedule_timeout(HZ / 2);
  763. + continue;
  764. + }
  765. + return 1;
  766. + }
  767. + y += x;
  768. + } while (y < size);
  769. + return 0;
  770. +}
  771. +
  772. +static int do_bio_filebacked(struct loop_device *lo, struct bio *bio)
  773. +{
  774. + loff_t pos;
  775. + struct file *file = lo->lo_backing_file;
  776. + char *data, *buf;
  777. + unsigned int size, len;
  778. + sector_t IV;
  779. + struct page *pg;
  780. +
  781. + pos = ((loff_t) bio->bi_sector << 9) + lo->lo_offset;
  782. + buf = page_address(LDE_lo_bio_free0->bi_io_vec[0].bv_page);
  783. + IV = bio->bi_sector;
  784. + if (!LDE_lo_iv_remove)
  785. + IV += LDE_lo_offs_sec;
  786. + do {
  787. + pg = bio->bi_io_vec[bio->bi_idx].bv_page;
  788. + len = bio->bi_io_vec[bio->bi_idx].bv_len;
  789. + data = kmap(pg) + bio->bi_io_vec[bio->bi_idx].bv_offset;
  790. + while (len > 0) {
  791. + if (!lo->lo_encryption) {
  792. + /* this code relies that NONE transfer is a no-op */
  793. + buf = data;
  794. + }
  795. + size = PAGE_SIZE;
  796. + if (size > len)
  797. + size = len;
  798. + if (bio_rw(bio) == WRITE) {
  799. + if (lo_do_transfer(lo, WRITE, buf, data, size, IV)) {
  800. + printk(KERN_ERR "loop%d: write transfer error, sector %llu\n", lo->lo_number, (unsigned long long)IV);
  801. + goto kunmap_and_out;
  802. + }
  803. + if (loop_file_io(file, buf, size, &pos, 1)) {
  804. + printk(KERN_ERR "loop%d: write i/o error, sector %llu\n", lo->lo_number, (unsigned long long)IV);
  805. + goto kunmap_and_out;
  806. + }
  807. + } else {
  808. + if (loop_file_io(file, buf, size, &pos, 0)) {
  809. + printk(KERN_ERR "loop%d: read i/o error, sector %llu\n", lo->lo_number, (unsigned long long)IV);
  810. + goto kunmap_and_out;
  811. + }
  812. + if (lo_do_transfer(lo, READ, buf, data, size, IV)) {
  813. + printk(KERN_ERR "loop%d: read transfer error, sector %llu\n", lo->lo_number, (unsigned long long)IV);
  814. + goto kunmap_and_out;
  815. + }
  816. + }
  817. + data += size;
  818. + len -= size;
  819. + IV += size >> 9;
  820. + }
  821. + kunmap(pg);
  822. + } while (++bio->bi_idx < bio->bi_vcnt);
  823. + return 0;
  824. +
  825. +kunmap_and_out:
  826. + kunmap(pg);
  827. + return -EIO;
  828. +}
  829. +
  830. +static int loop_make_request_err(request_queue_t *q, struct bio *old_bio)
  831. +{
  832. + old_bio->bi_next = NULL;
  833. + bio_io_error(old_bio, old_bio->bi_size);
  834. + return 0;
  835. }
  836. -static int loop_make_request(request_queue_t *q, struct bio *old_bio)
  837. +static int loop_make_request_real(request_queue_t *q, struct bio *old_bio)
  838. {
  839. + struct bio *new_bio, *merge;
  840. struct loop_device *lo = q->queuedata;
  841. - int rw = bio_rw(old_bio);
  842. + struct loop_bio_extension *extension;
  843. + int rw = bio_rw(old_bio), y;
  844. + char *md;
  845. + set_current_state(TASK_RUNNING);
  846. if (!lo)
  847. goto out;
  848. -
  849. - spin_lock_irq(&lo->lo_lock);
  850. - if (lo->lo_state != Lo_bound)
  851. - goto inactive;
  852. + if ((rw == WRITE) && (lo->lo_flags & LO_FLAGS_READ_ONLY))
  853. + goto out;
  854. atomic_inc(&lo->lo_pending);
  855. - spin_unlock_irq(&lo->lo_lock);
  856. + /*
  857. + * file backed, queue for loop_thread to handle
  858. + */
  859. + if (lo->lo_flags & LO_FLAGS_DO_BMAP) {
  860. + loop_add_queue_last(lo, old_bio, &LDE_lo_bio_que0);
  861. + return 0;
  862. + }
  863. +
  864. + /*
  865. + * device backed, just remap bdev & sector for NONE transfer
  866. + */
  867. + if (!lo->lo_encryption) {
  868. + old_bio->bi_sector += LDE_lo_offs_sec;
  869. + old_bio->bi_bdev = lo->lo_device;
  870. + generic_make_request(old_bio);
  871. + if (atomic_dec_and_test(&lo->lo_pending))
  872. + wake_up_interruptible(&LDE_lo_bio_wait);
  873. + return 0;
  874. + }
  875. +
  876. + /*
  877. + * device backed, start reads and writes now if buffer available
  878. + */
  879. + merge = NULL;
  880. + if (bio_barrier(old_bio))
  881. + atomic_inc(&LDE_lo_bio_barr);
  882. + try_next_old_bio_vec:
  883. + new_bio = loop_get_buffer(lo, old_bio, 0, &merge);
  884. + if (!new_bio) {
  885. + /* just queue request and let thread handle allocs later */
  886. + if (merge)
  887. + loop_add_queue_last(lo, merge, &LDE_lo_bio_que1);
  888. + else
  889. + loop_add_queue_last(lo, old_bio, &LDE_lo_bio_que2);
  890. + return 0;
  891. + }
  892. if (rw == WRITE) {
  893. - if (lo->lo_flags & LO_FLAGS_READ_ONLY)
  894. - goto err;
  895. - } else if (rw == READA) {
  896. - rw = READ;
  897. - } else if (rw != READ) {
  898. - printk(KERN_ERR "loop: unknown command (%x)\n", rw);
  899. - goto err;
  900. + extension = new_bio->bi_private;
  901. + y = extension->bioext_index;
  902. + md = kmap(old_bio->bi_io_vec[y].bv_page) + old_bio->bi_io_vec[y].bv_offset;
  903. + if (lo_do_transfer(lo, WRITE, page_address(new_bio->bi_io_vec[0].bv_page), md, extension->bioext_size, extension->bioext_iv)) {
  904. + clear_bit(0, &merge->bi_flags);
  905. + }
  906. + kunmap(old_bio->bi_io_vec[y].bv_page);
  907. }
  908. - loop_add_bio(lo, old_bio);
  909. +
  910. + /* merge & old_bio may vanish during generic_make_request() */
  911. + /* if last vec gets processed before function returns */
  912. + y = (merge->bi_idx < old_bio->bi_vcnt) ? 1 : 0;
  913. + generic_make_request(new_bio);
  914. +
  915. + /* other vecs may need processing too */
  916. + if (y)
  917. + goto try_next_old_bio_vec;
  918. return 0;
  919. -err:
  920. - if (atomic_dec_and_test(&lo->lo_pending))
  921. - up(&lo->lo_bh_mutex);
  922. +
  923. out:
  924. + old_bio->bi_next = NULL;
  925. bio_io_error(old_bio, old_bio->bi_size);
  926. return 0;
  927. -inactive:
  928. - spin_unlock_irq(&lo->lo_lock);
  929. - goto out;
  930. -}
  931. -
  932. -static inline void loop_handle_bio(struct loop_device *lo, struct bio *bio)
  933. -{
  934. - int ret;
  935. -
  936. - ret = do_bio_filebacked(lo, bio);
  937. - bio_endio(bio, bio->bi_size, ret);
  938. }
  939. /*
  940. @@ -446,8 +751,15 @@ static inline void loop_handle_bio(struc
  941. static int loop_thread(void *data)
  942. {
  943. struct loop_device *lo = data;
  944. - struct bio *bio;
  945. + struct bio *bio, *xbio, *merge;
  946. + struct loop_bio_extension *extension;
  947. + int x, y, flushcnt = 0;
  948. + wait_queue_t waitq;
  949. + char *md;
  950. + static const struct rlimit loop_rlim_defaults[RLIM_NLIMITS] = INIT_RLIMITS;
  951. + init_waitqueue_entry(&waitq, current);
  952. + memcpy(&current->rlim[0], &loop_rlim_defaults[0], sizeof(current->rlim));
  953. daemonize("loop%d", lo->lo_number);
  954. /*
  955. @@ -455,11 +767,19 @@ static int loop_thread(void *data)
  956. * hence, it mustn't be stopped at all
  957. * because it could be indirectly used during suspension
  958. */
  959. +#if defined(PF_NOFREEZE)
  960. + current->flags |= PF_NOFREEZE;
  961. +#elif defined(PF_IOTHREAD)
  962. current->flags |= PF_IOTHREAD;
  963. +#endif
  964. + current->flags |= PF_LESS_THROTTLE;
  965. - set_user_nice(current, -20);
  966. + if (lo_nice > 0)
  967. + lo_nice = 0;
  968. + if (lo_nice < -20)
  969. + lo_nice = -20;
  970. + set_user_nice(current, lo_nice);
  971. - lo->lo_state = Lo_bound;
  972. atomic_inc(&lo->lo_pending);
  973. /*
  974. @@ -468,23 +788,138 @@ static int loop_thread(void *data)
  975. up(&lo->lo_sem);
  976. for (;;) {
  977. - down_interruptible(&lo->lo_bh_mutex);
  978. + add_wait_queue(&LDE_lo_bio_wait, &waitq);
  979. + for (;;) {
  980. + set_current_state(TASK_INTERRUPTIBLE);
  981. + if (!atomic_read(&lo->lo_pending))
  982. + break;
  983. +
  984. + x = 0;
  985. + spin_lock_irq(&lo->lo_lock);
  986. + if (LDE_lo_bio_que0) {
  987. + /* don't sleep if device backed READ needs processing */
  988. + /* don't sleep if file backed READ/WRITE needs processing */
  989. + x = 1;
  990. + } else if (LDE_lo_bio_que1) {
  991. + /* don't sleep if a buffer-bio is available */
  992. + /* don't sleep if need-buffer-bio request is not set */
  993. + if (LDE_lo_bio_free0 || !(LDE_lo_bio_need & 1))
  994. + x = 1;
  995. + } else if (LDE_lo_bio_que2) {
  996. + /* don't sleep if a merge-bio is available */
  997. + /* don't sleep if need-merge-bio request is not set */
  998. + if (LDE_lo_bio_free1 || !(LDE_lo_bio_need & 2))
  999. + x = 1;
  1000. + }
  1001. + spin_unlock_irq(&lo->lo_lock);
  1002. + if (x)
  1003. + break;
  1004. +
  1005. + schedule();
  1006. + }
  1007. + set_current_state(TASK_RUNNING);
  1008. + remove_wait_queue(&LDE_lo_bio_wait, &waitq);
  1009. +
  1010. /*
  1011. - * could be upped because of tear-down, not because of
  1012. + * could be woken because of tear-down, not because of
  1013. * pending work
  1014. */
  1015. if (!atomic_read(&lo->lo_pending))
  1016. break;
  1017. - bio = loop_get_bio(lo);
  1018. - if (!bio) {
  1019. - printk("loop: missing bio\n");
  1020. + bio = loop_get_bio(lo, &x);
  1021. + if (!bio)
  1022. continue;
  1023. +
  1024. + /*
  1025. + * x list tag usage(has-buffer,has-merge)
  1026. + * --- --------------- ---------------------------
  1027. + * 0 LDE_lo_bio_que0 dev-r(y,y) / file-rw
  1028. + * 1 LDE_lo_bio_que1 dev-rw(n,y)
  1029. + * 2 LDE_lo_bio_que2 dev-rw(n,n)
  1030. + */
  1031. + if (x >= 1) {
  1032. + /* loop_make_request_real didn't allocate a buffer, do that now */
  1033. + if (x == 1) {
  1034. + merge = bio;
  1035. + bio = merge->bi_private;
  1036. + } else {
  1037. + merge = NULL;
  1038. + }
  1039. + try_next_bio_vec:
  1040. + xbio = loop_get_buffer(lo, bio, 1, &merge);
  1041. + if (!xbio) {
  1042. + blk_run_queues();
  1043. + flushcnt = 0;
  1044. + if (merge)
  1045. + loop_add_queue_first(lo, merge, &LDE_lo_bio_que1);
  1046. + else
  1047. + loop_add_queue_first(lo, bio, &LDE_lo_bio_que2);
  1048. + /* LDE_lo_bio_need should be non-zero now, go back to sleep */
  1049. + continue;
  1050. + }
  1051. + if (bio_rw(bio) == WRITE) {
  1052. + extension = xbio->bi_private;
  1053. + y = extension->bioext_index;
  1054. + md = kmap(bio->bi_io_vec[y].bv_page) + bio->bi_io_vec[y].bv_offset;
  1055. + if (lo_do_transfer(lo, WRITE, page_address(xbio->bi_io_vec[0].bv_page), md, extension->bioext_size, extension->bioext_iv)) {
  1056. + clear_bit(0, &merge->bi_flags);
  1057. + }
  1058. + kunmap(bio->bi_io_vec[y].bv_page);
  1059. + }
  1060. +
  1061. + /* merge & bio may vanish during generic_make_request() */
  1062. + /* if last vec gets processed before function returns */
  1063. + y = (merge->bi_idx < bio->bi_vcnt) ? 1 : 0;
  1064. + generic_make_request(xbio);
  1065. +
  1066. + /* start I/O if there are no more requests lacking buffers */
  1067. + x = 0;
  1068. + spin_lock_irq(&lo->lo_lock);
  1069. + if (!y && !LDE_lo_bio_que1 && !LDE_lo_bio_que2)
  1070. + x = 1;
  1071. + spin_unlock_irq(&lo->lo_lock);
  1072. + if (x || (++flushcnt >= LDE_lo_bio_flsh)) {
  1073. + blk_run_queues();
  1074. + flushcnt = 0;
  1075. + }
  1076. +
  1077. + /* other vecs may need processing too */
  1078. + if (y)
  1079. + goto try_next_bio_vec;
  1080. +
  1081. + /* request not completely processed yet */
  1082. + continue;
  1083. + }
  1084. +
  1085. + if (lo->lo_flags & LO_FLAGS_DO_BMAP) {
  1086. + /* request is for file backed device */
  1087. + y = do_bio_filebacked(lo, bio);
  1088. + bio->bi_next = NULL;
  1089. + bio_endio(bio, bio->bi_size, y);
  1090. + } else {
  1091. + /* device backed read has completed, do decrypt now */
  1092. + extension = bio->bi_private;
  1093. + merge = extension->bioext_merge;
  1094. + y = extension->bioext_index;
  1095. + xbio = merge->bi_private;
  1096. + md = kmap(xbio->bi_io_vec[y].bv_page) + xbio->bi_io_vec[y].bv_offset;
  1097. + if (lo_do_transfer(lo, READ, page_address(bio->bi_io_vec[0].bv_page), md, extension->bioext_size, extension->bioext_iv)) {
  1098. + clear_bit(0, &merge->bi_flags);
  1099. + }
  1100. + kunmap(xbio->bi_io_vec[y].bv_page);
  1101. + loop_put_buffer(lo, bio, 0);
  1102. + if (!atomic_dec_and_test(&merge->bi_cnt))
  1103. + continue;
  1104. + if (bio_barrier(xbio))
  1105. + atomic_dec(&LDE_lo_bio_barr);
  1106. + xbio->bi_next = NULL;
  1107. + bio_endio(xbio, xbio->bi_size, test_bit(0, &merge->bi_flags) ? 0 : -EIO);
  1108. + loop_put_buffer(lo, merge, 1);
  1109. }
  1110. - loop_handle_bio(lo, bio);
  1111. /*
  1112. - * upped both for pending work and tear-down, lo_pending
  1113. + * woken both for pending work and tear-down, lo_pending
  1114. * will hit zero then
  1115. */
  1116. if (atomic_dec_and_test(&lo->lo_pending))
  1117. @@ -495,125 +930,200 @@ static int loop_thread(void *data)
  1118. return 0;
  1119. }
  1120. +static void loop_set_softblksz(struct loop_device *lo, struct block_device *bdev)
  1121. +{
  1122. + int bs, x;
  1123. +
  1124. + if (lo->lo_device)
  1125. + bs = block_size(lo->lo_device);
  1126. + else
  1127. + bs = PAGE_SIZE;
  1128. + if (lo->lo_flags & LO_FLAGS_DO_BMAP) {
  1129. + x = (int) bdev->bd_inode->i_size;
  1130. + if ((bs == 8192) && (x & 0x1E00))
  1131. + bs = 4096;
  1132. + if ((bs == 4096) && (x & 0x0E00))
  1133. + bs = 2048;
  1134. + if ((bs == 2048) && (x & 0x0600))
  1135. + bs = 1024;
  1136. + if ((bs == 1024) && (x & 0x0200))
  1137. + bs = 512;
  1138. + }
  1139. + set_blocksize(bdev, bs);
  1140. +}
  1141. +
  1142. static int loop_set_fd(struct loop_device *lo, struct file *lo_file,
  1143. struct block_device *bdev, unsigned int arg)
  1144. {
  1145. struct file *file;
  1146. struct inode *inode;
  1147. struct block_device *lo_device = NULL;
  1148. - struct address_space *mapping;
  1149. - unsigned lo_blocksize;
  1150. int lo_flags = 0;
  1151. int error;
  1152. - /* This is safe, since we have a reference from open(). */
  1153. - __module_get(THIS_MODULE);
  1154. -
  1155. - error = -EBUSY;
  1156. - if (lo->lo_state != Lo_unbound)
  1157. - goto out;
  1158. -
  1159. error = -EBADF;
  1160. file = fget(arg);
  1161. if (!file)
  1162. goto out;
  1163. - mapping = file->f_mapping;
  1164. - inode = mapping->host;
  1165. + error = -EINVAL;
  1166. + inode = file->f_dentry->d_inode;
  1167. if (!(file->f_mode & FMODE_WRITE))
  1168. lo_flags |= LO_FLAGS_READ_ONLY;
  1169. - error = -EINVAL;
  1170. - if (S_ISREG(inode->i_mode) || S_ISBLK(inode->i_mode)) {
  1171. - struct address_space_operations *aops = mapping->a_ops;
  1172. + init_MUTEX_LOCKED(&lo->lo_sem);
  1173. + spin_lock_init(&lo->lo_lock);
  1174. + init_waitqueue_head(&LDE_lo_bio_wait);
  1175. + atomic_set(&lo->lo_pending, 0);
  1176. + atomic_set(&LDE_lo_bio_barr, 0);
  1177. + lo->lo_offset = lo->lo_sizelimit = 0;
  1178. + LDE_lo_offs_sec = LDE_lo_iv_remove = 0;
  1179. + lo->lo_encryption = NULL;
  1180. + lo->lo_encrypt_key_size = 0;
  1181. + lo->transfer = NULL;
  1182. + lo->lo_crypt_name[0] = 0;
  1183. + lo->lo_file_name[0] = 0;
  1184. + lo->lo_init[1] = lo->lo_init[0] = 0;
  1185. + lo->lo_key_owner = 0;
  1186. + lo->ioctl = NULL;
  1187. + lo->key_data = NULL;
  1188. + LDE_lo_bio_que2 = LDE_lo_bio_que1 = LDE_lo_bio_que0 = NULL;
  1189. + LDE_lo_bio_free1 = LDE_lo_bio_free0 = NULL;
  1190. + LDE_lo_bio_flsh = LDE_lo_bio_need = 0;
  1191. +
  1192. + if (S_ISBLK(inode->i_mode)) {
  1193. + lo_device = inode->i_bdev;
  1194. + if (lo_device == bdev) {
  1195. + error = -EBUSY;
  1196. + goto out_putf;
  1197. + }
  1198. + if (loop_prealloc_init(lo, 0)) {
  1199. + error = -ENOMEM;
  1200. + goto out_putf;
  1201. + }
  1202. + if (bdev_read_only(lo_device))
  1203. + lo_flags |= LO_FLAGS_READ_ONLY;
  1204. + else
  1205. + filemap_fdatawrite(inode->i_mapping);
  1206. + } else if (S_ISREG(inode->i_mode)) {
  1207. /*
  1208. * If we can't read - sorry. If we only can't write - well,
  1209. * it's going to be read-only.
  1210. */
  1211. - if (!lo_file->f_op->sendfile)
  1212. + if (!file->f_op || !file->f_op->read)
  1213. goto out_putf;
  1214. - if (!aops->prepare_write || !aops->commit_write)
  1215. + if (!file->f_op->write)
  1216. lo_flags |= LO_FLAGS_READ_ONLY;
  1217. - lo_blocksize = inode->i_blksize;
  1218. - error = 0;
  1219. - } else {
  1220. + lo_flags |= LO_FLAGS_DO_BMAP;
  1221. + if (loop_prealloc_init(lo, 1)) {
  1222. + error = -ENOMEM;
  1223. + goto out_putf;
  1224. + }
  1225. + } else
  1226. goto out_putf;
  1227. - }
  1228. +
  1229. + get_file(file);
  1230. if (!(lo_file->f_mode & FMODE_WRITE))
  1231. lo_flags |= LO_FLAGS_READ_ONLY;
  1232. set_device_ro(bdev, (lo_flags & LO_FLAGS_READ_ONLY) != 0);
  1233. - lo->lo_blocksize = lo_blocksize;
  1234. lo->lo_device = lo_device;
  1235. lo->lo_flags = lo_flags;
  1236. + if(lo_flags & LO_FLAGS_READ_ONLY)
  1237. + lo->lo_flags |= 0x200000; /* export to user space */
  1238. lo->lo_backing_file = file;
  1239. - lo->transfer = NULL;
  1240. - lo->ioctl = NULL;
  1241. - lo->lo_sizelimit = 0;
  1242. - if (figure_loop_size(lo)) {
  1243. + if (figure_loop_size(lo, bdev)) {
  1244. error = -EFBIG;
  1245. - goto out_putf;
  1246. + goto out_cleanup;
  1247. }
  1248. - lo->old_gfp_mask = mapping_gfp_mask(mapping);
  1249. - mapping_set_gfp_mask(mapping, lo->old_gfp_mask & ~(__GFP_IO|__GFP_FS));
  1250. -
  1251. - lo->lo_bio = lo->lo_biotail = NULL;
  1252. /*
  1253. * set queue make_request_fn, and add limits based on lower level
  1254. * device
  1255. */
  1256. - blk_queue_make_request(lo->lo_queue, loop_make_request);
  1257. - lo->lo_queue->queuedata = lo;
  1258. + blk_queue_make_request(lo->lo_queue, loop_make_request_err);
  1259. + blk_queue_bounce_limit(lo->lo_queue, BLK_BOUNCE_ANY);
  1260. + blk_queue_max_segment_size(lo->lo_queue, MAX_SEGMENT_SIZE);
  1261. - set_blocksize(bdev, lo_blocksize);
  1262. + /*
  1263. + * we remap to a block device, make sure we correctly stack limits
  1264. + */
  1265. + if (S_ISBLK(inode->i_mode) && lo_device) {
  1266. + request_queue_t *q = bdev_get_queue(lo_device);
  1267. - kernel_thread(loop_thread, lo, CLONE_KERNEL);
  1268. + blk_queue_max_sectors(lo->lo_queue, q->max_sectors);
  1269. + blk_queue_max_phys_segments(lo->lo_queue,q->max_phys_segments);
  1270. + blk_queue_max_hw_segments(lo->lo_queue, q->max_hw_segments);
  1271. + blk_queue_max_segment_size(lo->lo_queue, q->max_segment_size);
  1272. + blk_queue_segment_boundary(lo->lo_queue, q->seg_boundary_mask);
  1273. + blk_queue_merge_bvec(lo->lo_queue, q->merge_bvec_fn);
  1274. + blk_queue_hardsect_size(lo->lo_queue, q->hardsect_size);
  1275. + }
  1276. +
  1277. + if (lo_flags & LO_FLAGS_DO_BMAP) {
  1278. + lo->old_gfp_mask = mapping_gfp_mask(inode->i_mapping);
  1279. + mapping_set_gfp_mask(inode->i_mapping, (lo->old_gfp_mask & ~(__GFP_IO|__GFP_FS)) | __GFP_HIGH);
  1280. + } else {
  1281. + lo->old_gfp_mask = -1;
  1282. + }
  1283. +
  1284. + loop_set_softblksz(lo, bdev);
  1285. +
  1286. + error = kernel_thread(loop_thread, lo, CLONE_KERNEL);
  1287. + if(error < 0)
  1288. + goto out_mapping;
  1289. down(&lo->lo_sem);
  1290. + fput(file);
  1291. + lo->lo_queue->queuedata = lo;
  1292. + __module_get(THIS_MODULE);
  1293. return 0;
  1294. + out_mapping:
  1295. + if(lo->old_gfp_mask != -1)
  1296. + mapping_set_gfp_mask(inode->i_mapping, lo->old_gfp_mask);
  1297. + out_cleanup:
  1298. + loop_prealloc_cleanup(lo);
  1299. + fput(file);
  1300. out_putf:
  1301. fput(file);
  1302. out:
  1303. - /* This is safe: open() is still holding a reference. */
  1304. - module_put(THIS_MODULE);
  1305. return error;
  1306. }
  1307. -static int
  1308. -loop_release_xfer(struct loop_device *lo)
  1309. +static int loop_release_xfer(struct loop_device *lo)
  1310. {
  1311. int err = 0;
  1312. - struct loop_func_table *xfer = lo->lo_encryption;
  1313. + struct loop_func_table *xfer = lo->lo_encryption;
  1314. if (xfer) {
  1315. + lo->transfer = NULL;
  1316. if (xfer->release)
  1317. - err = xfer->release(lo);
  1318. - lo->transfer = NULL;
  1319. + err = xfer->release(lo);
  1320. lo->lo_encryption = NULL;
  1321. module_put(xfer->owner);
  1322. }
  1323. return err;
  1324. }
  1325. -static int
  1326. -loop_init_xfer(struct loop_device *lo, struct loop_func_table *xfer,
  1327. - const struct loop_info64 *i)
  1328. +static int loop_init_xfer(struct loop_device *lo, struct loop_func_table *xfer, struct loop_info64 *i)
  1329. {
  1330. - int err = 0;
  1331. + int err = 0;
  1332. if (xfer) {
  1333. struct module *owner = xfer->owner;
  1334. - if (!try_module_get(owner))
  1335. + if(!try_module_get(owner))
  1336. return -EINVAL;
  1337. - if (xfer->init)
  1338. - err = xfer->init(lo, i);
  1339. + if (xfer->init) {
  1340. + /* this ugly cast is needed to work around 'const' damage in function prototype */
  1341. + /* should be: err = xfer->init(lo, i); */
  1342. + err = ((int (*)(struct loop_device *, struct loop_info64 *))xfer->init)(lo, i);
  1343. + }
  1344. if (err)
  1345. module_put(owner);
  1346. else
  1347. @@ -627,58 +1137,51 @@ static int loop_clr_fd(struct loop_devic
  1348. struct file *filp = lo->lo_backing_file;
  1349. int gfp = lo->old_gfp_mask;
  1350. - if (lo->lo_state != Lo_bound)
  1351. - return -ENXIO;
  1352. -
  1353. - if (lo->lo_refcnt > 1) /* we needed one fd for the ioctl */
  1354. + if (bdev->bd_openers != 1) /* one for this fd being open */
  1355. return -EBUSY;
  1356. -
  1357. - if (filp == NULL)
  1358. + if (filp==NULL)
  1359. return -EINVAL;
  1360. - spin_lock_irq(&lo->lo_lock);
  1361. - lo->lo_state = Lo_rundown;
  1362. + lo->lo_queue->queuedata = NULL;
  1363. + lo->lo_queue->make_request_fn = loop_make_request_err;
  1364. if (atomic_dec_and_test(&lo->lo_pending))
  1365. - up(&lo->lo_bh_mutex);
  1366. - spin_unlock_irq(&lo->lo_lock);
  1367. -
  1368. + wake_up_interruptible(&LDE_lo_bio_wait);
  1369. down(&lo->lo_sem);
  1370. + loop_prealloc_cleanup(lo);
  1371. lo->lo_backing_file = NULL;
  1372. -
  1373. loop_release_xfer(lo);
  1374. lo->transfer = NULL;
  1375. lo->ioctl = NULL;
  1376. lo->lo_device = NULL;
  1377. lo->lo_encryption = NULL;
  1378. - lo->lo_offset = 0;
  1379. - lo->lo_sizelimit = 0;
  1380. + lo->lo_offset = lo->lo_sizelimit = 0;
  1381. + LDE_lo_offs_sec = LDE_lo_iv_remove = 0;
  1382. lo->lo_encrypt_key_size = 0;
  1383. lo->lo_flags = 0;
  1384. + lo->lo_init[1] = lo->lo_init[0] = 0;
  1385. + lo->lo_key_owner = 0;
  1386. + lo->key_data = NULL;
  1387. memset(lo->lo_encrypt_key, 0, LO_KEY_SIZE);
  1388. memset(lo->lo_crypt_name, 0, LO_NAME_SIZE);
  1389. memset(lo->lo_file_name, 0, LO_NAME_SIZE);
  1390. invalidate_bdev(bdev, 0);
  1391. set_capacity(disks[lo->lo_number], 0);
  1392. - mapping_set_gfp_mask(filp->f_mapping, gfp);
  1393. - lo->lo_state = Lo_unbound;
  1394. + if (gfp != -1)
  1395. + mapping_set_gfp_mask(filp->f_dentry->d_inode->i_mapping, gfp);
  1396. fput(filp);
  1397. - /* This is safe: open() is still holding a reference. */
  1398. module_put(THIS_MODULE);
  1399. return 0;
  1400. }
  1401. -static int
  1402. -loop_set_status(struct loop_device *lo, const struct loop_info64 *info)
  1403. +static int loop_set_status(struct loop_device *lo, struct block_device *bdev, struct loop_info64 *info)
  1404. {
  1405. int err;
  1406. - struct loop_func_table *xfer;
  1407. + struct loop_func_table *xfer = NULL;
  1408. if (lo->lo_encrypt_key_size && lo->lo_key_owner != current->uid &&
  1409. !capable(CAP_SYS_ADMIN))
  1410. return -EPERM;
  1411. - if (lo->lo_state != Lo_bound)
  1412. - return -ENXIO;
  1413. if ((unsigned int) info->lo_encrypt_key_size > LO_KEY_SIZE)
  1414. return -EINVAL;
  1415. @@ -686,6 +1189,22 @@ loop_set_status(struct loop_device *lo,
  1416. if (err)
  1417. return err;
  1418. + if ((loff_t)info->lo_offset < 0) {
  1419. + /* negative offset == remove offset from IV computations */
  1420. + lo->lo_offset = -(info->lo_offset);
  1421. + LDE_lo_iv_remove = lo->lo_offset >> 9;
  1422. + } else {
  1423. + /* positive offset == include offset in IV computations */
  1424. + lo->lo_offset = info->lo_offset;
  1425. + LDE_lo_iv_remove = 0;
  1426. + }
  1427. + LDE_lo_offs_sec = lo->lo_offset >> 9;
  1428. + lo->lo_sizelimit = info->lo_sizelimit;
  1429. + err = figure_loop_size(lo, bdev);
  1430. + if (err)
  1431. + return err;
  1432. + loop_set_softblksz(lo, bdev);
  1433. +
  1434. if (info->lo_encrypt_type) {
  1435. unsigned int type = info->lo_encrypt_type;
  1436. @@ -694,31 +1213,20 @@ loop_set_status(struct loop_device *lo,
  1437. xfer = xfer_funcs[type];
  1438. if (xfer == NULL)
  1439. return -EINVAL;
  1440. - } else
  1441. - xfer = NULL;
  1442. -
  1443. + }
  1444. err = loop_init_xfer(lo, xfer, info);
  1445. if (err)
  1446. return err;
  1447. - if (lo->lo_offset != info->lo_offset ||
  1448. - lo->lo_sizelimit != info->lo_sizelimit) {
  1449. - lo->lo_offset = info->lo_offset;
  1450. - lo->lo_sizelimit = info->lo_sizelimit;
  1451. - if (figure_loop_size(lo))
  1452. - return -EFBIG;
  1453. - }
  1454. -
  1455. - memcpy(lo->lo_file_name, info->lo_file_name, LO_NAME_SIZE);
  1456. - memcpy(lo->lo_crypt_name, info->lo_crypt_name, LO_NAME_SIZE);
  1457. - lo->lo_file_name[LO_NAME_SIZE-1] = 0;
  1458. - lo->lo_crypt_name[LO_NAME_SIZE-1] = 0;
  1459. -
  1460. if (!xfer)
  1461. xfer = &none_funcs;
  1462. lo->transfer = xfer->transfer;
  1463. lo->ioctl = xfer->ioctl;
  1464. -
  1465. +
  1466. + memcpy(lo->lo_file_name, info->lo_file_name, LO_NAME_SIZE);
  1467. + memcpy(lo->lo_crypt_name, info->lo_crypt_name, LO_NAME_SIZE);
  1468. + lo->lo_file_name[LO_NAME_SIZE-1] = 0;
  1469. + lo->lo_crypt_name[LO_NAME_SIZE-1] = 0;
  1470. lo->lo_encrypt_key_size = info->lo_encrypt_key_size;
  1471. lo->lo_init[0] = info->lo_init[0];
  1472. lo->lo_init[1] = info->lo_init[1];
  1473. @@ -728,18 +1236,16 @@ loop_set_status(struct loop_device *lo,
  1474. lo->lo_key_owner = current->uid;
  1475. }
  1476. + lo->lo_queue->make_request_fn = loop_make_request_real;
  1477. return 0;
  1478. }
  1479. -static int
  1480. -loop_get_status(struct loop_device *lo, struct loop_info64 *info)
  1481. +static int loop_get_status(struct loop_device *lo, struct loop_info64 *info)
  1482. {
  1483. struct file *file = lo->lo_backing_file;
  1484. struct kstat stat;
  1485. int error;
  1486. - if (lo->lo_state != Lo_bound)
  1487. - return -ENXIO;
  1488. error = vfs_getattr(file->f_vfsmnt, file->f_dentry, &stat);
  1489. if (error)
  1490. return error;
  1491. @@ -748,17 +1254,18 @@ loop_get_status(struct loop_device *lo,
  1492. info->lo_device = huge_encode_dev(stat.dev);
  1493. info->lo_inode = stat.ino;
  1494. info->lo_rdevice = huge_encode_dev(lo->lo_device ? stat.rdev : stat.dev);
  1495. - info->lo_offset = lo->lo_offset;
  1496. + info->lo_offset = LDE_lo_iv_remove ? -(lo->lo_offset) : lo->lo_offset;
  1497. info->lo_sizelimit = lo->lo_sizelimit;
  1498. info->lo_flags = lo->lo_flags;
  1499. memcpy(info->lo_file_name, lo->lo_file_name, LO_NAME_SIZE);
  1500. memcpy(info->lo_crypt_name, lo->lo_crypt_name, LO_NAME_SIZE);
  1501. - info->lo_encrypt_type =
  1502. - lo->lo_encryption ? lo->lo_encryption->number : 0;
  1503. + info->lo_encrypt_type = lo->lo_encryption ? lo->lo_encryption->number : 0;
  1504. if (lo->lo_encrypt_key_size && capable(CAP_SYS_ADMIN)) {
  1505. info->lo_encrypt_key_size = lo->lo_encrypt_key_size;
  1506. memcpy(info->lo_encrypt_key, lo->lo_encrypt_key,
  1507. lo->lo_encrypt_key_size);
  1508. + info->lo_init[0] = lo->lo_init[0];
  1509. + info->lo_init[1] = lo->lo_init[1];
  1510. }
  1511. return 0;
  1512. }
  1513. @@ -772,7 +1279,6 @@ loop_info64_from_old(const struct loop_i
  1514. info64->lo_inode = info->lo_inode;
  1515. info64->lo_rdevice = info->lo_rdevice;
  1516. info64->lo_offset = info->lo_offset;
  1517. - info64->lo_sizelimit = 0;
  1518. info64->lo_encrypt_type = info->lo_encrypt_type;
  1519. info64->lo_encrypt_key_size = info->lo_encrypt_key_size;
  1520. info64->lo_flags = info->lo_flags;
  1521. @@ -786,7 +1292,7 @@ loop_info64_from_old(const struct loop_i
  1522. }
  1523. static int
  1524. -loop_info64_to_old(const struct loop_info64 *info64, struct loop_info *info)
  1525. +loop_info64_to_old(struct loop_info64 *info64, struct loop_info *info)
  1526. {
  1527. memset(info, 0, sizeof(*info));
  1528. info->lo_number = info64->lo_number;
  1529. @@ -809,14 +1315,15 @@ loop_info64_to_old(const struct loop_inf
  1530. if (info->lo_device != info64->lo_device ||
  1531. info->lo_rdevice != info64->lo_rdevice ||
  1532. info->lo_inode != info64->lo_inode ||
  1533. - info->lo_offset != info64->lo_offset)
  1534. + info->lo_offset != info64->lo_offset ||
  1535. + info64->lo_sizelimit)
  1536. return -EOVERFLOW;
  1537. return 0;
  1538. }
  1539. static int
  1540. -loop_set_status_old(struct loop_device *lo, const struct loop_info *arg)
  1541. +loop_set_status_old(struct loop_device *lo, struct block_device *bdev, const struct loop_info *arg)
  1542. {
  1543. struct loop_info info;
  1544. struct loop_info64 info64;
  1545. @@ -824,17 +1331,18 @@ loop_set_status_old(struct loop_device *
  1546. if (copy_from_user(&info, arg, sizeof (struct loop_info)))
  1547. return -EFAULT;
  1548. loop_info64_from_old(&info, &info64);
  1549. - return loop_set_status(lo, &info64);
  1550. + memset(&info.lo_encrypt_key[0], 0, sizeof(info.lo_encrypt_key));
  1551. + return loop_set_status(lo, bdev, &info64);
  1552. }
  1553. static int
  1554. -loop_set_status64(struct loop_device *lo, const struct loop_info64 *arg)
  1555. +loop_set_status64(struct loop_device *lo, struct block_device *bdev, struct loop_info64 *arg)
  1556. {
  1557. struct loop_info64 info64;
  1558. if (copy_from_user(&info64, arg, sizeof (struct loop_info64)))
  1559. return -EFAULT;
  1560. - return loop_set_status(lo, &info64);
  1561. + return loop_set_status(lo, bdev, &info64);
  1562. }
  1563. static int
  1564. @@ -870,28 +1378,50 @@ loop_get_status64(struct loop_device *lo
  1565. return err;
  1566. }
  1567. -static int lo_ioctl(struct inode * inode, struct file * file,
  1568. - unsigned int cmd, unsigned long arg)
  1569. +#if !defined(NEW_BLOCK_DRIVER_INTERFACE)
  1570. +static int lo_ioctl(struct inode *inode, struct file * file, unsigned int cmd, unsigned long arg)
  1571. {
  1572. - struct loop_device *lo = inode->i_bdev->bd_disk->private_data;
  1573. + struct block_device *bdev = inode->i_bdev;
  1574. +#else
  1575. +static int lo_ioctl(struct block_device *bdev, struct file * file, unsigned int cmd, unsigned long arg)
  1576. +{
  1577. +#endif
  1578. + struct loop_device *lo = bdev->bd_disk->private_data;
  1579. int err;
  1580. - down(&lo->lo_ctl_mutex);
  1581. + down(&bdev->bd_sem);
  1582. +
  1583. + /*
  1584. + * LOOP_SET_FD can only be called when no device is attached.
  1585. + * All other ioctls can only be called when a device is attached.
  1586. + */
  1587. + if (bdev->bd_disk->queue->queuedata != NULL) {
  1588. + if (cmd == LOOP_SET_FD) {
  1589. + err = -EBUSY;
  1590. + goto out_err;
  1591. + }
  1592. + } else {
  1593. + if (cmd != LOOP_SET_FD) {
  1594. + err = -ENXIO;
  1595. + goto out_err;
  1596. + }
  1597. + }
  1598. +
  1599. switch (cmd) {
  1600. case LOOP_SET_FD:
  1601. - err = loop_set_fd(lo, file, inode->i_bdev, arg);
  1602. + err = loop_set_fd(lo, file, bdev, arg);
  1603. break;
  1604. case LOOP_CLR_FD:
  1605. - err = loop_clr_fd(lo, inode->i_bdev);
  1606. + err = loop_clr_fd(lo, bdev);
  1607. break;
  1608. case LOOP_SET_STATUS:
  1609. - err = loop_set_status_old(lo, (struct loop_info *) arg);
  1610. + err = loop_set_status_old(lo, bdev, (struct loop_info *) arg);
  1611. break;
  1612. case LOOP_GET_STATUS:
  1613. err = loop_get_status_old(lo, (struct loop_info *) arg);
  1614. break;
  1615. case LOOP_SET_STATUS64:
  1616. - err = loop_set_status64(lo, (struct loop_info64 *) arg);
  1617. + err = loop_set_status64(lo, bdev, (struct loop_info64 *) arg);
  1618. break;
  1619. case LOOP_GET_STATUS64:
  1620. err = loop_get_status64(lo, (struct loop_info64 *) arg);
  1621. @@ -899,29 +1429,28 @@ static int lo_ioctl(struct inode * inode
  1622. default:
  1623. err = lo->ioctl ? lo->ioctl(lo, cmd, arg) : -EINVAL;
  1624. }
  1625. - up(&lo->lo_ctl_mutex);
  1626. +out_err:
  1627. + up(&bdev->bd_sem);
  1628. return err;
  1629. }
  1630. +#if !defined(NEW_BLOCK_DRIVER_INTERFACE)
  1631. static int lo_open(struct inode *inode, struct file *file)
  1632. +#else
  1633. +static int lo_open(struct block_device *bdev, struct file *file)
  1634. +#endif
  1635. {
  1636. - struct loop_device *lo = inode->i_bdev->bd_disk->private_data;
  1637. -
  1638. - down(&lo->lo_ctl_mutex);
  1639. - lo->lo_refcnt++;
  1640. - up(&lo->lo_ctl_mutex);
  1641. -
  1642. return 0;
  1643. }
  1644. +#if !defined(NEW_BLOCK_DRIVER_INTERFACE)
  1645. static int lo_release(struct inode *inode, struct file *file)
  1646. {
  1647. - struct loop_device *lo = inode->i_bdev->bd_disk->private_data;
  1648. -
  1649. - down(&lo->lo_ctl_mutex);
  1650. - --lo->lo_refcnt;
  1651. - up(&lo->lo_ctl_mutex);
  1652. -
  1653. + sync_blockdev(inode->i_bdev);
  1654. +#else
  1655. +static int lo_release(struct gendisk *disk)
  1656. +{
  1657. +#endif
  1658. return 0;
  1659. }
  1660. @@ -938,7 +1467,12 @@ static struct block_device_operations lo
  1661. MODULE_PARM(max_loop, "i");
  1662. MODULE_PARM_DESC(max_loop, "Maximum number of loop devices (1-256)");
  1663. MODULE_LICENSE("GPL");
  1664. +
  1665. +#if !defined(OLD_REQUEST_MODULE_INTERFACE)
  1666. MODULE_ALIAS_BLOCKDEV_MAJOR(LOOP_MAJOR);
  1667. +#else
  1668. +MODULE_ALIAS("block-major-7");
  1669. +#endif
  1670. int loop_register_transfer(struct loop_func_table *funcs)
  1671. {
  1672. @@ -953,34 +1487,31 @@ int loop_register_transfer(struct loop_f
  1673. int loop_unregister_transfer(int number)
  1674. {
  1675. unsigned int n = number;
  1676. - struct loop_device *lo;
  1677. + struct loop_device *lo;
  1678. struct loop_func_table *xfer;
  1679. + int x;
  1680. if (n == 0 || n >= MAX_LO_CRYPT || (xfer = xfer_funcs[n]) == NULL)
  1681. - return -EINVAL;
  1682. -
  1683. + return -EINVAL;
  1684. xfer_funcs[n] = NULL;
  1685. -
  1686. - for (lo = &loop_dev[0]; lo < &loop_dev[max_loop]; lo++) {
  1687. - down(&lo->lo_ctl_mutex);
  1688. -
  1689. + for (x = 0; x < max_loop; x++) {
  1690. + lo = loop_dev_ptr_arr[x];
  1691. + if (!lo)
  1692. + continue;
  1693. if (lo->lo_encryption == xfer)
  1694. loop_release_xfer(lo);
  1695. -
  1696. - up(&lo->lo_ctl_mutex);
  1697. }
  1698. -
  1699. - return 0;
  1700. + return 0;
  1701. }
  1702. EXPORT_SYMBOL(loop_register_transfer);
  1703. EXPORT_SYMBOL(loop_unregister_transfer);
  1704. -int __init loop_init(void)
  1705. +int __init loop_init(void)
  1706. {
  1707. int i;
  1708. - if (max_loop < 1 || max_loop > 256) {
  1709. + if ((max_loop < 1) || (max_loop > 256)) {
  1710. printk(KERN_WARNING "loop: invalid max_loop (must be between"
  1711. " 1 and 256), using default (8)\n");
  1712. max_loop = 8;
  1713. @@ -989,62 +1520,78 @@ int __init loop_init(void)
  1714. if (register_blkdev(LOOP_MAJOR, "loop"))
  1715. return -EIO;
  1716. - loop_dev = kmalloc(max_loop * sizeof(struct loop_device), GFP_KERNEL);
  1717. - if (!loop_dev)
  1718. + loop_dev_ptr_arr = kmalloc(max_loop * sizeof(struct loop_device *), GFP_KERNEL);
  1719. + if (!loop_dev_ptr_arr)
  1720. goto out_mem1;
  1721. - memset(loop_dev, 0, max_loop * sizeof(struct loop_device));
  1722. disks = kmalloc(max_loop * sizeof(struct gendisk *), GFP_KERNEL);
  1723. if (!disks)
  1724. goto out_mem2;
  1725. for (i = 0; i < max_loop; i++) {
  1726. + loop_dev_ptr_arr[i] = kmalloc(sizeof(LoDevExt), GFP_KERNEL);
  1727. + if (!loop_dev_ptr_arr[i])
  1728. + goto out_mem3;
  1729. + }
  1730. +
  1731. + for (i = 0; i < max_loop; i++) {
  1732. disks[i] = alloc_disk(1);
  1733. if (!disks[i])
  1734. - goto out_mem3;
  1735. + goto out_mem4;
  1736. + }
  1737. +
  1738. + for (i = 0; i < max_loop; i++) {
  1739. + disks[i]->queue = blk_alloc_queue(GFP_KERNEL);
  1740. + if (!disks[i]->queue)
  1741. + goto out_mem5;
  1742. + disks[i]->queue->queuedata = NULL;
  1743. + blk_queue_make_request(disks[i]->queue, loop_make_request_err);
  1744. + }
  1745. +
  1746. + { extern int init_module_aes(void); init_module_aes(); }
  1747. + for (i = 0; i < (sizeof(lo_prealloc) / sizeof(int)); i += 2) {
  1748. + if (!lo_prealloc[i])
  1749. + continue;
  1750. + if (lo_prealloc[i] < LO_PREALLOC_MIN)
  1751. + lo_prealloc[i] = LO_PREALLOC_MIN;
  1752. + if (lo_prealloc[i] > LO_PREALLOC_MAX)
  1753. + lo_prealloc[i] = LO_PREALLOC_MAX;
  1754. }
  1755. devfs_mk_dir("loop");
  1756. for (i = 0; i < max_loop; i++) {
  1757. - struct loop_device *lo = &loop_dev[i];
  1758. + struct loop_device *lo = loop_dev_ptr_arr[i];
  1759. struct gendisk *disk = disks[i];
  1760. -
  1761. - memset(lo, 0, sizeof(*lo));
  1762. - lo->lo_queue = blk_alloc_queue(GFP_KERNEL);
  1763. - if (!lo->lo_queue)
  1764. - goto out_mem4;
  1765. - init_MUTEX(&lo->lo_ctl_mutex);
  1766. - init_MUTEX_LOCKED(&lo->lo_sem);
  1767. - init_MUTEX_LOCKED(&lo->lo_bh_mutex);
  1768. + memset(lo, 0, sizeof(LoDevExt));
  1769. lo->lo_number = i;
  1770. - spin_lock_init(&lo->lo_lock);
  1771. + lo->lo_queue = disk->queue;
  1772. disk->major = LOOP_MAJOR;
  1773. disk->first_minor = i;
  1774. disk->fops = &lo_fops;
  1775. sprintf(disk->disk_name, "loop%d", i);
  1776. sprintf(disk->devfs_name, "loop/%d", i);
  1777. disk->private_data = lo;
  1778. - disk->queue = lo->lo_queue;
  1779. + add_disk(disk);
  1780. }
  1781. - /* We cannot fail after we call this, so another loop!*/
  1782. - for (i = 0; i < max_loop; i++)
  1783. - add_disk(disks[i]);
  1784. printk(KERN_INFO "loop: loaded (max %d devices)\n", max_loop);
  1785. return 0;
  1786. +out_mem5:
  1787. + while (i--)
  1788. + blk_put_queue(disks[i]->queue);
  1789. + i = max_loop;
  1790. out_mem4:
  1791. while (i--)
  1792. - blk_put_queue(loop_dev[i].lo_queue);
  1793. - devfs_remove("loop");
  1794. + put_disk(disks[i]);
  1795. i = max_loop;
  1796. out_mem3:
  1797. while (i--)
  1798. - put_disk(disks[i]);
  1799. + kfree(loop_dev_ptr_arr[i]);
  1800. kfree(disks);
  1801. out_mem2:
  1802. - kfree(loop_dev);
  1803. + kfree(loop_dev_ptr_arr);
  1804. out_mem1:
  1805. unregister_blkdev(LOOP_MAJOR, "loop");
  1806. printk(KERN_ERR "loop: ran out of memory\n");
  1807. @@ -1055,17 +1602,17 @@ void loop_exit(void)
  1808. {
  1809. int i;
  1810. + { extern void cleanup_module_aes(void); cleanup_module_aes(); }
  1811. for (i = 0; i < max_loop; i++) {
  1812. del_gendisk(disks[i]);
  1813. - blk_put_queue(loop_dev[i].lo_queue);
  1814. put_disk(disks[i]);
  1815. + blk_put_queue(loop_dev_ptr_arr[i]->lo_queue);
  1816. + kfree(loop_dev_ptr_arr[i]);
  1817. }
  1818. devfs_remove("loop");
  1819. - if (unregister_blkdev(LOOP_MAJOR, "loop"))
  1820. - printk(KERN_WARNING "loop: cannot unregister blkdev\n");
  1821. -
  1822. + unregister_blkdev(LOOP_MAJOR, "loop");
  1823. kfree(disks);
  1824. - kfree(loop_dev);
  1825. + kfree(loop_dev_ptr_arr);
  1826. }
  1827. module_init(loop_init);
  1828. @@ -1080,3 +1627,10 @@ static int __init max_loop_setup(char *s
  1829. __setup("max_loop=", max_loop_setup);
  1830. #endif
  1831. +
  1832. +extern void loop_compute_sector_iv(sector_t, u_int32_t *);
  1833. +EXPORT_SYMBOL(loop_compute_sector_iv);
  1834. +extern void loop_compute_md5_iv(sector_t, u_int32_t *, u_int32_t *);
  1835. +EXPORT_SYMBOL(loop_compute_md5_iv);
  1836. +extern void md5_transform_CPUbyteorder(u_int32_t *, u_int32_t const *);
  1837. +EXPORT_SYMBOL_NOVERS(md5_transform_CPUbyteorder);