--- linux-2.4.25/drivers/block/loop.c 2003-08-25 13:44:41.000000000 +0200 +++ linux-2.4.25/drivers/block/loop.c 2004-02-08 16:51:25.000000000 +0100 @@ -39,21 +39,29 @@ * Support up to 256 loop devices * Heinz Mauelshagen , Feb 2002 * + * IV is now passed as (512 byte) sector number. + * Jari Ruusu, May 18 2001 + * + * External encryption module locking bug fixed. + * Ingo Rohloff , June 21 2001 + * + * Make device backed loop work with swap (pre-allocated buffers + queue rewrite). + * Jari Ruusu, September 2 2001 + * + * File backed code now uses file->f_op->read/write. Based on Andrew Morton's idea. + * Jari Ruusu, May 23 2002 + * + * Backported struct loop_info64 ioctls from 2.6 kernels (64 bit offsets and + * 64 bit sizelimits). Added support for removing offset from IV computations. + * Jari Ruusu, September 21 2003 + * + * * Still To Fix: * - Advisory locking is ignored here. * - Should use an own CAP_* category instead of CAP_SYS_ADMIN - * - * WARNING/FIXME: - * - The block number as IV passing to low level transfer functions is broken: - * it passes the underlying device's block number instead of the - * offset. This makes it change for a given block when the file is - * moved/restored/copied and also doesn't work over NFS. - * AV, Feb 12, 2000: we pass the logical block number now. It fixes the - * problem above. Encryption modules that used to rely on the old scheme - * should just call ->i_mapping->bmap() to calculate the physical block - * number. */ +#include #include #include @@ -71,6 +79,9 @@ #include #include #include +#if defined(CONFIG_SOFTWARE_SUSPEND) || defined(CONFIG_SOFTWARE_SUSPEND2) +#include +#endif #include @@ -79,24 +90,44 @@ #define MAJOR_NR LOOP_MAJOR static int max_loop = 8; -static struct loop_device *loop_dev; static int *loop_sizes; static int *loop_blksizes; +static int *loop_hardsizes; static devfs_handle_t devfs_handle; /* For the directory */ +struct loopinfo64 { + __u64 lo_device; /* ioctl r/o */ + __u64 lo_inode; /* ioctl r/o */ + __u64 lo_rdevice; /* ioctl r/o */ + __u64 lo_offset; + __u64 lo_sizelimit;/* bytes, 0 == max available */ + __u32 lo_number; /* ioctl r/o */ + __u32 lo_encrypt_type; + __u32 lo_encrypt_key_size; /* ioctl w/o */ + __u32 lo_flags; /* ioctl r/o */ + __u8 lo_file_name[LO_NAME_SIZE]; + __u8 lo_crypt_name[LO_NAME_SIZE]; + __u8 lo_encrypt_key[LO_KEY_SIZE]; /* ioctl w/o */ + __u64 lo_init[2]; +}; +#if !defined(LOOP_SET_STATUS64) +# define LOOP_SET_STATUS64 0x4C04 +#endif +#if !defined(LOOP_GET_STATUS64) +# define LOOP_GET_STATUS64 0x4C05 +#endif + /* * Transfer functions */ static int transfer_none(struct loop_device *lo, int cmd, char *raw_buf, char *loop_buf, int size, int real_block) { - if (raw_buf != loop_buf) { - if (cmd == READ) - memcpy(loop_buf, raw_buf, size); - else - memcpy(raw_buf, loop_buf, size); - } + /* this code is only called from file backed loop */ + /* and that code expects this function to be no-op */ + if (current->need_resched) + {set_current_state(TASK_RUNNING);schedule();} return 0; } @@ -118,12 +149,13 @@ static int transfer_xor(struct loop_devi keysize = lo->lo_encrypt_key_size; for (i = 0; i < size; i++) *out++ = *in++ ^ key[(i & 511) % keysize]; + if (current->need_resched) + {set_current_state(TASK_RUNNING);schedule();} return 0; } static int none_status(struct loop_device *lo, struct loop_info *info) { - lo->lo_flags |= LO_FLAGS_BH_REMAP; return 0; } @@ -136,13 +168,13 @@ static int xor_status(struct loop_device struct loop_func_table none_funcs = { number: LO_CRYPT_NONE, - transfer: transfer_none, + transfer: (void *)transfer_none, init: none_status, }; struct loop_func_table xor_funcs = { number: LO_CRYPT_XOR, - transfer: transfer_xor, + transfer: (void *)transfer_xor, init: xor_status }; @@ -152,325 +184,420 @@ struct loop_func_table *xfer_funcs[MAX_L &xor_funcs }; -#define MAX_DISK_SIZE 1024*1024*1024 +/* + * First number of 'lo_prealloc' is the default number of RAM pages + * to pre-allocate for each device backed loop. Every (configured) + * device backed loop pre-allocates this amount of RAM pages unless + * later 'lo_prealloc' numbers provide an override. 'lo_prealloc' + * overrides are defined in pairs: loop_index,number_of_pages + */ +static int lo_prealloc[9] = { 125, 999, 0, 999, 0, 999, 0, 999, 0 }; +#define LO_PREALLOC_MIN 4 /* minimum user defined pre-allocated RAM pages */ +#define LO_PREALLOC_MAX 512 /* maximum user defined pre-allocated RAM pages */ -static int compute_loop_size(struct loop_device *lo, struct dentry * lo_dentry, kdev_t lodev) -{ - if (S_ISREG(lo_dentry->d_inode->i_mode)) - return (lo_dentry->d_inode->i_size - lo->lo_offset) >> BLOCK_SIZE_BITS; - if (blk_size[MAJOR(lodev)]) - return blk_size[MAJOR(lodev)][MINOR(lodev)] - - (lo->lo_offset >> BLOCK_SIZE_BITS); - return MAX_DISK_SIZE; -} +MODULE_PARM(lo_prealloc, "1-9i"); +MODULE_PARM_DESC(lo_prealloc, "Number of pre-allocated pages [,index,pages]..."); -static void figure_loop_size(struct loop_device *lo) -{ - loop_sizes[lo->lo_number] = compute_loop_size(lo, - lo->lo_backing_file->f_dentry, - lo->lo_device); -} +/* + * This is loop helper thread nice value in range + * from 0 (low priority) to -20 (high priority). + */ +#if defined(DEF_NICE) && defined(DEF_COUNTER) +static int lo_nice = -20; /* old scheduler default */ +#else +static int lo_nice = -1; /* O(1) scheduler default */ +#endif -static int lo_send(struct loop_device *lo, struct buffer_head *bh, int bsize, - loff_t pos) -{ - struct file *file = lo->lo_backing_file; /* kudos to NFsckingS */ - struct address_space *mapping = file->f_dentry->d_inode->i_mapping; - struct address_space_operations *aops = mapping->a_ops; - struct page *page; - char *kaddr, *data; - unsigned long index; - unsigned size, offset; - int len; +MODULE_PARM(lo_nice, "1i"); +MODULE_PARM_DESC(lo_nice, "Loop thread scheduler nice (0 ... -20)"); - down(&mapping->host->i_sem); - index = pos >> PAGE_CACHE_SHIFT; - offset = pos & (PAGE_CACHE_SIZE - 1); - len = bh->b_size; - data = bh->b_data; - while (len > 0) { - int IV = index * (PAGE_CACHE_SIZE/bsize) + offset/bsize; - int transfer_result; +typedef struct { + struct loop_device lo_orig; + struct buffer_head *lo_bh_que0; + struct buffer_head *lo_bh_que1; + struct buffer_head *lo_bh_que2; + struct buffer_head *lo_bh_free; + int lo_bh_flsh; + int lo_bh_need; + wait_queue_head_t lo_bh_wait; + loff_t lo_offset; + loff_t lo_sizelimit; + unsigned long lo_offs_sec; + unsigned long lo_iv_remove; + unsigned char lo_crypt_name[LO_NAME_SIZE]; +} LoDevExt; +static LoDevExt *loop_dev; + +#define LDE_lo_bh_que0 (((LoDevExt *)lo)->lo_bh_que0) +#define LDE_lo_bh_que1 (((LoDevExt *)lo)->lo_bh_que1) +#define LDE_lo_bh_que2 (((LoDevExt *)lo)->lo_bh_que2) +#define LDE_lo_bh_free (((LoDevExt *)lo)->lo_bh_free) +#define LDE_lo_bh_flsh (((LoDevExt *)lo)->lo_bh_flsh) +#define LDE_lo_bh_need (((LoDevExt *)lo)->lo_bh_need) +#define LDE_lo_bh_wait (((LoDevExt *)lo)->lo_bh_wait) +#define LDE_lo_offset (((LoDevExt *)lo)->lo_offset) +#define LDE_lo_sizelimit (((LoDevExt *)lo)->lo_sizelimit) +#define LDE_lo_offs_sec (((LoDevExt *)lo)->lo_offs_sec) +#define LDE_lo_iv_remove (((LoDevExt *)lo)->lo_iv_remove) +#define LDE_lo_crypt_name (((LoDevExt *)lo)->lo_crypt_name) + +typedef struct { + struct buffer_head **q0; + struct buffer_head **q1; + struct buffer_head **q2; + int x0; + int x1; + int x2; +} que_look_up_table; - size = PAGE_CACHE_SIZE - offset; - if (size > len) - size = len; +static void loop_prealloc_cleanup(struct loop_device *lo) +{ + struct buffer_head *bh; - page = grab_cache_page(mapping, index); - if (!page) - goto fail; - kaddr = kmap(page); - if (aops->prepare_write(file, page, offset, offset+size)) - goto unlock; - flush_dcache_page(page); - transfer_result = lo_do_transfer(lo, WRITE, kaddr + offset, data, size, IV); - if (transfer_result) { - /* - * The transfer failed, but we still write the data to - * keep prepare/commit calls balanced. - */ - printk(KERN_ERR "loop: transfer error block %ld\n", index); - memset(kaddr + offset, 0, size); - } - if (aops->commit_write(file, page, offset, offset+size)) - goto unlock; - if (transfer_result) - goto unlock; - kunmap(page); - data += size; - len -= size; - offset = 0; - index++; - pos += size; - UnlockPage(page); - page_cache_release(page); + while ((bh = LDE_lo_bh_free)) { + __free_page(bh->b_page); + LDE_lo_bh_free = bh->b_reqnext; + bh->b_reqnext = NULL; + kmem_cache_free(bh_cachep, bh); } - up(&mapping->host->i_sem); - return 0; - -unlock: - kunmap(page); - UnlockPage(page); - page_cache_release(page); -fail: - up(&mapping->host->i_sem); - return -1; } -struct lo_read_data { - struct loop_device *lo; - char *data; - int bsize; -}; - -static int lo_read_actor(read_descriptor_t * desc, struct page *page, unsigned long offset, unsigned long size) +static int loop_prealloc_init(struct loop_device *lo, int y) { - char *kaddr; - unsigned long count = desc->count; - struct lo_read_data *p = (struct lo_read_data*)desc->buf; - struct loop_device *lo = p->lo; - int IV = page->index * (PAGE_CACHE_SIZE/p->bsize) + offset/p->bsize; + struct buffer_head *bh; + int x; - if (size > count) - size = count; + if(!y) { + y = lo_prealloc[0]; + for (x = 1; x < (sizeof(lo_prealloc) / sizeof(int)); x += 2) { + if (lo_prealloc[x + 1] && (lo->lo_number == lo_prealloc[x])) { + y = lo_prealloc[x + 1]; + break; + } + } + } + LDE_lo_bh_flsh = (y * 3) / 4; - kaddr = kmap(page); - if (lo_do_transfer(lo, READ, kaddr + offset, p->data, size, IV)) { - size = 0; - printk(KERN_ERR "loop: transfer error block %ld\n",page->index); - desc->error = -EINVAL; + for (x = 0; x < y; x++) { + bh = kmem_cache_alloc(bh_cachep, SLAB_KERNEL); + if (!bh) { + loop_prealloc_cleanup(lo); + return 1; + } + bh->b_page = alloc_page(GFP_KERNEL); + if (!bh->b_page) { + bh->b_reqnext = NULL; + kmem_cache_free(bh_cachep, bh); + loop_prealloc_cleanup(lo); + return 1; + } + bh->b_reqnext = LDE_lo_bh_free; + LDE_lo_bh_free = bh; } - kunmap(page); - - desc->count = count - size; - desc->written += size; - p->data += size; - return size; -} - -static int lo_receive(struct loop_device *lo, struct buffer_head *bh, int bsize, - loff_t pos) -{ - struct lo_read_data cookie; - read_descriptor_t desc; - struct file *file; - - cookie.lo = lo; - cookie.data = bh->b_data; - cookie.bsize = bsize; - desc.written = 0; - desc.count = bh->b_size; - desc.buf = (char*)&cookie; - desc.error = 0; - spin_lock_irq(&lo->lo_lock); - file = lo->lo_backing_file; - spin_unlock_irq(&lo->lo_lock); - do_generic_file_read(file, &pos, &desc, lo_read_actor); - return desc.error; + return 0; } -static inline int loop_get_bs(struct loop_device *lo) +static void loop_add_queue_last(struct loop_device *lo, struct buffer_head *bh, struct buffer_head **q) { - int bs = 0; + unsigned long flags; - if (blksize_size[MAJOR(lo->lo_device)]) - bs = blksize_size[MAJOR(lo->lo_device)][MINOR(lo->lo_device)]; - if (!bs) - bs = BLOCK_SIZE; + spin_lock_irqsave(&lo->lo_lock, flags); + if (*q) { + bh->b_reqnext = (*q)->b_reqnext; + (*q)->b_reqnext = bh; + } else { + bh->b_reqnext = bh; + } + *q = bh; + spin_unlock_irqrestore(&lo->lo_lock, flags); - return bs; + if (waitqueue_active(&LDE_lo_bh_wait)) + wake_up_interruptible(&LDE_lo_bh_wait); } -static inline unsigned long loop_get_iv(struct loop_device *lo, - unsigned long sector) +static void loop_add_queue_first(struct loop_device *lo, struct buffer_head *bh, struct buffer_head **q) { - int bs = loop_get_bs(lo); - unsigned long offset, IV; - - IV = sector / (bs >> 9) + lo->lo_offset / bs; - offset = ((sector % (bs >> 9)) << 9) + lo->lo_offset % bs; - if (offset >= bs) - IV++; - - return IV; + spin_lock_irq(&lo->lo_lock); + if (*q) { + bh->b_reqnext = (*q)->b_reqnext; + (*q)->b_reqnext = bh; + } else { + bh->b_reqnext = bh; + *q = bh; + } + spin_unlock_irq(&lo->lo_lock); } -static int do_bh_filebacked(struct loop_device *lo, struct buffer_head *bh, int rw) +static struct buffer_head *loop_get_bh(struct loop_device *lo, int *list_nr, + que_look_up_table *qt) { - loff_t pos; - int ret; + struct buffer_head *bh = NULL, *last; - pos = ((loff_t) bh->b_rsector << 9) + lo->lo_offset; - - if (rw == WRITE) - ret = lo_send(lo, bh, loop_get_bs(lo), pos); - else - ret = lo_receive(lo, bh, loop_get_bs(lo), pos); - - return ret; -} - -static void loop_end_io_transfer(struct buffer_head *bh, int uptodate); -static void loop_put_buffer(struct buffer_head *bh) -{ - /* - * check b_end_io, may just be a remapped bh and not an allocated one - */ - if (bh && bh->b_end_io == loop_end_io_transfer) { - __free_page(bh->b_page); - kmem_cache_free(bh_cachep, bh); + spin_lock_irq(&lo->lo_lock); + if ((last = *qt->q0)) { + bh = last->b_reqnext; + if (bh == last) + *qt->q0 = NULL; + else + last->b_reqnext = bh->b_reqnext; + bh->b_reqnext = NULL; + *list_nr = qt->x0; + } else if ((last = *qt->q1)) { + bh = last->b_reqnext; + if (bh == last) + *qt->q1 = NULL; + else + last->b_reqnext = bh->b_reqnext; + bh->b_reqnext = NULL; + *list_nr = qt->x1; + } else if ((last = *qt->q2)) { + bh = last->b_reqnext; + if (bh == last) + *qt->q2 = NULL; + else + last->b_reqnext = bh->b_reqnext; + bh->b_reqnext = NULL; + *list_nr = qt->x2; } + spin_unlock_irq(&lo->lo_lock); + return bh; } -/* - * Add buffer_head to back of pending list - */ -static void loop_add_bh(struct loop_device *lo, struct buffer_head *bh) +static void loop_put_buffer(struct loop_device *lo, struct buffer_head *b) { unsigned long flags; + int wk; spin_lock_irqsave(&lo->lo_lock, flags); - if (lo->lo_bhtail) { - lo->lo_bhtail->b_reqnext = bh; - lo->lo_bhtail = bh; - } else - lo->lo_bh = lo->lo_bhtail = bh; + b->b_reqnext = LDE_lo_bh_free; + LDE_lo_bh_free = b; + wk = LDE_lo_bh_need; spin_unlock_irqrestore(&lo->lo_lock, flags); - up(&lo->lo_bh_mutex); + if (wk && waitqueue_active(&LDE_lo_bh_wait)) + wake_up_interruptible(&LDE_lo_bh_wait); } -/* - * Grab first pending buffer - */ -static struct buffer_head *loop_get_bh(struct loop_device *lo) +static void loop_end_io_transfer_wr(struct buffer_head *bh, int uptodate) { - struct buffer_head *bh; - - spin_lock_irq(&lo->lo_lock); - if ((bh = lo->lo_bh)) { - if (bh == lo->lo_bhtail) - lo->lo_bhtail = NULL; - lo->lo_bh = bh->b_reqnext; - bh->b_reqnext = NULL; - } - spin_unlock_irq(&lo->lo_lock); + struct loop_device *lo = (struct loop_device *)(&loop_dev[MINOR(bh->b_dev)]); + struct buffer_head *rbh = bh->b_private; - return bh; + rbh->b_reqnext = NULL; + rbh->b_end_io(rbh, uptodate); + loop_put_buffer(lo, bh); + if (atomic_dec_and_test(&lo->lo_pending)) + wake_up_interruptible(&LDE_lo_bh_wait); } -/* - * when buffer i/o has completed. if BH_Dirty is set, this was a WRITE - * and lo->transfer stuff has already been done. if not, it was a READ - * so queue it for the loop thread and let it do the transfer out of - * b_end_io context (we don't want to do decrypt of a page with irqs - * disabled) - */ -static void loop_end_io_transfer(struct buffer_head *bh, int uptodate) +static void loop_end_io_transfer_rd(struct buffer_head *bh, int uptodate) { - struct loop_device *lo = &loop_dev[MINOR(bh->b_dev)]; - - if (!uptodate || test_bit(BH_Dirty, &bh->b_state)) { - struct buffer_head *rbh = bh->b_private; + struct loop_device *lo = (struct loop_device *)(&loop_dev[MINOR(bh->b_dev)]); - rbh->b_end_io(rbh, uptodate); - if (atomic_dec_and_test(&lo->lo_pending)) - up(&lo->lo_bh_mutex); - loop_put_buffer(bh); - } else - loop_add_bh(lo, bh); + if (!uptodate) + loop_end_io_transfer_wr(bh, uptodate); + else + loop_add_queue_last(lo, bh, &LDE_lo_bh_que0); } static struct buffer_head *loop_get_buffer(struct loop_device *lo, - struct buffer_head *rbh) + struct buffer_head *rbh, int from_thread, int rw) { struct buffer_head *bh; + struct page *p; + unsigned long flags; - /* - * for xfer_funcs that can operate on the same bh, do that - */ - if (lo->lo_flags & LO_FLAGS_BH_REMAP) { - bh = rbh; - goto out_bh; + spin_lock_irqsave(&lo->lo_lock, flags); + bh = LDE_lo_bh_free; + if (bh) { + LDE_lo_bh_free = bh->b_reqnext; + if (from_thread) + LDE_lo_bh_need = 0; + } else { + if (from_thread) + LDE_lo_bh_need = 1; } + spin_unlock_irqrestore(&lo->lo_lock, flags); + if (!bh) + return (struct buffer_head *)0; - do { - bh = kmem_cache_alloc(bh_cachep, SLAB_NOIO); - if (bh) - break; - - run_task_queue(&tq_disk); - set_current_state(TASK_INTERRUPTIBLE); - schedule_timeout(HZ); - } while (1); - memset(bh, 0, sizeof(*bh)); + p = bh->b_page; + memset(bh, 0, sizeof(struct buffer_head)); + bh->b_page = p; + bh->b_private = rbh; bh->b_size = rbh->b_size; bh->b_dev = rbh->b_rdev; + bh->b_rdev = lo->lo_device; bh->b_state = (1 << BH_Req) | (1 << BH_Mapped) | (1 << BH_Lock); + bh->b_data = page_address(bh->b_page); + bh->b_end_io = (rw == WRITE) ? loop_end_io_transfer_wr : loop_end_io_transfer_rd; + bh->b_rsector = rbh->b_rsector + LDE_lo_offs_sec; + init_waitqueue_head(&bh->b_wait); + + return bh; +} + +static int figure_loop_size(struct loop_device *lo) +{ + loff_t size, offs; + unsigned int x; + int err = 0; + kdev_t lodev = lo->lo_device; + + offs = LDE_lo_offset; + if (S_ISREG(lo->lo_backing_file->f_dentry->d_inode->i_mode)) { + size = lo->lo_backing_file->f_dentry->d_inode->i_size; + } else { + offs &= ~((loff_t)511); + if (blk_size[MAJOR(lodev)]) + size = (loff_t)(blk_size[MAJOR(lodev)][MINOR(lodev)]) << BLOCK_SIZE_BITS; + else + size = 1024*1024*1024; /* unknown size */ + } + if ((offs > 0) && (offs < size)) { + size -= offs; + } else { + if (offs) + err = -EINVAL; + LDE_lo_offset = 0; + LDE_lo_offs_sec = LDE_lo_iv_remove = 0; + } + if ((LDE_lo_sizelimit > 0) && (LDE_lo_sizelimit <= size)) { + size = LDE_lo_sizelimit; + } else { + if (LDE_lo_sizelimit) + err = -EINVAL; + LDE_lo_sizelimit = 0; + } + size >>= BLOCK_SIZE_BITS; /* - * easy way out, although it does waste some memory for < PAGE_SIZE - * blocks... if highmem bounce buffering can get away with it, - * so can we :-) + * Unfortunately, if we want to do I/O on the device, + * the number of 1024-byte blocks has to fit into unsigned int */ - do { - bh->b_page = alloc_page(GFP_NOIO); - if (bh->b_page) - break; + x = (unsigned int)size; + if ((loff_t)x != size) { + err = -EFBIG; + size = 0; + } - run_task_queue(&tq_disk); - set_current_state(TASK_INTERRUPTIBLE); - schedule_timeout(HZ); - } while (1); + loop_sizes[lo->lo_number] = size; + return err; +} - bh->b_data = page_address(bh->b_page); - bh->b_end_io = loop_end_io_transfer; - bh->b_private = rbh; - init_waitqueue_head(&bh->b_wait); +static inline int lo_do_Transfer(struct loop_device *lo, int cmd, char *rbuf, + char *lbuf, int size, int rblock) +{ + if (!lo->transfer) + return 0; -out_bh: - bh->b_rsector = rbh->b_rsector + (lo->lo_offset >> 9); - spin_lock_irq(&lo->lo_lock); - bh->b_rdev = lo->lo_device; - spin_unlock_irq(&lo->lo_lock); + /* this ugly cast is needed to work around (possible) kmap damage in function prototype */ + /* should be: return lo->transfer(lo, cmd, rbuf, lbuf, size, rblock); */ + return ((int (*)(struct loop_device *, int, char *, char *, int, int))lo->transfer)(lo, cmd, rbuf, lbuf, size, rblock); +} - return bh; +static int loop_file_io(struct file *file, char *buf, int size, loff_t *ppos, int w) +{ + mm_segment_t fs; + int x, y, z; + + y = 0; + do { + z = size - y; + fs = get_fs(); + set_fs(get_ds()); + if (w) { + x = file->f_op->write(file, buf + y, z, ppos); + set_fs(fs); + } else { + x = file->f_op->read(file, buf + y, z, ppos); + set_fs(fs); + if (!x) + return 1; + } + if (x < 0) { + if ((x == -EAGAIN) || (x == -ENOMEM) || (x == -ERESTART) || (x == -EINTR)) { + run_task_queue(&tq_disk); + set_current_state(TASK_INTERRUPTIBLE); + schedule_timeout(HZ / 2); + continue; + } + return 1; + } + y += x; + } while (y < size); + return 0; +} + +static int do_bh_filebacked(struct loop_device *lo, struct buffer_head *bh, int rw) +{ + loff_t pos; + struct file *file = lo->lo_backing_file; + char *data, *buf; + unsigned int size, len; + unsigned long IV; + + pos = ((loff_t) bh->b_rsector << 9) + LDE_lo_offset; + buf = page_address(LDE_lo_bh_free->b_page); + len = bh->b_size; + data = bh_kmap(bh); + IV = bh->b_rsector; + if (!LDE_lo_iv_remove) + IV += LDE_lo_offs_sec; + while (len > 0) { + if (lo->lo_encrypt_type == LO_CRYPT_NONE) { + /* this code relies that NONE transfer is a no-op */ + buf = data; + } + size = PAGE_SIZE; + if (size > len) + size = len; + if (rw == WRITE) { + if (lo_do_Transfer(lo, WRITE, buf, data, size, IV)) { + printk(KERN_ERR "loop%d: write transfer error, sector %lu\n", lo->lo_number, IV); + goto kunmap_and_out; + } + if (loop_file_io(file, buf, size, &pos, 1)) { + printk(KERN_ERR "loop%d: write i/o error, sector %lu\n", lo->lo_number, IV); + goto kunmap_and_out; + } + } else { + if (loop_file_io(file, buf, size, &pos, 0)) { + printk(KERN_ERR "loop%d: read i/o error, sector %lu\n", lo->lo_number, IV); + goto kunmap_and_out; + } + if (lo_do_Transfer(lo, READ, buf, data, size, IV)) { + printk(KERN_ERR "loop%d: read transfer error, sector %lu\n", lo->lo_number, IV); + goto kunmap_and_out; + } + } + data += size; + len -= size; + IV += size >> 9; + } + bh_kunmap(bh); + return 0; + +kunmap_and_out: + bh_kunmap(bh); + return 1; } static int loop_make_request(request_queue_t *q, int rw, struct buffer_head *rbh) { - struct buffer_head *bh = NULL; + struct buffer_head *bh; struct loop_device *lo; - unsigned long IV; + char *md; + set_current_state(TASK_RUNNING); if (!buffer_locked(rbh)) BUG(); if (MINOR(rbh->b_rdev) >= max_loop) goto out; - lo = &loop_dev[MINOR(rbh->b_rdev)]; + lo = (struct loop_device *)(&loop_dev[MINOR(rbh->b_rdev)]); spin_lock_irq(&lo->lo_lock); if (lo->lo_state != Lo_bound) goto inactive; @@ -483,45 +610,55 @@ static int loop_make_request(request_que } else if (rw == READA) { rw = READ; } else if (rw != READ) { - printk(KERN_ERR "loop: unknown command (%d)\n", rw); + printk(KERN_ERR "loop%d: unknown command (%d)\n", lo->lo_number, rw); goto err; } - rbh = blk_queue_bounce(q, rw, rbh); - /* * file backed, queue for loop_thread to handle */ if (lo->lo_flags & LO_FLAGS_DO_BMAP) { - /* - * rbh locked at this point, noone else should clear - * the dirty flag - */ - if (rw == WRITE) - set_bit(BH_Dirty, &rbh->b_state); - loop_add_bh(lo, rbh); + loop_add_queue_last(lo, rbh, (rw == WRITE) ? &LDE_lo_bh_que1 : &LDE_lo_bh_que0); return 0; } /* - * piggy old buffer on original, and submit for I/O + * device backed, just remap rdev & rsector for NONE transfer */ - bh = loop_get_buffer(lo, rbh); - IV = loop_get_iv(lo, rbh->b_rsector); + if (lo->lo_encrypt_type == LO_CRYPT_NONE) { + rbh->b_rsector += LDE_lo_offs_sec; + rbh->b_rdev = lo->lo_device; + generic_make_request(rw, rbh); + if (atomic_dec_and_test(&lo->lo_pending)) + wake_up_interruptible(&LDE_lo_bh_wait); + return 0; + } + + /* + * device backed, start reads and writes now if buffer available + */ + bh = loop_get_buffer(lo, rbh, 0, rw); + if (!bh) { + /* just queue request and let thread handle alloc later */ + loop_add_queue_last(lo, rbh, (rw == WRITE) ? &LDE_lo_bh_que1 : &LDE_lo_bh_que2); + return 0; + } if (rw == WRITE) { - set_bit(BH_Dirty, &bh->b_state); - if (lo_do_transfer(lo, WRITE, bh->b_data, rbh->b_data, - bh->b_size, IV)) + int trv; + md = bh_kmap(rbh); + trv = lo_do_Transfer(lo, WRITE, bh->b_data, md, bh->b_size, bh->b_rsector - LDE_lo_iv_remove); + bh_kunmap(rbh); + if (trv) { + loop_put_buffer(lo, bh); goto err; + } } - generic_make_request(rw, bh); return 0; err: if (atomic_dec_and_test(&lo->lo_pending)) - up(&lo->lo_bh_mutex); - loop_put_buffer(bh); + wake_up_interruptible(&LDE_lo_bh_wait); out: buffer_IO_error(rbh); return 0; @@ -530,30 +667,6 @@ inactive: goto out; } -static inline void loop_handle_bh(struct loop_device *lo,struct buffer_head *bh) -{ - int ret; - - /* - * For block backed loop, we know this is a READ - */ - if (lo->lo_flags & LO_FLAGS_DO_BMAP) { - int rw = !!test_and_clear_bit(BH_Dirty, &bh->b_state); - - ret = do_bh_filebacked(lo, bh, rw); - bh->b_end_io(bh, !ret); - } else { - struct buffer_head *rbh = bh->b_private; - unsigned long IV = loop_get_iv(lo, rbh->b_rsector); - - ret = lo_do_transfer(lo, READ, bh->b_data, rbh->b_data, - bh->b_size, IV); - - rbh->b_end_io(rbh, !ret); - loop_put_buffer(bh); - } -} - /* * worker thread that handles reads/writes to file backed loop devices, * to avoid blocking in our make_request_fn. it also does loop decrypting @@ -563,25 +676,71 @@ static inline void loop_handle_bh(struct static int loop_thread(void *data) { struct loop_device *lo = data; - struct buffer_head *bh; + struct buffer_head *bh, *xbh; + int x, rw, qi = 0, flushcnt = 0; + wait_queue_t waitq; + que_look_up_table qt[4] = { + { &LDE_lo_bh_que0, &LDE_lo_bh_que1, &LDE_lo_bh_que2, 0, 1, 2 }, + { &LDE_lo_bh_que2, &LDE_lo_bh_que0, &LDE_lo_bh_que1, 2, 0, 1 }, + { &LDE_lo_bh_que0, &LDE_lo_bh_que2, &LDE_lo_bh_que1, 0, 2, 1 }, + { &LDE_lo_bh_que1, &LDE_lo_bh_que0, &LDE_lo_bh_que2, 1, 0, 2 } + }; + char *md; + static const struct rlimit loop_rlim_defaults[RLIM_NLIMITS] = INIT_RLIMITS; + init_waitqueue_entry(&waitq, current); + memcpy(¤t->rlim[0], &loop_rlim_defaults[0], sizeof(current->rlim)); daemonize(); exit_files(current); +#if !defined(NO_REPARENT_TO_INIT) reparent_to_init(); +#endif sprintf(current->comm, "loop%d", lo->lo_number); +#if !defined(NO_TASK_STRUCT_SIGMASK_LOCK) spin_lock_irq(¤t->sigmask_lock); +#elif NO_TASK_STRUCT_SIGMASK_LOCK == 1 + spin_lock_irq(¤t->sighand->siglock); +#else + spin_lock_irq(¤t->sig->siglock); +#endif sigfillset(¤t->blocked); flush_signals(current); +#if !defined(NO_TASK_STRUCT_SIGMASK_LOCK) spin_unlock_irq(¤t->sigmask_lock); +#elif NO_TASK_STRUCT_SIGMASK_LOCK == 1 + spin_unlock_irq(¤t->sighand->siglock); +#else + spin_unlock_irq(¤t->sig->siglock); +#endif + + if (lo_nice > 0) + lo_nice = 0; + if (lo_nice < -20) + lo_nice = -20; +#if defined(DEF_NICE) && defined(DEF_COUNTER) + /* old scheduler syntax */ + current->policy = SCHED_OTHER; + current->nice = lo_nice; +#else + /* O(1) scheduler syntax */ + set_user_nice(current, lo_nice); +#endif spin_lock_irq(&lo->lo_lock); lo->lo_state = Lo_bound; atomic_inc(&lo->lo_pending); spin_unlock_irq(&lo->lo_lock); +#if defined(PF_NOIO) current->flags |= PF_NOIO; +#endif +#if defined(PF_NOFREEZE) + current->flags |= PF_NOFREEZE; +#elif defined(PF_IOTHREAD) + current->flags |= PF_IOTHREAD; +#endif /* * up sem, we are running @@ -589,23 +748,110 @@ static int loop_thread(void *data) up(&lo->lo_sem); for (;;) { - down_interruptible(&lo->lo_bh_mutex); + add_wait_queue(&LDE_lo_bh_wait, &waitq); + for (;;) { + set_current_state(TASK_INTERRUPTIBLE); + if (!atomic_read(&lo->lo_pending)) + break; + + x = 0; + spin_lock_irq(&lo->lo_lock); + if (LDE_lo_bh_que0) { + x = 1; + } else if (LDE_lo_bh_que1 || LDE_lo_bh_que2) { + /* file backed works too because LDE_lo_bh_need == 0 */ + if (LDE_lo_bh_free || !LDE_lo_bh_need) + x = 1; + } + spin_unlock_irq(&lo->lo_lock); + if (x) + break; + + schedule(); + } + set_current_state(TASK_RUNNING); + remove_wait_queue(&LDE_lo_bh_wait, &waitq); + /* - * could be upped because of tear-down, not because of + * could be woken because of tear-down, not because of * pending work */ if (!atomic_read(&lo->lo_pending)) break; - bh = loop_get_bh(lo); - if (!bh) { - printk("loop: missing bh\n"); + /* + * read queues using alternating order to prevent starvation + */ + bh = loop_get_bh(lo, &x, &qt[++qi & 3]); + if (!bh) + continue; + + /* + * x list tag usage(buffer-allocated) + * --- -------------- ----------------------- + * 0 LDE_lo_bh_que0 dev-read(y) / file-read + * 1 LDE_lo_bh_que1 dev-write(n) / file-write + * 2 LDE_lo_bh_que2 dev-read(n) + */ + rw = (x == 1) ? WRITE : READ; + if ((x >= 1) && !(lo->lo_flags & LO_FLAGS_DO_BMAP)) { + /* loop_make_request didn't allocate a buffer, do that now */ + xbh = loop_get_buffer(lo, bh, 1, rw); + if (!xbh) { + run_task_queue(&tq_disk); + flushcnt = 0; + loop_add_queue_first(lo, bh, (rw == WRITE) ? &LDE_lo_bh_que1 : &LDE_lo_bh_que2); + /* LDE_lo_bh_need should be 1 now, go back to sleep */ + continue; + } + if (rw == WRITE) { + int trv; + md = bh_kmap(bh); + trv = lo_do_Transfer(lo, WRITE, xbh->b_data, md, xbh->b_size, xbh->b_rsector - LDE_lo_iv_remove); + bh_kunmap(bh); + if (trv) { + loop_put_buffer(lo, xbh); + buffer_IO_error(bh); + atomic_dec(&lo->lo_pending); + continue; + } + } + generic_make_request(rw, xbh); + + /* start I/O if there are no more requests lacking buffers */ + x = 0; + spin_lock_irq(&lo->lo_lock); + if (!LDE_lo_bh_que1 && !LDE_lo_bh_que2) + x = 1; + spin_unlock_irq(&lo->lo_lock); + if (x || (++flushcnt >= LDE_lo_bh_flsh)) { + run_task_queue(&tq_disk); + flushcnt = 0; + } + + /* request not completely processed yet */ continue; } - loop_handle_bh(lo, bh); + if (lo->lo_flags & LO_FLAGS_DO_BMAP) { + /* request is for file backed device */ + x = do_bh_filebacked(lo, bh, rw); + bh->b_reqnext = NULL; + bh->b_end_io(bh, !x); + } else { + /* device backed read has completed, do decrypt now */ + xbh = bh->b_private; + /* must not use bh->b_rsector as IV, as it may be modified by LVM at this point */ + /* instead, recompute IV from original request */ + md = bh_kmap(xbh); + x = lo_do_Transfer(lo, READ, bh->b_data, md, bh->b_size, xbh->b_rsector + LDE_lo_offs_sec - LDE_lo_iv_remove); + bh_kunmap(xbh); + xbh->b_reqnext = NULL; + xbh->b_end_io(xbh, !x); + loop_put_buffer(lo, bh); + } /* - * upped both for pending work and tear-down, lo_pending + * woken both for pending work and tear-down, lo_pending * will hit zero then */ if (atomic_dec_and_test(&lo->lo_pending)) @@ -616,15 +862,34 @@ static int loop_thread(void *data) return 0; } +static void loop_set_softblksz(struct loop_device *lo, kdev_t dev) +{ + int bs = 0, x; + + if (blksize_size[MAJOR(lo->lo_device)]) + bs = blksize_size[MAJOR(lo->lo_device)][MINOR(lo->lo_device)]; + if (!bs) + bs = BLOCK_SIZE; + if (lo->lo_flags & LO_FLAGS_DO_BMAP) { + x = loop_sizes[lo->lo_number]; + if ((bs == 8192) && (x & 7)) + bs = 4096; + if ((bs == 4096) && (x & 3)) + bs = 2048; + if ((bs == 2048) && (x & 1)) + bs = 1024; + } + set_blocksize(dev, bs); +} + static int loop_set_fd(struct loop_device *lo, struct file *lo_file, kdev_t dev, unsigned int arg) { struct file *file; struct inode *inode; kdev_t lo_device; - int lo_flags = 0; + int lo_flags = 0, hardsz = 512; int error; - int bs; MOD_INC_USE_COUNT; @@ -643,33 +908,46 @@ static int loop_set_fd(struct loop_devic if (!(file->f_mode & FMODE_WRITE)) lo_flags |= LO_FLAGS_READ_ONLY; + LDE_lo_offset = LDE_lo_sizelimit = 0; + LDE_lo_offs_sec = LDE_lo_iv_remove = 0; + LDE_lo_bh_free = LDE_lo_bh_que2 = LDE_lo_bh_que1 = LDE_lo_bh_que0 = NULL; + LDE_lo_bh_need = LDE_lo_bh_flsh = 0; + init_waitqueue_head(&LDE_lo_bh_wait); if (S_ISBLK(inode->i_mode)) { lo_device = inode->i_rdev; if (lo_device == dev) { error = -EBUSY; goto out_putf; } + if (loop_prealloc_init(lo, 0)) { + error = -ENOMEM; + goto out_putf; + } + hardsz = get_hardsect_size(lo_device); } else if (S_ISREG(inode->i_mode)) { - struct address_space_operations *aops = inode->i_mapping->a_ops; /* * If we can't read - sorry. If we only can't write - well, * it's going to be read-only. */ - if (!aops->readpage) + if (!file->f_op || !file->f_op->read) goto out_putf; - if (!aops->prepare_write || !aops->commit_write) + if (!file->f_op->write) lo_flags |= LO_FLAGS_READ_ONLY; lo_device = inode->i_dev; lo_flags |= LO_FLAGS_DO_BMAP; + if (loop_prealloc_init(lo, 1)) { + error = -ENOMEM; + goto out_putf; + } error = 0; } else goto out_putf; get_file(file); - if (IS_RDONLY (inode) || is_read_only(lo_device) + if ((S_ISREG(inode->i_mode) && IS_RDONLY(inode)) || is_read_only(lo_device) || !(lo_file->f_mode & FMODE_WRITE)) lo_flags |= LO_FLAGS_READ_ONLY; @@ -677,28 +955,40 @@ static int loop_set_fd(struct loop_devic lo->lo_device = lo_device; lo->lo_flags = lo_flags; + if(lo_flags & LO_FLAGS_READ_ONLY) + lo->lo_flags |= 0x200000; /* export to user space */ lo->lo_backing_file = file; lo->transfer = NULL; lo->ioctl = NULL; - figure_loop_size(lo); - lo->old_gfp_mask = inode->i_mapping->gfp_mask; - inode->i_mapping->gfp_mask &= ~(__GFP_IO|__GFP_FS); - - bs = 0; - if (blksize_size[MAJOR(lo_device)]) - bs = blksize_size[MAJOR(lo_device)][MINOR(lo_device)]; - if (!bs) - bs = BLOCK_SIZE; + if (figure_loop_size(lo)) { + error = -EFBIG; + goto out_cleanup; + } - set_blocksize(dev, bs); + if (lo_flags & LO_FLAGS_DO_BMAP) { + lo->old_gfp_mask = inode->i_mapping->gfp_mask; + inode->i_mapping->gfp_mask &= ~(__GFP_IO|__GFP_FS); + inode->i_mapping->gfp_mask |= __GFP_HIGH; + } else { + lo->old_gfp_mask = -1; + } - lo->lo_bh = lo->lo_bhtail = NULL; - kernel_thread(loop_thread, lo, CLONE_FS | CLONE_FILES | CLONE_SIGHAND); - down(&lo->lo_sem); + loop_hardsizes[MINOR(dev)] = hardsz; + loop_set_softblksz(lo, dev); + error = kernel_thread(loop_thread, lo, CLONE_FS | CLONE_FILES | CLONE_SIGHAND); + if(error < 0) + goto out_mapping; + down(&lo->lo_sem); fput(file); return 0; + out_mapping: + if(lo->old_gfp_mask != -1) + inode->i_mapping->gfp_mask = lo->old_gfp_mask; + out_cleanup: + loop_prealloc_cleanup(lo); + fput(file); out_putf: fput(file); out: @@ -711,6 +1001,7 @@ static int loop_release_xfer(struct loop int err = 0; if (lo->lo_encrypt_type) { struct loop_func_table *xfer= xfer_funcs[lo->lo_encrypt_type]; + lo->transfer = NULL; if (xfer && xfer->release) err = xfer->release(lo); if (xfer && xfer->unlock) @@ -736,7 +1027,11 @@ static int loop_init_xfer(struct loop_de return err; } +#if LINUX_VERSION_CODE >= 0x2040C static int loop_clr_fd(struct loop_device *lo, struct block_device *bdev) +#else +static int loop_clr_fd(struct loop_device *lo, kdev_t dev) +#endif { struct file *filp = lo->lo_backing_file; int gfp = lo->old_gfp_mask; @@ -751,11 +1046,12 @@ static int loop_clr_fd(struct loop_devic spin_lock_irq(&lo->lo_lock); lo->lo_state = Lo_rundown; if (atomic_dec_and_test(&lo->lo_pending)) - up(&lo->lo_bh_mutex); + wake_up_interruptible(&LDE_lo_bh_wait); spin_unlock_irq(&lo->lo_lock); down(&lo->lo_sem); + loop_prealloc_cleanup(lo); lo->lo_backing_file = NULL; loop_release_xfer(lo); @@ -763,23 +1059,81 @@ static int loop_clr_fd(struct loop_devic lo->ioctl = NULL; lo->lo_device = 0; lo->lo_encrypt_type = 0; - lo->lo_offset = 0; + LDE_lo_offset = LDE_lo_sizelimit = 0; + LDE_lo_offs_sec = LDE_lo_iv_remove = 0; lo->lo_encrypt_key_size = 0; lo->lo_flags = 0; memset(lo->lo_encrypt_key, 0, LO_KEY_SIZE); memset(lo->lo_name, 0, LO_NAME_SIZE); + memset(LDE_lo_crypt_name, 0, LO_NAME_SIZE); loop_sizes[lo->lo_number] = 0; +#if LINUX_VERSION_CODE >= 0x2040C invalidate_bdev(bdev, 0); - filp->f_dentry->d_inode->i_mapping->gfp_mask = gfp; +#else + invalidate_buffers(dev); +#endif + if (gfp != -1) + filp->f_dentry->d_inode->i_mapping->gfp_mask = gfp; lo->lo_state = Lo_unbound; fput(filp); MOD_DEC_USE_COUNT; return 0; } -static int loop_set_status(struct loop_device *lo, struct loop_info *arg) +static void +loop_info64_from_old(const struct loop_info *info, struct loopinfo64 *info64) +{ + memset(info64, 0, sizeof(*info64)); + info64->lo_number = info->lo_number; + info64->lo_device = info->lo_device; + info64->lo_inode = info->lo_inode; + info64->lo_rdevice = info->lo_rdevice; + info64->lo_offset = info->lo_offset; + info64->lo_encrypt_type = info->lo_encrypt_type; + info64->lo_encrypt_key_size = info->lo_encrypt_key_size; + info64->lo_flags = info->lo_flags; + info64->lo_init[0] = info->lo_init[0]; + info64->lo_init[1] = info->lo_init[1]; + if (info->lo_encrypt_type == 18) /* LO_CRYPT_CRYPTOAPI */ + memcpy(info64->lo_crypt_name, info->lo_name, LO_NAME_SIZE); + else + memcpy(info64->lo_file_name, info->lo_name, LO_NAME_SIZE); + memcpy(info64->lo_encrypt_key, info->lo_encrypt_key, LO_KEY_SIZE); +} + +static int +loop_info64_to_old(struct loopinfo64 *info64, struct loop_info *info) +{ + memset(info, 0, sizeof(*info)); + info->lo_number = info64->lo_number; + info->lo_device = info64->lo_device; + info->lo_inode = info64->lo_inode; + info->lo_rdevice = info64->lo_rdevice; + info->lo_offset = info64->lo_offset; + info->lo_encrypt_type = info64->lo_encrypt_type; + info->lo_encrypt_key_size = info64->lo_encrypt_key_size; + info->lo_flags = info64->lo_flags; + info->lo_init[0] = info64->lo_init[0]; + info->lo_init[1] = info64->lo_init[1]; + if (info->lo_encrypt_type == 18) /* LO_CRYPT_CRYPTOAPI */ + memcpy(info->lo_name, info64->lo_crypt_name, LO_NAME_SIZE); + else + memcpy(info->lo_name, info64->lo_file_name, LO_NAME_SIZE); + memcpy(info->lo_encrypt_key, info64->lo_encrypt_key, LO_KEY_SIZE); + + /* error in case values were truncated */ + if (info->lo_device != info64->lo_device || + info->lo_rdevice != info64->lo_rdevice || + info->lo_inode != info64->lo_inode || + info->lo_offset != info64->lo_offset || + info64->lo_sizelimit) + return -EOVERFLOW; + + return 0; +} + +static int loop_set_status(struct loop_device *lo, kdev_t dev, struct loopinfo64 *info, struct loop_info *oldinfo) { - struct loop_info info; int err; unsigned int type; @@ -788,62 +1142,137 @@ static int loop_set_status(struct loop_d return -EPERM; if (lo->lo_state != Lo_bound) return -ENXIO; - if (copy_from_user(&info, arg, sizeof (struct loop_info))) - return -EFAULT; - if ((unsigned int) info.lo_encrypt_key_size > LO_KEY_SIZE) + if ((unsigned int) info->lo_encrypt_key_size > LO_KEY_SIZE) return -EINVAL; - type = info.lo_encrypt_type; + type = info->lo_encrypt_type; if (type >= MAX_LO_CRYPT || xfer_funcs[type] == NULL) return -EINVAL; - if (type == LO_CRYPT_XOR && info.lo_encrypt_key_size == 0) + if (type == LO_CRYPT_XOR && info->lo_encrypt_key_size == 0) return -EINVAL; err = loop_release_xfer(lo); - if (!err) - err = loop_init_xfer(lo, type, &info); if (err) return err; - lo->lo_offset = info.lo_offset; - strncpy(lo->lo_name, info.lo_name, LO_NAME_SIZE); + if ((loff_t)info->lo_offset < 0) { + /* negative offset == remove offset from IV computations */ + LDE_lo_offset = -(info->lo_offset); + LDE_lo_iv_remove = LDE_lo_offset >> 9; + } else { + /* positive offset == include offset in IV computations */ + LDE_lo_offset = info->lo_offset; + LDE_lo_iv_remove = 0; + } + LDE_lo_offs_sec = LDE_lo_offset >> 9; + LDE_lo_sizelimit = info->lo_sizelimit; + err = figure_loop_size(lo); + if (err) + return err; + loop_set_softblksz(lo, dev); + + /* transfer init function for 2.4 kernels takes old style struct */ + err = loop_init_xfer(lo, type, oldinfo); + /* copy key -- just in case transfer init func modified it */ + memcpy(info->lo_encrypt_key, oldinfo->lo_encrypt_key, sizeof(info->lo_encrypt_key)); + if (err) + return err; + strncpy(lo->lo_name, info->lo_file_name, LO_NAME_SIZE); + strncpy(LDE_lo_crypt_name, info->lo_crypt_name, LO_NAME_SIZE); lo->transfer = xfer_funcs[type]->transfer; lo->ioctl = xfer_funcs[type]->ioctl; - lo->lo_encrypt_key_size = info.lo_encrypt_key_size; - lo->lo_init[0] = info.lo_init[0]; - lo->lo_init[1] = info.lo_init[1]; - if (info.lo_encrypt_key_size) { - memcpy(lo->lo_encrypt_key, info.lo_encrypt_key, - info.lo_encrypt_key_size); + lo->lo_encrypt_key_size = info->lo_encrypt_key_size; + lo->lo_init[0] = info->lo_init[0]; + lo->lo_init[1] = info->lo_init[1]; + if (info->lo_encrypt_key_size) { + memcpy(lo->lo_encrypt_key, info->lo_encrypt_key, + info->lo_encrypt_key_size); lo->lo_key_owner = current->uid; - } - figure_loop_size(lo); + } + return 0; } -static int loop_get_status(struct loop_device *lo, struct loop_info *arg) +static int loop_get_status(struct loop_device *lo, struct loopinfo64 *info) { - struct loop_info info; struct file *file = lo->lo_backing_file; if (lo->lo_state != Lo_bound) return -ENXIO; - if (!arg) - return -EINVAL; - memset(&info, 0, sizeof(info)); - info.lo_number = lo->lo_number; - info.lo_device = kdev_t_to_nr(file->f_dentry->d_inode->i_dev); - info.lo_inode = file->f_dentry->d_inode->i_ino; - info.lo_rdevice = kdev_t_to_nr(lo->lo_device); - info.lo_offset = lo->lo_offset; - info.lo_flags = lo->lo_flags; - strncpy(info.lo_name, lo->lo_name, LO_NAME_SIZE); - info.lo_encrypt_type = lo->lo_encrypt_type; + memset(info, 0, sizeof(*info)); + info->lo_number = lo->lo_number; + info->lo_device = kdev_t_to_nr(file->f_dentry->d_inode->i_dev); + info->lo_inode = file->f_dentry->d_inode->i_ino; + info->lo_rdevice = kdev_t_to_nr(lo->lo_device); + info->lo_offset = LDE_lo_iv_remove ? -(LDE_lo_offset) : LDE_lo_offset; + info->lo_sizelimit = LDE_lo_sizelimit; + info->lo_flags = lo->lo_flags; + strncpy(info->lo_file_name, lo->lo_name, LO_NAME_SIZE); + strncpy(info->lo_crypt_name, LDE_lo_crypt_name, LO_NAME_SIZE); + info->lo_encrypt_type = lo->lo_encrypt_type; if (lo->lo_encrypt_key_size && capable(CAP_SYS_ADMIN)) { - info.lo_encrypt_key_size = lo->lo_encrypt_key_size; - memcpy(info.lo_encrypt_key, lo->lo_encrypt_key, + info->lo_encrypt_key_size = lo->lo_encrypt_key_size; + memcpy(info->lo_encrypt_key, lo->lo_encrypt_key, lo->lo_encrypt_key_size); + info->lo_init[0] = lo->lo_init[0]; + info->lo_init[1] = lo->lo_init[1]; } - return copy_to_user(arg, &info, sizeof(info)) ? -EFAULT : 0; + return 0; +} + +static int +loop_set_status_n(struct loop_device *lo, kdev_t dev, void *arg, int n) +{ + struct loop_info info; + struct loopinfo64 info64; + int err; + + if (n) { + if (copy_from_user(&info64, arg, sizeof (struct loopinfo64))) + return -EFAULT; + /* truncation errors can be ignored here as transfer init func only wants key bits */ + loop_info64_to_old(&info64, &info); + } else { + if (copy_from_user(&info, arg, sizeof (struct loop_info))) + return -EFAULT; + loop_info64_from_old(&info, &info64); + } + err = loop_set_status(lo, dev, &info64, &info); + memset(&info.lo_encrypt_key[0], 0, sizeof(info.lo_encrypt_key)); + memset(&info64.lo_encrypt_key[0], 0, sizeof(info64.lo_encrypt_key)); + return err; +} + +static int +loop_get_status_old(struct loop_device *lo, struct loop_info *arg) { + struct loop_info info; + struct loopinfo64 info64; + int err = 0; + + if (!arg) + err = -EINVAL; + if (!err) + err = loop_get_status(lo, &info64); + if (!err) + err = loop_info64_to_old(&info64, &info); + if (!err && copy_to_user(arg, &info, sizeof(info))) + err = -EFAULT; + + return err; +} + +static int +loop_get_status64(struct loop_device *lo, struct loopinfo64 *arg) { + struct loopinfo64 info64; + int err = 0; + + if (!arg) + err = -EINVAL; + if (!err) + err = loop_get_status(lo, &info64); + if (!err && copy_to_user(arg, &info64, sizeof(info64))) + err = -EFAULT; + + return err; } static int lo_ioctl(struct inode * inode, struct file * file, @@ -862,20 +1291,30 @@ static int lo_ioctl(struct inode * inode dev = MINOR(inode->i_rdev); if (dev >= max_loop) return -ENODEV; - lo = &loop_dev[dev]; + lo = (struct loop_device *)(&loop_dev[dev]); down(&lo->lo_ctl_mutex); switch (cmd) { case LOOP_SET_FD: err = loop_set_fd(lo, file, inode->i_rdev, arg); break; case LOOP_CLR_FD: +#if LINUX_VERSION_CODE >= 0x2040C err = loop_clr_fd(lo, inode->i_bdev); +#else + err = loop_clr_fd(lo, inode->i_rdev); +#endif break; case LOOP_SET_STATUS: - err = loop_set_status(lo, (struct loop_info *) arg); + err = loop_set_status_n(lo, inode->i_rdev, (void *) arg, 0); break; case LOOP_GET_STATUS: - err = loop_get_status(lo, (struct loop_info *) arg); + err = loop_get_status_old(lo, (struct loop_info *) arg); + break; + case LOOP_SET_STATUS64: + err = loop_set_status_n(lo, inode->i_rdev, (void *) arg, 1); + break; + case LOOP_GET_STATUS64: + err = loop_get_status64(lo, (struct loopinfo64 *) arg); break; case BLKGETSIZE: if (lo->lo_state != Lo_bound) { @@ -884,6 +1323,7 @@ static int lo_ioctl(struct inode * inode } err = put_user((unsigned long)loop_sizes[lo->lo_number] << 1, (unsigned long *) arg); break; +#if defined(BLKGETSIZE64) case BLKGETSIZE64: if (lo->lo_state != Lo_bound) { err = -ENXIO; @@ -891,9 +1331,18 @@ static int lo_ioctl(struct inode * inode } err = put_user((u64)loop_sizes[lo->lo_number] << 10, (u64*)arg); break; +#endif +#if defined(BLKBSZGET) case BLKBSZGET: +#endif +#if defined(BLKBSZSET) case BLKBSZSET: +#endif +#if defined(BLKSSZGET) case BLKSSZGET: +#endif + case BLKROGET: + case BLKROSET: err = blk_ioctl(inode->i_rdev, cmd, arg); break; default: @@ -906,7 +1355,7 @@ static int lo_ioctl(struct inode * inode static int lo_open(struct inode *inode, struct file *file) { struct loop_device *lo; - int dev, type; + int dev; if (!inode) return -EINVAL; @@ -918,13 +1367,9 @@ static int lo_open(struct inode *inode, if (dev >= max_loop) return -ENODEV; - lo = &loop_dev[dev]; + lo = (struct loop_device *)(&loop_dev[dev]); MOD_INC_USE_COUNT; down(&lo->lo_ctl_mutex); - - type = lo->lo_encrypt_type; - if (type && xfer_funcs[type] && xfer_funcs[type]->lock) - xfer_funcs[type]->lock(lo); lo->lo_refcnt++; up(&lo->lo_ctl_mutex); return 0; @@ -933,7 +1378,7 @@ static int lo_open(struct inode *inode, static int lo_release(struct inode *inode, struct file *file) { struct loop_device *lo; - int dev, type; + int dev; if (!inode) return 0; @@ -946,20 +1391,18 @@ static int lo_release(struct inode *inod if (dev >= max_loop) return 0; - lo = &loop_dev[dev]; + lo = (struct loop_device *)(&loop_dev[dev]); down(&lo->lo_ctl_mutex); - type = lo->lo_encrypt_type; --lo->lo_refcnt; - if (xfer_funcs[type] && xfer_funcs[type]->unlock) - xfer_funcs[type]->unlock(lo); - up(&lo->lo_ctl_mutex); MOD_DEC_USE_COUNT; return 0; } static struct block_device_operations lo_fops = { +#if !defined(NO_BLOCK_DEVICE_OPERATIONS_OWNER) owner: THIS_MODULE, +#endif open: lo_open, release: lo_release, ioctl: lo_ioctl, @@ -970,11 +1413,13 @@ static struct block_device_operations lo */ MODULE_PARM(max_loop, "i"); MODULE_PARM_DESC(max_loop, "Maximum number of loop devices (1-256)"); +#if defined(MODULE_LICENSE) MODULE_LICENSE("GPL"); +#endif int loop_register_transfer(struct loop_func_table *funcs) { - if ((unsigned)funcs->number > MAX_LO_CRYPT || xfer_funcs[funcs->number]) + if ((unsigned)funcs->number >= MAX_LO_CRYPT || xfer_funcs[funcs->number]) return -EINVAL; xfer_funcs[funcs->number] = funcs; return 0; @@ -983,15 +1428,15 @@ int loop_register_transfer(struct loop_f int loop_unregister_transfer(int number) { struct loop_device *lo; + int x, type; if ((unsigned)number >= MAX_LO_CRYPT) return -EINVAL; - for (lo = &loop_dev[0]; lo < &loop_dev[max_loop]; lo++) { - int type = lo->lo_encrypt_type; + for (x = 0; x < max_loop; x++) { + lo = (struct loop_device *)(&loop_dev[x]); + type = lo->lo_encrypt_type; if (type == number) { - xfer_funcs[type]->release(lo); - lo->transfer = NULL; - lo->lo_encrypt_type = 0; + loop_release_xfer(lo); } } xfer_funcs[number] = NULL; @@ -1017,10 +1462,9 @@ int __init loop_init(void) return -EIO; } - - loop_dev = kmalloc(max_loop * sizeof(struct loop_device), GFP_KERNEL); + loop_dev = kmalloc(max_loop * sizeof(LoDevExt), GFP_KERNEL); if (!loop_dev) - return -ENOMEM; + goto out_dev; loop_sizes = kmalloc(max_loop * sizeof(int), GFP_KERNEL); if (!loop_sizes) @@ -1030,25 +1474,40 @@ int __init loop_init(void) if (!loop_blksizes) goto out_blksizes; + loop_hardsizes = kmalloc(max_loop * sizeof(int), GFP_KERNEL); + if (!loop_hardsizes) + goto out_hardsizes; + blk_queue_make_request(BLK_DEFAULT_QUEUE(MAJOR_NR), loop_make_request); for (i = 0; i < max_loop; i++) { - struct loop_device *lo = &loop_dev[i]; - memset(lo, 0, sizeof(struct loop_device)); + struct loop_device *lo = (struct loop_device *)(&loop_dev[i]); + memset(lo, 0, sizeof(LoDevExt)); init_MUTEX(&lo->lo_ctl_mutex); init_MUTEX_LOCKED(&lo->lo_sem); - init_MUTEX_LOCKED(&lo->lo_bh_mutex); lo->lo_number = i; spin_lock_init(&lo->lo_lock); } memset(loop_sizes, 0, max_loop * sizeof(int)); memset(loop_blksizes, 0, max_loop * sizeof(int)); + memset(loop_hardsizes, 0, max_loop * sizeof(int)); blk_size[MAJOR_NR] = loop_sizes; blksize_size[MAJOR_NR] = loop_blksizes; + hardsect_size[MAJOR_NR] = loop_hardsizes; for (i = 0; i < max_loop; i++) register_disk(NULL, MKDEV(MAJOR_NR, i), 1, &lo_fops, 0); + { extern int init_module_aes(void); init_module_aes(); } + for (i = 0; i < (sizeof(lo_prealloc) / sizeof(int)); i += 2) { + if (!lo_prealloc[i]) + continue; + if (lo_prealloc[i] < LO_PREALLOC_MIN) + lo_prealloc[i] = LO_PREALLOC_MIN; + if (lo_prealloc[i] > LO_PREALLOC_MAX) + lo_prealloc[i] = LO_PREALLOC_MAX; + } + devfs_handle = devfs_mk_dir(NULL, "loop", NULL); devfs_register_series(devfs_handle, "%u", max_loop, DEVFS_FL_DEFAULT, MAJOR_NR, 0, @@ -1058,10 +1517,13 @@ int __init loop_init(void) printk(KERN_INFO "loop: loaded (max %d devices)\n", max_loop); return 0; +out_hardsizes: + kfree(loop_blksizes); out_blksizes: kfree(loop_sizes); out_sizes: kfree(loop_dev); +out_dev: if (devfs_unregister_blkdev(MAJOR_NR, "loop")) printk(KERN_WARNING "loop: cannot unregister blkdev\n"); printk(KERN_ERR "loop: ran out of memory\n"); @@ -1070,12 +1532,18 @@ out_sizes: void loop_exit(void) { + { extern void cleanup_module_aes(void); cleanup_module_aes(); } devfs_unregister(devfs_handle); if (devfs_unregister_blkdev(MAJOR_NR, "loop")) printk(KERN_WARNING "loop: cannot unregister blkdev\n"); + + blk_size[MAJOR_NR] = 0; + blksize_size[MAJOR_NR] = 0; + hardsect_size[MAJOR_NR] = 0; kfree(loop_dev); kfree(loop_sizes); kfree(loop_blksizes); + kfree(loop_hardsizes); } module_init(loop_init); @@ -1090,3 +1558,10 @@ static int __init max_loop_setup(char *s __setup("max_loop=", max_loop_setup); #endif + +extern void loop_compute_sector_iv(int, u_int32_t *); +EXPORT_SYMBOL(loop_compute_sector_iv); +extern void loop_compute_md5_iv(int, u_int32_t *, u_int32_t *); +EXPORT_SYMBOL(loop_compute_md5_iv); +extern void md5_transform_CPUbyteorder(u_int32_t *, u_int32_t const *); +EXPORT_SYMBOL_NOVERS(md5_transform_CPUbyteorder);