~ [ source navigation ] ~ [ diff markup ] ~ [ identifier search ] ~ [ freetext search ] ~ [ file search ] ~

Linux Cross Reference
Linux/drivers/block/nbd.c

Version: ~ [ 2.2.5 ] ~ [ 2.4.1 ] ~ [ 2.4.9 ] ~ [ 2.6.17.10 ] ~
Architecture: ~ [ i386 ] ~ [ alpha ] ~ [ m68k ] ~ [ mips ] ~ [ ppc ] ~ [ sparc ] ~ [ sparc64 ] ~

  1 /*
  2  * Network block device - make block devices work over TCP
  3  *
  4  * Note that you can not swap over this thing, yet. Seems to work but
  5  * deadlocks sometimes - you can not swap over TCP in general.
  6  * 
  7  * Copyright 1997-2000 Pavel Machek <pavel@ucw.cz>
  8  * 
  9  * (part of code stolen from loop.c)
 10  *
 11  * 97-3-25 compiled 0-th version, not yet tested it 
 12  *   (it did not work, BTW) (later that day) HEY! it works!
 13  *   (bit later) hmm, not that much... 2:00am next day:
 14  *   yes, it works, but it gives something like 50kB/sec
 15  * 97-4-01 complete rewrite to make it possible for many requests at 
 16  *   once to be processed
 17  * 97-4-11 Making protocol independent of endianity etc.
 18  * 97-9-13 Cosmetic changes
 19  * 98-5-13 Attempt to make 64-bit-clean on 64-bit machines
 20  * 99-1-11 Attempt to make 64-bit-clean on 32-bit machines <ankry@mif.pg.gda.pl>
 21  *
 22  * possible FIXME: make set_sock / set_blksize / set_size / do_it one syscall
 23  * why not: would need verify_area and friends, would share yet another 
 24  *          structure with userland
 25  */
 26 
 27 #undef  NBD_PLUGGABLE
 28 #define PARANOIA
 29 #include <linux/major.h>
 30 
 31 #include <linux/module.h>
 32 
 33 #include <linux/sched.h>
 34 #include <linux/fs.h>
 35 #include <linux/stat.h>
 36 #include <linux/errno.h>
 37 #include <linux/file.h>
 38 #include <linux/ioctl.h>
 39 #include <net/sock.h>
 40 
 41 #include <linux/devfs_fs_kernel.h>
 42 
 43 #include <asm/segment.h>
 44 #include <asm/uaccess.h>
 45 #include <asm/types.h>
 46 
 47 #define MAJOR_NR NBD_MAJOR
 48 #include <linux/nbd.h>
 49 
 50 #define LO_MAGIC 0x68797548
 51 
 52 static int nbd_blksizes[MAX_NBD];
 53 static int nbd_blksize_bits[MAX_NBD];
 54 static int nbd_sizes[MAX_NBD];
 55 static u64 nbd_bytesizes[MAX_NBD];
 56 
 57 static struct nbd_device nbd_dev[MAX_NBD];
 58 static devfs_handle_t devfs_handle;
 59 
 60 #define DEBUG( s )
 61 /* #define DEBUG( s ) printk( s ) 
 62  */
 63 
 64 #ifdef PARANOIA
 65 static int requests_in;
 66 static int requests_out;
 67 #endif
 68 
 69 static void nbd_plug_device(request_queue_t *q, kdev_t dev) { }
 70 
 71 static int nbd_open(struct inode *inode, struct file *file)
 72 {
 73         int dev;
 74 
 75         if (!inode)
 76                 return -EINVAL;
 77         dev = MINOR(inode->i_rdev);
 78         if (dev >= MAX_NBD)
 79                 return -ENODEV;
 80 
 81         nbd_dev[dev].refcnt++;
 82         MOD_INC_USE_COUNT;
 83         return 0;
 84 }
 85 
 86 /*
 87  *  Send or receive packet.
 88  */
 89 static int nbd_xmit(int send, struct socket *sock, char *buf, int size)
 90 {
 91         mm_segment_t oldfs;
 92         int result;
 93         struct msghdr msg;
 94         struct iovec iov;
 95         unsigned long flags;
 96         sigset_t oldset;
 97 
 98         oldfs = get_fs();
 99         set_fs(get_ds());
100 
101         spin_lock_irqsave(&current->sigmask_lock, flags);
102         oldset = current->blocked;
103         sigfillset(&current->blocked);
104         recalc_sigpending(current);
105         spin_unlock_irqrestore(&current->sigmask_lock, flags);
106 
107 
108         do {
109                 sock->sk->allocation = GFP_BUFFER;
110                 iov.iov_base = buf;
111                 iov.iov_len = size;
112                 msg.msg_name = NULL;
113                 msg.msg_namelen = 0;
114                 msg.msg_iov = &iov;
115                 msg.msg_iovlen = 1;
116                 msg.msg_control = NULL;
117                 msg.msg_controllen = 0;
118                 msg.msg_namelen = 0;
119                 msg.msg_flags = 0;
120 
121                 if (send)
122                         result = sock_sendmsg(sock, &msg, size);
123                 else
124                         result = sock_recvmsg(sock, &msg, size, 0);
125 
126                 if (result <= 0) {
127 #ifdef PARANOIA
128                         printk(KERN_ERR "NBD: %s - sock=%ld at buf=%ld, size=%d returned %d.\n",
129                                send ? "send" : "receive", (long) sock, (long) buf, size, result);
130 #endif
131                         break;
132                 }
133                 size -= result;
134                 buf += result;
135         } while (size > 0);
136 
137         spin_lock_irqsave(&current->sigmask_lock, flags);
138         current->blocked = oldset;
139         recalc_sigpending(current);
140         spin_unlock_irqrestore(&current->sigmask_lock, flags);
141 
142         set_fs(oldfs);
143         return result;
144 }
145 
146 #define FAIL( s ) { printk( KERN_ERR "NBD: " s "(result %d)\n", result ); goto error_out; }
147 
148 void nbd_send_req(struct socket *sock, struct request *req)
149 {
150         int result;
151         struct nbd_request request;
152 
153         DEBUG("NBD: sending control, ");
154         request.magic = htonl(NBD_REQUEST_MAGIC);
155         request.type = htonl(req->cmd);
156         request.from = cpu_to_be64( (u64) req->sector << 9);
157         request.len = htonl(req->current_nr_sectors << 9);
158         memcpy(request.handle, &req, sizeof(req));
159 
160         result = nbd_xmit(1, sock, (char *) &request, sizeof(request));
161         if (result <= 0)
162                 FAIL("Sendmsg failed for control.");
163 
164         if (req->cmd == WRITE) {
165                 DEBUG("data, ");
166                 result = nbd_xmit(1, sock, req->buffer, req->current_nr_sectors << 9);
167                 if (result <= 0)
168                         FAIL("Send data failed.");
169         }
170         return;
171 
172       error_out:
173         req->errors++;
174 }
175 
176 #define HARDFAIL( s ) { printk( KERN_ERR "NBD: " s "(result %d)\n", result ); lo->harderror = result; return NULL; }
177 struct request *nbd_read_stat(struct nbd_device *lo)
178                 /* NULL returned = something went wrong, inform userspace       */ 
179 {
180         int result;
181         struct nbd_reply reply;
182         struct request *xreq, *req;
183 
184         DEBUG("reading control, ");
185         reply.magic = 0;
186         result = nbd_xmit(0, lo->sock, (char *) &reply, sizeof(reply));
187         if (result <= 0)
188                 HARDFAIL("Recv control failed.");
189         memcpy(&xreq, reply.handle, sizeof(xreq));
190         req = blkdev_entry_prev_request(&lo->queue_head);
191 
192         if (xreq != req)
193                 FAIL("Unexpected handle received.\n");
194 
195         DEBUG("ok, ");
196         if (ntohl(reply.magic) != NBD_REPLY_MAGIC)
197                 HARDFAIL("Not enough magic.");
198         if (ntohl(reply.error))
199                 FAIL("Other side returned error.");
200         if (req->cmd == READ) {
201                 DEBUG("data, ");
202                 result = nbd_xmit(0, lo->sock, req->buffer, req->current_nr_sectors << 9);
203                 if (result <= 0)
204                         HARDFAIL("Recv data failed.");
205         }
206         DEBUG("done.\n");
207         return req;
208 
209 /* Can we get here? Yes, if other side returns error */
210       error_out:
211         req->errors++;
212         return req;
213 }
214 
215 void nbd_do_it(struct nbd_device *lo)
216 {
217         struct request *req;
218         int dequeued;
219 
220         down (&lo->queue_lock);
221         while (1) {
222                 up (&lo->queue_lock);
223                 req = nbd_read_stat(lo);
224                 down (&lo->queue_lock);
225 
226                 if (!req) {
227                         printk(KERN_ALERT "req should never be null\n" );
228                         goto out;
229                 }
230 #ifdef PARANOIA
231                 if (req != blkdev_entry_prev_request(&lo->queue_head)) {
232                         printk(KERN_ALERT "NBD: I have problem...\n");
233                 }
234                 if (lo != &nbd_dev[MINOR(req->rq_dev)]) {
235                         printk(KERN_ALERT "NBD: request corrupted!\n");
236                         continue;
237                 }
238                 if (lo->magic != LO_MAGIC) {
239                         printk(KERN_ALERT "NBD: nbd_dev[] corrupted: Not enough magic\n");
240                         goto out;
241                 }
242 #endif
243                 list_del(&req->queue);
244                 up (&lo->queue_lock);
245                 
246                 dequeued = nbd_end_request(req);
247 
248                 down (&lo->queue_lock);
249                 if (!dequeued)
250                         list_add(&req->queue, &lo->queue_head);
251         }
252  out:
253         up (&lo->queue_lock);
254 }
255 
256 void nbd_clear_que(struct nbd_device *lo)
257 {
258         struct request *req;
259         int dequeued;
260 
261 #ifdef PARANOIA
262         if (lo->magic != LO_MAGIC) {
263                 printk(KERN_ERR "NBD: nbd_dev[] corrupted: Not enough magic when clearing!\n");
264                 return;
265         }
266 #endif
267 
268         while (!list_empty(&lo->queue_head)) {
269                 req = blkdev_entry_prev_request(&lo->queue_head);
270 #ifdef PARANOIA
271                 if (!req) {
272                         printk( KERN_ALERT "NBD: panic, panic, panic\n" );
273                         break;
274                 }
275                 if (lo != &nbd_dev[MINOR(req->rq_dev)]) {
276                         printk(KERN_ALERT "NBD: request corrupted when clearing!\n");
277                         continue;
278                 }
279 #endif
280                 req->errors++;
281                 list_del(&req->queue);
282                 up(&lo->queue_lock);
283 
284                 dequeued = nbd_end_request(req);
285 
286                 down(&lo->queue_lock);
287                 if (!dequeued)
288                         list_add(&req->queue, &lo->queue_head);
289         }
290 }
291 
292 /*
293  * We always wait for result of write, for now. It would be nice to make it optional
294  * in future
295  * if ((req->cmd == WRITE) && (lo->flags & NBD_WRITE_NOCHK)) 
296  *   { printk( "Warning: Ignoring result!\n"); nbd_end_request( req ); }
297  */
298 
299 #undef FAIL
300 #define FAIL( s ) { printk( KERN_ERR "NBD, minor %d: " s "\n", dev ); goto error_out; }
301 
302 static void do_nbd_request(request_queue_t * q)
303 {
304         struct request *req;
305         int dev = 0;
306         struct nbd_device *lo;
307 
308         while (!QUEUE_EMPTY) {
309                 req = CURRENT;
310 #ifdef PARANOIA
311                 if (!req)
312                         FAIL("que not empty but no request?");
313 #endif
314                 dev = MINOR(req->rq_dev);
315 #ifdef PARANOIA
316                 if (dev >= MAX_NBD)
317                         FAIL("Minor too big.");         /* Probably can not happen */
318 #endif
319                 lo = &nbd_dev[dev];
320                 if (!lo->file)
321                         FAIL("Request when not-ready.");
322                 if ((req->cmd == WRITE) && (lo->flags & NBD_READ_ONLY))
323                         FAIL("Write on read-only");
324 #ifdef PARANOIA
325                 if (lo->magic != LO_MAGIC)
326                         FAIL("nbd[] is not magical!");
327                 requests_in++;
328 #endif
329                 req->errors = 0;
330                 blkdev_dequeue_request(req);
331                 spin_unlock_irq(&io_request_lock);
332 
333                 down (&lo->queue_lock);
334                 list_add(&req->queue, &lo->queue_head);
335                 nbd_send_req(lo->sock, req);    /* Why does this block?         */
336                 up (&lo->queue_lock);
337 
338                 spin_lock_irq(&io_request_lock);
339                 continue;
340 
341               error_out:
342                 req->errors++;
343                 blkdev_dequeue_request(req);
344                 spin_unlock(&io_request_lock);
345                 nbd_end_request(req);
346                 spin_lock(&io_request_lock);
347         }
348         return;
349 }
350 
351 static int nbd_ioctl(struct inode *inode, struct file *file,
352                      unsigned int cmd, unsigned long arg)
353 {
354         struct nbd_device *lo;
355         int dev, error, temp;
356         struct request sreq ;
357 
358         /* Anyone capable of this syscall can do *real bad* things */
359 
360         if (!capable(CAP_SYS_ADMIN))
361                 return -EPERM;
362         if (!inode)
363                 return -EINVAL;
364         dev = MINOR(inode->i_rdev);
365         if (dev >= MAX_NBD)
366                 return -ENODEV;
367 
368         lo = &nbd_dev[dev];
369         switch (cmd) {
370         case NBD_DISCONNECT:
371                 printk("NBD_DISCONNECT\n") ;
372                 sreq.cmd=2 ; /* shutdown command */
373                 if (!lo->sock) return -EINVAL ;
374                 nbd_send_req(lo->sock,&sreq) ;
375                 return 0 ;
376  
377         case NBD_CLEAR_SOCK:
378                 down(&lo->queue_lock);
379                 nbd_clear_que(lo);
380                 if (!list_empty(&lo->queue_head)) {
381                         up(&lo->queue_lock);
382                         printk(KERN_ERR "nbd: Some requests are in progress -> can not turn off.\n");
383                         return -EBUSY;
384                 }
385                 up(&lo->queue_lock);
386                 file = lo->file;
387                 if (!file)
388                         return -EINVAL;
389                 lo->file = NULL;
390                 lo->sock = NULL;
391                 fput(file);
392                 return 0;
393         case NBD_SET_SOCK:
394                 if (lo->file)
395                         return -EBUSY;
396                 error = -EINVAL;
397                 file = fget(arg);
398                 if (file) {
399                         inode = file->f_dentry->d_inode;
400                         /* N.B. Should verify that it's a socket */
401                         lo->file = file;
402                         lo->sock = &inode->u.socket_i;
403                         error = 0;
404                 }
405                 return error;
406         case NBD_SET_BLKSIZE:
407                 if ((arg & (arg-1)) || (arg < 512) || (arg > PAGE_SIZE))
408                         return -EINVAL;
409                 nbd_blksizes[dev] = arg;
410                 temp = arg >> 9;
411                 nbd_blksize_bits[dev] = 9;
412                 while (temp > 1) {
413                         nbd_blksize_bits[dev]++;
414                         temp >>= 1;
415                 }
416                 nbd_sizes[dev] = nbd_bytesizes[dev] >> nbd_blksize_bits[dev];
417                 nbd_bytesizes[dev] = nbd_sizes[dev] << nbd_blksize_bits[dev];
418                 return 0;
419         case NBD_SET_SIZE:
420                 nbd_sizes[dev] = arg >> nbd_blksize_bits[dev];
421                 nbd_bytesizes[dev] = nbd_sizes[dev] << nbd_blksize_bits[dev];
422                 return 0;
423         case NBD_SET_SIZE_BLOCKS:
424                 nbd_sizes[dev] = arg;
425                 nbd_bytesizes[dev] = ((u64) arg) << nbd_blksize_bits[dev];
426                 return 0;
427         case NBD_DO_IT:
428                 if (!lo->file)
429                         return -EINVAL;
430                 nbd_do_it(lo);
431                 return lo->harderror;
432         case NBD_CLEAR_QUE:
433                 nbd_clear_que(lo);
434                 return 0;
435 #ifdef PARANOIA
436         case NBD_PRINT_DEBUG:
437                 printk(KERN_INFO "NBD device %d: next = %p, prev = %p. Global: in %d, out %d\n",
438                        dev, lo->queue_head.next, lo->queue_head.prev, requests_in, requests_out);
439                 return 0;
440 #endif
441         case BLKGETSIZE:
442                 return put_user(nbd_bytesizes[dev] >> 9, (long *) arg);
443         }
444         return -EINVAL;
445 }
446 
447 static int nbd_release(struct inode *inode, struct file *file)
448 {
449         struct nbd_device *lo;
450         int dev;
451 
452         if (!inode)
453                 return -ENODEV;
454         dev = MINOR(inode->i_rdev);
455         if (dev >= MAX_NBD)
456                 return -ENODEV;
457         lo = &nbd_dev[dev];
458         if (lo->refcnt <= 0)
459                 printk(KERN_ALERT "nbd_release: refcount(%d) <= 0\n", lo->refcnt);
460         lo->refcnt--;
461         /* N.B. Doesn't lo->file need an fput?? */
462         MOD_DEC_USE_COUNT;
463         return 0;
464 }
465 
466 static struct block_device_operations nbd_fops =
467 {
468         open:           nbd_open,
469         release:        nbd_release,
470         ioctl:          nbd_ioctl,
471 };
472 
473 /*
474  * And here should be modules and kernel interface 
475  *  (Just smiley confuses emacs :-)
476  */
477 
478 #ifdef MODULE
479 #define nbd_init init_module
480 #endif
481 
482 int nbd_init(void)
483 {
484         int i;
485 
486         if (sizeof(struct nbd_request) != 28) {
487                 printk(KERN_CRIT "Sizeof nbd_request needs to be 28 in order to work!\n" );
488                 return -EIO;
489         }
490 
491         if (register_blkdev(MAJOR_NR, "nbd", &nbd_fops)) {
492                 printk("Unable to get major number %d for NBD\n",
493                        MAJOR_NR);
494                 return -EIO;
495         }
496 #ifdef MODULE
497         printk("nbd: registered device at major %d\n", MAJOR_NR);
498 #endif
499         blksize_size[MAJOR_NR] = nbd_blksizes;
500         blk_size[MAJOR_NR] = nbd_sizes;
501         blk_init_queue(BLK_DEFAULT_QUEUE(MAJOR_NR), do_nbd_request);
502 #ifndef NBD_PLUGGABLE
503         blk_queue_pluggable(BLK_DEFAULT_QUEUE(MAJOR_NR), nbd_plug_device);
504 #endif
505         blk_queue_headactive(BLK_DEFAULT_QUEUE(MAJOR_NR), 0);
506         for (i = 0; i < MAX_NBD; i++) {
507                 nbd_dev[i].refcnt = 0;
508                 nbd_dev[i].file = NULL;
509                 nbd_dev[i].magic = LO_MAGIC;
510                 nbd_dev[i].flags = 0;
511                 INIT_LIST_HEAD(&nbd_dev[i].queue_head);
512                 init_MUTEX(&nbd_dev[i].queue_lock);
513                 nbd_blksizes[i] = 1024;
514                 nbd_blksize_bits[i] = 10;
515                 nbd_bytesizes[i] = 0x7ffffc00; /* 2GB */
516                 nbd_sizes[i] = nbd_bytesizes[i] >> nbd_blksize_bits[i];
517                 register_disk(NULL, MKDEV(MAJOR_NR,i), 1, &nbd_fops,
518                                 nbd_bytesizes[i]>>9);
519         }
520         devfs_handle = devfs_mk_dir (NULL, "nbd", NULL);
521         devfs_register_series (devfs_handle, "%u", MAX_NBD,
522                                DEVFS_FL_DEFAULT, MAJOR_NR, 0,
523                                S_IFBLK | S_IRUSR | S_IWUSR,
524                                &nbd_fops, NULL);
525 
526         return 0;
527 }
528 
529 #ifdef MODULE
530 void cleanup_module(void)
531 {
532         devfs_unregister (devfs_handle);
533         blk_cleanup_queue(BLK_DEFAULT_QUEUE(MAJOR_NR));
534 
535         if (unregister_blkdev(MAJOR_NR, "nbd") != 0)
536                 printk("nbd: cleanup_module failed\n");
537         else
538                 printk("nbd: module cleaned up.\n");
539 }
540 #endif
541 

~ [ source navigation ] ~ [ diff markup ] ~ [ identifier search ] ~ [ freetext search ] ~ [ file search ] ~

This page was automatically generated by the LXR engine.
Visit the LXR main site for more information.