From ff2c3de305e2d06ae556e1a382ed75c5dd8f9dda Mon Sep 17 00:00:00 2001 From: Alexey Dobriyan Date: Fri, 18 Sep 2009 12:58:47 -0700 Subject: cpqarray: switch to seq_file Signed-off-by: Alexey Dobriyan Cc: Chirag Kantharia Cc: Tejun Heo Cc: Grant Likely Signed-off-by: Andrew Morton Signed-off-by: Jens Axboe --- drivers/block/cpqarray.c | 63 +++++++++++++++++++++++------------------------- 1 file changed, 30 insertions(+), 33 deletions(-) diff --git a/drivers/block/cpqarray.c b/drivers/block/cpqarray.c index b82d438e260..6422651ec36 100644 --- a/drivers/block/cpqarray.c +++ b/drivers/block/cpqarray.c @@ -32,6 +32,7 @@ #include #include #include +#include #include #include #include @@ -177,7 +178,6 @@ static int cpqarray_register_ctlr(int ctlr, struct pci_dev *pdev); #ifdef CONFIG_PROC_FS static void ida_procinit(int i); -static int ida_proc_get_info(char *buffer, char **start, off_t offset, int length, int *eof, void *data); #else static void ida_procinit(int i) {} #endif @@ -206,6 +206,7 @@ static const struct block_device_operations ida_fops = { #ifdef CONFIG_PROC_FS static struct proc_dir_entry *proc_array; +static const struct file_operations ida_proc_fops; /* * Get us a file in /proc/array that says something about each controller. @@ -218,19 +219,16 @@ static void __init ida_procinit(int i) if (!proc_array) return; } - create_proc_read_entry(hba[i]->devname, 0, proc_array, - ida_proc_get_info, hba[i]); + proc_create_data(hba[i]->devname, 0, proc_array, &ida_proc_fops, hba[i]); } /* * Report information about this controller. */ -static int ida_proc_get_info(char *buffer, char **start, off_t offset, int length, int *eof, void *data) +static int ida_proc_show(struct seq_file *m, void *v) { - off_t pos = 0; - off_t len = 0; - int size, i, ctlr; - ctlr_info_t *h = (ctlr_info_t*)data; + int i, ctlr; + ctlr_info_t *h = (ctlr_info_t*)m->private; drv_info_t *drv; #ifdef CPQ_PROC_PRINT_QUEUES cmdlist_t *c; @@ -238,7 +236,7 @@ static int ida_proc_get_info(char *buffer, char **start, off_t offset, int lengt #endif ctlr = h->ctlr; - size = sprintf(buffer, "%s: Compaq %s Controller\n" + seq_printf(m, "%s: Compaq %s Controller\n" " Board ID: 0x%08lx\n" " Firmware Revision: %c%c%c%c\n" " Controller Sig: 0x%08lx\n" @@ -258,55 +256,54 @@ static int ida_proc_get_info(char *buffer, char **start, off_t offset, int lengt h->log_drives, h->phys_drives, h->Qdepth, h->maxQsinceinit); - pos += size; len += size; - - size = sprintf(buffer+len, "Logical Drive Info:\n"); - pos += size; len += size; + seq_puts(m, "Logical Drive Info:\n"); for(i=0; ilog_drives; i++) { drv = &h->drv[i]; - size = sprintf(buffer+len, "ida/c%dd%d: blksz=%d nr_blks=%d\n", + seq_printf(m, "ida/c%dd%d: blksz=%d nr_blks=%d\n", ctlr, i, drv->blk_size, drv->nr_blks); - pos += size; len += size; } #ifdef CPQ_PROC_PRINT_QUEUES spin_lock_irqsave(IDA_LOCK(h->ctlr), flags); - size = sprintf(buffer+len, "\nCurrent Queues:\n"); - pos += size; len += size; + seq_puts(m, "\nCurrent Queues:\n"); c = h->reqQ; - size = sprintf(buffer+len, "reqQ = %p", c); pos += size; len += size; + seq_printf(m, "reqQ = %p", c); if (c) c=c->next; while(c && c != h->reqQ) { - size = sprintf(buffer+len, "->%p", c); - pos += size; len += size; + seq_printf(m, "->%p", c); c=c->next; } c = h->cmpQ; - size = sprintf(buffer+len, "\ncmpQ = %p", c); pos += size; len += size; + seq_printf(m, "\ncmpQ = %p", c); if (c) c=c->next; while(c && c != h->cmpQ) { - size = sprintf(buffer+len, "->%p", c); - pos += size; len += size; + seq_printf(m, "->%p", c); c=c->next; } - size = sprintf(buffer+len, "\n"); pos += size; len += size; + seq_putc(m, '\n'); spin_unlock_irqrestore(IDA_LOCK(h->ctlr), flags); #endif - size = sprintf(buffer+len, "nr_allocs = %d\nnr_frees = %d\n", + seq_printf(m, "nr_allocs = %d\nnr_frees = %d\n", h->nr_allocs, h->nr_frees); - pos += size; len += size; - - *eof = 1; - *start = buffer+offset; - len -= offset; - if (len>length) - len = length; - return len; + return 0; +} + +static int ida_proc_open(struct inode *inode, struct file *file) +{ + return single_open(file, ida_proc_show, PDE(inode)->data); } + +static const struct file_operations ida_proc_fops = { + .owner = THIS_MODULE, + .open = ida_proc_open, + .read = seq_read, + .llseek = seq_lseek, + .release = single_release, +}; #endif /* CONFIG_PROC_FS */ module_param_array(eisa, int, NULL, 0); -- cgit v1.2.3 From d5d03eec9b36f861e9c97846348fb3b5759f2d82 Mon Sep 17 00:00:00 2001 From: Alexey Dobriyan Date: Fri, 18 Sep 2009 12:58:48 -0700 Subject: dac960: switch to seq_file Signed-off-by: Alexey Dobriyan Cc: Yang Hongyang Cc: Tejun Heo Signed-off-by: Andrew Morton Signed-off-by: Jens Axboe --- drivers/block/DAC960.c | 156 ++++++++++++++++++++++--------------------------- 1 file changed, 71 insertions(+), 85 deletions(-) diff --git a/drivers/block/DAC960.c b/drivers/block/DAC960.c index 6fa7b0fdbdf..eb4fa194394 100644 --- a/drivers/block/DAC960.c +++ b/drivers/block/DAC960.c @@ -38,6 +38,7 @@ #include #include #include +#include #include #include #include @@ -6422,16 +6423,10 @@ static bool DAC960_V2_ExecuteUserCommand(DAC960_Controller_T *Controller, return true; } - -/* - DAC960_ProcReadStatus implements reading /proc/rd/status. -*/ - -static int DAC960_ProcReadStatus(char *Page, char **Start, off_t Offset, - int Count, int *EOF, void *Data) +static int dac960_proc_show(struct seq_file *m, void *v) { unsigned char *StatusMessage = "OK\n"; - int ControllerNumber, BytesAvailable; + int ControllerNumber; for (ControllerNumber = 0; ControllerNumber < DAC960_ControllerCount; ControllerNumber++) @@ -6444,52 +6439,49 @@ static int DAC960_ProcReadStatus(char *Page, char **Start, off_t Offset, break; } } - BytesAvailable = strlen(StatusMessage) - Offset; - if (Count >= BytesAvailable) - { - Count = BytesAvailable; - *EOF = true; - } - if (Count <= 0) return 0; - *Start = Page; - memcpy(Page, &StatusMessage[Offset], Count); - return Count; + seq_puts(m, StatusMessage); + return 0; } +static int dac960_proc_open(struct inode *inode, struct file *file) +{ + return single_open(file, dac960_proc_show, NULL); +} -/* - DAC960_ProcReadInitialStatus implements reading /proc/rd/cN/initial_status. -*/ +static const struct file_operations dac960_proc_fops = { + .owner = THIS_MODULE, + .open = dac960_proc_open, + .read = seq_read, + .llseek = seq_lseek, + .release = single_release, +}; -static int DAC960_ProcReadInitialStatus(char *Page, char **Start, off_t Offset, - int Count, int *EOF, void *Data) +static int dac960_initial_status_proc_show(struct seq_file *m, void *v) { - DAC960_Controller_T *Controller = (DAC960_Controller_T *) Data; - int BytesAvailable = Controller->InitialStatusLength - Offset; - if (Count >= BytesAvailable) - { - Count = BytesAvailable; - *EOF = true; - } - if (Count <= 0) return 0; - *Start = Page; - memcpy(Page, &Controller->CombinedStatusBuffer[Offset], Count); - return Count; + DAC960_Controller_T *Controller = (DAC960_Controller_T *)m->private; + seq_printf(m, "%.*s", Controller->InitialStatusLength, Controller->CombinedStatusBuffer); + return 0; } +static int dac960_initial_status_proc_open(struct inode *inode, struct file *file) +{ + return single_open(file, dac960_initial_status_proc_show, PDE(inode)->data); +} -/* - DAC960_ProcReadCurrentStatus implements reading /proc/rd/cN/current_status. -*/ +static const struct file_operations dac960_initial_status_proc_fops = { + .owner = THIS_MODULE, + .open = dac960_initial_status_proc_open, + .read = seq_read, + .llseek = seq_lseek, + .release = single_release, +}; -static int DAC960_ProcReadCurrentStatus(char *Page, char **Start, off_t Offset, - int Count, int *EOF, void *Data) +static int dac960_current_status_proc_show(struct seq_file *m, void *v) { - DAC960_Controller_T *Controller = (DAC960_Controller_T *) Data; + DAC960_Controller_T *Controller = (DAC960_Controller_T *) m->private; unsigned char *StatusMessage = "No Rebuild or Consistency Check in Progress\n"; int ProgressMessageLength = strlen(StatusMessage); - int BytesAvailable; if (jiffies != Controller->LastCurrentStatusTime) { Controller->CurrentStatusLength = 0; @@ -6513,49 +6505,41 @@ static int DAC960_ProcReadCurrentStatus(char *Page, char **Start, off_t Offset, } Controller->LastCurrentStatusTime = jiffies; } - BytesAvailable = Controller->CurrentStatusLength - Offset; - if (Count >= BytesAvailable) - { - Count = BytesAvailable; - *EOF = true; - } - if (Count <= 0) return 0; - *Start = Page; - memcpy(Page, &Controller->CurrentStatusBuffer[Offset], Count); - return Count; + seq_printf(m, "%.*s", Controller->CurrentStatusLength, Controller->CurrentStatusBuffer); + return 0; } +static int dac960_current_status_proc_open(struct inode *inode, struct file *file) +{ + return single_open(file, dac960_current_status_proc_show, PDE(inode)->data); +} -/* - DAC960_ProcReadUserCommand implements reading /proc/rd/cN/user_command. -*/ +static const struct file_operations dac960_current_status_proc_fops = { + .owner = THIS_MODULE, + .open = dac960_current_status_proc_open, + .read = seq_read, + .llseek = seq_lseek, + .release = single_release, +}; -static int DAC960_ProcReadUserCommand(char *Page, char **Start, off_t Offset, - int Count, int *EOF, void *Data) +static int dac960_user_command_proc_show(struct seq_file *m, void *v) { - DAC960_Controller_T *Controller = (DAC960_Controller_T *) Data; - int BytesAvailable = Controller->UserStatusLength - Offset; - if (Count >= BytesAvailable) - { - Count = BytesAvailable; - *EOF = true; - } - if (Count <= 0) return 0; - *Start = Page; - memcpy(Page, &Controller->UserStatusBuffer[Offset], Count); - return Count; -} + DAC960_Controller_T *Controller = (DAC960_Controller_T *)m->private; + seq_printf(m, "%.*s", Controller->UserStatusLength, Controller->UserStatusBuffer); + return 0; +} -/* - DAC960_ProcWriteUserCommand implements writing /proc/rd/cN/user_command. -*/ +static int dac960_user_command_proc_open(struct inode *inode, struct file *file) +{ + return single_open(file, dac960_user_command_proc_show, PDE(inode)->data); +} -static int DAC960_ProcWriteUserCommand(struct file *file, +static ssize_t dac960_user_command_proc_write(struct file *file, const char __user *Buffer, - unsigned long Count, void *Data) + size_t Count, loff_t *pos) { - DAC960_Controller_T *Controller = (DAC960_Controller_T *) Data; + DAC960_Controller_T *Controller = (DAC960_Controller_T *) PDE(file->f_path.dentry->d_inode)->data; unsigned char CommandBuffer[80]; int Length; if (Count > sizeof(CommandBuffer)-1) return -EINVAL; @@ -6572,6 +6556,14 @@ static int DAC960_ProcWriteUserCommand(struct file *file, ? Count : -EBUSY); } +static const struct file_operations dac960_user_command_proc_fops = { + .owner = THIS_MODULE, + .open = dac960_user_command_proc_open, + .read = seq_read, + .llseek = seq_lseek, + .release = single_release, + .write = dac960_user_command_proc_write, +}; /* DAC960_CreateProcEntries creates the /proc/rd/... entries for the @@ -6586,23 +6578,17 @@ static void DAC960_CreateProcEntries(DAC960_Controller_T *Controller) if (DAC960_ProcDirectoryEntry == NULL) { DAC960_ProcDirectoryEntry = proc_mkdir("rd", NULL); - StatusProcEntry = create_proc_read_entry("status", 0, + StatusProcEntry = proc_create("status", 0, DAC960_ProcDirectoryEntry, - DAC960_ProcReadStatus, NULL); + &dac960_proc_fops); } sprintf(Controller->ControllerName, "c%d", Controller->ControllerNumber); ControllerProcEntry = proc_mkdir(Controller->ControllerName, DAC960_ProcDirectoryEntry); - create_proc_read_entry("initial_status", 0, ControllerProcEntry, - DAC960_ProcReadInitialStatus, Controller); - create_proc_read_entry("current_status", 0, ControllerProcEntry, - DAC960_ProcReadCurrentStatus, Controller); - UserCommandProcEntry = - create_proc_read_entry("user_command", S_IWUSR | S_IRUSR, - ControllerProcEntry, DAC960_ProcReadUserCommand, - Controller); - UserCommandProcEntry->write_proc = DAC960_ProcWriteUserCommand; + proc_create_data("initial_status", 0, ControllerProcEntry, &dac960_initial_status_proc_fops, Controller); + proc_create_data("current_status", 0, ControllerProcEntry, &dac960_current_status_proc_fops, Controller); + UserCommandProcEntry = proc_create_data("user_command", S_IWUSR | S_IRUSR, ControllerProcEntry, &dac960_user_command_proc_fops, Controller); Controller->ControllerProcEntry = ControllerProcEntry; } -- cgit v1.2.3 From 4d761609471f7e543c880dd47ef5e1669076081b Mon Sep 17 00:00:00 2001 From: Randy Dunlap Date: Fri, 18 Sep 2009 12:58:48 -0700 Subject: cciss: fix schedule_timeout() parameters Change schedule_timeout() parameter to not be specific to HZ=1000. Signed-off-by: Randy Dunlap Acked-by: Mike Miller Cc: Marcin Slusarz Cc: "Cameron, Steve" Signed-off-by: Andrew Morton Signed-off-by: Jens Axboe --- drivers/block/cciss.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/drivers/block/cciss.c b/drivers/block/cciss.c index 24c3e21ab26..b890f8b3c09 100644 --- a/drivers/block/cciss.c +++ b/drivers/block/cciss.c @@ -36,6 +36,7 @@ #include #include #include +#include #include #include #include @@ -3489,7 +3490,7 @@ static int __devinit cciss_pci_init(ctlr_info_t *c, struct pci_dev *pdev) if (scratchpad == CCISS_FIRMWARE_READY) break; set_current_state(TASK_INTERRUPTIBLE); - schedule_timeout(HZ / 10); /* wait 100ms */ + schedule_timeout(msecs_to_jiffies(100)); /* wait 100ms */ } if (scratchpad != CCISS_FIRMWARE_READY) { printk(KERN_WARNING "cciss: Board not ready. Timed out.\n"); @@ -3615,7 +3616,7 @@ static int __devinit cciss_pci_init(ctlr_info_t *c, struct pci_dev *pdev) break; /* delay and try again */ set_current_state(TASK_INTERRUPTIBLE); - schedule_timeout(10); + schedule_timeout(msecs_to_jiffies(1)); } #ifdef CCISS_DEBUG -- cgit v1.2.3 From c64bebcd7f33a6260b6d4c9999f797a633a3fa1c Mon Sep 17 00:00:00 2001 From: Andrew Patterson Date: Thu, 17 Sep 2009 13:46:53 -0500 Subject: cciss: Remove sysfs entries for logical drives on driver cleanup. Sysfs entries for logical drives need to be removed when a drive is deleted during driver cleanup. Signed-off-by: Andrew Patterson Signed-off-by: Stephen M. Cameron Acked-by: Mike Miller Signed-off-by: Jens Axboe --- drivers/block/cciss.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/drivers/block/cciss.c b/drivers/block/cciss.c index b890f8b3c09..f162f96c36e 100644 --- a/drivers/block/cciss.c +++ b/drivers/block/cciss.c @@ -1978,7 +1978,6 @@ static int rebuild_lun_table(ctlr_info_t *h, int first_time) h->drv[i].busy_configuring = 1; spin_unlock_irqrestore(CCISS_LOCK(h->ctlr), flags); return_code = deregister_disk(h, i, 1); - cciss_destroy_ld_sysfs_entry(&h->drv[i]); h->drv[i].busy_configuring = 0; } } @@ -2119,6 +2118,7 @@ static int deregister_disk(ctlr_info_t *h, int drv_index, * indicate that this element of the drive * array is free. */ + cciss_destroy_ld_sysfs_entry(drv); if (clear_all) { /* check to see if it was the last disk */ @@ -4142,6 +4142,9 @@ static void __devexit cciss_remove_one(struct pci_dev *pdev) if (q) blk_cleanup_queue(q); } + if (hba[i]->drv[j].raid_level != -1) + cciss_destroy_ld_sysfs_entry(&hba[i]->drv[j]); + } #ifdef CONFIG_CISS_SCSI_TAPE -- cgit v1.2.3 From b368c9dd65984d1860b97bff77644c0e3e46df96 Mon Sep 17 00:00:00 2001 From: Andrew Patterson Date: Thu, 17 Sep 2009 13:46:58 -0500 Subject: cciss: Use one scan thread per controller and fix hang during rmmod Replace the use of one scan kthread per controller with one per driver. Use a queue to hold a list of controllers that need to be rescanned with routines to add and remove controllers from the queue. Fix locking and completion handling to prevent a hang during rmmod. Signed-off-by: Andrew Patterson Signed-off-by: Stephen M. Cameron Acked-by: Mike Miller Signed-off-by: Jens Axboe --- drivers/block/cciss.c | 156 +++++++++++++++++++++++++++++++++++++++++++------- drivers/block/cciss.h | 7 ++- 2 files changed, 141 insertions(+), 22 deletions(-) diff --git a/drivers/block/cciss.c b/drivers/block/cciss.c index f162f96c36e..4fb63b89879 100644 --- a/drivers/block/cciss.c +++ b/drivers/block/cciss.c @@ -40,6 +40,7 @@ #include #include #include +#include #include #include @@ -156,6 +157,10 @@ static struct board_type products[] = { static ctlr_info_t *hba[MAX_CTLR]; +static struct task_struct *cciss_scan_thread; +static DEFINE_MUTEX(scan_mutex); +static LIST_HEAD(scan_q); + static void do_cciss_request(struct request_queue *q); static irqreturn_t do_cciss_intr(int irq, void *dev_id); static int cciss_open(struct block_device *bdev, fmode_t mode); @@ -3233,20 +3238,121 @@ static irqreturn_t do_cciss_intr(int irq, void *dev_id) return IRQ_HANDLED; } +/** + * add_to_scan_list() - add controller to rescan queue + * @h: Pointer to the controller. + * + * Adds the controller to the rescan queue if not already on the queue. + * + * returns 1 if added to the queue, 0 if skipped (could be on the + * queue already, or the controller could be initializing or shutting + * down). + **/ +static int add_to_scan_list(struct ctlr_info *h) +{ + struct ctlr_info *test_h; + int found = 0; + int ret = 0; + + if (h->busy_initializing) + return 0; + + if (!mutex_trylock(&h->busy_shutting_down)) + return 0; + + mutex_lock(&scan_mutex); + list_for_each_entry(test_h, &scan_q, scan_list) { + if (test_h == h) { + found = 1; + break; + } + } + if (!found && !h->busy_scanning) { + INIT_COMPLETION(h->scan_wait); + list_add_tail(&h->scan_list, &scan_q); + ret = 1; + } + mutex_unlock(&scan_mutex); + mutex_unlock(&h->busy_shutting_down); + + return ret; +} + +/** + * remove_from_scan_list() - remove controller from rescan queue + * @h: Pointer to the controller. + * + * Removes the controller from the rescan queue if present. Blocks if + * the controller is currently conducting a rescan. + **/ +static void remove_from_scan_list(struct ctlr_info *h) +{ + struct ctlr_info *test_h, *tmp_h; + int scanning = 0; + + mutex_lock(&scan_mutex); + list_for_each_entry_safe(test_h, tmp_h, &scan_q, scan_list) { + if (test_h == h) { + list_del(&h->scan_list); + complete_all(&h->scan_wait); + mutex_unlock(&scan_mutex); + return; + } + } + if (&h->busy_scanning) + scanning = 0; + mutex_unlock(&scan_mutex); + + if (scanning) + wait_for_completion(&h->scan_wait); +} + +/** + * scan_thread() - kernel thread used to rescan controllers + * @data: Ignored. + * + * A kernel thread used scan for drive topology changes on + * controllers. The thread processes only one controller at a time + * using a queue. Controllers are added to the queue using + * add_to_scan_list() and removed from the queue either after done + * processing or using remove_from_scan_list(). + * + * returns 0. + **/ static int scan_thread(void *data) { - ctlr_info_t *h = data; - int rc; - DECLARE_COMPLETION_ONSTACK(wait); - h->rescan_wait = &wait; + struct ctlr_info *h; - for (;;) { - rc = wait_for_completion_interruptible(&wait); + while (1) { + set_current_state(TASK_INTERRUPTIBLE); + schedule(); if (kthread_should_stop()) break; - if (!rc) - rebuild_lun_table(h, 0); + + while (1) { + mutex_lock(&scan_mutex); + if (list_empty(&scan_q)) { + mutex_unlock(&scan_mutex); + break; + } + + h = list_entry(scan_q.next, + struct ctlr_info, + scan_list); + list_del(&h->scan_list); + h->busy_scanning = 1; + mutex_unlock(&scan_mutex); + + if (h) { + rebuild_lun_table(h, 0); + complete_all(&h->scan_wait); + mutex_lock(&scan_mutex); + h->busy_scanning = 0; + mutex_unlock(&scan_mutex); + } + } } + return 0; } @@ -3269,8 +3375,8 @@ static int check_for_unit_attention(ctlr_info_t *h, CommandList_struct *c) case REPORT_LUNS_CHANGED: printk(KERN_WARNING "cciss%d: report LUN data " "changed\n", h->ctlr); - if (h->rescan_wait) - complete(h->rescan_wait); + add_to_scan_list(h); + wake_up_process(cciss_scan_thread); return 1; break; case POWER_OR_RESET: @@ -3919,6 +4025,7 @@ static int __devinit cciss_init_one(struct pci_dev *pdev, hba[i]->busy_initializing = 1; INIT_HLIST_HEAD(&hba[i]->cmpQ); INIT_HLIST_HEAD(&hba[i]->reqQ); + mutex_init(&hba[i]->busy_shutting_down); if (cciss_pci_init(hba[i], pdev) != 0) goto clean0; @@ -3927,6 +4034,8 @@ static int __devinit cciss_init_one(struct pci_dev *pdev, hba[i]->ctlr = i; hba[i]->pdev = pdev; + init_completion(&hba[i]->scan_wait); + if (cciss_create_hba_sysfs_entry(hba[i])) goto clean0; @@ -4036,14 +4145,8 @@ static int __devinit cciss_init_one(struct pci_dev *pdev, hba[i]->cciss_max_sectors = 2048; - hba[i]->busy_initializing = 0; - rebuild_lun_table(hba[i], 1); - hba[i]->cciss_scan_thread = kthread_run(scan_thread, hba[i], - "cciss_scan%02d", i); - if (IS_ERR(hba[i]->cciss_scan_thread)) - return PTR_ERR(hba[i]->cciss_scan_thread); - + hba[i]->busy_initializing = 0; return 1; clean4: @@ -4126,8 +4229,9 @@ static void __devexit cciss_remove_one(struct pci_dev *pdev) return; } - kthread_stop(hba[i]->cciss_scan_thread); + mutex_lock(&hba[i]->busy_shutting_down); + remove_from_scan_list(hba[i]); remove_proc_entry(hba[i]->devname, proc_cciss); unregister_blkdev(hba[i]->major, hba[i]->devname); @@ -4174,6 +4278,7 @@ static void __devexit cciss_remove_one(struct pci_dev *pdev) pci_release_regions(pdev); pci_set_drvdata(pdev, NULL); cciss_destroy_hba_sysfs_entry(hba[i]); + mutex_unlock(&hba[i]->busy_shutting_down); free_hba(i); } @@ -4206,15 +4311,25 @@ static int __init cciss_init(void) if (err) return err; + /* Start the scan thread */ + cciss_scan_thread = kthread_run(scan_thread, NULL, "cciss_scan"); + if (IS_ERR(cciss_scan_thread)) { + err = PTR_ERR(cciss_scan_thread); + goto err_bus_unregister; + } + /* Register for our PCI devices */ err = pci_register_driver(&cciss_pci_driver); if (err) - goto err_bus_register; + goto err_thread_stop; return 0; -err_bus_register: +err_thread_stop: + kthread_stop(cciss_scan_thread); +err_bus_unregister: bus_unregister(&cciss_bus_type); + return err; } @@ -4231,6 +4346,7 @@ static void __exit cciss_cleanup(void) cciss_remove_one(hba[i]->pdev); } } + kthread_stop(cciss_scan_thread); remove_proc_entry("driver/cciss", NULL); bus_unregister(&cciss_bus_type); } diff --git a/drivers/block/cciss.h b/drivers/block/cciss.h index 06a5db25b29..4fb3639b6cf 100644 --- a/drivers/block/cciss.h +++ b/drivers/block/cciss.h @@ -2,6 +2,7 @@ #define CCISS_H #include +#include #include "cciss_cmd.h" @@ -108,6 +109,8 @@ struct ctlr_info int nr_frees; int busy_configuring; int busy_initializing; + int busy_scanning; + struct mutex busy_shutting_down; /* This element holds the zero based queue number of the last * queue to be started. It is used for fairness. @@ -122,8 +125,8 @@ struct ctlr_info /* and saved for later processing */ #endif unsigned char alive; - struct completion *rescan_wait; - struct task_struct *cciss_scan_thread; + struct list_head scan_list; + struct completion scan_wait; struct device dev; }; -- cgit v1.2.3 From d6f4965d7d2e718eb9b223cb06db5f6a53b73507 Mon Sep 17 00:00:00 2001 From: Andrew Patterson Date: Thu, 17 Sep 2009 13:47:03 -0500 Subject: cciss: Allow triggering of rescan of logical drive topology via sysfs entry Added /sys/bus/pci/devices//ccissX/rescan sysfs entry used to kick off a rescan that discovers logical drive topology changes. Signed-off-by: Andrew Patterson Signed-off-by: Stephen M. Cameron Acked-by: Mike Miller Signed-off-by: Jens Axboe --- .../ABI/testing/sysfs-bus-pci-devices-cciss | 7 +++++ drivers/block/cciss.c | 36 ++++++++++++++++++++-- 2 files changed, 40 insertions(+), 3 deletions(-) diff --git a/Documentation/ABI/testing/sysfs-bus-pci-devices-cciss b/Documentation/ABI/testing/sysfs-bus-pci-devices-cciss index 0a92a7c93a6..ac3429def23 100644 --- a/Documentation/ABI/testing/sysfs-bus-pci-devices-cciss +++ b/Documentation/ABI/testing/sysfs-bus-pci-devices-cciss @@ -31,3 +31,10 @@ Date: March 2009 Kernel Version: 2.6.30 Contact: iss_storagedev@hp.com Description: A symbolic link to /sys/block/cciss!cXdY + +Where: /sys/bus/pci/devices//ccissX/rescan +Date: August 2009 +Kernel Version: 2.6.31 +Contact: iss_storagedev@hp.com +Description: Kicks of a rescan of the controller to discover logical + drive topology changes. diff --git a/drivers/block/cciss.c b/drivers/block/cciss.c index 4fb63b89879..a45268554e0 100644 --- a/drivers/block/cciss.c +++ b/drivers/block/cciss.c @@ -195,6 +195,7 @@ static int sendcmd_withirq_core(ctlr_info_t *h, CommandList_struct *c, static int process_sendcmd_error(ctlr_info_t *h, CommandList_struct *c); static void fail_all_cmds(unsigned long ctlr); +static int add_to_scan_list(struct ctlr_info *h); static int scan_thread(void *data); static int check_for_unit_attention(ctlr_info_t *h, CommandList_struct *c); @@ -460,9 +461,19 @@ static void __devinit cciss_procinit(int i) #define to_hba(n) container_of(n, struct ctlr_info, dev) #define to_drv(n) container_of(n, drive_info_struct, dev) -static struct device_type cciss_host_type = { - .name = "cciss_host", -}; +static ssize_t host_store_rescan(struct device *dev, + struct device_attribute *attr, + const char *buf, size_t count) +{ + struct ctlr_info *h = to_hba(dev); + + add_to_scan_list(h); + wake_up_process(cciss_scan_thread); + wait_for_completion_interruptible(&h->scan_wait); + + return count; +} +DEVICE_ATTR(rescan, S_IWUSR, NULL, host_store_rescan); static ssize_t dev_show_unique_id(struct device *dev, struct device_attribute *attr, @@ -566,6 +577,25 @@ static ssize_t dev_show_rev(struct device *dev, } DEVICE_ATTR(rev, S_IRUGO, dev_show_rev, NULL); +static struct attribute *cciss_host_attrs[] = { + &dev_attr_rescan.attr, + NULL +}; + +static struct attribute_group cciss_host_attr_group = { + .attrs = cciss_host_attrs, +}; + +static struct attribute_group *cciss_host_attr_groups[] = { + &cciss_host_attr_group, + NULL +}; + +static struct device_type cciss_host_type = { + .name = "cciss_host", + .groups = cciss_host_attr_groups, +}; + static struct attribute *cciss_dev_attrs[] = { &dev_attr_unique_id.attr, &dev_attr_model.attr, -- cgit v1.2.3 From 21d9db0b6231ef908fcdbfacefa392352776857f Mon Sep 17 00:00:00 2001 From: "Stephen M. Cameron" Date: Thu, 17 Sep 2009 13:47:08 -0500 Subject: cciss: Remove some unused code in rebuild_lun_table() Remove some unused code in rebuild_lun_table() Signed-off-by: Stephen M. Cameron Signed-off-by: Jens Axboe --- drivers/block/cciss.c | 8 -------- 1 file changed, 8 deletions(-) diff --git a/drivers/block/cciss.c b/drivers/block/cciss.c index a45268554e0..e15f4acf08a 100644 --- a/drivers/block/cciss.c +++ b/drivers/block/cciss.c @@ -1708,7 +1708,6 @@ static void cciss_update_drive_info(int ctlr, int drv_index, int first_time) unsigned long flags = 0; int ret = 0; drive_info_struct *drvinfo; - int was_only_controller_node; /* Get information about the disk and modify the driver structure */ inq_buff = kmalloc(sizeof(InquiryData_struct), GFP_KERNEL); @@ -1716,13 +1715,6 @@ static void cciss_update_drive_info(int ctlr, int drv_index, int first_time) if (inq_buff == NULL || drvinfo == NULL) goto mem_msg; - /* See if we're trying to update the "controller node" - * this will happen the when the first logical drive gets - * created by ACU. - */ - was_only_controller_node = (drv_index == 0 && - h->drv[0].raid_level == -1); - /* testing to see if 16-byte CDBs are already being used */ if (h->cciss_read == CCISS_READ_16) { cciss_read_capacity_16(h->ctlr, drv_index, 1, -- cgit v1.2.3 From 617e1344229d22ea9ecb6538e50808541618ed2b Mon Sep 17 00:00:00 2001 From: "Stephen M. Cameron" Date: Thu, 17 Sep 2009 13:47:14 -0500 Subject: cciss: Dynamically allocate struct device for each logical drive as needed. Dynamically allocate struct device for each logical drive as needed instead of allocating the maximum we would ever need at driver init time. Signed-off-by: Stephen M. Cameron Signed-off-by: Jens Axboe --- drivers/block/cciss.c | 98 +++++++++++++++++++++++++++++++++++++-------------- drivers/block/cciss.h | 2 +- 2 files changed, 73 insertions(+), 27 deletions(-) diff --git a/drivers/block/cciss.c b/drivers/block/cciss.c index e15f4acf08a..30b328aefe7 100644 --- a/drivers/block/cciss.c +++ b/drivers/block/cciss.c @@ -198,6 +198,8 @@ static void fail_all_cmds(unsigned long ctlr); static int add_to_scan_list(struct ctlr_info *h); static int scan_thread(void *data); static int check_for_unit_attention(ctlr_info_t *h, CommandList_struct *c); +static void cciss_hba_release(struct device *dev); +static void cciss_device_release(struct device *dev); #ifdef CONFIG_PROC_FS static void cciss_procinit(int i); @@ -459,7 +461,6 @@ static void __devinit cciss_procinit(int i) #define MAX_PRODUCT_NAME_LEN 19 #define to_hba(n) container_of(n, struct ctlr_info, dev) -#define to_drv(n) container_of(n, drive_info_struct, dev) static ssize_t host_store_rescan(struct device *dev, struct device_attribute *attr, @@ -479,8 +480,8 @@ static ssize_t dev_show_unique_id(struct device *dev, struct device_attribute *attr, char *buf) { - drive_info_struct *drv = to_drv(dev); - struct ctlr_info *h = to_hba(drv->dev.parent); + drive_info_struct *drv = dev_get_drvdata(dev); + struct ctlr_info *h = to_hba(drv->dev->parent); __u8 sn[16]; unsigned long flags; int ret = 0; @@ -509,8 +510,8 @@ static ssize_t dev_show_vendor(struct device *dev, struct device_attribute *attr, char *buf) { - drive_info_struct *drv = to_drv(dev); - struct ctlr_info *h = to_hba(drv->dev.parent); + drive_info_struct *drv = dev_get_drvdata(dev); + struct ctlr_info *h = to_hba(drv->dev->parent); char vendor[VENDOR_LEN + 1]; unsigned long flags; int ret = 0; @@ -533,8 +534,8 @@ static ssize_t dev_show_model(struct device *dev, struct device_attribute *attr, char *buf) { - drive_info_struct *drv = to_drv(dev); - struct ctlr_info *h = to_hba(drv->dev.parent); + drive_info_struct *drv = dev_get_drvdata(dev); + struct ctlr_info *h = to_hba(drv->dev->parent); char model[MODEL_LEN + 1]; unsigned long flags; int ret = 0; @@ -557,8 +558,8 @@ static ssize_t dev_show_rev(struct device *dev, struct device_attribute *attr, char *buf) { - drive_info_struct *drv = to_drv(dev); - struct ctlr_info *h = to_hba(drv->dev.parent); + drive_info_struct *drv = dev_get_drvdata(dev); + struct ctlr_info *h = to_hba(drv->dev->parent); char rev[REV_LEN + 1]; unsigned long flags; int ret = 0; @@ -594,6 +595,7 @@ static struct attribute_group *cciss_host_attr_groups[] = { static struct device_type cciss_host_type = { .name = "cciss_host", .groups = cciss_host_attr_groups, + .release = cciss_hba_release, }; static struct attribute *cciss_dev_attrs[] = { @@ -616,12 +618,24 @@ static const struct attribute_group *cciss_dev_attr_groups[] = { static struct device_type cciss_dev_type = { .name = "cciss_device", .groups = cciss_dev_attr_groups, + .release = cciss_device_release, }; static struct bus_type cciss_bus_type = { .name = "cciss", }; +/* + * cciss_hba_release is called when the reference count + * of h->dev goes to zero. + */ +static void cciss_hba_release(struct device *dev) +{ + /* + * nothing to do, but need this to avoid a warning + * about not having a release handler from lib/kref.c. + */ +} /* * Initialize sysfs entry for each controller. This sets up and registers @@ -645,6 +659,15 @@ static int cciss_create_hba_sysfs_entry(struct ctlr_info *h) static void cciss_destroy_hba_sysfs_entry(struct ctlr_info *h) { device_del(&h->dev); + put_device(&h->dev); /* final put. */ +} + +/* cciss_device_release is called when the reference count + * of h->drv[x].dev goes to zero. + */ +static void cciss_device_release(struct device *dev) +{ + kfree(dev); } /* @@ -653,24 +676,33 @@ static void cciss_destroy_hba_sysfs_entry(struct ctlr_info *h) * /sys/bus/pci/devices/dev); - drv->dev.type = &cciss_dev_type; - drv->dev.bus = &cciss_bus_type; - dev_set_name(&drv->dev, "c%dd%d", h->ctlr, drv_index); - drv->dev.parent = &h->dev; - return device_add(&drv->dev); + struct device *dev; + + dev = kzalloc(sizeof(*dev), GFP_KERNEL); + if (!dev) + return -ENOMEM; + device_initialize(dev); + dev->type = &cciss_dev_type; + dev->bus = &cciss_bus_type; + dev_set_name(dev, "c%dd%d", h->ctlr, drv_index); + dev->parent = &h->dev; + h->drv[drv_index].dev = dev; + dev_set_drvdata(dev, &h->drv[drv_index]); + return device_add(dev); } /* * Remove sysfs entries for a logical drive. */ -static void cciss_destroy_ld_sysfs_entry(drive_info_struct *drv) +static void cciss_destroy_ld_sysfs_entry(struct ctlr_info *h, int drv_index) { - device_del(&drv->dev); + struct device *dev = h->drv[drv_index].dev; + device_del(dev); + put_device(dev); /* the "final" put. */ + h->drv[drv_index].dev = NULL; } /* @@ -1651,7 +1683,10 @@ static void cciss_get_serial_no(int ctlr, int logvol, int withirq, return; } -static void cciss_add_disk(ctlr_info_t *h, struct gendisk *disk, +/* + * cciss_add_disk sets up the block device queue for a logical drive + */ +static int cciss_add_disk(ctlr_info_t *h, struct gendisk *disk, int drv_index) { disk->queue = blk_init_queue(do_cciss_request, &h->lock); @@ -1659,8 +1694,12 @@ static void cciss_add_disk(ctlr_info_t *h, struct gendisk *disk, disk->major = h->major; disk->first_minor = drv_index << NWD_SHIFT; disk->fops = &cciss_fops; + if (h->drv[drv_index].dev == NULL) { + if (cciss_create_ld_sysfs_entry(h, drv_index)) + goto cleanup_queue; + } disk->private_data = &h->drv[drv_index]; - disk->driverfs_dev = &h->drv[drv_index].dev; + disk->driverfs_dev = h->drv[drv_index].dev; /* Set up queue information */ blk_queue_bounce_limit(disk->queue, h->pdev->dma_mask); @@ -1686,6 +1725,12 @@ static void cciss_add_disk(ctlr_info_t *h, struct gendisk *disk, wmb(); h->drv[drv_index].queue = disk->queue; add_disk(disk); + return 0; + +cleanup_queue: + blk_cleanup_queue(disk->queue); + disk->queue = NULL; + return -1; } /* This function will check the usage_count of the drive to be updated/added. @@ -1871,7 +1916,7 @@ static int cciss_add_gendisk(ctlr_info_t *h, __u32 lunid, int controller_node) } } h->drv[drv_index].LunID = lunid; - if (cciss_create_ld_sysfs_entry(h, &h->drv[drv_index], drv_index)) + if (cciss_create_ld_sysfs_entry(h, drv_index)) goto err_free_disk; /* Don't need to mark this busy because nobody */ @@ -2145,7 +2190,7 @@ static int deregister_disk(ctlr_info_t *h, int drv_index, * indicate that this element of the drive * array is free. */ - cciss_destroy_ld_sysfs_entry(drv); + cciss_destroy_ld_sysfs_entry(h, drv_index); if (clear_all) { /* check to see if it was the last disk */ @@ -4268,8 +4313,9 @@ static void __devexit cciss_remove_one(struct pci_dev *pdev) if (q) blk_cleanup_queue(q); } - if (hba[i]->drv[j].raid_level != -1) - cciss_destroy_ld_sysfs_entry(&hba[i]->drv[j]); + if (hba[i]->drv[j].dev != NULL && + (j == 0 || hba[i]->drv[j].raid_level != -1)) + cciss_destroy_ld_sysfs_entry(hba[i], j); } @@ -4345,7 +4391,7 @@ static int __init cciss_init(void) if (err) goto err_thread_stop; - return 0; + return err; err_thread_stop: kthread_stop(cciss_scan_thread); diff --git a/drivers/block/cciss.h b/drivers/block/cciss.h index 4fb3639b6cf..96793425688 100644 --- a/drivers/block/cciss.h +++ b/drivers/block/cciss.h @@ -45,7 +45,7 @@ typedef struct _drive_info_struct * to prevent it from being opened or it's * queue from being started. */ - struct device dev; + struct device *dev; __u8 serial_no[16]; /* from inquiry page 0x83, * not necc. null terminated. */ -- cgit v1.2.3 From 097d026453e7051a544722f4e05240085916499d Mon Sep 17 00:00:00 2001 From: "Stephen M. Cameron" Date: Thu, 17 Sep 2009 13:47:19 -0500 Subject: cciss: Rearrange logical drive sysfs code to make the "changing a disk" path work. Rearrange logical drive sysfs code to make the "changing a disk" path work. Signed-off-by: Stephen M. Cameron Signed-off-by: Jens Axboe --- drivers/block/cciss.c | 20 ++++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) diff --git a/drivers/block/cciss.c b/drivers/block/cciss.c index 30b328aefe7..2810dd9805a 100644 --- a/drivers/block/cciss.c +++ b/drivers/block/cciss.c @@ -1916,9 +1916,10 @@ static int cciss_add_gendisk(ctlr_info_t *h, __u32 lunid, int controller_node) } } h->drv[drv_index].LunID = lunid; - if (cciss_create_ld_sysfs_entry(h, drv_index)) - goto err_free_disk; - + if (h->drv[drv_index].dev == NULL) { + if (cciss_create_ld_sysfs_entry(h, drv_index)) + goto err_free_disk; + } /* Don't need to mark this busy because nobody */ /* else knows about this disk yet to contend */ /* for access to it. */ @@ -2145,8 +2146,10 @@ static int deregister_disk(ctlr_info_t *h, int drv_index, */ if (h->gendisk[0] != disk) { struct request_queue *q = disk->queue; - if (disk->flags & GENHD_FL_UP) + if (disk->flags & GENHD_FL_UP) { + cciss_destroy_ld_sysfs_entry(h, drv_index); del_gendisk(disk); + } if (q) { blk_cleanup_queue(q); /* Set drv->queue to NULL so that we do not try @@ -2190,7 +2193,6 @@ static int deregister_disk(ctlr_info_t *h, int drv_index, * indicate that this element of the drive * array is free. */ - cciss_destroy_ld_sysfs_entry(h, drv_index); if (clear_all) { /* check to see if it was the last disk */ @@ -4308,15 +4310,13 @@ static void __devexit cciss_remove_one(struct pci_dev *pdev) if (disk) { struct request_queue *q = disk->queue; - if (disk->flags & GENHD_FL_UP) + if (disk->flags & GENHD_FL_UP) { + cciss_destroy_ld_sysfs_entry(hba[i], j); del_gendisk(disk); + } if (q) blk_cleanup_queue(q); } - if (hba[i]->drv[j].dev != NULL && - (j == 0 || hba[i]->drv[j].raid_level != -1)) - cciss_destroy_ld_sysfs_entry(hba[i], j); - } #ifdef CONFIG_CISS_SCSI_TAPE -- cgit v1.2.3 From e8074f79770953be26b64539803d06a46d1a6e58 Mon Sep 17 00:00:00 2001 From: "Stephen M. Cameron" Date: Thu, 17 Sep 2009 13:47:24 -0500 Subject: cciss: Handle failure of blk_init_queue gracefully in cciss_add_disk. Handle failure of blk_init_queue gracefully in cciss_add_disk. Signed-off-by: Stephen M. Cameron Signed-off-by: Jens Axboe --- drivers/block/cciss.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/block/cciss.c b/drivers/block/cciss.c index 2810dd9805a..b1211d530da 100644 --- a/drivers/block/cciss.c +++ b/drivers/block/cciss.c @@ -1690,6 +1690,8 @@ static int cciss_add_disk(ctlr_info_t *h, struct gendisk *disk, int drv_index) { disk->queue = blk_init_queue(do_cciss_request, &h->lock); + if (!disk->queue) + goto init_queue_failure; sprintf(disk->disk_name, "cciss/c%dd%d", h->ctlr, drv_index); disk->major = h->major; disk->first_minor = drv_index << NWD_SHIFT; @@ -1730,6 +1732,7 @@ static int cciss_add_disk(ctlr_info_t *h, struct gendisk *disk, cleanup_queue: blk_cleanup_queue(disk->queue); disk->queue = NULL; +init_queue_failure: return -1; } -- cgit v1.2.3 From 361e9b07d11cfc8b77921a0e96910019402efe79 Mon Sep 17 00:00:00 2001 From: "Stephen M. Cameron" Date: Thu, 17 Sep 2009 13:47:29 -0500 Subject: cciss: Handle cases when cciss_add_disk fails. Handle cases when cciss_add_disk fails. Signed-off-by: Stephen M. Cameron Signed-off-by: Jens Axboe --- drivers/block/cciss.c | 35 +++++++++++++++++++++++++---------- 1 file changed, 25 insertions(+), 10 deletions(-) diff --git a/drivers/block/cciss.c b/drivers/block/cciss.c index b1211d530da..ced71b006cd 100644 --- a/drivers/block/cciss.c +++ b/drivers/block/cciss.c @@ -200,6 +200,7 @@ static int scan_thread(void *data); static int check_for_unit_attention(ctlr_info_t *h, CommandList_struct *c); static void cciss_hba_release(struct device *dev); static void cciss_device_release(struct device *dev); +static void cciss_free_gendisk(ctlr_info_t *h, int drv_index); #ifdef CONFIG_PROC_FS static void cciss_procinit(int i); @@ -1856,8 +1857,14 @@ static void cciss_update_drive_info(int ctlr, int drv_index, int first_time) * (raid_leve == -1) then we want to update the * logical drive's information. */ - if (drv_index || first_time) - cciss_add_disk(h, disk, drv_index); + if (drv_index || first_time) { + if (cciss_add_disk(h, disk, drv_index) != 0) { + cciss_free_gendisk(h, drv_index); + printk(KERN_WARNING "cciss:%d could not update " + "disk %d\n", h->ctlr, drv_index); + --h->num_luns; + } + } freeret: kfree(inq_buff); @@ -1891,6 +1898,12 @@ static int cciss_find_free_drive_index(int ctlr, int controller_node) return -1; } +static void cciss_free_gendisk(ctlr_info_t *h, int drv_index) +{ + put_disk(h->gendisk[drv_index]); + h->gendisk[drv_index] = NULL; +} + /* cciss_add_gendisk finds a free hba[]->drv structure * and allocates a gendisk if needed, and sets the lunid * in the drvinfo structure. It returns the index into @@ -1931,8 +1944,7 @@ static int cciss_add_gendisk(ctlr_info_t *h, __u32 lunid, int controller_node) return drv_index; err_free_disk: - put_disk(h->gendisk[drv_index]); - h->gendisk[drv_index] = NULL; + cciss_free_gendisk(h, drv_index); return -1; } @@ -1950,11 +1962,8 @@ static void cciss_add_controller_node(ctlr_info_t *h) return; drv_index = cciss_add_gendisk(h, 0, 1); - if (drv_index == -1) { - printk(KERN_WARNING "cciss%d: could not " - "add disk 0.\n", h->ctlr); - return; - } + if (drv_index == -1) + goto error; h->drv[drv_index].block_size = 512; h->drv[drv_index].nr_blocks = 0; h->drv[drv_index].heads = 0; @@ -1963,7 +1972,13 @@ static void cciss_add_controller_node(ctlr_info_t *h) h->drv[drv_index].raid_level = -1; memset(h->drv[drv_index].serial_no, 0, 16); disk = h->gendisk[drv_index]; - cciss_add_disk(h, disk, drv_index); + if (cciss_add_disk(h, disk, drv_index) == 0) + return; + cciss_free_gendisk(h, drv_index); +error: + printk(KERN_WARNING "cciss%d: could not " + "add disk 0.\n", h->ctlr); + return; } /* This function will add and remove logical drives from the Logical -- cgit v1.2.3 From 8ce51966d3b809d6c1ae4f3902058558589480b8 Mon Sep 17 00:00:00 2001 From: "Stephen M. Cameron" Date: Thu, 17 Sep 2009 13:47:34 -0500 Subject: cciss: Handle special case for sysfs attributes of the first logical drive. For c0dx where x is not 0, we handle deletion and addition simply, but for c0d0, there is the special case that even when there's no disk, the device node exists so that the controller may be accessed. So, for c0d0, we only create the sysfs entries once, when a controller is added, and only remove them once, when a controller is being taken down. Signed-off-by: Stephen M. Cameron Signed-off-by: Jens Axboe --- drivers/block/cciss.c | 18 ++++++++++++++---- 1 file changed, 14 insertions(+), 4 deletions(-) diff --git a/drivers/block/cciss.c b/drivers/block/cciss.c index ced71b006cd..aa95eeb3020 100644 --- a/drivers/block/cciss.c +++ b/drivers/block/cciss.c @@ -682,6 +682,10 @@ static long cciss_create_ld_sysfs_entry(struct ctlr_info *h, { struct device *dev; + /* Special case for c*d0, we only create it once. */ + if (drv_index == 0 && h->drv[drv_index].dev != NULL) + return 0; + dev = kzalloc(sizeof(*dev), GFP_KERNEL); if (!dev) return -ENOMEM; @@ -698,9 +702,15 @@ static long cciss_create_ld_sysfs_entry(struct ctlr_info *h, /* * Remove sysfs entries for a logical drive. */ -static void cciss_destroy_ld_sysfs_entry(struct ctlr_info *h, int drv_index) +static void cciss_destroy_ld_sysfs_entry(struct ctlr_info *h, int drv_index, + int ctlr_exiting) { struct device *dev = h->drv[drv_index].dev; + + /* special case for c*d0, we only destroy it on controller exit */ + if (drv_index == 0 && !ctlr_exiting) + return; + device_del(dev); put_device(dev); /* the "final" put. */ h->drv[drv_index].dev = NULL; @@ -1920,6 +1930,7 @@ static int cciss_add_gendisk(ctlr_info_t *h, __u32 lunid, int controller_node) drv_index = cciss_find_free_drive_index(h->ctlr, controller_node); if (drv_index == -1) return -1; + /*Check if the gendisk needs to be allocated */ if (!h->gendisk[drv_index]) { h->gendisk[drv_index] = @@ -2165,7 +2176,7 @@ static int deregister_disk(ctlr_info_t *h, int drv_index, if (h->gendisk[0] != disk) { struct request_queue *q = disk->queue; if (disk->flags & GENHD_FL_UP) { - cciss_destroy_ld_sysfs_entry(h, drv_index); + cciss_destroy_ld_sysfs_entry(h, drv_index, 0); del_gendisk(disk); } if (q) { @@ -2211,7 +2222,6 @@ static int deregister_disk(ctlr_info_t *h, int drv_index, * indicate that this element of the drive * array is free. */ - if (clear_all) { /* check to see if it was the last disk */ if (drv == h->drv + h->highest_lun) { @@ -4329,7 +4339,7 @@ static void __devexit cciss_remove_one(struct pci_dev *pdev) struct request_queue *q = disk->queue; if (disk->flags & GENHD_FL_UP) { - cciss_destroy_ld_sysfs_entry(hba[i], j); + cciss_destroy_ld_sysfs_entry(hba[i], j, 1); del_gendisk(disk); } if (q) -- cgit v1.2.3 From 9ddb27b44ffeb3080b71cc493b2edff2224d9356 Mon Sep 17 00:00:00 2001 From: "Stephen M. Cameron" Date: Thu, 17 Sep 2009 13:47:39 -0500 Subject: cciss: Clear all sysfs-exposed data for deleted logical drives. When removing a logical drive, clear all the information that is now exposed by sysfs (e.g. vendor, model, serial number.) Signed-off-by: Stephen M. Cameron Signed-off-by: Jens Axboe --- drivers/block/cciss.c | 31 +++++++++++++++++++++---------- 1 file changed, 21 insertions(+), 10 deletions(-) diff --git a/drivers/block/cciss.c b/drivers/block/cciss.c index aa95eeb3020..09a0f7bb433 100644 --- a/drivers/block/cciss.c +++ b/drivers/block/cciss.c @@ -2134,6 +2134,25 @@ mem_msg: goto freeret; } +static void cciss_clear_drive_info(drive_info_struct *drive_info) +{ + /* zero out the disk size info */ + drive_info->nr_blocks = 0; + drive_info->block_size = 0; + drive_info->heads = 0; + drive_info->sectors = 0; + drive_info->cylinders = 0; + drive_info->raid_level = -1; + memset(drive_info->serial_no, 0, sizeof(drive_info->serial_no)); + memset(drive_info->model, 0, sizeof(drive_info->model)); + memset(drive_info->rev, 0, sizeof(drive_info->rev)); + memset(drive_info->vendor, 0, sizeof(drive_info->vendor)); + /* + * don't clear the LUNID though, we need to remember which + * one this one is. + */ +} + /* This function will deregister the disk and it's queue from the * kernel. It must be called with the controller lock held and the * drv structures busy_configuring flag set. It's parameters are: @@ -2212,16 +2231,8 @@ static int deregister_disk(ctlr_info_t *h, int drv_index, } --h->num_luns; - /* zero out the disk size info */ - drv->nr_blocks = 0; - drv->block_size = 0; - drv->heads = 0; - drv->sectors = 0; - drv->cylinders = 0; - drv->raid_level = -1; /* This can be used as a flag variable to - * indicate that this element of the drive - * array is free. - */ + cciss_clear_drive_info(drv); + if (clear_all) { /* check to see if it was the last disk */ if (drv == h->drv + h->highest_lun) { -- cgit v1.2.3 From 2d11d9931f5968bddac50d9d224c4812d4be869a Mon Sep 17 00:00:00 2001 From: "Stephen M. Cameron" Date: Thu, 17 Sep 2009 13:47:44 -0500 Subject: cciss: Fix usage_count check in rebuild_lun_table when triggered via sysfs. When rebuild_lun_table is reached via sysfs, the usage count that is checked prior to messing with c0d0 has different constraints (must be zero) than if rebuild_lun_table is reached via ioctl (must be one.) Fix rebuild_lun_table to take that into account. Signed-off-by: Stephen M. Cameron Signed-off-by: Jens Axboe --- drivers/block/cciss.c | 33 +++++++++++++++++++++------------ 1 file changed, 21 insertions(+), 12 deletions(-) diff --git a/drivers/block/cciss.c b/drivers/block/cciss.c index 09a0f7bb433..0a3c057c778 100644 --- a/drivers/block/cciss.c +++ b/drivers/block/cciss.c @@ -170,9 +170,9 @@ static int cciss_ioctl(struct block_device *bdev, fmode_t mode, static int cciss_getgeo(struct block_device *bdev, struct hd_geometry *geo); static int cciss_revalidate(struct gendisk *disk); -static int rebuild_lun_table(ctlr_info_t *h, int first_time); +static int rebuild_lun_table(ctlr_info_t *h, int first_time, int via_ioctl); static int deregister_disk(ctlr_info_t *h, int drv_index, - int clear_all); + int clear_all, int via_ioctl); static void cciss_read_capacity(int ctlr, int logvol, int withirq, sector_t *total_size, unsigned int *block_size); @@ -1211,7 +1211,7 @@ static int cciss_ioctl(struct block_device *bdev, fmode_t mode, case CCISS_DEREGDISK: case CCISS_REGNEWD: case CCISS_REVALIDVOLS: - return rebuild_lun_table(host, 0); + return rebuild_lun_table(host, 0, 1); case CCISS_GETLUNINFO:{ LogvolInfo_struct luninfo; @@ -1757,7 +1757,8 @@ init_queue_failure: * is also the controller node. Any changes to disk 0 will show up on * the next reboot. */ -static void cciss_update_drive_info(int ctlr, int drv_index, int first_time) +static void cciss_update_drive_info(int ctlr, int drv_index, int first_time, + int via_ioctl) { ctlr_info_t *h = hba[ctlr]; struct gendisk *disk; @@ -1835,7 +1836,7 @@ static void cciss_update_drive_info(int ctlr, int drv_index, int first_time) * which keeps the interrupt handler from starting * the queue. */ - ret = deregister_disk(h, drv_index, 0); + ret = deregister_disk(h, drv_index, 0, via_ioctl); h->drv[drv_index].busy_configuring = 0; } @@ -2000,7 +2001,8 @@ error: * INPUT * h = The controller to perform the operations on */ -static int rebuild_lun_table(ctlr_info_t *h, int first_time) +static int rebuild_lun_table(ctlr_info_t *h, int first_time, + int via_ioctl) { int ctlr = h->ctlr; int num_luns; @@ -2079,7 +2081,7 @@ static int rebuild_lun_table(ctlr_info_t *h, int first_time) spin_lock_irqsave(CCISS_LOCK(h->ctlr), flags); h->drv[i].busy_configuring = 1; spin_unlock_irqrestore(CCISS_LOCK(h->ctlr), flags); - return_code = deregister_disk(h, i, 1); + return_code = deregister_disk(h, i, 1, via_ioctl); h->drv[i].busy_configuring = 0; } } @@ -2117,7 +2119,8 @@ static int rebuild_lun_table(ctlr_info_t *h, int first_time) if (drv_index == -1) goto freeret; } - cciss_update_drive_info(ctlr, drv_index, first_time); + cciss_update_drive_info(ctlr, drv_index, first_time, + via_ioctl); } /* end for */ freeret: @@ -2167,9 +2170,15 @@ static void cciss_clear_drive_info(drive_info_struct *drive_info) * the disk in preparation for re-adding it. In this case * the highest_lun should be left unchanged and the LunID * should not be cleared. + * via_ioctl + * This indicates whether we've reached this path via ioctl. + * This affects the maximum usage count allowed for c0d0 to be messed with. + * If this path is reached via ioctl(), then the max_usage_count will + * be 1, as the process calling ioctl() has got to have the device open. + * If we get here via sysfs, then the max usage count will be zero. */ static int deregister_disk(ctlr_info_t *h, int drv_index, - int clear_all) + int clear_all, int via_ioctl) { int i; struct gendisk *disk; @@ -2183,7 +2192,7 @@ static int deregister_disk(ctlr_info_t *h, int drv_index, /* make sure logical volume is NOT is use */ if (clear_all || (h->gendisk[0] == disk)) { - if (drv->usage_count > 1) + if (drv->usage_count > via_ioctl) return -EBUSY; } else if (drv->usage_count > 0) return -EBUSY; @@ -3452,7 +3461,7 @@ static int scan_thread(void *data) mutex_unlock(&scan_mutex); if (h) { - rebuild_lun_table(h, 0); + rebuild_lun_table(h, 0, 0); complete_all(&h->scan_wait); mutex_lock(&scan_mutex); h->busy_scanning = 0; @@ -4253,7 +4262,7 @@ static int __devinit cciss_init_one(struct pci_dev *pdev, hba[i]->cciss_max_sectors = 2048; - rebuild_lun_table(hba[i], 1); + rebuild_lun_table(hba[i], 1, 0); hba[i]->busy_initializing = 0; return 1; -- cgit v1.2.3 From 2c935593ac1871211b43a54f023dc3bc605ad346 Mon Sep 17 00:00:00 2001 From: "Stephen M. Cameron" Date: Thu, 17 Sep 2009 13:47:50 -0500 Subject: cciss: Fix excessive gendisk freeing bug on driver unload. Fix bug that free_hba was calling put_disk for all gendisk[] pointers -- all 1024 of them -- regardless of whether the were used or not (NULL). This bug could cause rmmod to oops if logical drives had been deleted during the driver's lifetime. Signed-off-by: Stephen M. Cameron Signed-off-by: Jens Axboe --- drivers/block/cciss.c | 15 ++++++++------- 1 file changed, 8 insertions(+), 7 deletions(-) diff --git a/drivers/block/cciss.c b/drivers/block/cciss.c index 0a3c057c778..3a6ca7de4c9 100644 --- a/drivers/block/cciss.c +++ b/drivers/block/cciss.c @@ -3893,15 +3893,16 @@ Enomem: return -1; } -static void free_hba(int i) +static void free_hba(int n) { - ctlr_info_t *p = hba[i]; - int n; + ctlr_info_t *h = hba[n]; + int i; - hba[i] = NULL; - for (n = 0; n < CISS_MAX_LUN; n++) - put_disk(p->gendisk[n]); - kfree(p); + hba[n] = NULL; + for (i = 0; i < h->highest_lun + 1; i++) + if (h->gendisk[i] != NULL) + put_disk(h->gendisk[i]); + kfree(h); } /* Send a message CDB to the firmware. */ -- cgit v1.2.3 From 983333cb0c445c56808502461bbb34876c63eb2b Mon Sep 17 00:00:00 2001 From: "Stephen M. Cameron" Date: Thu, 17 Sep 2009 13:47:55 -0500 Subject: cciss: Silence noisy per-disk messages output by cciss_read_capacity Silence noisy per-disk messages output by cciss_read_capacity Signed-off-by: Stephen M. Cameron Signed-off-by: Jens Axboe --- drivers/block/cciss.c | 5 ----- 1 file changed, 5 deletions(-) diff --git a/drivers/block/cciss.c b/drivers/block/cciss.c index 3a6ca7de4c9..67c4899ce9e 100644 --- a/drivers/block/cciss.c +++ b/drivers/block/cciss.c @@ -2602,8 +2602,6 @@ static void cciss_geometry_inquiry(int ctlr, int logvol, } else { /* Get geometry failed */ printk(KERN_WARNING "cciss: reading geometry failed\n"); } - printk(KERN_INFO " heads=%d, sectors=%d, cylinders=%d\n\n", - drv->heads, drv->sectors, drv->cylinders); } static void @@ -2637,9 +2635,6 @@ cciss_read_capacity(int ctlr, int logvol, int withirq, sector_t *total_size, *total_size = 0; *block_size = BLOCK_SIZE; } - if (*total_size != 0) - printk(KERN_INFO " blocks= %llu block_size= %d\n", - (unsigned long long)*total_size+1, *block_size); kfree(buf); } -- cgit v1.2.3 From 39ccf9a645dbca7f9866317380912327570787c0 Mon Sep 17 00:00:00 2001 From: "Stephen M. Cameron" Date: Thu, 17 Sep 2009 13:48:00 -0500 Subject: cciss: Preserve all 8 bytes of LUN ID for logical drives. Preserve all 8 bytes of the LunID field returned by CCISS_REPORT_LOGICAL instead of only saving 4 bytes. This fixes a bug with logical volume addressing encountered on an MSA2012. Signed-off-by: Stephen M. Cameron Signed-off-by: Jens Axboe --- drivers/block/cciss.c | 48 ++++++++++++++++++++++++------------------------ drivers/block/cciss.h | 2 +- 2 files changed, 25 insertions(+), 25 deletions(-) diff --git a/drivers/block/cciss.c b/drivers/block/cciss.c index 67c4899ce9e..d6ea9376797 100644 --- a/drivers/block/cciss.c +++ b/drivers/block/cciss.c @@ -846,7 +846,8 @@ static int cciss_open(struct block_device *bdev, fmode_t mode) if (MINOR(bdev->bd_dev) & 0x0f) { return -ENXIO; /* if it is, make sure we have a LUN ID */ - } else if (drv->LunID == 0) { + } else if (memcmp(drv->LunID, CTLR_LUNID, + sizeof(drv->LunID))) { return -ENXIO; } } @@ -1216,7 +1217,8 @@ static int cciss_ioctl(struct block_device *bdev, fmode_t mode, case CCISS_GETLUNINFO:{ LogvolInfo_struct luninfo; - luninfo.LunID = drv->LunID; + memcpy(&luninfo.LunID, drv->LunID, + sizeof(luninfo.LunID)); luninfo.num_opens = drv->usage_count; luninfo.num_parts = 0; if (copy_to_user(argp, &luninfo, @@ -1611,13 +1613,11 @@ static void cciss_softirq_done(struct request *rq) spin_unlock_irqrestore(&h->lock, flags); } -static void log_unit_to_scsi3addr(ctlr_info_t *h, unsigned char scsi3addr[], - uint32_t log_unit) +static inline void log_unit_to_scsi3addr(ctlr_info_t *h, + unsigned char scsi3addr[], uint32_t log_unit) { - log_unit = h->drv[log_unit].LunID & 0x03fff; - memset(&scsi3addr[4], 0, 4); - memcpy(&scsi3addr[0], &log_unit, 4); - scsi3addr[3] |= 0x40; + memcpy(scsi3addr, h->drv[log_unit].LunID, + sizeof(h->drv[log_unit].LunID)); } /* This function gets the SCSI vendor, model, and revision of a logical drive @@ -1924,7 +1924,8 @@ static void cciss_free_gendisk(ctlr_info_t *h, int drv_index) * a means to talk to the controller in case no logical * drives have yet been configured. */ -static int cciss_add_gendisk(ctlr_info_t *h, __u32 lunid, int controller_node) +static int cciss_add_gendisk(ctlr_info_t *h, unsigned char lunid[], + int controller_node) { int drv_index; @@ -1943,7 +1944,8 @@ static int cciss_add_gendisk(ctlr_info_t *h, __u32 lunid, int controller_node) return -1; } } - h->drv[drv_index].LunID = lunid; + memcpy(h->drv[drv_index].LunID, lunid, + sizeof(h->drv[drv_index].LunID)); if (h->drv[drv_index].dev == NULL) { if (cciss_create_ld_sysfs_entry(h, drv_index)) goto err_free_disk; @@ -1973,7 +1975,7 @@ static void cciss_add_controller_node(ctlr_info_t *h) if (h->gendisk[0] != NULL) /* already did this? Then bail. */ return; - drv_index = cciss_add_gendisk(h, 0, 1); + drv_index = cciss_add_gendisk(h, CTLR_LUNID, 1); if (drv_index == -1) goto error; h->drv[drv_index].block_size = 512; @@ -2012,7 +2014,7 @@ static int rebuild_lun_table(ctlr_info_t *h, int first_time, int i; int drv_found; int drv_index = 0; - __u32 lunid = 0; + unsigned char lunid[8] = CTLR_LUNID; unsigned long flags; if (!capable(CAP_SYS_RAWIO)) @@ -2069,9 +2071,9 @@ static int rebuild_lun_table(ctlr_info_t *h, int first_time, continue; for (j = 0; j < num_luns; j++) { - memcpy(&lunid, &ld_buff->LUN[j][0], 4); - lunid = le32_to_cpu(lunid); - if (h->drv[i].LunID == lunid) { + memcpy(lunid, &ld_buff->LUN[j][0], sizeof(lunid)); + if (memcmp(h->drv[i].LunID, lunid, + sizeof(lunid)) == 0) { drv_found = 1; break; } @@ -2096,9 +2098,7 @@ static int rebuild_lun_table(ctlr_info_t *h, int first_time, drv_found = 0; - memcpy(&lunid, &ld_buff->LUN[i][0], 4); - lunid = le32_to_cpu(lunid); - + memcpy(lunid, &ld_buff->LUN[i][0], sizeof(lunid)); /* Find if the LUN is already in the drive array * of the driver. If so then update its info * if not in use. If it does not exist then find @@ -2106,7 +2106,8 @@ static int rebuild_lun_table(ctlr_info_t *h, int first_time, */ for (j = 0; j <= h->highest_lun; j++) { if (h->drv[j].raid_level != -1 && - h->drv[j].LunID == lunid) { + memcmp(h->drv[j].LunID, lunid, + sizeof(h->drv[j].LunID)) == 0) { drv_index = j; drv_found = 1; break; @@ -2254,8 +2255,7 @@ static int deregister_disk(ctlr_info_t *h, int drv_index, } h->highest_lun = newhighest; } - - drv->LunID = 0; + memset(drv->LunID, 0, sizeof(drv->LunID)); } return 0; } @@ -2686,7 +2686,8 @@ static int cciss_revalidate(struct gendisk *disk) InquiryData_struct *inq_buff = NULL; for (logvol = 0; logvol < CISS_MAX_LUN; logvol++) { - if (h->drv[logvol].LunID == drv->LunID) { + if (memcmp(h->drv[logvol].LunID, drv->LunID, + sizeof(drv->LunID)) == 0) { FOUND = 1; break; } @@ -3171,8 +3172,7 @@ static void do_cciss_request(struct request_queue *q) /* The first 2 bits are reserved for controller error reporting. */ c->Header.Tag.lower = (c->cmdindex << 3); c->Header.Tag.lower |= 0x04; /* flag for direct lookup. */ - c->Header.LUN.LogDev.VolId = drv->LunID; - c->Header.LUN.LogDev.Mode = 1; + memcpy(&c->Header.LUN, drv->LunID, sizeof(drv->LunID)); c->Request.CDBLen = 10; // 12 byte commands not in FW yet; c->Request.Type.Type = TYPE_CMD; // It is a command. c->Request.Type.Attribute = ATTR_SIMPLE; diff --git a/drivers/block/cciss.h b/drivers/block/cciss.h index 96793425688..5188f713d1b 100644 --- a/drivers/block/cciss.h +++ b/drivers/block/cciss.h @@ -30,7 +30,7 @@ struct access_method { }; typedef struct _drive_info_struct { - __u32 LunID; + unsigned char LunID[8]; int usage_count; struct request_queue *queue; sector_t nr_blocks; -- cgit v1.2.3 From 2e043986d584cf95656d4ee0c40fb2051e8a8460 Mon Sep 17 00:00:00 2001 From: "Stephen M. Cameron" Date: Thu, 17 Sep 2009 13:48:05 -0500 Subject: cciss: Don't check h->busy_initializing in cciss_open(). Don't check h->busy_initializing in cciss_open(). Open won't be called before things are ready, but h->busy_initializing won't be unset until after the initial rebuild_lun_table is finished. But, to read the partitions, cciss_open will be called for each logical drive during rebuild_lun_table. If cciss_open checks h->busy_initializing, then the reading of the partition information during the initial rebuild_lun_table will fail, which is especially bad news if it happens to be your boot device. Signed-off-by: Stephen M. Cameron Signed-off-by: Jens Axboe --- drivers/block/cciss.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/block/cciss.c b/drivers/block/cciss.c index d6ea9376797..79afca2e824 100644 --- a/drivers/block/cciss.c +++ b/drivers/block/cciss.c @@ -830,7 +830,7 @@ static int cciss_open(struct block_device *bdev, fmode_t mode) printk(KERN_DEBUG "cciss_open %s\n", bdev->bd_disk->disk_name); #endif /* CCISS_DEBUG */ - if (host->busy_initializing || drv->busy_configuring) + if (drv->busy_configuring) return -EBUSY; /* * Root is allowed to open raw volume zero even if it's not configured -- cgit v1.2.3 From ce84a8aeac4a4a2cc421b3145dd2fb7cae860e4d Mon Sep 17 00:00:00 2001 From: "Stephen M. Cameron" Date: Thu, 17 Sep 2009 13:48:10 -0500 Subject: cciss: Add lunid attribute to each logical drive in /sys Add lunid attribute to each logical drive at /sys/devices//ccissX/cXdY/lunid for controller X, logical drive Y Signed-off-by: Stephen M. Cameron Signed-off-by: Jens Axboe --- .../ABI/testing/sysfs-bus-pci-devices-cciss | 7 ++++++ drivers/block/cciss.c | 26 ++++++++++++++++++++++ 2 files changed, 33 insertions(+) diff --git a/Documentation/ABI/testing/sysfs-bus-pci-devices-cciss b/Documentation/ABI/testing/sysfs-bus-pci-devices-cciss index ac3429def23..5a6c8d36afc 100644 --- a/Documentation/ABI/testing/sysfs-bus-pci-devices-cciss +++ b/Documentation/ABI/testing/sysfs-bus-pci-devices-cciss @@ -38,3 +38,10 @@ Kernel Version: 2.6.31 Contact: iss_storagedev@hp.com Description: Kicks of a rescan of the controller to discover logical drive topology changes. + +Where: /sys/bus/pci/devices//ccissX/cXdY/lunid +Date: August 2009 +Kernel Version: 2.6.31 +Contact: iss_storagedev@hp.com +Description: Displays the 8-byte LUN ID used to address logical + drive Y of controller X. diff --git a/drivers/block/cciss.c b/drivers/block/cciss.c index 79afca2e824..ae0cb1329e9 100644 --- a/drivers/block/cciss.c +++ b/drivers/block/cciss.c @@ -579,6 +579,31 @@ static ssize_t dev_show_rev(struct device *dev, } DEVICE_ATTR(rev, S_IRUGO, dev_show_rev, NULL); +static ssize_t cciss_show_lunid(struct device *dev, + struct device_attribute *attr, char *buf) +{ + drive_info_struct *drv = dev_get_drvdata(dev); + struct ctlr_info *h = to_hba(drv->dev->parent); + unsigned long flags; + unsigned char lunid[8]; + + spin_lock_irqsave(CCISS_LOCK(h->ctlr), flags); + if (h->busy_configuring) { + spin_unlock_irqrestore(CCISS_LOCK(h->ctlr), flags); + return -EBUSY; + } + if (!drv->heads) { + spin_unlock_irqrestore(CCISS_LOCK(h->ctlr), flags); + return -ENOTTY; + } + memcpy(lunid, drv->LunID, sizeof(lunid)); + spin_unlock_irqrestore(CCISS_LOCK(h->ctlr), flags); + return snprintf(buf, 20, "0x%02x%02x%02x%02x%02x%02x%02x%02x\n", + lunid[0], lunid[1], lunid[2], lunid[3], + lunid[4], lunid[5], lunid[6], lunid[7]); +} +DEVICE_ATTR(lunid, S_IRUGO, cciss_show_lunid, NULL); + static struct attribute *cciss_host_attrs[] = { &dev_attr_rescan.attr, NULL @@ -604,6 +629,7 @@ static struct attribute *cciss_dev_attrs[] = { &dev_attr_model.attr, &dev_attr_vendor.attr, &dev_attr_rev.attr, + &dev_attr_lunid.attr, NULL }; -- cgit v1.2.3 From fa52bec9df974096f9eb0e42a0b890512c0a0036 Mon Sep 17 00:00:00 2001 From: "Stephen M. Cameron" Date: Thu, 17 Sep 2009 13:48:15 -0500 Subject: cciss: fix some magic numbers in the raid-level decoding cciss: fix some magic numbers in the raid-level decoding Signed-off-by: Stephen M. Cameron Signed-off-by: Jens Axboe --- drivers/block/cciss.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/drivers/block/cciss.c b/drivers/block/cciss.c index ae0cb1329e9..b674f93d4be 100644 --- a/drivers/block/cciss.c +++ b/drivers/block/cciss.c @@ -255,8 +255,6 @@ static inline void removeQ(CommandList_struct *c) #include "cciss_scsi.c" /* For SCSI tape support */ -#define RAID_UNKNOWN 6 - #ifdef CONFIG_PROC_FS /* @@ -268,6 +266,7 @@ static inline void removeQ(CommandList_struct *c) static const char *raid_label[] = { "0", "4", "1(1+0)", "5", "5+1", "ADG", "UNKNOWN" }; +#define RAID_UNKNOWN (sizeof(raid_label) / sizeof(raid_label[0])-1) static struct proc_dir_entry *proc_cciss; @@ -341,7 +340,7 @@ static int cciss_seq_show(struct seq_file *seq, void *v) vol_sz_frac *= 100; sector_div(vol_sz_frac, ENG_GIG_FACTOR); - if (drv->raid_level > 5) + if (drv->raid_level < 0 || drv->raid_level > RAID_UNKNOWN) drv->raid_level = RAID_UNKNOWN; seq_printf(seq, "cciss/c%dd%d:" "\t%4u.%02uGB\tRAID %s\n", -- cgit v1.2.3 From 3ff1111dc6e27524eeef267ab0ca9b5690594748 Mon Sep 17 00:00:00 2001 From: "Stephen M. Cameron" Date: Thu, 17 Sep 2009 13:48:21 -0500 Subject: cciss: Add a "raid_level" attribute to each logical drive in /sys and change get rid of some magic numbers in raid lavel decoding. Add raid_level attribute to each logical drive at /sys/devices//ccissX/cXdY/raid_level for controller X, logical drive Y Signed-off-by: Stephen M. Cameron Signed-off-by: Jens Axboe --- .../ABI/testing/sysfs-bus-pci-devices-cciss | 7 +++++++ drivers/block/cciss.c | 24 ++++++++++++++++++++++ 2 files changed, 31 insertions(+) diff --git a/Documentation/ABI/testing/sysfs-bus-pci-devices-cciss b/Documentation/ABI/testing/sysfs-bus-pci-devices-cciss index 5a6c8d36afc..8d026025616 100644 --- a/Documentation/ABI/testing/sysfs-bus-pci-devices-cciss +++ b/Documentation/ABI/testing/sysfs-bus-pci-devices-cciss @@ -45,3 +45,10 @@ Kernel Version: 2.6.31 Contact: iss_storagedev@hp.com Description: Displays the 8-byte LUN ID used to address logical drive Y of controller X. + +Where: /sys/bus/pci/devices//ccissX/cXdY/raid_level +Date: August 2009 +Kernel Version: 2.6.31 +Contact: iss_storagedev@hp.com +Description: Displays the RAID level of logical drive Y of + controller X. diff --git a/drivers/block/cciss.c b/drivers/block/cciss.c index b674f93d4be..063e8b0834d 100644 --- a/drivers/block/cciss.c +++ b/drivers/block/cciss.c @@ -603,6 +603,29 @@ static ssize_t cciss_show_lunid(struct device *dev, } DEVICE_ATTR(lunid, S_IRUGO, cciss_show_lunid, NULL); +static ssize_t cciss_show_raid_level(struct device *dev, + struct device_attribute *attr, char *buf) +{ + drive_info_struct *drv = dev_get_drvdata(dev); + struct ctlr_info *h = to_hba(drv->dev->parent); + int raid; + unsigned long flags; + + spin_lock_irqsave(CCISS_LOCK(h->ctlr), flags); + if (h->busy_configuring) { + spin_unlock_irqrestore(CCISS_LOCK(h->ctlr), flags); + return -EBUSY; + } + raid = drv->raid_level; + spin_unlock_irqrestore(CCISS_LOCK(h->ctlr), flags); + if (raid < 0 || raid > RAID_UNKNOWN) + raid = RAID_UNKNOWN; + + return snprintf(buf, strlen(raid_label[raid]) + 7, "RAID %s\n", + raid_label[raid]); +} +DEVICE_ATTR(raid_level, S_IRUGO, cciss_show_raid_level, NULL); + static struct attribute *cciss_host_attrs[] = { &dev_attr_rescan.attr, NULL @@ -629,6 +652,7 @@ static struct attribute *cciss_dev_attrs[] = { &dev_attr_vendor.attr, &dev_attr_rev.attr, &dev_attr_lunid.attr, + &dev_attr_raid_level.attr, NULL }; -- cgit v1.2.3 From e272afecaf18912e971374df4605496975942e5c Mon Sep 17 00:00:00 2001 From: "Stephen M. Cameron" Date: Thu, 17 Sep 2009 13:48:26 -0500 Subject: cciss: Add usage_count attribute to each logical drive in /sys Add usage_count attribute to each logical drive at /sys/devices//ccissX/cXdY/usage_count for controller X, logical drive Y. The usage count is the number of times the device has currently been opened. Signed-off-by: Stephen M. Cameron Signed-off-by: Jens Axboe --- .../ABI/testing/sysfs-bus-pci-devices-cciss | 7 +++++++ drivers/block/cciss.c | 20 ++++++++++++++++++++ 2 files changed, 27 insertions(+) diff --git a/Documentation/ABI/testing/sysfs-bus-pci-devices-cciss b/Documentation/ABI/testing/sysfs-bus-pci-devices-cciss index 8d026025616..4f29e5f1ebf 100644 --- a/Documentation/ABI/testing/sysfs-bus-pci-devices-cciss +++ b/Documentation/ABI/testing/sysfs-bus-pci-devices-cciss @@ -52,3 +52,10 @@ Kernel Version: 2.6.31 Contact: iss_storagedev@hp.com Description: Displays the RAID level of logical drive Y of controller X. + +Where: /sys/bus/pci/devices//ccissX/cXdY/usage_count +Date: August 2009 +Kernel Version: 2.6.31 +Contact: iss_storagedev@hp.com +Description: Displays the usage count (number of opens) of logical drive Y + of controller X. diff --git a/drivers/block/cciss.c b/drivers/block/cciss.c index 063e8b0834d..b808e9287b7 100644 --- a/drivers/block/cciss.c +++ b/drivers/block/cciss.c @@ -626,6 +626,25 @@ static ssize_t cciss_show_raid_level(struct device *dev, } DEVICE_ATTR(raid_level, S_IRUGO, cciss_show_raid_level, NULL); +static ssize_t cciss_show_usage_count(struct device *dev, + struct device_attribute *attr, char *buf) +{ + drive_info_struct *drv = dev_get_drvdata(dev); + struct ctlr_info *h = to_hba(drv->dev->parent); + unsigned long flags; + int count; + + spin_lock_irqsave(CCISS_LOCK(h->ctlr), flags); + if (h->busy_configuring) { + spin_unlock_irqrestore(CCISS_LOCK(h->ctlr), flags); + return -EBUSY; + } + count = drv->usage_count; + spin_unlock_irqrestore(CCISS_LOCK(h->ctlr), flags); + return snprintf(buf, 20, "%d\n", count); +} +DEVICE_ATTR(usage_count, S_IRUGO, cciss_show_usage_count, NULL); + static struct attribute *cciss_host_attrs[] = { &dev_attr_rescan.attr, NULL @@ -653,6 +672,7 @@ static struct attribute *cciss_dev_attrs[] = { &dev_attr_rev.attr, &dev_attr_lunid.attr, &dev_attr_raid_level.attr, + &dev_attr_usage_count.attr, NULL }; -- cgit v1.2.3 From 9cef0d2f4f68a5a2c6ea0495f958a074d21fbd07 Mon Sep 17 00:00:00 2001 From: "Stephen M. Cameron" Date: Thu, 17 Sep 2009 13:48:31 -0500 Subject: cciss: Dynamically allocate the drive_info_struct for each logical drive. cciss: Dynamically allocate the drive_info_struct for each logical drive. This reduces the size of the per-hba ctlr_info structure from 106936 bytes to 8132 bytes. That's on 32-bit systems. On 64-bit systems, the improvement is even bigger. Without this, the ctlr_info struct is so big that the driver won't even load on a 64 bit system if CISS_MAX_LUN was at it's current setting of 1024 logical drives. Signed-off-by: Stephen M. Cameron Signed-off-by: Jens Axboe --- drivers/block/cciss.c | 293 ++++++++++++++++++++++++++++---------------------- drivers/block/cciss.h | 5 +- 2 files changed, 168 insertions(+), 130 deletions(-) diff --git a/drivers/block/cciss.c b/drivers/block/cciss.c index b808e9287b7..04036ef8ea5 100644 --- a/drivers/block/cciss.c +++ b/drivers/block/cciss.c @@ -201,6 +201,7 @@ static int check_for_unit_attention(ctlr_info_t *h, CommandList_struct *c); static void cciss_hba_release(struct device *dev); static void cciss_device_release(struct device *dev); static void cciss_free_gendisk(ctlr_info_t *h, int drv_index); +static void cciss_free_drive_info(ctlr_info_t *h, int drv_index); #ifdef CONFIG_PROC_FS static void cciss_procinit(int i); @@ -327,7 +328,7 @@ static int cciss_seq_show(struct seq_file *seq, void *v) ctlr_info_t *h = seq->private; unsigned ctlr = h->ctlr; loff_t *pos = v; - drive_info_struct *drv = &h->drv[*pos]; + drive_info_struct *drv = h->drv[*pos]; if (*pos > h->highest_lun) return 0; @@ -461,6 +462,7 @@ static void __devinit cciss_procinit(int i) #define MAX_PRODUCT_NAME_LEN 19 #define to_hba(n) container_of(n, struct ctlr_info, dev) +#define to_drv(n) container_of(n, drive_info_struct, dev) static ssize_t host_store_rescan(struct device *dev, struct device_attribute *attr, @@ -480,8 +482,8 @@ static ssize_t dev_show_unique_id(struct device *dev, struct device_attribute *attr, char *buf) { - drive_info_struct *drv = dev_get_drvdata(dev); - struct ctlr_info *h = to_hba(drv->dev->parent); + drive_info_struct *drv = to_drv(dev); + struct ctlr_info *h = to_hba(drv->dev.parent); __u8 sn[16]; unsigned long flags; int ret = 0; @@ -510,8 +512,8 @@ static ssize_t dev_show_vendor(struct device *dev, struct device_attribute *attr, char *buf) { - drive_info_struct *drv = dev_get_drvdata(dev); - struct ctlr_info *h = to_hba(drv->dev->parent); + drive_info_struct *drv = to_drv(dev); + struct ctlr_info *h = to_hba(drv->dev.parent); char vendor[VENDOR_LEN + 1]; unsigned long flags; int ret = 0; @@ -534,8 +536,8 @@ static ssize_t dev_show_model(struct device *dev, struct device_attribute *attr, char *buf) { - drive_info_struct *drv = dev_get_drvdata(dev); - struct ctlr_info *h = to_hba(drv->dev->parent); + drive_info_struct *drv = to_drv(dev); + struct ctlr_info *h = to_hba(drv->dev.parent); char model[MODEL_LEN + 1]; unsigned long flags; int ret = 0; @@ -558,8 +560,8 @@ static ssize_t dev_show_rev(struct device *dev, struct device_attribute *attr, char *buf) { - drive_info_struct *drv = dev_get_drvdata(dev); - struct ctlr_info *h = to_hba(drv->dev->parent); + drive_info_struct *drv = to_drv(dev); + struct ctlr_info *h = to_hba(drv->dev.parent); char rev[REV_LEN + 1]; unsigned long flags; int ret = 0; @@ -581,8 +583,8 @@ DEVICE_ATTR(rev, S_IRUGO, dev_show_rev, NULL); static ssize_t cciss_show_lunid(struct device *dev, struct device_attribute *attr, char *buf) { - drive_info_struct *drv = dev_get_drvdata(dev); - struct ctlr_info *h = to_hba(drv->dev->parent); + drive_info_struct *drv = to_drv(dev); + struct ctlr_info *h = to_hba(drv->dev.parent); unsigned long flags; unsigned char lunid[8]; @@ -606,8 +608,8 @@ DEVICE_ATTR(lunid, S_IRUGO, cciss_show_lunid, NULL); static ssize_t cciss_show_raid_level(struct device *dev, struct device_attribute *attr, char *buf) { - drive_info_struct *drv = dev_get_drvdata(dev); - struct ctlr_info *h = to_hba(drv->dev->parent); + drive_info_struct *drv = to_drv(dev); + struct ctlr_info *h = to_hba(drv->dev.parent); int raid; unsigned long flags; @@ -629,8 +631,8 @@ DEVICE_ATTR(raid_level, S_IRUGO, cciss_show_raid_level, NULL); static ssize_t cciss_show_usage_count(struct device *dev, struct device_attribute *attr, char *buf) { - drive_info_struct *drv = dev_get_drvdata(dev); - struct ctlr_info *h = to_hba(drv->dev->parent); + drive_info_struct *drv = to_drv(dev); + struct ctlr_info *h = to_hba(drv->dev.parent); unsigned long flags; int count; @@ -733,11 +735,12 @@ static void cciss_destroy_hba_sysfs_entry(struct ctlr_info *h) } /* cciss_device_release is called when the reference count - * of h->drv[x].dev goes to zero. + * of h->drv[x]dev goes to zero. */ static void cciss_device_release(struct device *dev) { - kfree(dev); + drive_info_struct *drv = to_drv(dev); + kfree(drv); } /* @@ -751,20 +754,16 @@ static long cciss_create_ld_sysfs_entry(struct ctlr_info *h, { struct device *dev; - /* Special case for c*d0, we only create it once. */ - if (drv_index == 0 && h->drv[drv_index].dev != NULL) + if (h->drv[drv_index]->device_initialized) return 0; - dev = kzalloc(sizeof(*dev), GFP_KERNEL); - if (!dev) - return -ENOMEM; + dev = &h->drv[drv_index]->dev; device_initialize(dev); dev->type = &cciss_dev_type; dev->bus = &cciss_bus_type; dev_set_name(dev, "c%dd%d", h->ctlr, drv_index); dev->parent = &h->dev; - h->drv[drv_index].dev = dev; - dev_set_drvdata(dev, &h->drv[drv_index]); + h->drv[drv_index]->device_initialized = 1; return device_add(dev); } @@ -774,7 +773,7 @@ static long cciss_create_ld_sysfs_entry(struct ctlr_info *h, static void cciss_destroy_ld_sysfs_entry(struct ctlr_info *h, int drv_index, int ctlr_exiting) { - struct device *dev = h->drv[drv_index].dev; + struct device *dev = &h->drv[drv_index]->dev; /* special case for c*d0, we only destroy it on controller exit */ if (drv_index == 0 && !ctlr_exiting) @@ -782,7 +781,7 @@ static void cciss_destroy_ld_sysfs_entry(struct ctlr_info *h, int drv_index, device_del(dev); put_device(dev); /* the "final" put. */ - h->drv[drv_index].dev = NULL; + h->drv[drv_index] = NULL; } /* @@ -1625,7 +1624,10 @@ static void cciss_check_queues(ctlr_info_t *h) /* make sure the disk has been added and the drive is real * because this can be called from the middle of init_one. */ - if (!(h->drv[curr_queue].queue) || !(h->drv[curr_queue].heads)) + if (!h->drv[curr_queue]) + continue; + if (!(h->drv[curr_queue]->queue) || + !(h->drv[curr_queue]->heads)) continue; blk_start_queue(h->gendisk[curr_queue]->queue); @@ -1685,8 +1687,8 @@ static void cciss_softirq_done(struct request *rq) static inline void log_unit_to_scsi3addr(ctlr_info_t *h, unsigned char scsi3addr[], uint32_t log_unit) { - memcpy(scsi3addr, h->drv[log_unit].LunID, - sizeof(h->drv[log_unit].LunID)); + memcpy(scsi3addr, h->drv[log_unit]->LunID, + sizeof(h->drv[log_unit]->LunID)); } /* This function gets the SCSI vendor, model, and revision of a logical drive @@ -1776,12 +1778,10 @@ static int cciss_add_disk(ctlr_info_t *h, struct gendisk *disk, disk->major = h->major; disk->first_minor = drv_index << NWD_SHIFT; disk->fops = &cciss_fops; - if (h->drv[drv_index].dev == NULL) { - if (cciss_create_ld_sysfs_entry(h, drv_index)) - goto cleanup_queue; - } - disk->private_data = &h->drv[drv_index]; - disk->driverfs_dev = h->drv[drv_index].dev; + if (cciss_create_ld_sysfs_entry(h, drv_index)) + goto cleanup_queue; + disk->private_data = h->drv[drv_index]; + disk->driverfs_dev = &h->drv[drv_index]->dev; /* Set up queue information */ blk_queue_bounce_limit(disk->queue, h->pdev->dma_mask); @@ -1799,13 +1799,13 @@ static int cciss_add_disk(ctlr_info_t *h, struct gendisk *disk, disk->queue->queuedata = h; blk_queue_logical_block_size(disk->queue, - h->drv[drv_index].block_size); + h->drv[drv_index]->block_size); /* Make sure all queue data is written out before */ - /* setting h->drv[drv_index].queue, as setting this */ + /* setting h->drv[drv_index]->queue, as setting this */ /* allows the interrupt handler to start the queue */ wmb(); - h->drv[drv_index].queue = disk->queue; + h->drv[drv_index]->queue = disk->queue; add_disk(disk); return 0; @@ -1840,7 +1840,7 @@ static void cciss_update_drive_info(int ctlr, int drv_index, int first_time, /* Get information about the disk and modify the driver structure */ inq_buff = kmalloc(sizeof(InquiryData_struct), GFP_KERNEL); - drvinfo = kmalloc(sizeof(*drvinfo), GFP_KERNEL); + drvinfo = kzalloc(sizeof(*drvinfo), GFP_KERNEL); if (inq_buff == NULL || drvinfo == NULL) goto mem_msg; @@ -1876,16 +1876,19 @@ static void cciss_update_drive_info(int ctlr, int drv_index, int first_time, drvinfo->model, drvinfo->rev); cciss_get_serial_no(ctlr, drv_index, 1, drvinfo->serial_no, sizeof(drvinfo->serial_no)); + /* Save the lunid in case we deregister the disk, below. */ + memcpy(drvinfo->LunID, h->drv[drv_index]->LunID, + sizeof(drvinfo->LunID)); /* Is it the same disk we already know, and nothing's changed? */ - if (h->drv[drv_index].raid_level != -1 && + if (h->drv[drv_index]->raid_level != -1 && ((memcmp(drvinfo->serial_no, - h->drv[drv_index].serial_no, 16) == 0) && - drvinfo->block_size == h->drv[drv_index].block_size && - drvinfo->nr_blocks == h->drv[drv_index].nr_blocks && - drvinfo->heads == h->drv[drv_index].heads && - drvinfo->sectors == h->drv[drv_index].sectors && - drvinfo->cylinders == h->drv[drv_index].cylinders)) + h->drv[drv_index]->serial_no, 16) == 0) && + drvinfo->block_size == h->drv[drv_index]->block_size && + drvinfo->nr_blocks == h->drv[drv_index]->nr_blocks && + drvinfo->heads == h->drv[drv_index]->heads && + drvinfo->sectors == h->drv[drv_index]->sectors && + drvinfo->cylinders == h->drv[drv_index]->cylinders)) /* The disk is unchanged, nothing to update */ goto freeret; @@ -1895,18 +1898,17 @@ static void cciss_update_drive_info(int ctlr, int drv_index, int first_time, * If the disk already exists then deregister it before proceeding * (unless it's the first disk (for the controller node). */ - if (h->drv[drv_index].raid_level != -1 && drv_index != 0) { + if (h->drv[drv_index]->raid_level != -1 && drv_index != 0) { printk(KERN_WARNING "disk %d has changed.\n", drv_index); spin_lock_irqsave(CCISS_LOCK(h->ctlr), flags); - h->drv[drv_index].busy_configuring = 1; + h->drv[drv_index]->busy_configuring = 1; spin_unlock_irqrestore(CCISS_LOCK(h->ctlr), flags); - /* deregister_disk sets h->drv[drv_index].queue = NULL + /* deregister_disk sets h->drv[drv_index]->queue = NULL * which keeps the interrupt handler from starting * the queue. */ ret = deregister_disk(h, drv_index, 0, via_ioctl); - h->drv[drv_index].busy_configuring = 0; } /* If the disk is in use return */ @@ -1914,22 +1916,31 @@ static void cciss_update_drive_info(int ctlr, int drv_index, int first_time, goto freeret; /* Save the new information from cciss_geometry_inquiry - * and serial number inquiry. + * and serial number inquiry. If the disk was deregistered + * above, then h->drv[drv_index] will be NULL. */ - h->drv[drv_index].block_size = drvinfo->block_size; - h->drv[drv_index].nr_blocks = drvinfo->nr_blocks; - h->drv[drv_index].heads = drvinfo->heads; - h->drv[drv_index].sectors = drvinfo->sectors; - h->drv[drv_index].cylinders = drvinfo->cylinders; - h->drv[drv_index].raid_level = drvinfo->raid_level; - memcpy(h->drv[drv_index].serial_no, drvinfo->serial_no, 16); - memcpy(h->drv[drv_index].vendor, drvinfo->vendor, VENDOR_LEN + 1); - memcpy(h->drv[drv_index].model, drvinfo->model, MODEL_LEN + 1); - memcpy(h->drv[drv_index].rev, drvinfo->rev, REV_LEN + 1); + if (h->drv[drv_index] == NULL) { + drvinfo->device_initialized = 0; + h->drv[drv_index] = drvinfo; + drvinfo = NULL; /* so it won't be freed below. */ + } else { + /* special case for cxd0 */ + h->drv[drv_index]->block_size = drvinfo->block_size; + h->drv[drv_index]->nr_blocks = drvinfo->nr_blocks; + h->drv[drv_index]->heads = drvinfo->heads; + h->drv[drv_index]->sectors = drvinfo->sectors; + h->drv[drv_index]->cylinders = drvinfo->cylinders; + h->drv[drv_index]->raid_level = drvinfo->raid_level; + memcpy(h->drv[drv_index]->serial_no, drvinfo->serial_no, 16); + memcpy(h->drv[drv_index]->vendor, drvinfo->vendor, + VENDOR_LEN + 1); + memcpy(h->drv[drv_index]->model, drvinfo->model, MODEL_LEN + 1); + memcpy(h->drv[drv_index]->rev, drvinfo->rev, REV_LEN + 1); + } ++h->num_luns; disk = h->gendisk[drv_index]; - set_capacity(disk, h->drv[drv_index].nr_blocks); + set_capacity(disk, h->drv[drv_index]->nr_blocks); /* If it's not disk 0 (drv_index != 0) * or if it was disk 0, but there was previously @@ -1940,6 +1951,7 @@ static void cciss_update_drive_info(int ctlr, int drv_index, int first_time, if (drv_index || first_time) { if (cciss_add_disk(h, disk, drv_index) != 0) { cciss_free_gendisk(h, drv_index); + cciss_free_drive_info(h, drv_index); printk(KERN_WARNING "cciss:%d could not update " "disk %d\n", h->ctlr, drv_index); --h->num_luns; @@ -1956,28 +1968,64 @@ mem_msg: } /* This function will find the first index of the controllers drive array - * that has a -1 for the raid_level and will return that index. This is - * where new drives will be added. If the index to be returned is greater - * than the highest_lun index for the controller then highest_lun is set - * to this new index. If there are no available indexes then -1 is returned. - * "controller_node" is used to know if this is a real logical drive, or just - * the controller node, which determines if this counts towards highest_lun. + * that has a null drv pointer and allocate the drive info struct and + * will return that index This is where new drives will be added. + * If the index to be returned is greater than the highest_lun index for + * the controller then highest_lun is set * to this new index. + * If there are no available indexes or if tha allocation fails, then -1 + * is returned. * "controller_node" is used to know if this is a real + * logical drive, or just the controller node, which determines if this + * counts towards highest_lun. */ -static int cciss_find_free_drive_index(int ctlr, int controller_node) +static int cciss_alloc_drive_info(ctlr_info_t *h, int controller_node) { int i; + drive_info_struct *drv; + /* Search for an empty slot for our drive info */ for (i = 0; i < CISS_MAX_LUN; i++) { - if (hba[ctlr]->drv[i].raid_level == -1) { - if (i > hba[ctlr]->highest_lun) - if (!controller_node) - hba[ctlr]->highest_lun = i; + + /* if not cxd0 case, and it's occupied, skip it. */ + if (h->drv[i] && i != 0) + continue; + /* + * If it's cxd0 case, and drv is alloc'ed already, and a + * disk is configured there, skip it. + */ + if (i == 0 && h->drv[i] && h->drv[i]->raid_level != -1) + continue; + + /* + * We've found an empty slot. Update highest_lun + * provided this isn't just the fake cxd0 controller node. + */ + if (i > h->highest_lun && !controller_node) + h->highest_lun = i; + + /* If adding a real disk at cxd0, and it's already alloc'ed */ + if (i == 0 && h->drv[i] != NULL) return i; - } + + /* + * Found an empty slot, not already alloc'ed. Allocate it. + * Mark it with raid_level == -1, so we know it's new later on. + */ + drv = kzalloc(sizeof(*drv), GFP_KERNEL); + if (!drv) + return -1; + drv->raid_level = -1; /* so we know it's new */ + h->drv[i] = drv; + return i; } return -1; } +static void cciss_free_drive_info(ctlr_info_t *h, int drv_index) +{ + kfree(h->drv[drv_index]); + h->drv[drv_index] = NULL; +} + static void cciss_free_gendisk(ctlr_info_t *h, int drv_index) { put_disk(h->gendisk[drv_index]); @@ -1998,7 +2046,7 @@ static int cciss_add_gendisk(ctlr_info_t *h, unsigned char lunid[], { int drv_index; - drv_index = cciss_find_free_drive_index(h->ctlr, controller_node); + drv_index = cciss_alloc_drive_info(h, controller_node); if (drv_index == -1) return -1; @@ -2010,24 +2058,24 @@ static int cciss_add_gendisk(ctlr_info_t *h, unsigned char lunid[], printk(KERN_ERR "cciss%d: could not " "allocate a new disk %d\n", h->ctlr, drv_index); - return -1; + goto err_free_drive_info; } } - memcpy(h->drv[drv_index].LunID, lunid, - sizeof(h->drv[drv_index].LunID)); - if (h->drv[drv_index].dev == NULL) { - if (cciss_create_ld_sysfs_entry(h, drv_index)) - goto err_free_disk; - } + memcpy(h->drv[drv_index]->LunID, lunid, + sizeof(h->drv[drv_index]->LunID)); + if (cciss_create_ld_sysfs_entry(h, drv_index)) + goto err_free_disk; /* Don't need to mark this busy because nobody */ /* else knows about this disk yet to contend */ /* for access to it. */ - h->drv[drv_index].busy_configuring = 0; + h->drv[drv_index]->busy_configuring = 0; wmb(); return drv_index; err_free_disk: cciss_free_gendisk(h, drv_index); +err_free_drive_info: + cciss_free_drive_info(h, drv_index); return -1; } @@ -2047,17 +2095,18 @@ static void cciss_add_controller_node(ctlr_info_t *h) drv_index = cciss_add_gendisk(h, CTLR_LUNID, 1); if (drv_index == -1) goto error; - h->drv[drv_index].block_size = 512; - h->drv[drv_index].nr_blocks = 0; - h->drv[drv_index].heads = 0; - h->drv[drv_index].sectors = 0; - h->drv[drv_index].cylinders = 0; - h->drv[drv_index].raid_level = -1; - memset(h->drv[drv_index].serial_no, 0, 16); + h->drv[drv_index]->block_size = 512; + h->drv[drv_index]->nr_blocks = 0; + h->drv[drv_index]->heads = 0; + h->drv[drv_index]->sectors = 0; + h->drv[drv_index]->cylinders = 0; + h->drv[drv_index]->raid_level = -1; + memset(h->drv[drv_index]->serial_no, 0, 16); disk = h->gendisk[drv_index]; if (cciss_add_disk(h, disk, drv_index) == 0) return; cciss_free_gendisk(h, drv_index); + cciss_free_drive_info(h, drv_index); error: printk(KERN_WARNING "cciss%d: could not " "add disk 0.\n", h->ctlr); @@ -2136,12 +2185,12 @@ static int rebuild_lun_table(ctlr_info_t *h, int first_time, drv_found = 0; /* skip holes in the array from already deleted drives */ - if (h->drv[i].raid_level == -1) + if (h->drv[i] == NULL) continue; for (j = 0; j < num_luns; j++) { memcpy(lunid, &ld_buff->LUN[j][0], sizeof(lunid)); - if (memcmp(h->drv[i].LunID, lunid, + if (memcmp(h->drv[i]->LunID, lunid, sizeof(lunid)) == 0) { drv_found = 1; break; @@ -2150,10 +2199,11 @@ static int rebuild_lun_table(ctlr_info_t *h, int first_time, if (!drv_found) { /* Deregister it from the OS, it's gone. */ spin_lock_irqsave(CCISS_LOCK(h->ctlr), flags); - h->drv[i].busy_configuring = 1; + h->drv[i]->busy_configuring = 1; spin_unlock_irqrestore(CCISS_LOCK(h->ctlr), flags); return_code = deregister_disk(h, i, 1, via_ioctl); - h->drv[i].busy_configuring = 0; + if (h->drv[i] != NULL) + h->drv[i]->busy_configuring = 0; } } @@ -2174,9 +2224,9 @@ static int rebuild_lun_table(ctlr_info_t *h, int first_time, * the first free index and add it. */ for (j = 0; j <= h->highest_lun; j++) { - if (h->drv[j].raid_level != -1 && - memcmp(h->drv[j].LunID, lunid, - sizeof(h->drv[j].LunID)) == 0) { + if (h->drv[j] != NULL && + memcmp(h->drv[j]->LunID, lunid, + sizeof(h->drv[j]->LunID)) == 0) { drv_index = j; drv_found = 1; break; @@ -2253,11 +2303,12 @@ static int deregister_disk(ctlr_info_t *h, int drv_index, int i; struct gendisk *disk; drive_info_struct *drv; + int recalculate_highest_lun; if (!capable(CAP_SYS_RAWIO)) return -EPERM; - drv = &h->drv[drv_index]; + drv = h->drv[drv_index]; disk = h->gendisk[drv_index]; /* make sure logical volume is NOT is use */ @@ -2267,6 +2318,8 @@ static int deregister_disk(ctlr_info_t *h, int drv_index, } else if (drv->usage_count > 0) return -EBUSY; + recalculate_highest_lun = (drv == h->drv[h->highest_lun]); + /* invalidate the devices and deregister the disk. If it is disk * zero do not deregister it but just zero out it's values. This * allows us to delete disk zero but keep the controller registered. @@ -2277,14 +2330,8 @@ static int deregister_disk(ctlr_info_t *h, int drv_index, cciss_destroy_ld_sysfs_entry(h, drv_index, 0); del_gendisk(disk); } - if (q) { + if (q) blk_cleanup_queue(q); - /* Set drv->queue to NULL so that we do not try - * to call blk_start_queue on this queue in the - * interrupt handler - */ - drv->queue = NULL; - } /* If clear_all is set then we are deleting the logical * drive, not just refreshing its info. For drives * other than disk 0 we will call put_disk. We do not @@ -2307,24 +2354,20 @@ static int deregister_disk(ctlr_info_t *h, int drv_index, } } else { set_capacity(disk, 0); + cciss_clear_drive_info(drv); } --h->num_luns; - cciss_clear_drive_info(drv); - - if (clear_all) { - /* check to see if it was the last disk */ - if (drv == h->drv + h->highest_lun) { - /* if so, find the new hightest lun */ - int i, newhighest = -1; - for (i = 0; i <= h->highest_lun; i++) { - /* if the disk has size > 0, it is available */ - if (h->drv[i].heads) - newhighest = i; - } - h->highest_lun = newhighest; + + /* if it was the last disk, find the new hightest lun */ + if (clear_all && recalculate_highest_lun) { + int i, newhighest = -1; + for (i = 0; i <= h->highest_lun; i++) { + /* if the disk has size > 0, it is available */ + if (h->drv[i] && h->drv[i]->heads) + newhighest = i; } - memset(drv->LunID, 0, sizeof(drv->LunID)); + h->highest_lun = newhighest; } return 0; } @@ -2755,7 +2798,7 @@ static int cciss_revalidate(struct gendisk *disk) InquiryData_struct *inq_buff = NULL; for (logvol = 0; logvol < CISS_MAX_LUN; logvol++) { - if (memcmp(h->drv[logvol].LunID, drv->LunID, + if (memcmp(h->drv[logvol]->LunID, drv->LunID, sizeof(drv->LunID)) == 0) { FOUND = 1; break; @@ -4293,8 +4336,7 @@ static int __devinit cciss_init_one(struct pci_dev *pdev, hba[i]->num_luns = 0; hba[i]->highest_lun = -1; for (j = 0; j < CISS_MAX_LUN; j++) { - hba[i]->drv[j].raid_level = -1; - hba[i]->drv[j].queue = NULL; + hba[i]->drv[j] = NULL; hba[i]->gendisk[j] = NULL; } @@ -4349,12 +4391,7 @@ clean1: cciss_destroy_hba_sysfs_entry(hba[i]); clean0: hba[i]->busy_initializing = 0; - /* cleanup any queues that may have been initialized */ - for (j=0; j <= hba[i]->highest_lun; j++){ - drive_info_struct *drv = &(hba[i]->drv[j]); - if (drv->queue) - blk_cleanup_queue(drv->queue); - } + /* * Deliberately omit pci_disable_device(): it does something nasty to * Smart Array controllers that pci_enable_device does not undo diff --git a/drivers/block/cciss.h b/drivers/block/cciss.h index 5188f713d1b..31524cf42c7 100644 --- a/drivers/block/cciss.h +++ b/drivers/block/cciss.h @@ -45,13 +45,14 @@ typedef struct _drive_info_struct * to prevent it from being opened or it's * queue from being started. */ - struct device *dev; + struct device dev; __u8 serial_no[16]; /* from inquiry page 0x83, * not necc. null terminated. */ char vendor[VENDOR_LEN + 1]; /* SCSI vendor string */ char model[MODEL_LEN + 1]; /* SCSI model string */ char rev[REV_LEN + 1]; /* SCSI revision string */ + char device_initialized; /* indicates whether dev is initialized */ } drive_info_struct; struct ctlr_info @@ -87,7 +88,7 @@ struct ctlr_info BYTE cciss_read_capacity; // information about each logical volume - drive_info_struct drv[CISS_MAX_LUN]; + drive_info_struct *drv[CISS_MAX_LUN]; struct access_method access; -- cgit v1.2.3 From 9f792d9f58496161b1b201e2ca440a6b6e116c39 Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Fri, 18 Sep 2009 22:24:21 +0200 Subject: cciss: cciss_host_attr_groups should be const Signed-off-by: Jens Axboe --- drivers/block/cciss.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/block/cciss.c b/drivers/block/cciss.c index 04036ef8ea5..4d879b79225 100644 --- a/drivers/block/cciss.c +++ b/drivers/block/cciss.c @@ -656,7 +656,7 @@ static struct attribute_group cciss_host_attr_group = { .attrs = cciss_host_attrs, }; -static struct attribute_group *cciss_host_attr_groups[] = { +static const struct attribute_group *cciss_host_attr_groups[] = { &cciss_host_attr_group, NULL }; -- cgit v1.2.3 From 80ddf247c84fbd7f4371dd15bbbff0adb44a8708 Mon Sep 17 00:00:00 2001 From: "Martin K. Petersen" Date: Fri, 18 Sep 2009 22:54:37 +0200 Subject: block: Set max_sectors correctly for stacking devices The topology changes unintentionally caused SAFE_MAX_SECTORS to be set for stacking devices. Set the default limit to BLK_DEF_MAX_SECTORS and provide SAFE_MAX_SECTORS in blk_queue_make_request() for legacy hw drivers that depend on the old behavior. Acked-by: Mike Snitzer Signed-off-by: Martin K. Petersen Signed-off-by: Jens Axboe --- block/blk-settings.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/block/blk-settings.c b/block/blk-settings.c index 83413ff8373..cd9b7302dfc 100644 --- a/block/blk-settings.c +++ b/block/blk-settings.c @@ -111,7 +111,7 @@ void blk_set_default_limits(struct queue_limits *lim) lim->max_hw_segments = MAX_HW_SEGMENTS; lim->seg_boundary_mask = BLK_SEG_BOUNDARY_MASK; lim->max_segment_size = MAX_SEGMENT_SIZE; - lim->max_sectors = lim->max_hw_sectors = SAFE_MAX_SECTORS; + lim->max_sectors = lim->max_hw_sectors = BLK_DEF_MAX_SECTORS; lim->logical_block_size = lim->physical_block_size = lim->io_min = 512; lim->bounce_pfn = (unsigned long)(BLK_BOUNCE_ANY >> PAGE_SHIFT); lim->alignment_offset = 0; @@ -164,6 +164,7 @@ void blk_queue_make_request(struct request_queue *q, make_request_fn *mfn) q->unplug_timer.data = (unsigned long)q; blk_set_default_limits(&q->limits); + blk_queue_max_sectors(q, SAFE_MAX_SECTORS); /* * If the caller didn't supply a lock, fall back to our embedded -- cgit v1.2.3 From 5dee2477df5368368b7dba810a17a3c411a1d0f0 Mon Sep 17 00:00:00 2001 From: "Martin K. Petersen" Date: Mon, 21 Sep 2009 21:46:05 +0200 Subject: block: Do not clamp max_hw_sectors for stacking devices Stacking devices do not have an inherent max_hw_sector limit. Set the default to INT_MAX so we are bounded only by capabilities of the underlying storage. Signed-off-by: Martin K. Petersen Signed-off-by: Jens Axboe --- block/blk-settings.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/block/blk-settings.c b/block/blk-settings.c index cd9b7302dfc..eaf122ff5f1 100644 --- a/block/blk-settings.c +++ b/block/blk-settings.c @@ -111,7 +111,8 @@ void blk_set_default_limits(struct queue_limits *lim) lim->max_hw_segments = MAX_HW_SEGMENTS; lim->seg_boundary_mask = BLK_SEG_BOUNDARY_MASK; lim->max_segment_size = MAX_SEGMENT_SIZE; - lim->max_sectors = lim->max_hw_sectors = BLK_DEF_MAX_SECTORS; + lim->max_sectors = BLK_DEF_MAX_SECTORS; + lim->max_hw_sectors = INT_MAX; lim->logical_block_size = lim->physical_block_size = lim->io_min = 512; lim->bounce_pfn = (unsigned long)(BLK_BOUNCE_ANY >> PAGE_SHIFT); lim->alignment_offset = 0; -- cgit v1.2.3 From 1e6f2dc11984b81c6438ff6cd45cdf15a02e3dfd Mon Sep 17 00:00:00 2001 From: Alexander Beregalov Date: Thu, 24 Sep 2009 16:15:38 +0200 Subject: cciss: fix build when !PROC_FS Fix these build errors when CONFIG_PROC_FS is not set: drivers/block/cciss.c: In function 'cciss_show_raid_level': drivers/block/cciss.c:623: error: 'RAID_UNKNOWN' undeclared (first use in this function) drivers/block/cciss.c:626: error: 'raid_label' undeclared (first use in this function) drivers/block/cciss.c: In function 'cciss_geometry_inquiry': drivers/block/cciss.c:2696: error: 'RAID_UNKNOWN' undeclared (first use in this function) Signed-off-by: Alexander Beregalov Signed-off-by: Jens Axboe --- drivers/block/cciss.c | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/drivers/block/cciss.c b/drivers/block/cciss.c index 4d879b79225..78852b10319 100644 --- a/drivers/block/cciss.c +++ b/drivers/block/cciss.c @@ -256,6 +256,11 @@ static inline void removeQ(CommandList_struct *c) #include "cciss_scsi.c" /* For SCSI tape support */ +static const char *raid_label[] = { "0", "4", "1(1+0)", "5", "5+1", "ADG", + "UNKNOWN" +}; +#define RAID_UNKNOWN (sizeof(raid_label) / sizeof(raid_label[0])-1) + #ifdef CONFIG_PROC_FS /* @@ -264,10 +269,6 @@ static inline void removeQ(CommandList_struct *c) #define ENG_GIG 1000000000 #define ENG_GIG_FACTOR (ENG_GIG/512) #define ENGAGE_SCSI "engage scsi" -static const char *raid_label[] = { "0", "4", "1(1+0)", "5", "5+1", "ADG", - "UNKNOWN" -}; -#define RAID_UNKNOWN (sizeof(raid_label) / sizeof(raid_label[0])-1) static struct proc_dir_entry *proc_cciss; -- cgit v1.2.3 From 48c0d4d4c04dd520c55e0fd756fa4e7c83de3d13 Mon Sep 17 00:00:00 2001 From: Zdenek Kabelac Date: Fri, 25 Sep 2009 06:19:26 +0200 Subject: Add missing blk_trace_remove_sysfs to be in pair with blk_trace_init_sysfs Add missing blk_trace_remove_sysfs to be in pair with blk_trace_init_sysfs introduced in commit 1d54ad6da9192fed5dd3b60224d9f2dfea0dcd82. Release kobject also in case the request_fn is NULL. Problem was noticed via kmemleak backtrace when some sysfs entries were note properly destroyed during device removal: unreferenced object 0xffff88001aa76640 (size 80): comm "lvcreate", pid 2120, jiffies 4294885144 hex dump (first 32 bytes): 01 00 00 00 00 00 00 00 f0 65 a7 1a 00 88 ff ff .........e...... 90 66 a7 1a 00 88 ff ff 86 1d 53 81 ff ff ff ff .f........S..... backtrace: [] kmemleak_alloc+0x26/0x60 [] kmem_cache_alloc+0x133/0x1c0 [] sysfs_new_dirent+0x41/0x120 [] sysfs_add_file_mode+0x3c/0xb0 [] internal_create_group+0xc1/0x1a0 [] sysfs_create_group+0x13/0x20 [] blk_trace_init_sysfs+0x14/0x20 [] blk_register_queue+0x3c/0xf0 [] add_disk+0x94/0x160 [] dm_create+0x598/0x6e0 [dm_mod] [] dev_create+0x51/0x350 [dm_mod] [] ctl_ioctl+0x1a3/0x240 [dm_mod] [] dm_compat_ctl_ioctl+0x12/0x20 [dm_mod] [] compat_sys_ioctl+0xcd/0x4f0 [] sysenter_dispatch+0x7/0x2c [] 0xffffffffffffffff Signed-off-by: Zdenek Kabelac Reviewed-by: Li Zefan Signed-off-by: Jens Axboe --- block/blk-sysfs.c | 11 ++++++----- include/linux/blktrace_api.h | 2 ++ kernel/trace/blktrace.c | 5 +++++ 3 files changed, 13 insertions(+), 5 deletions(-) diff --git a/block/blk-sysfs.c b/block/blk-sysfs.c index b78c9c3e267..8a6d81afb28 100644 --- a/block/blk-sysfs.c +++ b/block/blk-sysfs.c @@ -452,6 +452,7 @@ int blk_register_queue(struct gendisk *disk) if (ret) { kobject_uevent(&q->kobj, KOBJ_REMOVE); kobject_del(&q->kobj); + blk_trace_remove_sysfs(disk_to_dev(disk)); return ret; } @@ -465,11 +466,11 @@ void blk_unregister_queue(struct gendisk *disk) if (WARN_ON(!q)) return; - if (q->request_fn) { + if (q->request_fn) elv_unregister_queue(q); - kobject_uevent(&q->kobj, KOBJ_REMOVE); - kobject_del(&q->kobj); - kobject_put(&disk_to_dev(disk)->kobj); - } + kobject_uevent(&q->kobj, KOBJ_REMOVE); + kobject_del(&q->kobj); + blk_trace_remove_sysfs(disk_to_dev(disk)); + kobject_put(&disk_to_dev(disk)->kobj); } diff --git a/include/linux/blktrace_api.h b/include/linux/blktrace_api.h index 7e4350ece0f..622939a2329 100644 --- a/include/linux/blktrace_api.h +++ b/include/linux/blktrace_api.h @@ -198,6 +198,7 @@ extern int blk_trace_setup(struct request_queue *q, char *name, dev_t dev, char __user *arg); extern int blk_trace_startstop(struct request_queue *q, int start); extern int blk_trace_remove(struct request_queue *q); +extern void blk_trace_remove_sysfs(struct device *dev); extern int blk_trace_init_sysfs(struct device *dev); extern struct attribute_group blk_trace_attr_group; @@ -211,6 +212,7 @@ extern struct attribute_group blk_trace_attr_group; # define blk_trace_startstop(q, start) (-ENOTTY) # define blk_trace_remove(q) (-ENOTTY) # define blk_add_trace_msg(q, fmt, ...) do { } while (0) +# define blk_trace_remove_sysfs(struct device *dev) do { } while (0) static inline int blk_trace_init_sysfs(struct device *dev) { return 0; diff --git a/kernel/trace/blktrace.c b/kernel/trace/blktrace.c index 3eb159c277c..60b5c5a3d4b 100644 --- a/kernel/trace/blktrace.c +++ b/kernel/trace/blktrace.c @@ -1657,6 +1657,11 @@ int blk_trace_init_sysfs(struct device *dev) return sysfs_create_group(&dev->kobj, &blk_trace_attr_group); } +void blk_trace_remove_sysfs(struct device *dev) +{ + sysfs_remove_group(&dev->kobj, &blk_trace_attr_group); +} + #endif /* CONFIG_BLK_DEV_IO_TRACE */ #ifdef CONFIG_EVENT_TRACING -- cgit v1.2.3 From a112a71d45b5e40c3cf07371d20a4a5079a72610 Mon Sep 17 00:00:00 2001 From: H Hartley Sweeten Date: Sat, 26 Sep 2009 16:19:21 +0200 Subject: fs/bio.c: move EXPORT* macros to line after function As mentioned in Documentation/CodingStyle, move EXPORT* macro's to the line immediately after the closing function brace line. Signed-off-by: H Hartley Sweeten Signed-off-by: Jens Axboe --- fs/bio.c | 49 +++++++++++++++++++++++-------------------------- 1 file changed, 23 insertions(+), 26 deletions(-) diff --git a/fs/bio.c b/fs/bio.c index 76738005c8e..402cb84a92a 100644 --- a/fs/bio.c +++ b/fs/bio.c @@ -249,6 +249,7 @@ void bio_free(struct bio *bio, struct bio_set *bs) mempool_free(p, bs->bio_pool); } +EXPORT_SYMBOL(bio_free); void bio_init(struct bio *bio) { @@ -257,6 +258,7 @@ void bio_init(struct bio *bio) bio->bi_comp_cpu = -1; atomic_set(&bio->bi_cnt, 1); } +EXPORT_SYMBOL(bio_init); /** * bio_alloc_bioset - allocate a bio for I/O @@ -311,6 +313,7 @@ err_free: mempool_free(p, bs->bio_pool); return NULL; } +EXPORT_SYMBOL(bio_alloc_bioset); static void bio_fs_destructor(struct bio *bio) { @@ -337,6 +340,7 @@ struct bio *bio_alloc(gfp_t gfp_mask, int nr_iovecs) return bio; } +EXPORT_SYMBOL(bio_alloc); static void bio_kmalloc_destructor(struct bio *bio) { @@ -380,6 +384,7 @@ struct bio *bio_kmalloc(gfp_t gfp_mask, int nr_iovecs) return bio; } +EXPORT_SYMBOL(bio_kmalloc); void zero_fill_bio(struct bio *bio) { @@ -416,6 +421,7 @@ void bio_put(struct bio *bio) bio->bi_destructor(bio); } } +EXPORT_SYMBOL(bio_put); inline int bio_phys_segments(struct request_queue *q, struct bio *bio) { @@ -424,6 +430,7 @@ inline int bio_phys_segments(struct request_queue *q, struct bio *bio) return bio->bi_phys_segments; } +EXPORT_SYMBOL(bio_phys_segments); /** * __bio_clone - clone a bio @@ -451,6 +458,7 @@ void __bio_clone(struct bio *bio, struct bio *bio_src) bio->bi_size = bio_src->bi_size; bio->bi_idx = bio_src->bi_idx; } +EXPORT_SYMBOL(__bio_clone); /** * bio_clone - clone a bio @@ -482,6 +490,7 @@ struct bio *bio_clone(struct bio *bio, gfp_t gfp_mask) return b; } +EXPORT_SYMBOL(bio_clone); /** * bio_get_nr_vecs - return approx number of vecs @@ -505,6 +514,7 @@ int bio_get_nr_vecs(struct block_device *bdev) return nr_pages; } +EXPORT_SYMBOL(bio_get_nr_vecs); static int __bio_add_page(struct request_queue *q, struct bio *bio, struct page *page, unsigned int len, unsigned int offset, @@ -635,6 +645,7 @@ int bio_add_pc_page(struct request_queue *q, struct bio *bio, struct page *page, return __bio_add_page(q, bio, page, len, offset, queue_max_hw_sectors(q)); } +EXPORT_SYMBOL(bio_add_pc_page); /** * bio_add_page - attempt to add page to bio @@ -655,6 +666,7 @@ int bio_add_page(struct bio *bio, struct page *page, unsigned int len, struct request_queue *q = bdev_get_queue(bio->bi_bdev); return __bio_add_page(q, bio, page, len, offset, queue_max_sectors(q)); } +EXPORT_SYMBOL(bio_add_page); struct bio_map_data { struct bio_vec *iovecs; @@ -776,6 +788,7 @@ int bio_uncopy_user(struct bio *bio) bio_put(bio); return ret; } +EXPORT_SYMBOL(bio_uncopy_user); /** * bio_copy_user_iov - copy user data to bio @@ -920,6 +933,7 @@ struct bio *bio_copy_user(struct request_queue *q, struct rq_map_data *map_data, return bio_copy_user_iov(q, map_data, &iov, 1, write_to_vm, gfp_mask); } +EXPORT_SYMBOL(bio_copy_user); static struct bio *__bio_map_user_iov(struct request_queue *q, struct block_device *bdev, @@ -1050,6 +1064,7 @@ struct bio *bio_map_user(struct request_queue *q, struct block_device *bdev, return bio_map_user_iov(q, bdev, &iov, 1, write_to_vm, gfp_mask); } +EXPORT_SYMBOL(bio_map_user); /** * bio_map_user_iov - map user sg_iovec table into bio @@ -1117,13 +1132,13 @@ void bio_unmap_user(struct bio *bio) __bio_unmap_user(bio); bio_put(bio); } +EXPORT_SYMBOL(bio_unmap_user); static void bio_map_kern_endio(struct bio *bio, int err) { bio_put(bio); } - static struct bio *__bio_map_kern(struct request_queue *q, void *data, unsigned int len, gfp_t gfp_mask) { @@ -1189,6 +1204,7 @@ struct bio *bio_map_kern(struct request_queue *q, void *data, unsigned int len, bio_put(bio); return ERR_PTR(-EINVAL); } +EXPORT_SYMBOL(bio_map_kern); static void bio_copy_kern_endio(struct bio *bio, int err) { @@ -1250,6 +1266,7 @@ struct bio *bio_copy_kern(struct request_queue *q, void *data, unsigned int len, return bio; } +EXPORT_SYMBOL(bio_copy_kern); /* * bio_set_pages_dirty() and bio_check_pages_dirty() are support functions @@ -1400,6 +1417,7 @@ void bio_endio(struct bio *bio, int error) if (bio->bi_end_io) bio->bi_end_io(bio, error); } +EXPORT_SYMBOL(bio_endio); void bio_pair_release(struct bio_pair *bp) { @@ -1410,6 +1428,7 @@ void bio_pair_release(struct bio_pair *bp) mempool_free(bp, bp->bio2.bi_private); } } +EXPORT_SYMBOL(bio_pair_release); static void bio_pair_end_1(struct bio *bi, int err) { @@ -1477,6 +1496,7 @@ struct bio_pair *bio_split(struct bio *bi, int first_sectors) return bp; } +EXPORT_SYMBOL(bio_split); /** * bio_sector_offset - Find hardware sector offset in bio @@ -1547,6 +1567,7 @@ void bioset_free(struct bio_set *bs) kfree(bs); } +EXPORT_SYMBOL(bioset_free); /** * bioset_create - Create a bio_set @@ -1592,6 +1613,7 @@ bad: bioset_free(bs); return NULL; } +EXPORT_SYMBOL(bioset_create); static void __init biovec_init_slabs(void) { @@ -1636,29 +1658,4 @@ static int __init init_bio(void) return 0; } - subsys_initcall(init_bio); - -EXPORT_SYMBOL(bio_alloc); -EXPORT_SYMBOL(bio_kmalloc); -EXPORT_SYMBOL(bio_put); -EXPORT_SYMBOL(bio_free); -EXPORT_SYMBOL(bio_endio); -EXPORT_SYMBOL(bio_init); -EXPORT_SYMBOL(__bio_clone); -EXPORT_SYMBOL(bio_clone); -EXPORT_SYMBOL(bio_phys_segments); -EXPORT_SYMBOL(bio_add_page); -EXPORT_SYMBOL(bio_add_pc_page); -EXPORT_SYMBOL(bio_get_nr_vecs); -EXPORT_SYMBOL(bio_map_user); -EXPORT_SYMBOL(bio_unmap_user); -EXPORT_SYMBOL(bio_map_kern); -EXPORT_SYMBOL(bio_copy_kern); -EXPORT_SYMBOL(bio_pair_release); -EXPORT_SYMBOL(bio_split); -EXPORT_SYMBOL(bio_copy_user); -EXPORT_SYMBOL(bio_uncopy_user); -EXPORT_SYMBOL(bioset_create); -EXPORT_SYMBOL(bioset_free); -EXPORT_SYMBOL(bio_alloc_bioset); -- cgit v1.2.3 From 3bd0f0c763e497c8674b28e3df2732f48683dabd Mon Sep 17 00:00:00 2001 From: Suresh Jayaraman Date: Wed, 30 Sep 2009 10:53:48 +0200 Subject: swapfile: avoid NULL pointer dereference in swapon when s_bdev is NULL While testing Swap over NFS patchset, I noticed an oops that was triggered during swapon. Investigating further, the NULL pointer deference is due to the SSD device check/optimization in the swapon code that assumes s_bdev could never be NULL. inode->i_sb->s_bdev could be NULL in a few cases. For e.g. one such case is loopback NFS mount, there could be others as well. Fix this by ensuring s_bdev is not NULL before we try to deference s_bdev. Signed-off-by: Suresh Jayaraman Signed-off-by: Jens Axboe --- mm/swapfile.c | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) diff --git a/mm/swapfile.c b/mm/swapfile.c index 4de7f02f820..a1bc6b9af9a 100644 --- a/mm/swapfile.c +++ b/mm/swapfile.c @@ -1974,12 +1974,14 @@ SYSCALL_DEFINE2(swapon, const char __user *, specialfile, int, swap_flags) goto bad_swap; } - if (blk_queue_nonrot(bdev_get_queue(p->bdev))) { - p->flags |= SWP_SOLIDSTATE; - p->cluster_next = 1 + (random32() % p->highest_bit); + if (p->bdev) { + if (blk_queue_nonrot(bdev_get_queue(p->bdev))) { + p->flags |= SWP_SOLIDSTATE; + p->cluster_next = 1 + (random32() % p->highest_bit); + } + if (discard_swap(p) == 0) + p->flags |= SWP_DISCARDABLE; } - if (discard_swap(p) == 0) - p->flags |= SWP_DISCARDABLE; mutex_lock(&swapon_mutex); spin_lock(&swap_lock); -- cgit v1.2.3 From c15227de132f1295f3db6b7df9079956b1020fd8 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Wed, 30 Sep 2009 13:52:12 +0200 Subject: block: use normal I/O path for discard requests prepare_discard_fn() was being called in a place where memory allocation was effectively impossible. This makes it inappropriate for all but the most trivial translations of Linux's DISCARD operation to the block command set. Additionally adding a payload there makes the ownership of the bio backing unclear as it's now allocated by the device driver and not the submitter as usual. It is replaced with QUEUE_FLAG_DISCARD which is used to indicate whether the queue supports discard operations or not. blkdev_issue_discard now allocates a one-page, sector-length payload which is the right thing for the common ATA and SCSI implementations. The mtd implementation of prepare_discard_fn() is replaced with simply checking for the request being a discard. Largely based on a previous patch from Matthew Wilcox which did the prepare_discard_fn but not the different payload allocation yet. Signed-off-by: Christoph Hellwig Signed-off-by: Jens Axboe --- block/blk-barrier.c | 35 ++++++++++++++++++++++++++++++----- block/blk-core.c | 3 +-- block/blk-settings.c | 17 ----------------- drivers/mtd/mtd_blkdevs.c | 19 +++++-------------- drivers/staging/dst/dcore.c | 2 +- include/linux/blkdev.h | 6 ++---- 6 files changed, 39 insertions(+), 43 deletions(-) diff --git a/block/blk-barrier.c b/block/blk-barrier.c index 6593ab39cfe..21f5025c394 100644 --- a/block/blk-barrier.c +++ b/block/blk-barrier.c @@ -350,6 +350,7 @@ static void blkdev_discard_end_io(struct bio *bio, int err) if (bio->bi_private) complete(bio->bi_private); + __free_page(bio_page(bio)); bio_put(bio); } @@ -372,26 +373,44 @@ int blkdev_issue_discard(struct block_device *bdev, sector_t sector, struct request_queue *q = bdev_get_queue(bdev); int type = flags & DISCARD_FL_BARRIER ? DISCARD_BARRIER : DISCARD_NOBARRIER; + struct bio *bio; + struct page *page; int ret = 0; if (!q) return -ENXIO; - if (!q->prepare_discard_fn) + if (!blk_queue_discard(q)) return -EOPNOTSUPP; while (nr_sects && !ret) { - struct bio *bio = bio_alloc(gfp_mask, 0); - if (!bio) - return -ENOMEM; + unsigned int sector_size = q->limits.logical_block_size; + bio = bio_alloc(gfp_mask, 1); + if (!bio) + goto out; + bio->bi_sector = sector; bio->bi_end_io = blkdev_discard_end_io; bio->bi_bdev = bdev; if (flags & DISCARD_FL_WAIT) bio->bi_private = &wait; - bio->bi_sector = sector; + /* + * Add a zeroed one-sector payload as that's what + * our current implementations need. If we'll ever need + * more the interface will need revisiting. + */ + page = alloc_page(GFP_KERNEL | __GFP_ZERO); + if (!page) + goto out_free_bio; + if (bio_add_pc_page(q, bio, page, sector_size, 0) < sector_size) + goto out_free_page; + /* + * And override the bio size - the way discard works we + * touch many more blocks on disk than the actual payload + * length. + */ if (nr_sects > queue_max_hw_sectors(q)) { bio->bi_size = queue_max_hw_sectors(q) << 9; nr_sects -= queue_max_hw_sectors(q); @@ -414,5 +433,11 @@ int blkdev_issue_discard(struct block_device *bdev, sector_t sector, bio_put(bio); } return ret; +out_free_page: + __free_page(page); +out_free_bio: + bio_put(bio); +out: + return -ENOMEM; } EXPORT_SYMBOL(blkdev_issue_discard); diff --git a/block/blk-core.c b/block/blk-core.c index 8135228e4b2..80a020dd158 100644 --- a/block/blk-core.c +++ b/block/blk-core.c @@ -1124,7 +1124,6 @@ void init_request_from_bio(struct request *req, struct bio *bio) req->cmd_flags |= REQ_DISCARD; if (bio_rw_flagged(bio, BIO_RW_BARRIER)) req->cmd_flags |= REQ_SOFTBARRIER; - req->q->prepare_discard_fn(req->q, req); } else if (unlikely(bio_rw_flagged(bio, BIO_RW_BARRIER))) req->cmd_flags |= REQ_HARDBARRIER; @@ -1470,7 +1469,7 @@ static inline void __generic_make_request(struct bio *bio) goto end_io; if (bio_rw_flagged(bio, BIO_RW_DISCARD) && - !q->prepare_discard_fn) { + !blk_queue_discard(q)) { err = -EOPNOTSUPP; goto end_io; } diff --git a/block/blk-settings.c b/block/blk-settings.c index eaf122ff5f1..d29498ef1eb 100644 --- a/block/blk-settings.c +++ b/block/blk-settings.c @@ -33,23 +33,6 @@ void blk_queue_prep_rq(struct request_queue *q, prep_rq_fn *pfn) } EXPORT_SYMBOL(blk_queue_prep_rq); -/** - * blk_queue_set_discard - set a discard_sectors function for queue - * @q: queue - * @dfn: prepare_discard function - * - * It's possible for a queue to register a discard callback which is used - * to transform a discard request into the appropriate type for the - * hardware. If none is registered, then discard requests are failed - * with %EOPNOTSUPP. - * - */ -void blk_queue_set_discard(struct request_queue *q, prepare_discard_fn *dfn) -{ - q->prepare_discard_fn = dfn; -} -EXPORT_SYMBOL(blk_queue_set_discard); - /** * blk_queue_merge_bvec - set a merge_bvec function for queue * @q: queue diff --git a/drivers/mtd/mtd_blkdevs.c b/drivers/mtd/mtd_blkdevs.c index 0acbf4f5be5..8ca17a3e96e 100644 --- a/drivers/mtd/mtd_blkdevs.c +++ b/drivers/mtd/mtd_blkdevs.c @@ -32,14 +32,6 @@ struct mtd_blkcore_priv { spinlock_t queue_lock; }; -static int blktrans_discard_request(struct request_queue *q, - struct request *req) -{ - req->cmd_type = REQ_TYPE_LINUX_BLOCK; - req->cmd[0] = REQ_LB_OP_DISCARD; - return 0; -} - static int do_blktrans_request(struct mtd_blktrans_ops *tr, struct mtd_blktrans_dev *dev, struct request *req) @@ -52,10 +44,6 @@ static int do_blktrans_request(struct mtd_blktrans_ops *tr, buf = req->buffer; - if (req->cmd_type == REQ_TYPE_LINUX_BLOCK && - req->cmd[0] == REQ_LB_OP_DISCARD) - return tr->discard(dev, block, nsect); - if (!blk_fs_request(req)) return -EIO; @@ -63,6 +51,9 @@ static int do_blktrans_request(struct mtd_blktrans_ops *tr, get_capacity(req->rq_disk)) return -EIO; + if (blk_discard_rq(req)) + return tr->discard(dev, block, nsect); + switch(rq_data_dir(req)) { case READ: for (; nsect > 0; nsect--, block++, buf += tr->blksize) @@ -380,8 +371,8 @@ int register_mtd_blktrans(struct mtd_blktrans_ops *tr) tr->blkcore_priv->rq->queuedata = tr; blk_queue_logical_block_size(tr->blkcore_priv->rq, tr->blksize); if (tr->discard) - blk_queue_set_discard(tr->blkcore_priv->rq, - blktrans_discard_request); + queue_flag_set_unlocked(QUEUE_FLAG_DISCARD, + tr->blkcore_priv->rq); tr->blkshift = ffs(tr->blksize) - 1; diff --git a/drivers/staging/dst/dcore.c b/drivers/staging/dst/dcore.c index ac8577358ba..5e8db067758 100644 --- a/drivers/staging/dst/dcore.c +++ b/drivers/staging/dst/dcore.c @@ -102,7 +102,7 @@ static int dst_request(struct request_queue *q, struct bio *bio) struct dst_node *n = q->queuedata; int err = -EIO; - if (bio_empty_barrier(bio) && !q->prepare_discard_fn) { + if (bio_empty_barrier(bio) && !blk_queue_discard(q)) { /* * This is a dirty^Wnice hack, but if we complete this * operation with -EOPNOTSUPP like intended, XFS diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index e23a86cae5a..f62d45e8761 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -82,7 +82,6 @@ enum rq_cmd_type_bits { enum { REQ_LB_OP_EJECT = 0x40, /* eject request */ REQ_LB_OP_FLUSH = 0x41, /* flush request */ - REQ_LB_OP_DISCARD = 0x42, /* discard sectors */ }; /* @@ -261,7 +260,6 @@ typedef void (request_fn_proc) (struct request_queue *q); typedef int (make_request_fn) (struct request_queue *q, struct bio *bio); typedef int (prep_rq_fn) (struct request_queue *, struct request *); typedef void (unplug_fn) (struct request_queue *); -typedef int (prepare_discard_fn) (struct request_queue *, struct request *); struct bio_vec; struct bvec_merge_data { @@ -340,7 +338,6 @@ struct request_queue make_request_fn *make_request_fn; prep_rq_fn *prep_rq_fn; unplug_fn *unplug_fn; - prepare_discard_fn *prepare_discard_fn; merge_bvec_fn *merge_bvec_fn; prepare_flush_fn *prepare_flush_fn; softirq_done_fn *softirq_done_fn; @@ -460,6 +457,7 @@ struct request_queue #define QUEUE_FLAG_VIRT QUEUE_FLAG_NONROT /* paravirt device */ #define QUEUE_FLAG_IO_STAT 15 /* do IO stats */ #define QUEUE_FLAG_CQ 16 /* hardware does queuing */ +#define QUEUE_FLAG_DISCARD 17 /* supports DISCARD */ #define QUEUE_FLAG_DEFAULT ((1 << QUEUE_FLAG_IO_STAT) | \ (1 << QUEUE_FLAG_CLUSTER) | \ @@ -591,6 +589,7 @@ enum { #define blk_queue_flushing(q) ((q)->ordseq) #define blk_queue_stackable(q) \ test_bit(QUEUE_FLAG_STACKABLE, &(q)->queue_flags) +#define blk_queue_discard(q) test_bit(QUEUE_FLAG_DISCARD, &(q)->queue_flags) #define blk_fs_request(rq) ((rq)->cmd_type == REQ_TYPE_FS) #define blk_pc_request(rq) ((rq)->cmd_type == REQ_TYPE_BLOCK_PC) @@ -955,7 +954,6 @@ extern void blk_queue_merge_bvec(struct request_queue *, merge_bvec_fn *); extern void blk_queue_dma_alignment(struct request_queue *, int); extern void blk_queue_update_dma_alignment(struct request_queue *, int); extern void blk_queue_softirq_done(struct request_queue *, softirq_done_fn *); -extern void blk_queue_set_discard(struct request_queue *, prepare_discard_fn *); extern void blk_queue_rq_timed_out(struct request_queue *, rq_timed_out_fn *); extern void blk_queue_rq_timeout(struct request_queue *, unsigned int); extern struct backing_dev_info *blk_get_backing_dev_info(struct block_device *bdev); -- cgit v1.2.3 From 67efc9258010da35b27b3854d0880c7e193004ed Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Wed, 30 Sep 2009 13:54:20 +0200 Subject: block: allow large discard requests Currently we set the bio size to the byte equivalent of the blocks to be trimmed when submitting the initial DISCARD ioctl. That means it is subject to the max_hw_sectors limitation of the HBA which is much lower than the size of a DISCARD request we can support. Add a separate max_discard_sectors tunable to limit the size for discard requests. We limit the max discard request size in bytes to 32bit as that is the limit for bio->bi_size. This could be much larger if we had a way to pass that information through the block layer. Signed-off-by: Christoph Hellwig Signed-off-by: Jens Axboe --- block/blk-barrier.c | 10 ++++++---- block/blk-core.c | 3 ++- block/blk-settings.c | 13 +++++++++++++ include/linux/blkdev.h | 3 +++ 4 files changed, 24 insertions(+), 5 deletions(-) diff --git a/block/blk-barrier.c b/block/blk-barrier.c index 21f5025c394..8873b9b439f 100644 --- a/block/blk-barrier.c +++ b/block/blk-barrier.c @@ -385,6 +385,8 @@ int blkdev_issue_discard(struct block_device *bdev, sector_t sector, while (nr_sects && !ret) { unsigned int sector_size = q->limits.logical_block_size; + unsigned int max_discard_sectors = + min(q->limits.max_discard_sectors, UINT_MAX >> 9); bio = bio_alloc(gfp_mask, 1); if (!bio) @@ -411,10 +413,10 @@ int blkdev_issue_discard(struct block_device *bdev, sector_t sector, * touch many more blocks on disk than the actual payload * length. */ - if (nr_sects > queue_max_hw_sectors(q)) { - bio->bi_size = queue_max_hw_sectors(q) << 9; - nr_sects -= queue_max_hw_sectors(q); - sector += queue_max_hw_sectors(q); + if (nr_sects > max_discard_sectors) { + bio->bi_size = max_discard_sectors << 9; + nr_sects -= max_discard_sectors; + sector += max_discard_sectors; } else { bio->bi_size = nr_sects << 9; nr_sects = 0; diff --git a/block/blk-core.c b/block/blk-core.c index 80a020dd158..34504f30972 100644 --- a/block/blk-core.c +++ b/block/blk-core.c @@ -1436,7 +1436,8 @@ static inline void __generic_make_request(struct bio *bio) goto end_io; } - if (unlikely(nr_sectors > queue_max_hw_sectors(q))) { + if (unlikely(!bio_rw_flagged(bio, BIO_RW_DISCARD) && + nr_sectors > queue_max_hw_sectors(q))) { printk(KERN_ERR "bio too big device %s (%u > %u)\n", bdevname(bio->bi_bdev, b), bio_sectors(bio), diff --git a/block/blk-settings.c b/block/blk-settings.c index d29498ef1eb..e0695bca702 100644 --- a/block/blk-settings.c +++ b/block/blk-settings.c @@ -96,6 +96,7 @@ void blk_set_default_limits(struct queue_limits *lim) lim->max_segment_size = MAX_SEGMENT_SIZE; lim->max_sectors = BLK_DEF_MAX_SECTORS; lim->max_hw_sectors = INT_MAX; + lim->max_discard_sectors = SAFE_MAX_SECTORS; lim->logical_block_size = lim->physical_block_size = lim->io_min = 512; lim->bounce_pfn = (unsigned long)(BLK_BOUNCE_ANY >> PAGE_SHIFT); lim->alignment_offset = 0; @@ -238,6 +239,18 @@ void blk_queue_max_hw_sectors(struct request_queue *q, unsigned int max_sectors) } EXPORT_SYMBOL(blk_queue_max_hw_sectors); +/** + * blk_queue_max_discard_sectors - set max sectors for a single discard + * @q: the request queue for the device + * @max_discard: maximum number of sectors to discard + **/ +void blk_queue_max_discard_sectors(struct request_queue *q, + unsigned int max_discard_sectors) +{ + q->limits.max_discard_sectors = max_discard_sectors; +} +EXPORT_SYMBOL(blk_queue_max_discard_sectors); + /** * blk_queue_max_phys_segments - set max phys segments for a request for this queue * @q: the request queue for the device diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index f62d45e8761..1a03b715dfa 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -311,6 +311,7 @@ struct queue_limits { unsigned int alignment_offset; unsigned int io_min; unsigned int io_opt; + unsigned int max_discard_sectors; unsigned short logical_block_size; unsigned short max_hw_segments; @@ -928,6 +929,8 @@ extern void blk_queue_max_hw_sectors(struct request_queue *, unsigned int); extern void blk_queue_max_phys_segments(struct request_queue *, unsigned short); extern void blk_queue_max_hw_segments(struct request_queue *, unsigned short); extern void blk_queue_max_segment_size(struct request_queue *, unsigned int); +extern void blk_queue_max_discard_sectors(struct request_queue *q, + unsigned int max_discard_sectors); extern void blk_queue_logical_block_size(struct request_queue *, unsigned short); extern void blk_queue_physical_block_size(struct request_queue *, unsigned short); extern void blk_queue_alignment_offset(struct request_queue *q, -- cgit v1.2.3 From b0da3f0dada78832c9da03ad2152ae76bd9a2496 Mon Sep 17 00:00:00 2001 From: Jun'ichi Nomura Date: Thu, 1 Oct 2009 21:16:13 +0200 Subject: Add a tracepoint for block request remapping Since 2.6.31 now has request-based device-mapper, it's useful to have a tracepoint for request-remapping as well as bio-remapping. This patch adds a tracepoint for request-remapping, trace_block_rq_remap(). Signed-off-by: Kiyoshi Ueda Signed-off-by: Jun'ichi Nomura Cc: Alasdair G Kergon Cc: Li Zefan Signed-off-by: Jens Axboe --- block/blk-core.c | 1 + include/linux/blktrace_api.h | 2 +- include/trace/events/block.h | 33 +++++++++++++++++++++++++++++++++ kernel/trace/blktrace.c | 34 ++++++++++++++++++++++++++++++++++ 4 files changed, 69 insertions(+), 1 deletion(-) diff --git a/block/blk-core.c b/block/blk-core.c index 34504f30972..ddaaea4fdff 100644 --- a/block/blk-core.c +++ b/block/blk-core.c @@ -34,6 +34,7 @@ #include "blk.h" EXPORT_TRACEPOINT_SYMBOL_GPL(block_remap); +EXPORT_TRACEPOINT_SYMBOL_GPL(block_rq_remap); EXPORT_TRACEPOINT_SYMBOL_GPL(block_bio_complete); static int __make_request(struct request_queue *q, struct bio *bio); diff --git a/include/linux/blktrace_api.h b/include/linux/blktrace_api.h index 622939a2329..3b73b9992b2 100644 --- a/include/linux/blktrace_api.h +++ b/include/linux/blktrace_api.h @@ -212,7 +212,7 @@ extern struct attribute_group blk_trace_attr_group; # define blk_trace_startstop(q, start) (-ENOTTY) # define blk_trace_remove(q) (-ENOTTY) # define blk_add_trace_msg(q, fmt, ...) do { } while (0) -# define blk_trace_remove_sysfs(struct device *dev) do { } while (0) +# define blk_trace_remove_sysfs(dev) do { } while (0) static inline int blk_trace_init_sysfs(struct device *dev) { return 0; diff --git a/include/trace/events/block.h b/include/trace/events/block.h index d86af94691c..00405b5f624 100644 --- a/include/trace/events/block.h +++ b/include/trace/events/block.h @@ -488,6 +488,39 @@ TRACE_EVENT(block_remap, (unsigned long long)__entry->old_sector) ); +TRACE_EVENT(block_rq_remap, + + TP_PROTO(struct request_queue *q, struct request *rq, dev_t dev, + sector_t from), + + TP_ARGS(q, rq, dev, from), + + TP_STRUCT__entry( + __field( dev_t, dev ) + __field( sector_t, sector ) + __field( unsigned int, nr_sector ) + __field( dev_t, old_dev ) + __field( sector_t, old_sector ) + __array( char, rwbs, 6 ) + ), + + TP_fast_assign( + __entry->dev = disk_devt(rq->rq_disk); + __entry->sector = blk_rq_pos(rq); + __entry->nr_sector = blk_rq_sectors(rq); + __entry->old_dev = dev; + __entry->old_sector = from; + blk_fill_rwbs_rq(__entry->rwbs, rq); + ), + + TP_printk("%d,%d %s %llu + %u <- (%d,%d) %llu", + MAJOR(__entry->dev), MINOR(__entry->dev), __entry->rwbs, + (unsigned long long)__entry->sector, + __entry->nr_sector, + MAJOR(__entry->old_dev), MINOR(__entry->old_dev), + (unsigned long long)__entry->old_sector) +); + #endif /* _TRACE_BLOCK_H */ /* This part must be outside protection */ diff --git a/kernel/trace/blktrace.c b/kernel/trace/blktrace.c index 60b5c5a3d4b..d9d6206e0b1 100644 --- a/kernel/trace/blktrace.c +++ b/kernel/trace/blktrace.c @@ -855,6 +855,37 @@ static void blk_add_trace_remap(struct request_queue *q, struct bio *bio, sizeof(r), &r); } +/** + * blk_add_trace_rq_remap - Add a trace for a request-remap operation + * @q: queue the io is for + * @rq: the source request + * @dev: target device + * @from: source sector + * + * Description: + * Device mapper remaps request to other devices. + * Add a trace for that action. + * + **/ +static void blk_add_trace_rq_remap(struct request_queue *q, + struct request *rq, dev_t dev, + sector_t from) +{ + struct blk_trace *bt = q->blk_trace; + struct blk_io_trace_remap r; + + if (likely(!bt)) + return; + + r.device_from = cpu_to_be32(dev); + r.device_to = cpu_to_be32(disk_devt(rq->rq_disk)); + r.sector_from = cpu_to_be64(from); + + __blk_add_trace(bt, blk_rq_pos(rq), blk_rq_bytes(rq), + rq_data_dir(rq), BLK_TA_REMAP, !!rq->errors, + sizeof(r), &r); +} + /** * blk_add_driver_data - Add binary message with driver-specific data * @q: queue the io is for @@ -922,10 +953,13 @@ static void blk_register_tracepoints(void) WARN_ON(ret); ret = register_trace_block_remap(blk_add_trace_remap); WARN_ON(ret); + ret = register_trace_block_rq_remap(blk_add_trace_rq_remap); + WARN_ON(ret); } static void blk_unregister_tracepoints(void) { + unregister_trace_block_rq_remap(blk_add_trace_rq_remap); unregister_trace_block_remap(blk_add_trace_remap); unregister_trace_block_split(blk_add_trace_split); unregister_trace_block_unplug_io(blk_add_trace_unplug_io); -- cgit v1.2.3 From 1d2235152dc745c6d94bedb550fea84cffdbf768 Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Fri, 2 Oct 2009 19:27:04 +0200 Subject: cfq-iosched: add a knob for desktop interactiveness This is basically identical to what Vivek Goyal posted, but combined into one and labelled 'desktop' instead of 'fairness'. The goal is to continue to improve on the latency side of things as it relates to interactiveness, keeping the questionable bits under this sysfs tunable so it would be easy for throughput-only people to turn off. Apart from adding the interactive sysfs knob, it also adds the behavioural change of allowing slice idling even if the hardware does tagged command queuing. Signed-off-by: Jens Axboe --- block/cfq-iosched.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/block/cfq-iosched.c b/block/cfq-iosched.c index 1ca813b16e7..8917f2b3a78 100644 --- a/block/cfq-iosched.c +++ b/block/cfq-iosched.c @@ -173,6 +173,7 @@ struct cfq_data { unsigned int cfq_slice[2]; unsigned int cfq_slice_async_rq; unsigned int cfq_slice_idle; + unsigned int cfq_desktop; struct list_head cic_list; @@ -1951,7 +1952,7 @@ cfq_update_idle_window(struct cfq_data *cfqd, struct cfq_queue *cfqq, enable_idle = old_idle = cfq_cfqq_idle_window(cfqq); if (!atomic_read(&cic->ioc->nr_tasks) || !cfqd->cfq_slice_idle || - (cfqd->hw_tag && CIC_SEEKY(cic))) + (!cfqd->cfq_desktop && cfqd->hw_tag && CIC_SEEKY(cic))) enable_idle = 0; else if (sample_valid(cic->ttime_samples)) { if (cic->ttime_mean > cfqd->cfq_slice_idle) @@ -2480,6 +2481,7 @@ static void *cfq_init_queue(struct request_queue *q) cfqd->cfq_slice[1] = cfq_slice_sync; cfqd->cfq_slice_async_rq = cfq_slice_async_rq; cfqd->cfq_slice_idle = cfq_slice_idle; + cfqd->cfq_desktop = 1; cfqd->hw_tag = 1; return cfqd; @@ -2549,6 +2551,7 @@ SHOW_FUNCTION(cfq_slice_idle_show, cfqd->cfq_slice_idle, 1); SHOW_FUNCTION(cfq_slice_sync_show, cfqd->cfq_slice[1], 1); SHOW_FUNCTION(cfq_slice_async_show, cfqd->cfq_slice[0], 1); SHOW_FUNCTION(cfq_slice_async_rq_show, cfqd->cfq_slice_async_rq, 0); +SHOW_FUNCTION(cfq_desktop_show, cfqd->cfq_desktop, 0); #undef SHOW_FUNCTION #define STORE_FUNCTION(__FUNC, __PTR, MIN, MAX, __CONV) \ @@ -2580,6 +2583,7 @@ STORE_FUNCTION(cfq_slice_sync_store, &cfqd->cfq_slice[1], 1, UINT_MAX, 1); STORE_FUNCTION(cfq_slice_async_store, &cfqd->cfq_slice[0], 1, UINT_MAX, 1); STORE_FUNCTION(cfq_slice_async_rq_store, &cfqd->cfq_slice_async_rq, 1, UINT_MAX, 0); +STORE_FUNCTION(cfq_desktop_store, &cfqd->cfq_desktop, 0, 1, 0); #undef STORE_FUNCTION #define CFQ_ATTR(name) \ @@ -2595,6 +2599,7 @@ static struct elv_fs_entry cfq_attrs[] = { CFQ_ATTR(slice_async), CFQ_ATTR(slice_async_rq), CFQ_ATTR(slice_idle), + CFQ_ATTR(desktop), __ATTR_NULL }; -- cgit v1.2.3 From 365722bb917b08b7323b5a4a0a3386cc7d00397d Mon Sep 17 00:00:00 2001 From: Vivek Goyal Date: Sat, 3 Oct 2009 15:21:27 +0200 Subject: cfq-iosched: delay async IO dispatch, if sync IO was just done o Do not allow more than max_dispatch requests from an async queue, if some sync request has finished recently. This is in the hope that sync activity is still going on in the system and we might receive a sync request soon. Most likely from a sync queue which finished a request and we did not enable idling on it. Signed-off-by: Vivek Goyal Signed-off-by: Jens Axboe --- block/cfq-iosched.c | 18 ++++++++++++++++-- 1 file changed, 16 insertions(+), 2 deletions(-) diff --git a/block/cfq-iosched.c b/block/cfq-iosched.c index 8917f2b3a78..70b48ea0e3e 100644 --- a/block/cfq-iosched.c +++ b/block/cfq-iosched.c @@ -181,6 +181,8 @@ struct cfq_data { * Fallback dummy cfqq for extreme OOM conditions */ struct cfq_queue oom_cfqq; + + unsigned long last_end_sync_rq; }; enum cfqq_state_flags { @@ -1314,6 +1316,8 @@ static int cfq_dispatch_requests(struct request_queue *q, int force) * Does this cfqq already have too much IO in flight? */ if (cfqq->dispatched >= max_dispatch) { + unsigned long load_at = cfqd->last_end_sync_rq + cfq_slice_sync; + /* * idle queue must always only have a single IO in flight */ @@ -1326,6 +1330,14 @@ static int cfq_dispatch_requests(struct request_queue *q, int force) if (cfqd->busy_queues > 1) return 0; + /* + * If a sync request has completed recently, don't overload + * the dispatch queue yet with async requests. + */ + if (cfqd->cfq_desktop && !cfq_cfqq_sync(cfqq) + && time_before(jiffies, load_at)) + return 0; + /* * we are the only queue, allow up to 4 times of 'quantum' */ @@ -2158,8 +2170,10 @@ static void cfq_completed_request(struct request_queue *q, struct request *rq) if (cfq_cfqq_sync(cfqq)) cfqd->sync_flight--; - if (sync) + if (sync) { RQ_CIC(rq)->last_end_request = now; + cfqd->last_end_sync_rq = now; + } /* * If this is the active queue, check if it needs to be expired, @@ -2483,7 +2497,7 @@ static void *cfq_init_queue(struct request_queue *q) cfqd->cfq_slice_idle = cfq_slice_idle; cfqd->cfq_desktop = 1; cfqd->hw_tag = 1; - + cfqd->last_end_sync_rq = jiffies; return cfqd; } -- cgit v1.2.3 From 8e2967555571659d2c8a70dd120710110ed7bba4 Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Sat, 3 Oct 2009 16:26:03 +0200 Subject: cfq-iosched: implement slower async initiate and queue ramp up This slowly ramps up the async queue depth based on the time passed since the sync IO, and doesn't allow async at all until a sync slice period has passed. Signed-off-by: Jens Axboe --- block/blk-core.c | 8 ++++++++ block/cfq-iosched.c | 56 ++++++++++++++++++++++++++++++++------------------ include/linux/blkdev.h | 4 ++++ 3 files changed, 48 insertions(+), 20 deletions(-) diff --git a/block/blk-core.c b/block/blk-core.c index ddaaea4fdff..a8c7fbe52e2 100644 --- a/block/blk-core.c +++ b/block/blk-core.c @@ -2492,6 +2492,14 @@ int kblockd_schedule_work(struct request_queue *q, struct work_struct *work) } EXPORT_SYMBOL(kblockd_schedule_work); +int kblockd_schedule_delayed_work(struct request_queue *q, + struct delayed_work *work, + unsigned long delay) +{ + return queue_delayed_work(kblockd_workqueue, work, delay); +} +EXPORT_SYMBOL(kblockd_schedule_delayed_work); + int __init blk_dev_init(void) { BUILD_BUG_ON(__REQ_NR_BITS > 8 * diff --git a/block/cfq-iosched.c b/block/cfq-iosched.c index 70b48ea0e3e..fce8a749f4b 100644 --- a/block/cfq-iosched.c +++ b/block/cfq-iosched.c @@ -150,7 +150,7 @@ struct cfq_data { * idle window management */ struct timer_list idle_slice_timer; - struct work_struct unplug_work; + struct delayed_work unplug_work; struct cfq_queue *active_queue; struct cfq_io_context *active_cic; @@ -268,11 +268,13 @@ static inline int cfq_bio_sync(struct bio *bio) * scheduler run of queue, if there are requests pending and no one in the * driver that will restart queueing */ -static inline void cfq_schedule_dispatch(struct cfq_data *cfqd) +static inline void cfq_schedule_dispatch(struct cfq_data *cfqd, + unsigned long delay) { if (cfqd->busy_queues) { cfq_log(cfqd, "schedule dispatch"); - kblockd_schedule_work(cfqd->queue, &cfqd->unplug_work); + kblockd_schedule_delayed_work(cfqd->queue, &cfqd->unplug_work, + delay); } } @@ -1316,8 +1318,6 @@ static int cfq_dispatch_requests(struct request_queue *q, int force) * Does this cfqq already have too much IO in flight? */ if (cfqq->dispatched >= max_dispatch) { - unsigned long load_at = cfqd->last_end_sync_rq + cfq_slice_sync; - /* * idle queue must always only have a single IO in flight */ @@ -1331,20 +1331,36 @@ static int cfq_dispatch_requests(struct request_queue *q, int force) return 0; /* - * If a sync request has completed recently, don't overload - * the dispatch queue yet with async requests. + * Sole queue user, allow bigger slice */ - if (cfqd->cfq_desktop && !cfq_cfqq_sync(cfqq) - && time_before(jiffies, load_at)) - return 0; + max_dispatch *= 4; + } + + /* + * Async queues must wait a bit before being allowed dispatch. + * We also ramp up the dispatch depth gradually for async IO, + * based on the last sync IO we serviced + */ + if (!cfq_cfqq_sync(cfqq) && cfqd->cfq_desktop) { + unsigned long last_sync = jiffies - cfqd->last_end_sync_rq; + unsigned int depth; /* - * we are the only queue, allow up to 4 times of 'quantum' + * must wait a bit longer */ - if (cfqq->dispatched >= 4 * max_dispatch) + if (last_sync < cfq_slice_sync) { + cfq_schedule_dispatch(cfqd, cfq_slice_sync - last_sync); return 0; + } + + depth = last_sync / cfq_slice_sync; + if (depth < max_dispatch) + max_dispatch = depth; } + if (cfqq->dispatched >= max_dispatch) + return 0; + /* * Dispatch a request from this cfqq */ @@ -1389,7 +1405,7 @@ static void cfq_put_queue(struct cfq_queue *cfqq) if (unlikely(cfqd->active_queue == cfqq)) { __cfq_slice_expired(cfqd, cfqq, 0); - cfq_schedule_dispatch(cfqd); + cfq_schedule_dispatch(cfqd, 0); } kmem_cache_free(cfq_pool, cfqq); @@ -1484,7 +1500,7 @@ static void cfq_exit_cfqq(struct cfq_data *cfqd, struct cfq_queue *cfqq) { if (unlikely(cfqq == cfqd->active_queue)) { __cfq_slice_expired(cfqd, cfqq, 0); - cfq_schedule_dispatch(cfqd); + cfq_schedule_dispatch(cfqd, 0); } cfq_put_queue(cfqq); @@ -2201,7 +2217,7 @@ static void cfq_completed_request(struct request_queue *q, struct request *rq) } if (!rq_in_driver(cfqd)) - cfq_schedule_dispatch(cfqd); + cfq_schedule_dispatch(cfqd, 0); } /* @@ -2331,7 +2347,7 @@ queue_fail: if (cic) put_io_context(cic->ioc); - cfq_schedule_dispatch(cfqd); + cfq_schedule_dispatch(cfqd, 0); spin_unlock_irqrestore(q->queue_lock, flags); cfq_log(cfqd, "set_request fail"); return 1; @@ -2340,7 +2356,7 @@ queue_fail: static void cfq_kick_queue(struct work_struct *work) { struct cfq_data *cfqd = - container_of(work, struct cfq_data, unplug_work); + container_of(work, struct cfq_data, unplug_work.work); struct request_queue *q = cfqd->queue; spin_lock_irq(q->queue_lock); @@ -2394,7 +2410,7 @@ static void cfq_idle_slice_timer(unsigned long data) expire: cfq_slice_expired(cfqd, timed_out); out_kick: - cfq_schedule_dispatch(cfqd); + cfq_schedule_dispatch(cfqd, 0); out_cont: spin_unlock_irqrestore(cfqd->queue->queue_lock, flags); } @@ -2402,7 +2418,7 @@ out_cont: static void cfq_shutdown_timer_wq(struct cfq_data *cfqd) { del_timer_sync(&cfqd->idle_slice_timer); - cancel_work_sync(&cfqd->unplug_work); + cancel_delayed_work_sync(&cfqd->unplug_work); } static void cfq_put_async_queues(struct cfq_data *cfqd) @@ -2484,7 +2500,7 @@ static void *cfq_init_queue(struct request_queue *q) cfqd->idle_slice_timer.function = cfq_idle_slice_timer; cfqd->idle_slice_timer.data = (unsigned long) cfqd; - INIT_WORK(&cfqd->unplug_work, cfq_kick_queue); + INIT_DELAYED_WORK(&cfqd->unplug_work, cfq_kick_queue); cfqd->cfq_quantum = cfq_quantum; cfqd->cfq_fifo_expire[0] = cfq_fifo_expire[0]; diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index 1a03b715dfa..a7323930d2b 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -1147,7 +1147,11 @@ static inline void put_dev_sector(Sector p) } struct work_struct; +struct delayed_work; int kblockd_schedule_work(struct request_queue *q, struct work_struct *work); +int kblockd_schedule_delayed_work(struct request_queue *q, + struct delayed_work *work, + unsigned long delay); #define MODULE_ALIAS_BLOCKDEV(major,minor) \ MODULE_ALIAS("block-major-" __stringify(major) "-" __stringify(minor)) -- cgit v1.2.3 From 963b72fc6664be12ea52f35a6addea14ec373433 Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Sat, 3 Oct 2009 19:42:18 +0200 Subject: cfq-iosched: rename 'desktop' sysfs entry to 'low_latency' Don't think that's necessarily a perfect description of what this option fiddles with, but it's probably better than 'desktop'. Signed-off-by: Jens Axboe --- block/cfq-iosched.c | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/block/cfq-iosched.c b/block/cfq-iosched.c index fce8a749f4b..3010e2ec20c 100644 --- a/block/cfq-iosched.c +++ b/block/cfq-iosched.c @@ -173,7 +173,7 @@ struct cfq_data { unsigned int cfq_slice[2]; unsigned int cfq_slice_async_rq; unsigned int cfq_slice_idle; - unsigned int cfq_desktop; + unsigned int cfq_latency; struct list_head cic_list; @@ -1341,7 +1341,7 @@ static int cfq_dispatch_requests(struct request_queue *q, int force) * We also ramp up the dispatch depth gradually for async IO, * based on the last sync IO we serviced */ - if (!cfq_cfqq_sync(cfqq) && cfqd->cfq_desktop) { + if (!cfq_cfqq_sync(cfqq) && cfqd->cfq_latency) { unsigned long last_sync = jiffies - cfqd->last_end_sync_rq; unsigned int depth; @@ -1980,7 +1980,7 @@ cfq_update_idle_window(struct cfq_data *cfqd, struct cfq_queue *cfqq, enable_idle = old_idle = cfq_cfqq_idle_window(cfqq); if (!atomic_read(&cic->ioc->nr_tasks) || !cfqd->cfq_slice_idle || - (!cfqd->cfq_desktop && cfqd->hw_tag && CIC_SEEKY(cic))) + (!cfqd->cfq_latency && cfqd->hw_tag && CIC_SEEKY(cic))) enable_idle = 0; else if (sample_valid(cic->ttime_samples)) { if (cic->ttime_mean > cfqd->cfq_slice_idle) @@ -2511,7 +2511,7 @@ static void *cfq_init_queue(struct request_queue *q) cfqd->cfq_slice[1] = cfq_slice_sync; cfqd->cfq_slice_async_rq = cfq_slice_async_rq; cfqd->cfq_slice_idle = cfq_slice_idle; - cfqd->cfq_desktop = 1; + cfqd->cfq_latency = 1; cfqd->hw_tag = 1; cfqd->last_end_sync_rq = jiffies; return cfqd; @@ -2581,7 +2581,7 @@ SHOW_FUNCTION(cfq_slice_idle_show, cfqd->cfq_slice_idle, 1); SHOW_FUNCTION(cfq_slice_sync_show, cfqd->cfq_slice[1], 1); SHOW_FUNCTION(cfq_slice_async_show, cfqd->cfq_slice[0], 1); SHOW_FUNCTION(cfq_slice_async_rq_show, cfqd->cfq_slice_async_rq, 0); -SHOW_FUNCTION(cfq_desktop_show, cfqd->cfq_desktop, 0); +SHOW_FUNCTION(cfq_low_latency_show, cfqd->cfq_latency, 0); #undef SHOW_FUNCTION #define STORE_FUNCTION(__FUNC, __PTR, MIN, MAX, __CONV) \ @@ -2613,7 +2613,7 @@ STORE_FUNCTION(cfq_slice_sync_store, &cfqd->cfq_slice[1], 1, UINT_MAX, 1); STORE_FUNCTION(cfq_slice_async_store, &cfqd->cfq_slice[0], 1, UINT_MAX, 1); STORE_FUNCTION(cfq_slice_async_rq_store, &cfqd->cfq_slice_async_rq, 1, UINT_MAX, 0); -STORE_FUNCTION(cfq_desktop_store, &cfqd->cfq_desktop, 0, 1, 0); +STORE_FUNCTION(cfq_low_latency_store, &cfqd->cfq_latency, 0, 1, 0); #undef STORE_FUNCTION #define CFQ_ATTR(name) \ @@ -2629,7 +2629,7 @@ static struct elv_fs_entry cfq_attrs[] = { CFQ_ATTR(slice_async), CFQ_ATTR(slice_async_rq), CFQ_ATTR(slice_idle), - CFQ_ATTR(desktop), + CFQ_ATTR(low_latency), __ATTR_NULL }; -- cgit v1.2.3 From 61f0c1dcaaac71faabac6ef7c839b29f20204bea Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Sat, 3 Oct 2009 19:46:03 +0200 Subject: cfq-iosched: use assigned slice sync value, not default We should use the sysfs modified slice sync value, in case it differs from the default. Signed-off-by: Jens Axboe --- block/cfq-iosched.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/block/cfq-iosched.c b/block/cfq-iosched.c index 3010e2ec20c..ebab60c6be9 100644 --- a/block/cfq-iosched.c +++ b/block/cfq-iosched.c @@ -1348,12 +1348,13 @@ static int cfq_dispatch_requests(struct request_queue *q, int force) /* * must wait a bit longer */ - if (last_sync < cfq_slice_sync) { - cfq_schedule_dispatch(cfqd, cfq_slice_sync - last_sync); + if (last_sync < cfqd->cfq_slice[1]) { + cfq_schedule_dispatch(cfqd, + cfqd->cfq_slice[1] - last_sync); return 0; } - depth = last_sync / cfq_slice_sync; + depth = last_sync / cfqd->cfq_slice[1]; if (depth < max_dispatch) max_dispatch = depth; } -- cgit v1.2.3 From ac481c20ef8f6c6f2be75d581863f40c43874ef7 Mon Sep 17 00:00:00 2001 From: "Martin K. Petersen" Date: Sat, 3 Oct 2009 20:52:01 +0200 Subject: block: Topology ioctls Not all users of the topology information want to use libblkid. Provide the topology information through bdev ioctls. Also clarify sector size comments for existing BLK ioctls. Signed-off-by: Martin K. Petersen Signed-off-by: Jens Axboe --- block/compat_ioctl.c | 13 +++++++++++++ block/ioctl.c | 17 +++++++++++++++-- include/linux/blkdev.h | 35 ++++++++++++++++++++++++++++++----- include/linux/fs.h | 4 ++++ 4 files changed, 62 insertions(+), 7 deletions(-) diff --git a/block/compat_ioctl.c b/block/compat_ioctl.c index 7865a34e0fa..9bd086c1a4d 100644 --- a/block/compat_ioctl.c +++ b/block/compat_ioctl.c @@ -21,6 +21,11 @@ static int compat_put_int(unsigned long arg, int val) return put_user(val, (compat_int_t __user *)compat_ptr(arg)); } +static int compat_put_uint(unsigned long arg, unsigned int val) +{ + return put_user(val, (compat_uint_t __user *)compat_ptr(arg)); +} + static int compat_put_long(unsigned long arg, long val) { return put_user(val, (compat_long_t __user *)compat_ptr(arg)); @@ -734,6 +739,14 @@ long compat_blkdev_ioctl(struct file *file, unsigned cmd, unsigned long arg) switch (cmd) { case HDIO_GETGEO: return compat_hdio_getgeo(disk, bdev, compat_ptr(arg)); + case BLKPBSZGET: + return compat_put_uint(arg, bdev_physical_block_size(bdev)); + case BLKIOMIN: + return compat_put_uint(arg, bdev_io_min(bdev)); + case BLKIOOPT: + return compat_put_uint(arg, bdev_io_opt(bdev)); + case BLKALIGNOFF: + return compat_put_int(arg, bdev_alignment_offset(bdev)); case BLKFLSBUF: case BLKROSET: case BLKDISCARD: diff --git a/block/ioctl.c b/block/ioctl.c index d3e6b5827a3..1f4d1de12b0 100644 --- a/block/ioctl.c +++ b/block/ioctl.c @@ -138,6 +138,11 @@ static int put_int(unsigned long arg, int val) return put_user(val, (int __user *)arg); } +static int put_uint(unsigned long arg, unsigned int val) +{ + return put_user(val, (unsigned int __user *)arg); +} + static int put_long(unsigned long arg, long val) { return put_user(val, (long __user *)arg); @@ -263,10 +268,18 @@ int blkdev_ioctl(struct block_device *bdev, fmode_t mode, unsigned cmd, return put_long(arg, (bdi->ra_pages * PAGE_CACHE_SIZE) / 512); case BLKROGET: return put_int(arg, bdev_read_only(bdev) != 0); - case BLKBSZGET: /* get the logical block size (cf. BLKSSZGET) */ + case BLKBSZGET: /* get block device soft block size (cf. BLKSSZGET) */ return put_int(arg, block_size(bdev)); - case BLKSSZGET: /* get block device hardware sector size */ + case BLKSSZGET: /* get block device logical block size */ return put_int(arg, bdev_logical_block_size(bdev)); + case BLKPBSZGET: /* get block device physical block size */ + return put_uint(arg, bdev_physical_block_size(bdev)); + case BLKIOMIN: + return put_uint(arg, bdev_io_min(bdev)); + case BLKIOOPT: + return put_uint(arg, bdev_io_opt(bdev)); + case BLKALIGNOFF: + return put_int(arg, bdev_alignment_offset(bdev)); case BLKSECTGET: return put_ushort(arg, queue_max_sectors(bdev_get_queue(bdev))); case BLKRASET: diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index a7323930d2b..25119041e03 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -1081,25 +1081,37 @@ static inline unsigned int queue_physical_block_size(struct request_queue *q) return q->limits.physical_block_size; } +static inline int bdev_physical_block_size(struct block_device *bdev) +{ + return queue_physical_block_size(bdev_get_queue(bdev)); +} + static inline unsigned int queue_io_min(struct request_queue *q) { return q->limits.io_min; } +static inline int bdev_io_min(struct block_device *bdev) +{ + return queue_io_min(bdev_get_queue(bdev)); +} + static inline unsigned int queue_io_opt(struct request_queue *q) { return q->limits.io_opt; } +static inline int bdev_io_opt(struct block_device *bdev) +{ + return queue_io_opt(bdev_get_queue(bdev)); +} + static inline int queue_alignment_offset(struct request_queue *q) { - if (q && q->limits.misaligned) + if (q->limits.misaligned) return -1; - if (q && q->limits.alignment_offset) - return q->limits.alignment_offset; - - return 0; + return q->limits.alignment_offset; } static inline int queue_sector_alignment_offset(struct request_queue *q, @@ -1109,6 +1121,19 @@ static inline int queue_sector_alignment_offset(struct request_queue *q, & (q->limits.io_min - 1); } +static inline int bdev_alignment_offset(struct block_device *bdev) +{ + struct request_queue *q = bdev_get_queue(bdev); + + if (q->limits.misaligned) + return -1; + + if (bdev != bdev->bd_contains) + return bdev->bd_part->alignment_offset; + + return q->limits.alignment_offset; +} + static inline int queue_dma_alignment(struct request_queue *q) { return q ? q->dma_alignment : 511; diff --git a/include/linux/fs.h b/include/linux/fs.h index 2adaa2529f1..883eaacfd92 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -300,6 +300,10 @@ struct inodes_stat_t { #define BLKTRACESTOP _IO(0x12,117) #define BLKTRACETEARDOWN _IO(0x12,118) #define BLKDISCARD _IO(0x12,119) +#define BLKIOMIN _IO(0x12,120) +#define BLKIOOPT _IO(0x12,121) +#define BLKALIGNOFF _IO(0x12,122) +#define BLKPBSZGET _IO(0x12,123) #define BMAP_IOCTL 1 /* obsolete - kept for compatibility */ #define FIBMAP _IO(0x00,1) /* bmap access */ -- cgit v1.2.3 From e00c54c36ac2024c3a8a37432e2e2698ff849594 Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Sun, 4 Oct 2009 20:36:19 +0200 Subject: cfq-iosched: don't delay async queue if it hasn't dispatched at all We cannot delay for the first dispatch of the async queue if it hasn't dispatched at all, since that could present a local user DoS attack vector using an app that just did slow timed sync reads while filling memory. Signed-off-by: Jens Axboe --- block/cfq-iosched.c | 11 ++--------- 1 file changed, 2 insertions(+), 9 deletions(-) diff --git a/block/cfq-iosched.c b/block/cfq-iosched.c index ebab60c6be9..9c4b679908f 100644 --- a/block/cfq-iosched.c +++ b/block/cfq-iosched.c @@ -1345,16 +1345,9 @@ static int cfq_dispatch_requests(struct request_queue *q, int force) unsigned long last_sync = jiffies - cfqd->last_end_sync_rq; unsigned int depth; - /* - * must wait a bit longer - */ - if (last_sync < cfqd->cfq_slice[1]) { - cfq_schedule_dispatch(cfqd, - cfqd->cfq_slice[1] - last_sync); - return 0; - } - depth = last_sync / cfqd->cfq_slice[1]; + if (!depth && !cfqq->dispatched) + depth = 1; if (depth < max_dispatch) max_dispatch = depth; } -- cgit v1.2.3 From 0f78ab9899e9d6acb09d5465def618704255963b Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Sun, 4 Oct 2009 21:04:38 +0200 Subject: Revert "Seperate read and write statistics of in_flight requests" This reverts commit a9327cac440be4d8333bba975cbbf76045096275. Corrado Zoccolo reports: "with 2.6.32-rc1 I started getting the following strange output from "iostat -kx 2": Linux 2.6.31bisect (et2) 04/10/2009 _i686_ (2 CPU) avg-cpu: %user %nice %system %iowait %steal %idle 10,70 0,00 3,16 15,75 0,00 70,38 Device: rrqm/s wrqm/s r/s w/s rkB/s wkB/s avgrq-sz avgqu-sz await svctm %util sda 18,22 0,00 0,67 0,01 14,77 0,02 43,94 0,01 10,53 39043915,03 2629219,87 sdb 60,89 9,68 50,79 3,04 1724,43 50,52 65,95 0,70 13,06 488437,47 2629219,87 avg-cpu: %user %nice %system %iowait %steal %idle 2,72 0,00 0,74 0,00 0,00 96,53 Device: rrqm/s wrqm/s r/s w/s rkB/s wkB/s avgrq-sz avgqu-sz await svctm %util sda 0,00 0,00 0,00 0,00 0,00 0,00 0,00 0,00 0,00 0,00 100,00 sdb 0,00 0,00 0,00 0,00 0,00 0,00 0,00 0,00 0,00 0,00 100,00 avg-cpu: %user %nice %system %iowait %steal %idle 6,68 0,00 0,99 0,00 0,00 92,33 Device: rrqm/s wrqm/s r/s w/s rkB/s wkB/s avgrq-sz avgqu-sz await svctm %util sda 0,00 0,00 0,00 0,00 0,00 0,00 0,00 0,00 0,00 0,00 100,00 sdb 0,00 0,00 0,00 0,00 0,00 0,00 0,00 0,00 0,00 0,00 100,00 avg-cpu: %user %nice %system %iowait %steal %idle 4,40 0,00 0,73 1,47 0,00 93,40 Device: rrqm/s wrqm/s r/s w/s rkB/s wkB/s avgrq-sz avgqu-sz await svctm %util sda 0,00 0,00 0,00 0,00 0,00 0,00 0,00 0,00 0,00 0,00 100,00 sdb 0,00 4,00 0,00 3,00 0,00 28,00 18,67 0,06 19,50 333,33 100,00 Global values for service time and utilization are garbage. For interval values, utilization is always 100%, and service time is higher than normal. I bisected it down to: [a9327cac440be4d8333bba975cbbf76045096275] Seperate read and write statistics of in_flight requests and verified that reverting just that commit indeed solves the issue on 2.6.32-rc1." So until this is debugged, revert the bad commit. Signed-off-by: Jens Axboe --- block/blk-core.c | 6 +++--- block/blk-merge.c | 2 +- block/genhd.c | 4 +--- drivers/md/dm.c | 16 ++++++---------- fs/partitions/check.c | 12 +----------- include/linux/genhd.h | 21 +++++++-------------- 6 files changed, 19 insertions(+), 42 deletions(-) diff --git a/block/blk-core.c b/block/blk-core.c index a8c7fbe52e2..81f34311659 100644 --- a/block/blk-core.c +++ b/block/blk-core.c @@ -70,7 +70,7 @@ static void drive_stat_acct(struct request *rq, int new_io) part_stat_inc(cpu, part, merges[rw]); else { part_round_stats(cpu, part); - part_inc_in_flight(part, rw); + part_inc_in_flight(part); } part_stat_unlock(); @@ -1032,7 +1032,7 @@ static void part_round_stats_single(int cpu, struct hd_struct *part, if (part->in_flight) { __part_stat_add(cpu, part, time_in_queue, - part_in_flight(part) * (now - part->stamp)); + part->in_flight * (now - part->stamp)); __part_stat_add(cpu, part, io_ticks, (now - part->stamp)); } part->stamp = now; @@ -1739,7 +1739,7 @@ static void blk_account_io_done(struct request *req) part_stat_inc(cpu, part, ios[rw]); part_stat_add(cpu, part, ticks[rw], duration); part_round_stats(cpu, part); - part_dec_in_flight(part, rw); + part_dec_in_flight(part); part_stat_unlock(); } diff --git a/block/blk-merge.c b/block/blk-merge.c index 99cb5cf1f44..b0de8574fdc 100644 --- a/block/blk-merge.c +++ b/block/blk-merge.c @@ -351,7 +351,7 @@ static void blk_account_io_merge(struct request *req) part = disk_map_sector_rcu(req->rq_disk, blk_rq_pos(req)); part_round_stats(cpu, part); - part_dec_in_flight(part, rq_data_dir(req)); + part_dec_in_flight(part); part_stat_unlock(); } diff --git a/block/genhd.c b/block/genhd.c index 517e4332cb3..5a0861da324 100644 --- a/block/genhd.c +++ b/block/genhd.c @@ -869,7 +869,6 @@ static DEVICE_ATTR(size, S_IRUGO, part_size_show, NULL); static DEVICE_ATTR(alignment_offset, S_IRUGO, disk_alignment_offset_show, NULL); static DEVICE_ATTR(capability, S_IRUGO, disk_capability_show, NULL); static DEVICE_ATTR(stat, S_IRUGO, part_stat_show, NULL); -static DEVICE_ATTR(inflight, S_IRUGO, part_inflight_show, NULL); #ifdef CONFIG_FAIL_MAKE_REQUEST static struct device_attribute dev_attr_fail = __ATTR(make-it-fail, S_IRUGO|S_IWUSR, part_fail_show, part_fail_store); @@ -889,7 +888,6 @@ static struct attribute *disk_attrs[] = { &dev_attr_alignment_offset.attr, &dev_attr_capability.attr, &dev_attr_stat.attr, - &dev_attr_inflight.attr, #ifdef CONFIG_FAIL_MAKE_REQUEST &dev_attr_fail.attr, #endif @@ -1055,7 +1053,7 @@ static int diskstats_show(struct seq_file *seqf, void *v) part_stat_read(hd, merges[1]), (unsigned long long)part_stat_read(hd, sectors[1]), jiffies_to_msecs(part_stat_read(hd, ticks[1])), - part_in_flight(hd), + hd->in_flight, jiffies_to_msecs(part_stat_read(hd, io_ticks)), jiffies_to_msecs(part_stat_read(hd, time_in_queue)) ); diff --git a/drivers/md/dm.c b/drivers/md/dm.c index 376f1ab48a2..23e76fe0d35 100644 --- a/drivers/md/dm.c +++ b/drivers/md/dm.c @@ -130,7 +130,7 @@ struct mapped_device { /* * A list of ios that arrived while we were suspended. */ - atomic_t pending[2]; + atomic_t pending; wait_queue_head_t wait; struct work_struct work; struct bio_list deferred; @@ -453,14 +453,13 @@ static void start_io_acct(struct dm_io *io) { struct mapped_device *md = io->md; int cpu; - int rw = bio_data_dir(io->bio); io->start_time = jiffies; cpu = part_stat_lock(); part_round_stats(cpu, &dm_disk(md)->part0); part_stat_unlock(); - dm_disk(md)->part0.in_flight[rw] = atomic_inc_return(&md->pending[rw]); + dm_disk(md)->part0.in_flight = atomic_inc_return(&md->pending); } static void end_io_acct(struct dm_io *io) @@ -480,9 +479,8 @@ static void end_io_acct(struct dm_io *io) * After this is decremented the bio must not be touched if it is * a barrier. */ - dm_disk(md)->part0.in_flight[rw] = pending = - atomic_dec_return(&md->pending[rw]); - pending += atomic_read(&md->pending[rw^0x1]); + dm_disk(md)->part0.in_flight = pending = + atomic_dec_return(&md->pending); /* nudge anyone waiting on suspend queue */ if (!pending) @@ -1787,8 +1785,7 @@ static struct mapped_device *alloc_dev(int minor) if (!md->disk) goto bad_disk; - atomic_set(&md->pending[0], 0); - atomic_set(&md->pending[1], 0); + atomic_set(&md->pending, 0); init_waitqueue_head(&md->wait); INIT_WORK(&md->work, dm_wq_work); init_waitqueue_head(&md->eventq); @@ -2091,8 +2088,7 @@ static int dm_wait_for_completion(struct mapped_device *md, int interruptible) break; } spin_unlock_irqrestore(q->queue_lock, flags); - } else if (!atomic_read(&md->pending[0]) && - !atomic_read(&md->pending[1])) + } else if (!atomic_read(&md->pending)) break; if (interruptible == TASK_INTERRUPTIBLE && diff --git a/fs/partitions/check.c b/fs/partitions/check.c index 7b685e10cba..f38fee0311a 100644 --- a/fs/partitions/check.c +++ b/fs/partitions/check.c @@ -248,19 +248,11 @@ ssize_t part_stat_show(struct device *dev, part_stat_read(p, merges[WRITE]), (unsigned long long)part_stat_read(p, sectors[WRITE]), jiffies_to_msecs(part_stat_read(p, ticks[WRITE])), - part_in_flight(p), + p->in_flight, jiffies_to_msecs(part_stat_read(p, io_ticks)), jiffies_to_msecs(part_stat_read(p, time_in_queue))); } -ssize_t part_inflight_show(struct device *dev, - struct device_attribute *attr, char *buf) -{ - struct hd_struct *p = dev_to_part(dev); - - return sprintf(buf, "%8u %8u\n", p->in_flight[0], p->in_flight[1]); -} - #ifdef CONFIG_FAIL_MAKE_REQUEST ssize_t part_fail_show(struct device *dev, struct device_attribute *attr, char *buf) @@ -289,7 +281,6 @@ static DEVICE_ATTR(start, S_IRUGO, part_start_show, NULL); static DEVICE_ATTR(size, S_IRUGO, part_size_show, NULL); static DEVICE_ATTR(alignment_offset, S_IRUGO, part_alignment_offset_show, NULL); static DEVICE_ATTR(stat, S_IRUGO, part_stat_show, NULL); -static DEVICE_ATTR(inflight, S_IRUGO, part_inflight_show, NULL); #ifdef CONFIG_FAIL_MAKE_REQUEST static struct device_attribute dev_attr_fail = __ATTR(make-it-fail, S_IRUGO|S_IWUSR, part_fail_show, part_fail_store); @@ -301,7 +292,6 @@ static struct attribute *part_attrs[] = { &dev_attr_size.attr, &dev_attr_alignment_offset.attr, &dev_attr_stat.attr, - &dev_attr_inflight.attr, #ifdef CONFIG_FAIL_MAKE_REQUEST &dev_attr_fail.attr, #endif diff --git a/include/linux/genhd.h b/include/linux/genhd.h index 297df45ffd0..7beaa21b388 100644 --- a/include/linux/genhd.h +++ b/include/linux/genhd.h @@ -98,7 +98,7 @@ struct hd_struct { int make_it_fail; #endif unsigned long stamp; - int in_flight[2]; + int in_flight; #ifdef CONFIG_SMP struct disk_stats *dkstats; #else @@ -322,23 +322,18 @@ static inline void free_part_stats(struct hd_struct *part) #define part_stat_sub(cpu, gendiskp, field, subnd) \ part_stat_add(cpu, gendiskp, field, -subnd) -static inline void part_inc_in_flight(struct hd_struct *part, int rw) +static inline void part_inc_in_flight(struct hd_struct *part) { - part->in_flight[rw]++; + part->in_flight++; if (part->partno) - part_to_disk(part)->part0.in_flight[rw]++; + part_to_disk(part)->part0.in_flight++; } -static inline void part_dec_in_flight(struct hd_struct *part, int rw) +static inline void part_dec_in_flight(struct hd_struct *part) { - part->in_flight[rw]--; + part->in_flight--; if (part->partno) - part_to_disk(part)->part0.in_flight[rw]--; -} - -static inline int part_in_flight(struct hd_struct *part) -{ - return part->in_flight[0] + part->in_flight[1]; + part_to_disk(part)->part0.in_flight--; } /* block/blk-core.c */ @@ -551,8 +546,6 @@ extern ssize_t part_size_show(struct device *dev, struct device_attribute *attr, char *buf); extern ssize_t part_stat_show(struct device *dev, struct device_attribute *attr, char *buf); -extern ssize_t part_inflight_show(struct device *dev, - struct device_attribute *attr, char *buf); #ifdef CONFIG_FAIL_MAKE_REQUEST extern ssize_t part_fail_show(struct device *dev, struct device_attribute *attr, char *buf); -- cgit v1.2.3