summaryrefslogtreecommitdiff
path: root/drivers/infiniband
diff options
context:
space:
mode:
authorHoang-Nam Nguyen <hnguyen@linux.vnet.ibm.com>2007-02-15 17:07:30 +0100
committerRoland Dreier <rolandd@cisco.com>2007-02-16 13:57:34 -0800
commit8b16cef3df871b005f3a97e273b5b135ebfb3769 (patch)
tree36b75ea756c6338164ab436da515abd3073d21f2 /drivers/infiniband
parent78d8d5f9ef8d6179e92b94481cfdfc45d396992f (diff)
downloadlinux-3.10-8b16cef3df871b005f3a97e273b5b135ebfb3769.tar.gz
linux-3.10-8b16cef3df871b005f3a97e273b5b135ebfb3769.tar.bz2
linux-3.10-8b16cef3df871b005f3a97e273b5b135ebfb3769.zip
IB/ehca: Fix race condition/locking issues in scaling code
Fix a race condition in find_next_cpu_online() and some other locking issues in ehca scaling code. Signed-off-by: Hoang-Nam Nguyen <hnguyen@de.ibm.com> Signed-off-by: Roland Dreier <rolandd@cisco.com>
Diffstat (limited to 'drivers/infiniband')
-rw-r--r--drivers/infiniband/hw/ehca/ehca_irq.c68
1 files changed, 33 insertions, 35 deletions
diff --git a/drivers/infiniband/hw/ehca/ehca_irq.c b/drivers/infiniband/hw/ehca/ehca_irq.c
index b923b5d5de6..9679b072ad0 100644
--- a/drivers/infiniband/hw/ehca/ehca_irq.c
+++ b/drivers/infiniband/hw/ehca/ehca_irq.c
@@ -544,28 +544,30 @@ void ehca_tasklet_eq(unsigned long data)
static inline int find_next_online_cpu(struct ehca_comp_pool* pool)
{
- unsigned long flags_last_cpu;
+ int cpu;
+ unsigned long flags;
+ WARN_ON_ONCE(!in_interrupt());
if (ehca_debug_level)
ehca_dmp(&cpu_online_map, sizeof(cpumask_t), "");
- spin_lock_irqsave(&pool->last_cpu_lock, flags_last_cpu);
- pool->last_cpu = next_cpu(pool->last_cpu, cpu_online_map);
- if (pool->last_cpu == NR_CPUS)
- pool->last_cpu = first_cpu(cpu_online_map);
- spin_unlock_irqrestore(&pool->last_cpu_lock, flags_last_cpu);
+ spin_lock_irqsave(&pool->last_cpu_lock, flags);
+ cpu = next_cpu(pool->last_cpu, cpu_online_map);
+ if (cpu == NR_CPUS)
+ cpu = first_cpu(cpu_online_map);
+ pool->last_cpu = cpu;
+ spin_unlock_irqrestore(&pool->last_cpu_lock, flags);
- return pool->last_cpu;
+ return cpu;
}
static void __queue_comp_task(struct ehca_cq *__cq,
struct ehca_cpu_comp_task *cct)
{
- unsigned long flags_cct;
- unsigned long flags_cq;
+ unsigned long flags;
- spin_lock_irqsave(&cct->task_lock, flags_cct);
- spin_lock_irqsave(&__cq->task_lock, flags_cq);
+ spin_lock_irqsave(&cct->task_lock, flags);
+ spin_lock(&__cq->task_lock);
if (__cq->nr_callbacks == 0) {
__cq->nr_callbacks++;
@@ -576,8 +578,8 @@ static void __queue_comp_task(struct ehca_cq *__cq,
else
__cq->nr_callbacks++;
- spin_unlock_irqrestore(&__cq->task_lock, flags_cq);
- spin_unlock_irqrestore(&cct->task_lock, flags_cct);
+ spin_unlock(&__cq->task_lock);
+ spin_unlock_irqrestore(&cct->task_lock, flags);
}
static void queue_comp_task(struct ehca_cq *__cq)
@@ -588,69 +590,69 @@ static void queue_comp_task(struct ehca_cq *__cq)
cpu = get_cpu();
cpu_id = find_next_online_cpu(pool);
-
BUG_ON(!cpu_online(cpu_id));
cct = per_cpu_ptr(pool->cpu_comp_tasks, cpu_id);
+ BUG_ON(!cct);
if (cct->cq_jobs > 0) {
cpu_id = find_next_online_cpu(pool);
cct = per_cpu_ptr(pool->cpu_comp_tasks, cpu_id);
+ BUG_ON(!cct);
}
__queue_comp_task(__cq, cct);
-
- put_cpu();
-
- return;
}
static void run_comp_task(struct ehca_cpu_comp_task* cct)
{
struct ehca_cq *cq;
- unsigned long flags_cct;
- unsigned long flags_cq;
+ unsigned long flags;
- spin_lock_irqsave(&cct->task_lock, flags_cct);
+ spin_lock_irqsave(&cct->task_lock, flags);
while (!list_empty(&cct->cq_list)) {
cq = list_entry(cct->cq_list.next, struct ehca_cq, entry);
- spin_unlock_irqrestore(&cct->task_lock, flags_cct);
+ spin_unlock_irqrestore(&cct->task_lock, flags);
comp_event_callback(cq);
- spin_lock_irqsave(&cct->task_lock, flags_cct);
+ spin_lock_irqsave(&cct->task_lock, flags);
- spin_lock_irqsave(&cq->task_lock, flags_cq);
+ spin_lock(&cq->task_lock);
cq->nr_callbacks--;
if (cq->nr_callbacks == 0) {
list_del_init(cct->cq_list.next);
cct->cq_jobs--;
}
- spin_unlock_irqrestore(&cq->task_lock, flags_cq);
-
+ spin_unlock(&cq->task_lock);
}
- spin_unlock_irqrestore(&cct->task_lock, flags_cct);
-
- return;
+ spin_unlock_irqrestore(&cct->task_lock, flags);
}
static int comp_task(void *__cct)
{
struct ehca_cpu_comp_task* cct = __cct;
+ int cql_empty;
DECLARE_WAITQUEUE(wait, current);
set_current_state(TASK_INTERRUPTIBLE);
while(!kthread_should_stop()) {
add_wait_queue(&cct->wait_queue, &wait);
- if (list_empty(&cct->cq_list))
+ spin_lock_irq(&cct->task_lock);
+ cql_empty = list_empty(&cct->cq_list);
+ spin_unlock_irq(&cct->task_lock);
+ if (cql_empty)
schedule();
else
__set_current_state(TASK_RUNNING);
remove_wait_queue(&cct->wait_queue, &wait);
- if (!list_empty(&cct->cq_list))
+ spin_lock_irq(&cct->task_lock);
+ cql_empty = list_empty(&cct->cq_list);
+ spin_unlock_irq(&cct->task_lock);
+ if (!cql_empty)
run_comp_task(__cct);
set_current_state(TASK_INTERRUPTIBLE);
@@ -693,8 +695,6 @@ static void destroy_comp_task(struct ehca_comp_pool *pool,
if (task)
kthread_stop(task);
-
- return;
}
static void take_over_work(struct ehca_comp_pool *pool,
@@ -815,6 +815,4 @@ void ehca_destroy_comp_pool(void)
free_percpu(pool->cpu_comp_tasks);
kfree(pool);
#endif
-
- return;
}