| [2557] | 1 | From cabef9fee397081ec3dfbde2955d4db675a96a4a Mon Sep 17 00:00:00 2001 | 
|---|
|  | 2 | From: Thomas Gleixner <tglx@linutronix.de> | 
|---|
|  | 3 | Date: Mon, 12 May 2014 20:45:34 +0000 | 
|---|
|  | 4 | Subject: [PATCH 1/1] futex: Add another early deadlock detection check | 
|---|
|  | 5 |  | 
|---|
|  | 6 | commit 866293ee54227584ffcb4a42f69c1f365974ba7f upstream. | 
|---|
|  | 7 |  | 
|---|
|  | 8 | Dave Jones trinity syscall fuzzer exposed an issue in the deadlock | 
|---|
|  | 9 | detection code of rtmutex: | 
|---|
|  | 10 | http://lkml.kernel.org/r/20140429151655.GA14277@redhat.com | 
|---|
|  | 11 |  | 
|---|
|  | 12 | That underlying issue has been fixed with a patch to the rtmutex code, | 
|---|
|  | 13 | but the futex code must not call into rtmutex in that case because | 
|---|
|  | 14 | - it can detect that issue early | 
|---|
|  | 15 | - it avoids a different and more complex fixup for backing out | 
|---|
|  | 16 |  | 
|---|
|  | 17 | If the user space variable got manipulated to 0x80000000 which means | 
|---|
|  | 18 | no lock holder, but the waiters bit set and an active pi_state in the | 
|---|
|  | 19 | kernel is found we can figure out the recursive locking issue by | 
|---|
|  | 20 | looking at the pi_state owner. If that is the current task, then we | 
|---|
|  | 21 | can safely return -EDEADLK. | 
|---|
|  | 22 |  | 
|---|
|  | 23 | The check should have been added in commit 59fa62451 (futex: Handle | 
|---|
|  | 24 | futex_pi OWNER_DIED take over correctly) already, but I did not see | 
|---|
|  | 25 | the above issue caused by user space manipulation back then. | 
|---|
|  | 26 |  | 
|---|
|  | 27 | Signed-off-by: Thomas Gleixner <tglx@linutronix.de> | 
|---|
|  | 28 | Cc: Dave Jones <davej@redhat.com> | 
|---|
|  | 29 | Cc: Linus Torvalds <torvalds@linux-foundation.org> | 
|---|
|  | 30 | Cc: Peter Zijlstra <peterz@infradead.org> | 
|---|
|  | 31 | Cc: Darren Hart <darren@dvhart.com> | 
|---|
|  | 32 | Cc: Davidlohr Bueso <davidlohr@hp.com> | 
|---|
|  | 33 | Cc: Steven Rostedt <rostedt@goodmis.org> | 
|---|
|  | 34 | Cc: Clark Williams <williams@redhat.com> | 
|---|
|  | 35 | Cc: Paul McKenney <paulmck@linux.vnet.ibm.com> | 
|---|
|  | 36 | Cc: Lai Jiangshan <laijs@cn.fujitsu.com> | 
|---|
|  | 37 | Cc: Roland McGrath <roland@hack.frob.com> | 
|---|
|  | 38 | Cc: Carlos ODonell <carlos@redhat.com> | 
|---|
|  | 39 | Cc: Jakub Jelinek <jakub@redhat.com> | 
|---|
|  | 40 | Cc: Michael Kerrisk <mtk.manpages@gmail.com> | 
|---|
|  | 41 | Cc: Sebastian Andrzej Siewior <bigeasy@linutronix.de> | 
|---|
|  | 42 | Link: http://lkml.kernel.org/r/20140512201701.097349971@linutronix.de | 
|---|
|  | 43 | Signed-off-by: Thomas Gleixner <tglx@linutronix.de> | 
|---|
|  | 44 | Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org> | 
|---|
|  | 45 | --- | 
|---|
|  | 46 | kernel/futex.c |   47 ++++++++++++++++++++++++++++++++++------------- | 
|---|
|  | 47 | 1 file changed, 34 insertions(+), 13 deletions(-) | 
|---|
|  | 48 |  | 
|---|
|  | 49 | diff --git a/kernel/futex.c b/kernel/futex.c | 
|---|
|  | 50 | index 3bc18bf..66af37d 100644 | 
|---|
|  | 51 | --- a/kernel/futex.c | 
|---|
|  | 52 | +++ b/kernel/futex.c | 
|---|
|  | 53 | @@ -594,7 +594,8 @@ void exit_pi_state_list(struct task_struct *curr) | 
|---|
|  | 54 |  | 
|---|
|  | 55 | static int | 
|---|
|  | 56 | lookup_pi_state(u32 uval, struct futex_hash_bucket *hb, | 
|---|
|  | 57 | -               union futex_key *key, struct futex_pi_state **ps) | 
|---|
|  | 58 | +               union futex_key *key, struct futex_pi_state **ps, | 
|---|
|  | 59 | +               struct task_struct *task) | 
|---|
|  | 60 | { | 
|---|
|  | 61 | struct futex_pi_state *pi_state = NULL; | 
|---|
|  | 62 | struct futex_q *this, *next; | 
|---|
|  | 63 | @@ -638,6 +639,16 @@ lookup_pi_state(u32 uval, struct futex_hash_bucket *hb, | 
|---|
|  | 64 | return -EINVAL; | 
|---|
|  | 65 | } | 
|---|
|  | 66 |  | 
|---|
|  | 67 | +                       /* | 
|---|
|  | 68 | +                        * Protect against a corrupted uval. If uval | 
|---|
|  | 69 | +                        * is 0x80000000 then pid is 0 and the waiter | 
|---|
|  | 70 | +                        * bit is set. So the deadlock check in the | 
|---|
|  | 71 | +                        * calling code has failed and we did not fall | 
|---|
|  | 72 | +                        * into the check above due to !pid. | 
|---|
|  | 73 | +                        */ | 
|---|
|  | 74 | +                       if (task && pi_state->owner == task) | 
|---|
|  | 75 | +                               return -EDEADLK; | 
|---|
|  | 76 | + | 
|---|
|  | 77 | atomic_inc(&pi_state->refcount); | 
|---|
|  | 78 | *ps = pi_state; | 
|---|
|  | 79 |  | 
|---|
|  | 80 | @@ -787,7 +798,7 @@ retry: | 
|---|
|  | 81 | * We dont have the lock. Look up the PI state (or create it if | 
|---|
|  | 82 | * we are the first waiter): | 
|---|
|  | 83 | */ | 
|---|
|  | 84 | -       ret = lookup_pi_state(uval, hb, key, ps); | 
|---|
|  | 85 | +       ret = lookup_pi_state(uval, hb, key, ps, task); | 
|---|
|  | 86 |  | 
|---|
|  | 87 | if (unlikely(ret)) { | 
|---|
|  | 88 | switch (ret) { | 
|---|
|  | 89 | @@ -1197,7 +1208,7 @@ void requeue_pi_wake_futex(struct futex_q *q, union futex_key *key, | 
|---|
|  | 90 | * | 
|---|
|  | 91 | * Return: | 
|---|
|  | 92 | *  0 - failed to acquire the lock atomically; | 
|---|
|  | 93 | - *  1 - acquired the lock; | 
|---|
|  | 94 | + * >0 - acquired the lock, return value is vpid of the top_waiter | 
|---|
|  | 95 | * <0 - error | 
|---|
|  | 96 | */ | 
|---|
|  | 97 | static int futex_proxy_trylock_atomic(u32 __user *pifutex, | 
|---|
|  | 98 | @@ -1208,7 +1219,7 @@ static int futex_proxy_trylock_atomic(u32 __user *pifutex, | 
|---|
|  | 99 | { | 
|---|
|  | 100 | struct futex_q *top_waiter = NULL; | 
|---|
|  | 101 | u32 curval; | 
|---|
|  | 102 | -       int ret; | 
|---|
|  | 103 | +       int ret, vpid; | 
|---|
|  | 104 |  | 
|---|
|  | 105 | if (get_futex_value_locked(&curval, pifutex)) | 
|---|
|  | 106 | return -EFAULT; | 
|---|
|  | 107 | @@ -1236,11 +1247,13 @@ static int futex_proxy_trylock_atomic(u32 __user *pifutex, | 
|---|
|  | 108 | * the contended case or if set_waiters is 1.  The pi_state is returned | 
|---|
|  | 109 | * in ps in contended cases. | 
|---|
|  | 110 | */ | 
|---|
|  | 111 | +       vpid = task_pid_vnr(top_waiter->task); | 
|---|
|  | 112 | ret = futex_lock_pi_atomic(pifutex, hb2, key2, ps, top_waiter->task, | 
|---|
|  | 113 | set_waiters); | 
|---|
|  | 114 | -       if (ret == 1) | 
|---|
|  | 115 | +       if (ret == 1) { | 
|---|
|  | 116 | requeue_pi_wake_futex(top_waiter, key2, hb2); | 
|---|
|  | 117 | - | 
|---|
|  | 118 | +               return vpid; | 
|---|
|  | 119 | +       } | 
|---|
|  | 120 | return ret; | 
|---|
|  | 121 | } | 
|---|
|  | 122 |  | 
|---|
|  | 123 | @@ -1272,7 +1285,6 @@ static int futex_requeue(u32 __user *uaddr1, unsigned int flags, | 
|---|
|  | 124 | struct futex_hash_bucket *hb1, *hb2; | 
|---|
|  | 125 | struct plist_head *head1; | 
|---|
|  | 126 | struct futex_q *this, *next; | 
|---|
|  | 127 | -       u32 curval2; | 
|---|
|  | 128 |  | 
|---|
|  | 129 | if (requeue_pi) { | 
|---|
|  | 130 | /* | 
|---|
|  | 131 | @@ -1358,16 +1370,25 @@ retry_private: | 
|---|
|  | 132 | * At this point the top_waiter has either taken uaddr2 or is | 
|---|
|  | 133 | * waiting on it.  If the former, then the pi_state will not | 
|---|
|  | 134 | * exist yet, look it up one more time to ensure we have a | 
|---|
|  | 135 | -                * reference to it. | 
|---|
|  | 136 | +                * reference to it. If the lock was taken, ret contains the | 
|---|
|  | 137 | +                * vpid of the top waiter task. | 
|---|
|  | 138 | */ | 
|---|
|  | 139 | -               if (ret == 1) { | 
|---|
|  | 140 | +               if (ret > 0) { | 
|---|
|  | 141 | WARN_ON(pi_state); | 
|---|
|  | 142 | drop_count++; | 
|---|
|  | 143 | task_count++; | 
|---|
|  | 144 | -                       ret = get_futex_value_locked(&curval2, uaddr2); | 
|---|
|  | 145 | -                       if (!ret) | 
|---|
|  | 146 | -                               ret = lookup_pi_state(curval2, hb2, &key2, | 
|---|
|  | 147 | -                                                     &pi_state); | 
|---|
|  | 148 | +                       /* | 
|---|
|  | 149 | +                        * If we acquired the lock, then the user | 
|---|
|  | 150 | +                        * space value of uaddr2 should be vpid. It | 
|---|
|  | 151 | +                        * cannot be changed by the top waiter as it | 
|---|
|  | 152 | +                        * is blocked on hb2 lock if it tries to do | 
|---|
|  | 153 | +                        * so. If something fiddled with it behind our | 
|---|
|  | 154 | +                        * back the pi state lookup might unearth | 
|---|
|  | 155 | +                        * it. So we rather use the known value than | 
|---|
|  | 156 | +                        * rereading and handing potential crap to | 
|---|
|  | 157 | +                        * lookup_pi_state. | 
|---|
|  | 158 | +                        */ | 
|---|
|  | 159 | +                       ret = lookup_pi_state(ret, hb2, &key2, &pi_state, NULL); | 
|---|
|  | 160 | } | 
|---|
|  | 161 |  | 
|---|
|  | 162 | switch (ret) { | 
|---|
|  | 163 | -- | 
|---|
|  | 164 | 1.7.10.4 | 
|---|
|  | 165 |  | 
|---|