?mutex相關(guān)的函數(shù)并不是linux kernel實現(xiàn)的颂跨,而是glibc實現(xiàn)的盛撑,源碼位于nptl目錄下势决。
http://ftp.gnu.org/pub/gnu/glibc/glibc-2.3.5.tar.gz
首先說數(shù)據(jù)結(jié)構(gòu):
typedef union
{
? struct
? {
??? int __lock;
??? unsigned int __count;
??? int __owner;
??? unsigned int __nusers;
??? /* KIND must stay at this position in the structure to maintain
?????? binary compatibility.? */
??? int __kind;
??? int __spins;
? } __data;
? char __size[__SIZEOF_PTHREAD_MUTEX_T];
? long int __align;
} pthread_mutex_t;
?int __lock;? 資源競爭引用計數(shù)
?int __kind; 鎖類型侠仇,init 函數(shù)中mutexattr 參數(shù)傳遞,該參數(shù)可以為NULL乳乌,一般為 PTHREAD_MUTEX_NORMAL
結(jié)構(gòu)體其他元素暫時不了解,以后更新市咆。
/*nptl/pthread_mutex_init.c*/
int
__pthread_mutex_init (mutex, mutexattr)
???? pthread_mutex_t *mutex;
???? const pthread_mutexattr_t *mutexattr;
{
? const struct pthread_mutexattr *imutexattr;
? assert (sizeof (pthread_mutex_t) <= __SIZEOF_PTHREAD_MUTEX_T);
? imutexattr = (const struct pthread_mutexattr *) mutexattr ?: &default_attr;
? /* Clear the whole variable.? */
? memset (mutex, '\0', __SIZEOF_PTHREAD_MUTEX_T);
? /* Copy the values from the attribute.? */
? mutex->__data.__kind = imutexattr->mutexkind & ~0x80000000;
? /* Default values: mutex not used yet.? */
? // mutex->__count = 0;????????already done by memset
? // mutex->__owner = 0;????????already done by memset
? // mutex->__nusers = 0;????????already done by memset
? // mutex->__spins = 0;????????already done by memset
? return 0;
}
init函數(shù)就比較簡單了汉操,將mutex結(jié)構(gòu)體清零,設置結(jié)構(gòu)體中__kind屬性蒙兰。
/*nptl/pthread_mutex_lock.c*/
int
__pthread_mutex_lock (mutex)
???? pthread_mutex_t *mutex;
{
? assert (sizeof (mutex->__size) >= sizeof (mutex->__data));
? pid_t id = THREAD_GETMEM (THREAD_SELF, tid);
? switch (__builtin_expect (mutex->__data.__kind, PTHREAD_MUTEX_TIMED_NP))
??? {
???? …
??? default:
????? /* Correct code cannot set any other type.? */
??? case PTHREAD_MUTEX_TIMED_NP:
??? simple:
????? /* Normal mutex.? */
????? LLL_MUTEX_LOCK (mutex->__data.__lock);
????? break;
??…
??}
? /* Record the ownership.? */
? assert (mutex->__data.__owner == 0);
? mutex->__data.__owner = id;
#ifndef NO_INCR
? ++mutex->__data.__nusers;
#endif
? return 0;
}
該函數(shù)主要是調(diào)用LLL_MUTEX_LOCK磷瘤, 省略部分為根據(jù)mutex結(jié)構(gòu)體__kind屬性不同值做些處理。
宏定義函數(shù)LLL_MUTEX_LOCK最終調(diào)用搜变,將結(jié)構(gòu)體mutex的__lock屬性作為參數(shù)傳遞進來
#define __lll_mutex_lock(futex)????????????????????????????????????????????????\
? ((void) ({????????????????????????????????????????????????????????????????\
??? int *__futex = (futex);????????????????????????????????????????????????\
??? if (atomic_compare_and_exchange_bool_acq (__futex, 1, 0) != 0)????????\
????? __lll_lock_wait (__futex);????????????????????????????????????????\
? }))
atomic_compare_and_exchange_bool_acq (__futex, 1, 0)宏定義為:
#define atomic_compare_and_exchange_bool_acq(mem, newval, oldval) \
? ({ __typeof (mem) __gmemp = (mem);????????????????????????????????????? \
???? __typeof (*mem) __gnewval = (newval);????????????????????????????? \
????? \
???? *__gmemp == (oldval) ? (*__gmemp = __gnewval, 0) : 1; })
這個宏實現(xiàn)的功能是:
如果mem的值等于oldval采缚,則把newval賦值給mem,放回0挠他,否則不做任何處理扳抽,返回1.
由此可以看出,當mutex鎖限制的資源沒有競爭時殖侵,__lock 屬性被置為1贸呢,并返回0,不會調(diào)用__lll_lock_wait (__futex); 當存在競爭時拢军,再次調(diào)用lock函數(shù)楞陷,該宏不做任何處理,返回1茉唉,調(diào)用__lll_lock_wait (__futex);
void
__lll_lock_wait (int *futex)
{
? do
??? {
????? int oldval = atomic_compare_and_exchange_val_acq (futex, 2, 1);
????? if (oldval != 0)
lll_futex_wait (futex, 2);
??? }
? while (atomic_compare_and_exchange_bool_acq (futex, 2, 0) != 0);
}
atomic_compare_and_exchange_val_acq (futex, 2, 1); 宏定義:
/* The only basic operation needed is compare and exchange.? */
#define atomic_compare_and_exchange_val_acq(mem, newval, oldval) \
? ({ __typeof (mem) __gmemp = (mem);????????????????????????????????????? \
???? __typeof (*mem) __gret = *__gmemp;????????????????????????????????????? \
???? __typeof (*mem) __gnewval = (newval);????????????????????????????? \
????? \
???? if (__gret == (oldval))????????????????????????????????????????????? \
?????? *__gmemp = __gnewval;????????????????????????????????????????????? \
???? __gret; })
這個宏實現(xiàn)的功能是固蛾,當mem等于oldval時,將mem置為newval赌渣,始終返回mem原始值魏铅。
此時,futex等于1坚芜,futex將被置為2览芳,并且返回1. 進而調(diào)用
lll_futex_wait (futex, 2);
#define lll_futex_timed_wait(ftx, val, timespec)????????????????????????\
({????????????????????????????????????????????????????????????????????????\
?? DO_INLINE_SYSCALL(futex, 4, (long) (ftx), FUTEX_WAIT, (int) (val),????????\
???? (long) (timespec));????????????????????????????????\
?? _r10 == -1 ? -_retval : _retval;????????????????????????????????????????\
})
該宏對于不同的平臺架構(gòu)會用不同的實現(xiàn),采用匯編語言實現(xiàn)系統(tǒng)調(diào)用鸿竖。不過確定的是調(diào)用了Linux kernel的futex系統(tǒng)調(diào)用沧竟。
futex在linux kernel的實現(xiàn)位于:kernel/futex.c
SYSCALL_DEFINE6(futex, u32 __user *, uaddr, int, op, u32, val,
struct timespec __user *, utime, u32 __user *, uaddr2,
u32, val3)
{
struct timespec ts;
ktime_t t, *tp = NULL;
u32 val2 = 0;
int cmd = op & FUTEX_CMD_MASK;
if (utime && (cmd == FUTEX_WAIT || cmd == FUTEX_LOCK_PI ||
????? cmd == FUTEX_WAIT_BITSET ||
????? cmd == FUTEX_WAIT_REQUEUE_PI)) {
if (copy_from_user(&ts, utime, sizeof(ts)) != 0)
return -EFAULT;
if (!timespec_valid(&ts))
return -EINVAL;
t = timespec_to_ktime(ts);
if (cmd == FUTEX_WAIT)
t = ktime_add_safe(ktime_get(), t);
tp = &t;
}
/*
?* requeue parameter in 'utime' if cmd == FUTEX_*_REQUEUE_*.
?* number of waiters to wake in 'utime' if cmd == FUTEX_WAKE_OP.
?*/
if (cmd == FUTEX_REQUEUE || cmd == FUTEX_CMP_REQUEUE ||
??? cmd == FUTEX_CMP_REQUEUE_PI || cmd == FUTEX_WAKE_OP)
val2 = (u32) (unsigned long) utime;
return do_futex(uaddr, op, val, tp, uaddr2, val2, val3);
}
futex具有六個形參,pthread_mutex_lock最終只關(guān)注了前四個缚忧。futex函數(shù)對參數(shù)進行判斷和轉(zhuǎn)化之后悟泵,直接調(diào)用do_futex。
long do_futex(u32 __user *uaddr, int op, u32 val, ktime_t *timeout,
u32 __user *uaddr2, u32 val2, u32 val3)
{
int clockrt, ret = -ENOSYS;
int cmd = op & FUTEX_CMD_MASK;
int fshared = 0;
if (!(op & FUTEX_PRIVATE_FLAG))
fshared = 1;
clockrt = op & FUTEX_CLOCK_REALTIME;
if (clockrt && cmd != FUTEX_WAIT_BITSET && cmd != FUTEX_WAIT_REQUEUE_PI)
return -ENOSYS;
switch (cmd) {
case FUTEX_WAIT:
val3 = FUTEX_BITSET_MATCH_ANY;
case FUTEX_WAIT_BITSET:
ret = futex_wait(uaddr, fshared, val, timeout, val3, clockrt);
break;
???????? …
default:
ret = -ENOSYS;
}
return ret;
}
省略部分為對其他cmd的處理闪水,pthread_mutex_lock函數(shù)最終傳入的cmd參數(shù)為FUTEX_WAIT糕非,所以在此只關(guān)注此分之,分析futex_wait函數(shù)的實現(xiàn)。
static int futex_wait(u32 __user *uaddr, int fshared,
????? u32 val, ktime_t *abs_time, u32 bitset, int clockrt)
{
struct hrtimer_sleeper timeout, *to = NULL;
struct restart_block *restart;
struct futex_hash_bucket *hb;
struct futex_q q;
int ret;
?????????? … … //delete parameters check and convertion
retry:
/* Prepare to wait on uaddr. */
ret = futex_wait_setup(uaddr, val, fshared, &q, &hb);
if (ret)
goto out;
/* queue_me and wait for wakeup, timeout, or a signal. */
futex_wait_queue_me(hb, &q, to);
… … //other handlers
return ret;
}
futex_wait_setup 將線程放進休眠隊列中朽肥,
futex_wait_queue_me(hb, &q, to);將本線程休眠禁筏,等待喚醒。
喚醒后衡招,__lll_lock_wait函數(shù)中的while (atomic_compare_and_exchange_bool_acq (futex, 2, 0) != 0); 語句將被執(zhí)行篱昔,由于此時futex在pthread_mutex_unlock中置為0,所以atomic_compare_and_exchange_bool_acq (futex, 2, 0)語句將futex置為2始腾,返回0. 退出循環(huán)州刽,訪問用戶控件的臨界資源。
/*nptl/pthread_mutex_unlock.c*/
int
internal_function attribute_hidden
__pthread_mutex_unlock_usercnt (mutex, decr)
???? pthread_mutex_t *mutex;
???? int decr;
{
? switch (__builtin_expect (mutex->__data.__kind, PTHREAD_MUTEX_TIMED_NP))
??? {
???… …
??? default:
????? /* Correct code cannot set any other type.? */
??? case PTHREAD_MUTEX_TIMED_NP:
??? case PTHREAD_MUTEX_ADAPTIVE_NP:
????? /* Normal mutex.? Nothing special to do.? */
????? break;
??? }
? /* Always reset the owner field.? */
? mutex->__data.__owner = 0;
? if (decr)
??? /* One less user.? */
??? --mutex->__data.__nusers;
? /* Unlock.? */
? lll_mutex_unlock (mutex->__data.__lock);
? return 0;
}
省略部分是針對不同的__kind屬性值做的一些處理浪箭,最終調(diào)用 lll_mutex_unlock穗椅。
該宏函數(shù)最終的定義為:
#define __lll_mutex_unlock(futex)????????????????????????\
? ((void) ({????????????????????????????????????????????????\
??? int *__futex = (futex);????????????????????????????????\
??? int __val = atomic_exchange_rel (__futex, 0);????????\
\
??? if (__builtin_expect (__val > 1, 0))????????????????\
????? lll_futex_wake (__futex, 1);????????????????????????\
? }))
atomic_exchange_rel (__futex, 0);宏為:
#define atomic_exchange_rel(mem, value) \
? (__sync_synchronize (), __sync_lock_test_and_set (mem, value))
實現(xiàn)功能為:將mem設置為value,返回原始mem值山林。
__builtin_expect (__val > 1, 0) 是編譯器優(yōu)化語句房待,告訴編譯器期望值邢羔,也就是大多數(shù)情況下__val > 1 驼抹?是0,其邏輯判斷依然為if(__val > 1)為真的話執(zhí)行 lll_futex_wake拜鹤。
現(xiàn)在分析框冀,在資源沒有被競爭的情況下,__futex 為1敏簿,那么返回值__val則為1明也,那么 lll_futex_wake (__futex, 1);????????不會被執(zhí)行,不產(chǎn)生系統(tǒng)調(diào)用惯裕。 當資源產(chǎn)生競爭的情況時温数,根據(jù)對pthread_mutex_lock 函數(shù)的分析,__futex為2蜻势, __val則為2撑刺,執(zhí)行 lll_futex_wake (__futex, 1); 從而喚醒等在臨界資源的線程。
lll_futex_wake (__futex, 1); 最終會調(diào)動同一個系統(tǒng)調(diào)用握玛,即futex, 只是傳遞的cmd參數(shù)為FUTEX_WAKE够傍。
在linux kernel的futex實現(xiàn)中,調(diào)用
static int futex_wake(u32 __user *uaddr, int fshared, int nr_wake, u32 bitset)
{
struct futex_hash_bucket *hb;
struct futex_q *this, *next;
struct plist_head *head;
union futex_key key = FUTEX_KEY_INIT;
int ret;
if (!bitset)
return -EINVAL;
ret = get_futex_key(uaddr, fshared, &key);
if (unlikely(ret != 0))
goto out;
hb = hash_futex(&key);
spin_lock(&hb->lock);
head = &hb->chain;
plist_for_each_entry_safe(this, next, head, list) {
if (match_futex (&this->key, &key)) {
if (this->pi_state || this->rt_waiter) {
ret = -EINVAL;
break;
}
/* Check if one of the bits is set in both bitsets */
if (!(this->bitset & bitset))
continue;
wake_futex(this);
if (++ret >= nr_wake)
break;
}
}
spin_unlock(&hb->lock);
put_futex_key(fshared, &key);
out:
return ret;
}
該函數(shù)遍歷在該mutex上休眠的所有線程挠铲,調(diào)用wake_futex進行喚醒冕屯,
static void wake_futex(struct futex_q *q)
{
struct task_struct *p = q->task;
/*
?* We set q->lock_ptr = NULL _before_ we wake up the task. If
?* a non futex wake up happens on another CPU then the task
?* might exit and p would dereference a non existing task
?* struct. Prevent this by holding a reference on p across the
?* wake up.
?*/
get_task_struct(p);
plist_del(&q->list, &q->list.plist);
/*
?* The waiting task can free the futex_q as soon as
?* q->lock_ptr = NULL is written, without taking any locks. A
?* memory barrier is required here to prevent the following
?* store to lock_ptr from getting ahead of the plist_del.
?*/
smp_wmb();
q->lock_ptr = NULL;
wake_up_state(p, TASK_NORMAL);
put_task_struct(p);
}
wake_up_state(p, TASK_NORMAL);? 的實現(xiàn)位于kernel/sched.c中,屬于linux進程調(diào)度的技術(shù)拂苹。