我們來考察下pthread中鎖的實現。
首先看下初始化宏:PTHREAD_MUTEX_INITIALIZER。
# define PTHREAD_MUTEX_INITIALIZER \
{ { 0, 0, 0, 0, 0, __PTHREAD_SPINS, { 0, 0 } } }
/* Data structures for mutex handling. The structure of the attribute
type is not exposed on purpose. */
/*刪減了32位的代碼*/
typedef union
{
struct __pthread_mutex_s
{
int __lock;
unsigned int __count;
int __owner;
#ifdef __x86_64__
unsigned int __nusers;
#endif
/* KIND must stay at this position in the structure to maintain
binary compatibility with static initializers. */
int __kind;
#ifdef __x86_64__
short __spins;
short __elision;
__pthread_list_t __list;
# define __PTHREAD_MUTEX_HAVE_PREV 1
/* Mutex __spins initializer used by PTHREAD_MUTEX_INITIALIZER. */
# define __PTHREAD_SPINS 0, 0
#else
#endif
} __data;
char __size[__SIZEOF_PTHREAD_MUTEX_T];
long int __align;
} pthread_mutex_t;
注意PTHREAD_MUTEX_INITIALIZER 是8個成員的結構體,與pthread_mutex_t定義相符。並且所有成爲初始化爲0。
初始化之後,我們接着看看pthread_mutex_lock操作:
#ifndef __pthread_mutex_lock
strong_alias (__pthread_mutex_lock, pthread_mutex_lock)
hidden_def (__pthread_mutex_lock)
#endif
int
__pthread_mutex_lock (pthread_mutex_t *mutex)
{
assert (sizeof (mutex->__size) >= sizeof (mutex->__data));
unsigned int type = PTHREAD_MUTEX_TYPE_ELISION (mutex);
LIBC_PROBE (mutex_entry, 1, mutex);
if (__builtin_expect (type & ~(PTHREAD_MUTEX_KIND_MASK_NP
| PTHREAD_MUTEX_ELISION_FLAGS_NP), 0))
return __pthread_mutex_lock_full (mutex);
if (__glibc_likely (type == PTHREAD_MUTEX_TIMED_NP))
{
FORCE_ELISION (mutex, goto elision);
simple:
/* Normal mutex. */
LLL_MUTEX_LOCK (mutex);
assert (mutex->__data.__owner == 0);
}
#ifdef HAVE_ELISION
else if (__glibc_likely (type == PTHREAD_MUTEX_TIMED_ELISION_NP))
{
elision: __attribute__((unused))
/* This case can never happen on a system without elision,
as the mutex type initialization functions will not
allow to set the elision flags. */
/* Don't record owner or users for elision case. This is a
tail call. */
return LLL_MUTEX_LOCK_ELISION (mutex);
}
#endif
else if (__builtin_expect (PTHREAD_MUTEX_TYPE (mutex)
== PTHREAD_MUTEX_RECURSIVE_NP, 1))
{
/* Recursive mutex. */
pid_t id = THREAD_GETMEM (THREAD_SELF, tid);
/* Check whether we already hold the mutex. */
if (mutex->__data.__owner == id)
{
/* Just bump the counter. */
if (__glibc_unlikely (mutex->__data.__count + 1 == 0))
/* Overflow of the counter. */
return EAGAIN;
++mutex->__data.__count;
return 0;
}
/* We have to get the mutex. */
LLL_MUTEX_LOCK (mutex);
assert (mutex->__data.__owner == 0);
mutex->__data.__count = 1;
}
else if (__builtin_expect (PTHREAD_MUTEX_TYPE (mutex)
== PTHREAD_MUTEX_ADAPTIVE_NP, 1))
{
if (! __is_smp)
goto simple;
if (LLL_MUTEX_TRYLOCK (mutex) != 0)
{
int cnt = 0;
int max_cnt = MIN (MAX_ADAPTIVE_COUNT,
mutex->__data.__spins * 2 + 10);
do
{
if (cnt++ >= max_cnt)
{
LLL_MUTEX_LOCK (mutex);
break;
}
atomic_spin_nop ();
}
while (LLL_MUTEX_TRYLOCK (mutex) != 0);
mutex->__data.__spins += (cnt - mutex->__data.__spins) / 8;
}
assert (mutex->__data.__owner == 0);
}
else
{
pid_t id = THREAD_GETMEM (THREAD_SELF, tid);
assert (PTHREAD_MUTEX_TYPE (mutex) == PTHREAD_MUTEX_ERRORCHECK_NP);
/* Check whether we already hold the mutex. */
if (__glibc_unlikely (mutex->__data.__owner == id))
return EDEADLK;
goto simple;
}
pid_t id = THREAD_GETMEM (THREAD_SELF, tid);
/* Record the ownership. */
mutex->__data.__owner = id;
#ifndef NO_INCR
++mutex->__data.__nusers;
#endif
LIBC_PROBE (mutex_acquired, 1, mutex);
return 0;
}
首先看下第一句
assert (sizeof (mutex->__size) >= sizeof (mutex->__data));
這句的意思是成員_size和_data所佔內存相同,我們來驗證下。
char __size[__SIZEOF_PTHREAD_MUTEX_T]的字節數:40.
1 #ifdef __x86_64__
2 # if __WORDSIZE == 64
3 # define __SIZEOF_PTHREAD_ATTR_T 56
4 # define __SIZEOF_PTHREAD_MUTEX_T 40
另一方面_data中的字節數是int、short、unsigned、__pthread_list_t這些個加起來,剛好爲40字節.
所以這個union在64位計算機上最大的空間爲40個字節。
接着是:
1 unsigned int type = PTHREAD_MUTEX_TYPE_ELISION (mutex);
1 #define PTHREAD_MUTEX_TYPE_ELISION(m) \
2 ((m)->__data.__kind & (127|PTHREAD_MUTEX_ELISION_NP))
因爲__kind爲0,所以這裏的type顯然爲0;
1 if (__builtin_expect (type & ~(PTHREAD_MUTEX_KIND_MASK_NP
2 | PTHREAD_MUTEX_ELISION_FLAGS_NP), 0))
3 return __pthread_mutex_lock_full (mutex);
這裏的結果爲0,所以顯然不走這個分支。
PTHREAD_MUTEX_TIMED_NP值爲0,所以我們的代碼顯然是進入如下第一行的分支。
根據註釋/* Normal mutex. */,很可能是通過這裏得到鎖。我們繼續探索下,
/* Mutex types. */
enum
{
PTHREAD_MUTEX_TIMED_NP,
PTHREAD_MUTEX_RECURSIVE_NP,
PTHREAD_MUTEX_ERRORCHECK_NP,
PTHREAD_MUTEX_ADAPTIVE_NP
#if defined __USE_UNIX98 || defined __USE_XOPEN2K8
if (__glibc_likely (type == PTHREAD_MUTEX_TIMED_NP))
{
FORCE_ELISION (mutex, goto elision);
simple:
/* Normal mutex. */
LLL_MUTEX_LOCK (mutex);
assert (mutex->__data.__owner == 0);
}
#ifdef HAVE_ELISION
else if (__glibc_likely (type == PTHREAD_MUTEX_TIMED_ELISION_NP))
{
elision: __attribute__((unused))
/* This case can never happen on a system without elision,
as the mutex type initialization functions will not
allow to set the elision flags. */
/* Don't record owner or users for elision case. This is a
tail call. */
return LLL_MUTEX_LOCK_ELISION (mutex);
}
#endif
else if (__builtin_expect (PTHREAD_MUTEX_TYPE (mutex)
== PTHREAD_MUTEX_RECURSIVE_NP, 1))
{
/* Recursive mutex. */
pid_t id = THREAD_GETMEM (THREAD_SELF, tid);
/* Check whether we already hold the mutex. */
if (mutex->__data.__owner == id)
{
/* Just bump the counter. */
if (__glibc_unlikely (mutex->__data.__count + 1 == 0))
/* Overflow of the counter. */
return EAGAIN;
++mutex->__data.__count;
return 0;
}
/* We have to get the mutex. */
LLL_MUTEX_LOCK (mutex);
assert (mutex->__data.__owner == 0);
mutex->__data.__count = 1;
}
else if (__builtin_expect (PTHREAD_MUTEX_TYPE (mutex)
== PTHREAD_MUTEX_ADAPTIVE_NP, 1))
{
if (! __is_smp)
goto simple;
if (LLL_MUTEX_TRYLOCK (mutex) != 0)
{
int cnt = 0;
int max_cnt = MIN (MAX_ADAPTIVE_COUNT,
mutex->__data.__spins * 2 + 10);
do
{
if (cnt++ >= max_cnt)
{
LLL_MUTEX_LOCK (mutex);
break;
}
atomic_spin_nop ();
}
while (LLL_MUTEX_TRYLOCK (mutex) != 0);
mutex->__data.__spins += (cnt - mutex->__data.__spins) / 8;
}
assert (mutex->__data.__owner == 0);
}
else
{
pid_t id = THREAD_GETMEM (THREAD_SELF, tid);
assert (PTHREAD_MUTEX_TYPE (mutex) == PTHREAD_MUTEX_ERRORCHECK_NP);
/* Check whether we already hold the mutex. */
if (__glibc_unlikely (mutex->__data.__owner == id))
return EDEADLK;
goto simple;
}
這裏的意思:將_data中的__lock作爲參數填入lll_lock,注意,這裏是宏定義。
#ifndef LLL_MUTEX_LOCK
# define LLL_MUTEX_LOCK(mutex) \
lll_lock ((mutex) ->__data.__lock, PTHREAD_MUTEX_PSHARED (mutex))
1 #if LLL_PRIVATE == 0 && LLL_SHARED == 128
2 # define PTHREAD_MUTEX_PSHARED(m) \
3 ((m)->__data.__kind & 128)
4 #else
這裏的PTHREAD_MUTEX_PSHARED將__kind字段和128做&操作,推測是第8個標誌位用來標識該鎖是否共享。
既然如此, 我們這裏兩者填入的都是0,但是第一個__lock在後續使用中有取地址的可能。
我們接着看看lll_lock:
1 #define lll_lock(futex, private) \
2 __lll_lock (&(futex), private)
取了地址, 那麼這裏就是原mutex中__lock字段的地址和數值0.
#define __lll_lock(futex, private) \
((void) \
({ \
int *__futex = (futex); \
if (__glibc_unlikely \
(atomic_compare_and_exchange_bool_acq (__futex, 1, 0))) \
{ \
if (__builtin_constant_p (private) && (private) == LLL_PRIVATE) \
__lll_lock_wait_private (__futex); \
else \
__lll_lock_wait (__futex, private); \
} \
}))
此處atomic_compare_and_exchange_bool_acq 用於將_futex從0原子變爲1,成功則返回0,從而獲得鎖退出。
失敗則返回值>0(對應我們這裏是1或者2),然後繼續走分支。
根據值, 走__lll_lock_wait:
/* Note that we need no lock prefix. */
#define atomic_exchange_acq(mem, newvalue) \
({ __typeof (*mem) result; \
if (sizeof (*mem) == 1) \
__asm __volatile ("xchgb %b0, %1" \
: "=q" (result), "=m" (*mem) \
: "0" (newvalue), "m" (*mem)); \
else if (sizeof (*mem) == 2) \
__asm __volatile ("xchgw %w0, %1" \
: "=r" (result), "=m" (*mem) \
: "0" (newvalue), "m" (*mem)); \
else if (sizeof (*mem) == 4) \
__asm __volatile ("xchgl %0, %1" \
: "=r" (result), "=m" (*mem) \
: "0" (newvalue), "m" (*mem)); \
else \
__asm __volatile ("xchgq %q0, %1" \
: "=r" (result), "=m" (*mem) \
: "0" ((atomic64_t) cast_to_integer (newvalue)), \
"m" (*mem)); \
result; })
/* This function doesn't get included in libc. */
#if IS_IN (libpthread)
void
__lll_lock_wait (int *futex, int private)
{
if (*futex == 2)
lll_futex_wait (futex, 2, private); /* Wait if *futex == 2. */
while (atomic_exchange_acq (futex, 2) != 0)
lll_futex_wait (futex, 2, private); /* Wait if *futex == 2. */
}
#endif
所以到了關鍵的地方, 這裏是將futex(&__lock)的值從0原子變爲2就成功。否則調用lll_futex_wait,阻塞。這裏的atomic_exchange_acq是一個返回舊值的原子操作,直接採用了內斂彙編(xchg)的方式,並且根據變量類型從而選取linux下不同的彙編指令。
到了這裏,只要這個原子xchg的是正確的,並且阻塞與喚醒(wake up)之間的協議是正確的,那麼這個mutex的語義就得到保證了。
我們接着看看lll_futex_wait是怎麼樣的(val = 2, private = 0):
1 /* Wait while *FUTEXP == VAL for an lll_futex_wake call on FUTEXP. */
2 #define lll_futex_wait(futexp, val, private) \
3 lll_futex_timed_wait (futexp, val, NULL, private)
參數多了個NULL(val = 2, timeout = NULL, private = 0),
1 #define lll_futex_timed_wait(futexp, val, timeout, private) \
2 lll_futex_syscall (4, futexp, \
3 __lll_private_flag (FUTEX_WAIT, private), \
4 val, timeout)
展開__lll_private_flag
1 # else
2 # define __lll_private_flag(fl, private) \
3 ((fl) | THREAD_GETMEM (THREAD_SELF, header.private_futex))
4 # endif
1 # define THREAD_SELF \
2 ({ struct pthread *__self; \
3 asm ("mov %%fs:%c1,%0" : "=r" (__self) \
4 : "i" (offsetof (struct pthread, header.self))); \
5 __self;})
這裏是從struct pthread中取得private_futex來計算的,值爲0。這裏實際上只保留了FUTEX_WAIT的值,同樣爲0.
#define FUTEX_WAIT 0
#define FUTEX_WAKE 1
#define FUTEX_REQUEUE 3
#define FUTEX_CMP_REQUEUE 4
#define FUTEX_WAKE_OP 5
#define FUTEX_OP_CLEAR_WAKE_IF_GT_ONE ((4 << 24) | 1)
#define FUTEX_LOCK_PI 6
#define FUTEX_UNLOCK_PI 7
#define FUTEX_TRYLOCK_PI 8
#define FUTEX_WAIT_BITSET 9
#define FUTEX_WAKE_BITSET 10
#define FUTEX_WAIT_REQUEUE_PI 11
#define FUTEX_CMP_REQUEUE_PI 12
#define FUTEX_PRIVATE_FLAG 128
#define FUTEX_CLOCK_REALTIME 256
#define FUTEX_BITSET_MATCH_ANY 0xffffffff
所以這裏的lll_futex_syscall調用簡化爲:
lll_futex_syscall (4, futexp, 0, 2, NULL)
我們接着看:
#define lll_futex_syscall(nargs, futexp, op, ...) \
({ \
INTERNAL_SYSCALL_DECL (__err); \
long int __ret = INTERNAL_SYSCALL (futex, __err, nargs, futexp, op, \
__VA_ARGS__); \
(__glibc_unlikely (INTERNAL_SYSCALL_ERROR_P (__ret, __err)) \
? -INTERNAL_SYSCALL_ERRNO (__ret, __err) : 0); \
})
這裏的futex作爲字符串字面量後續使用,__VA_ARGS__指代了2和NULL。
我們看一下INTERNAL_SYSCALL:
# define INTERNAL_SYSCALL(name, err, nr, args...) \
INTERNAL_SYSCALL_NCS (__NR_##name, err, nr, ##args)
# define INTERNAL_SYSCALL_NCS(name, err, nr, args...) \
({ \
unsigned long int resultvar; \
LOAD_ARGS_##nr (args) \
LOAD_REGS_##nr \
asm volatile ( \
"syscall\n\t" \
: "=a" (resultvar) \
: "0" (name) ASM_ARGS_##nr : "memory", REGISTERS_CLOBBERED_BY_SYSCALL); \
(long int) resultvar; })
那麼這裏的INTERNAL_SYSCALL_NCS調用, 參數爲( __NR_futex,err,4, futexp,0, 2, NULL)。第四個參數開始爲futexp,0, 2, NULL。
# define LOAD_ARGS_4(a1, a2, a3, a4) \
LOAD_ARGS_TYPES_4 (long int, a1, long int, a2, long int, a3, \
long int, a4)
# define LOAD_REGS_4 \
LOAD_REGS_TYPES_4 (long int, a1, long int, a2, long int, a3, \
long int, a4)
# define ASM_ARGS_4 ASM_ARGS_3, "r" (_a4)
將LOAD_ARGS_##nr (args)、LOAD_REGS_##nr、ASM_ARGS_##nr、REGISTERS_CLOBBERED_BY_SYSCALL展開帶入,之後可將INTERNAL_SYSCALL_NCS轉換爲如下:
unsigned long long int resultvar;
long int __arg4 = (long int) (NULL); \
long int __arg3 = (long int) (2); \
long int __arg2 = (long int) (0); \
long int __arg1 = (long int) (futexp); \
register long int _a4 asm ("r10") = __arg4; \
register long int _a3 asm ("rdx") = __arg3; \
register long int _a2 asm ("rsi") = __arg2; \
register long int _a1 asm ("rdi") = __arg1; \
asm volatile ( \
"syscall\n\t" \
: "=a" (resultvar) \
: "0" (__NR_futex), "r" (_a1), "r" (_a2), "r" (_a3), "r" (_a4) : "memory", "cc", "r11", "cx"); \
(long long int) resultvar; })
這裏的__NR_futex爲找不到,這應該是個linux系統定義的系統調用號,並且由它來定義SYS_futex的值。
#define SYS_futex __NR_futex
那麼上面的那段代碼真的確定是使用(FUTEX_WAIT)futex來陷入了阻塞嗎?
讓我嘗試將之前寫的一段直接採用futex做同步區塊的代碼修改下做檢驗。
原代碼:
#include <stdio.h>
#include <pthread.h>
#include <linux/futex.h>
#include <syscall.h>
#include <unistd.h>
#include <sys/time.h>
#define NUM 1000
int num = 0;
int futex_addr = 0;
int futex_wait(void* addr, int val){
return syscall(SYS_futex, addr, FUTEX_WAIT, val, NULL, NULL, 0);
}
int futex_wake(void* addr, int val){
return syscall(SYS_futex, addr, FUTEX_WAKE, val, NULL, NULL, 0);
}
void* thread_f(void* par){
int id = (int) par;
/*go to sleep*/
for(int i = 0; i < 1000; ++i){
while(1 == __sync_val_compare_and_swap(&futex_addr, 0, 1) ){
futex_wait(&futex_addr,1);
}
++num;
futex_addr = 0;
futex_wake(&futex_addr, NUM);
}
// printf("Thread %d starting to work!\n",id);
return NULL;
}
int main(){
pthread_t threads[NUM];
int i;
printf("Everyone go...\n");
float time_use=0;
struct timeval start;
struct timeval end;
gettimeofday(&start,NULL);
for (i=0;i<NUM;i++){
pthread_create(&threads[i],NULL,thread_f,(void *)i);
}
/*wake threads*/
/*give the threads time to complete their tasks*/
for (i=0;i<NUM;i++){
pthread_join(*(threads + i), NULL);
}
printf("Main is quitting...\n");
printf("and num is %d\n", num);
gettimeofday(&end,NULL);
time_use=(end.tv_sec-start.tv_sec)+(end.tv_usec-start.tv_usec) / 1000000.0;//微秒
printf("time_use is %f \n",time_use);
return 0;
}
執行輸出爲:
Everyone go...
Main is quitting...
and num is 1000000
time_use is 0.283753
1000個線程執行1000次+1,答案爲1000000正確。
我們嘗試將futex_wait中sys_call做一下修改:
int futex_wait(void* addr, int val){
// return syscall(SYS_futex, addr, FUTEX_WAIT, val, NULL, NULL, 0);
return INTERNAL_SYSCALL_NCS(addr, FUTEX_WAIT, val, NULL);
}
然後添加宏INTERNAL_SYSCALL_NCS:
#define INTERNAL_SYSCALL_NCS(a1, a2, a3, a4) \
({ \
unsigned long long int resultvar; \
long int __arg4 = (long int) (a4); \
long int __arg3 = (long int) (a3); \
long int __arg2 = (long int) (a2); \
long int __arg1 = (long int) (a1); \
register long int _a4 asm ("r10") = __arg4; \
register long int _a3 asm ("rdx") = __arg3; \
register long int _a2 asm ("rsi") = __arg2; \
register long int _a1 asm ("rdi") = __arg1; \
asm volatile ( \
"syscall\n\t" \
: "=a" (resultvar) \
: "0" (SYS_futex), "r" (_a1), "r" (_a2), "r" (_a3), "r" (_a4) : "memory", "cc", "r11", "cx"); \
(long long int) resultvar; })
得到如下代碼:
#include <stdio.h>
#include <pthread.h>
#include <linux/futex.h>
#include <syscall.h>
#include <unistd.h>
#include <sys/time.h>
#define NUM 1000
#define INTERNAL_SYSCALL_NCS(a1, a2, a3, a4) \
({ \
unsigned long long int resultvar; \
long int __arg4 = (long int) (a4); \
long int __arg3 = (long int) (a3); \
long int __arg2 = (long int) (a2); \
long int __arg1 = (long int) (a1); \
register long int _a4 asm ("r10") = __arg4; \
register long int _a3 asm ("rdx") = __arg3; \
register long int _a2 asm ("rsi") = __arg2; \
register long int _a1 asm ("rdi") = __arg1; \
asm volatile ( \
"syscall\n\t" \
: "=a" (resultvar) \
: "0" (SYS_futex), "r" (_a1), "r" (_a2), "r" (_a3), "r" (_a4) : "memory", "cc", "r11", "cx"); \
(long long int) resultvar; })
int num = 0;
int futex_addr = 0;
int futex_wait(void* addr, int val){
// return syscall(SYS_futex, addr, FUTEX_WAIT, val, NULL, NULL, 0);
return INTERNAL_SYSCALL_NCS(addr, FUTEX_WAIT, val, NULL);
}
int futex_wake(void* addr, int val){
return syscall(SYS_futex, addr, FUTEX_WAKE, val, NULL, NULL, 0);
}
void* thread_f(void* par){
int id = (int) par;
/*go to sleep*/
for(int i = 0; i < 1000; ++i){
while(1 == __sync_val_compare_and_swap(&futex_addr, 0, 1) ){
futex_wait(&futex_addr,1);
}
++num;
futex_addr = 0;
futex_wake(&futex_addr, NUM);
}
// printf("Thread %d starting to work!\n",id);
return NULL;
}
int main(){
pthread_t threads[NUM];
int i;
printf("Everyone go...\n");
float time_use=0;
struct timeval start;
struct timeval end;
gettimeofday(&start,NULL);
for (i=0;i<NUM;i++){
pthread_create(&threads[i],NULL,thread_f,(void *)i);
}
/*wake threads*/
/*give the threads time to complete their tasks*/
for (i=0;i<NUM;i++){
pthread_join(*(threads + i), NULL);
}
printf("Main is quitting...\n");
printf("and num is %d\n", num);
gettimeofday(&end,NULL);
time_use=(end.tv_sec-start.tv_sec)+(end.tv_usec-start.tv_usec) / 1000000.0;//微秒
printf("time_use is %f \n",time_use);
return 0;
}
注意到我們這裏與pthread不一樣的地方在於
1 == __sync_val_compare_and_swap(&futex_addr, 0, 1)
注意到我們這裏的和pthread_mutex不一樣的地方在於我們是原子得將值futex_addr從0改爲1.
執行如上代碼,輸出爲:
Everyone go...
Main is quitting...
and num is 1000000
time_use is 0.254833
答案同樣是1000000,所以這個採用彙編形式的調用符合了我們的預期,應該是和系統調用一致的。
最後我們看假如已經獲得了鎖,需要做什麼:
pid_t id = THREAD_GETMEM (THREAD_SELF, tid);
/* Record the ownership. */
mutex->__data.__owner = id;
#ifndef NO_INCR
++mutex->__data.__nusers;
#endif
知識簡單地把__data中的__owner設置爲id,已經++__nusers。從而代表這個鎖的使用者人數+1,並且當前有用者爲該id的線程。
我們之後接着來看看pthread_mutex_unlock的實現。