自旋锁(spinlock)是一种常用的互斥同步原语,当试图进入临界区的线程使用忙等待的方式检测锁的状态,若锁未被持有则尝试获取。这种忙等待的做法无谓的消耗了处理器资源,故只适合用于临界区非常小的代码片段,如linux下的中断处理函数。
posix 已经实现了自旋锁相关的API,其主要包括 pthread_spin_lock,pthread_spin_trylock,pthread_spin_unlock。从实现的原理上来说,它属于busy-waiting类型的锁。假设一双核的机器上有两个线程(线程a,b),他们分别运行在core0,core1上,当线程a使用pthread_spin_lock去请求锁,那么线程a就会在core0上进行忙等待并不停的进行锁请求,直到得到这个锁为止。与mutex锁不同的是,线程a通过pthread_mutex_lock操作区得到一个临界区的时候,如果此时临界区被线程b所持有,那么线程a就会阻塞,core0 会进行上下文切换将线程a置于等待队列中,此时core0就可以运行其他的任务。可见spin_lock的好处是省去用户态切换与内核态的切换。linux 下自旋锁的代码在include/linux/spinlock.h,
其主要结构定义:
点击(此处)折叠或打开
- typedef struct {
- volatile unsigned int lock;
- } spinlock_t;
关于volatile的作用在 Linux 内核的同步机制--原子操作中已有说明,实际上spinlock_t仅仅包含一个unsigned int类型变量。
其关键代码:点击(此处)折叠或打开
- static inline void _raw_spin_lock(spinlock_t *lock)
- {
- __asm__ __volatile__(
- spin_lock_string
- :"=m" (lock->lock) : : "memory");
- }
- #define spin_lock_string
- "n1:t"
- "lock ; decb %0nt"
- "js 2fn"
- LOCK_SECTION_START("")
- "2:t"
- "rep;nopnt"
- "cmpb $0,%0nt"
- "jle 2bnt"
- "jmp 1bn"
- LOCK_SECTION_END
上面汇编代码中,decb %0 将lock递减,js为负跳转,即如果为负,表明锁已被持有,进入自旋。 每次执行一条空指令后(rep;nop),比较lock与0的值("cmpb $0,%0),如果lock小于或者等于0,则继续自旋(jle 2b);否则,说明锁已释放,将lock递减后返回(jmp 1b)。
在这里,采取了一种优化手段,因在大多数情况下,自旋锁是能够获取成功的,而自旋部分代码,只是在锁被持有时才执行,因此LOCK_SECTION_START与LOCK_SECTION_END将这些代码放到专门道区.text.lock中。如果把它跟别的常用指令混在一起,会浪费指令缓存的空间。点击(此处)折叠或打开
- #define LOCK_SECTION_NAME
- ".text.lock." __stringify(KBUILD_BASENAME)
- #define LOCK_SECTION_START(extra)
- ".subsection 1nt"
- extra
- ".ifndef " LOCK_SECTION_NAME "nt"
- LOCK_SECTION_NAME ":nt"
- ".endifnt"
- #define LOCK_SECTION_END
- ".previousnt"
点击(此处)折叠或打开
- static inline int _raw_spin_trylock(spinlock_t *lock)
- {
- char oldval;
- __asm__ __volatile__(
- "xchgb %b0,%1"
- :"=q" (oldval), "=m" (lock->lock)
- :"0" (0) : "memory");
- return oldval > 0;
- }
点击(此处)折叠或打开
- int acquire_lock()
- {
- volatile unsigned long* __lock = &this->lock_;
- if (!_Atomic_swap((unsigned long*)__lock,1))
- {
- return 0;
- }
- unsigned __my_spin_max = mutex_spin<0>::__max; // 30
- unsigned __my_last_spins = mutex_spin<0>::__last; // 0
- // no matter the value of __junk
- volatile unsigned __junk = 17;
- unsigned __i;
- for (__i = 0; __i < __my_spin_max; ++__i)
- {
- if (__i < __my_spin_max/2 || *__lock)
- {
- // __junk ^ 4
- __junk *= __junk;
- __junk *= __junk;
- __junk *= __junk;
- __junk *= __junk;
- continue;
- }
- // when __lock is 0,record the value __i and return
- if (!_Atomic_swap((unsigned long*)__lock, 1))
- {
- // got
- // Spinning worked. Thus we're probably not being scheduled
- // against the other process with which we were contending.
- // Thus it makes sense to spin longer the next time.
- mutex_spin<0>::__last = __i;
- mutex_spin<0>::__max = mutex_spin<0>::__high_max; // 1000
- return 0;
- }
- }
- // waiting until the value of __lock is 0, no time out
- mutex_spin<0>::__last = mutex_spin<0>::__low_max; // 30
- for (__i = 0; ; ++__i)
- {
- int __log_nsec = __i + 6;
- if (__log_nsec > 27)
- {
- __log_nsec = 27;
- }
- if (!_Atomic_swap((unsigned long*)__lock,1))
- {
- return 0;
- }
- nsec_sleep(__log_nsec);
- }
- return 0;
- }
点击(此处)折叠或打开
- #include <pthread.h>
- #include <unistd.h>
- static int __val = 0;
- pthread_spinlock_t __spinlock;
- static void* thread_fun1(void *__arg)
- {
- while(true)
- {
- pthread_spin_lock(&__spinlock);
- ++__val;
- pthread_spin_unlock(&__spinlock);
- }
- return NULL;
- }
-
- static void* thread_fun2(void *__arg)
- {
- while(true)
- {
- pthread_spin_lock(&__spinlock);
- ++__val;
- pthread_spin_unlock(&__spinlock);
- }
- return NULL;
- }
- int main()
- {
- pthread_t __thread_id1;
- pthread_t __thread_id2;
- pthread_spin_init(&__spinlock, 0);
- int __res1 = pthread_create(&__thread_id1,NULL,&thread_fun1,NULL);
- int __res2 = pthread_create(&__thread_id2,NULL,&thread_fun2,NULL);
- sleep(1);
- pthread_spin_destroy(&__spinlock);
- return 0;
- }
参考:
http://blog.chinaunix.net/uid-20184656-id-138057.html
http://www.parallellabs.com/2010/01/31/pthreads-programming-spin-lock-vs-mutex-performance-analysis/