原文地址：http://blog.chinaunix.net/uid-24774106-id-3488649.html

提到了flock，不提fcntl這個鎖有點不想話，畢竟fcntl這個鎖纔是更常見的一把鎖。咱也不能拈輕怕重，逮着軟柿子可勁捏，今天我們比較下這兩種類型鎖的異同，並從kernel實現的層面，來講講爲啥表現不同，準備好了沒，LET GO！

上一篇博文講到了flock系統調用那把鎖是FL_FLOCK類型的鎖，而fcntl創建的鎖是遵循POSIX標準的，所以稱爲FL_POSIX類型的鎖。上一篇博文做了一個實驗，進程A首先申請FL_FLOCK類型的鎖一把，然後fork出來子進程B，此時在啓動同一個可執行程序，啓動進程C，C也會首先申請FL_FLOCK鎖，當然了，都是對同一個文件加排他鎖。我們發現，在A進程推出後，C進程依然申請不到這把鎖，直到B 進程推出，C進程才持有了這把鎖。我們得到結論，fork出來的子進程，不但拷貝所有父進程的所有打開的文件（當然了同一個struct file，struct file引用計數+1）, 同時也持有了父進程申請的FL_FLOCK類型的鎖。這就是上篇博文的結論，當然我們沒有從代碼層面分析這種鎖的繼承性的緣由。沒關係，這是我們這篇博文涉及的東西。

應用層fcntl

首先說，我不太喜歡fcntl這個函數，因爲這個函數有點瑞士軍刀的意思，方便是方便了，但是這廝乾的事兒有點多，不符合一個接口只幹一件事，並把事情幹好的UNIX哲學。不喜歡歸不喜歡，但是咱也得從了。西遊記說，世界尚不完美，經書怎能苛求完美。是啊，世界尚不完美，我們也沒辦法苛求太多。

flock系統調用本質是給文件上鎖，它比較死心眼，一鎖就是整個文件，要求flock系統調用給某文件前40個字節上鎖，不好意思，flock他老人家太老了，這麼細的活兒幹不了。但是fcntl不同了，它屬於江湖晚輩，做的就比較細緻了，他能夠精確打擊，讓它給文件的某一個字節加鎖，他都能辦得到。OK ，閒言少敘看接口。

       #include <unistd.h>
       #include <fcntl.h>

       int fcntl(int fd, int cmd, ... /* arg */ );

       struct flock {
           ...
           short l_type;    /* Type of lock: F_RDLCK,
                               F_WRLCK, F_UNLCK */
           short l_whence;  /* How to interpret l_start:
                               SEEK_SET, SEEK_CUR, SEEK_END */
           off_t l_start;   /* Starting offset for lock */
           off_t l_len;     /* Number of bytes to lock */
           pid_t l_pid;     /* PID of process blocking our lock
                               (F_GETLK only) */
           ...
       };

文件記錄加鎖相關的cmd 分三種（fcntl這廝還有其他於加鎖無關的cmd）：

F_SETLK

申請鎖（讀鎖F_RDLCK，寫鎖F_WRLCK）或者釋放所（F_UNLCK），但是如果kernel無法將鎖授予本進程（被其他進程搶了先，佔了鎖），不傻等，返回error
F_SETLKW

和F_SETLK幾乎一樣，唯一的區別，這廝是個死心眼的主兒，申請不到，就傻等。
F_GETLK

這個接口是獲取鎖的相關信息：這個接口會修改我們傳入的struct flock。

如果探測了一番，發現根本就沒有進程對該文件指定數據段加鎖，那麼了l_type會被修改成F_UNLCK

如果有進程持有了鎖，那麼了l_pid會返回持鎖進程的PID

參考UNIX網絡編程卷2 進程間通信，將這個接口封裝了下，讓接口變得好用些。

            #include <unistd.h>
            #include <fcntl.h>

            static int lock_reg(int fd,int cmd,int type,off_t offset,int whence,off_t len)
            {
                struct flock lock;
                lock.l_type = type;
                lock.l_start = offset;
                lock.l_whence = whence;
                lock.l_len = len;

                return (fcntl(fd,cmd,&lock));
            }

            static pid_t lock_test(int fd,int type,off_t offset,int whence,off_t len)
            {
                struct flock lock;

                lock.l_type = type;
                lock.l_start = offset;
                lock.l_whence = whence;
                lock.l_len = len;

                if(fcntl(fd,F_GETLK,&lock) == -1)
                {
                    return -1;
                }
                if(lock.l_type = F_UNLCK)
                    return 0;
                return lock.l_pid;
            }

            int read_lock(int fd,off_t offset,int whence,off_t len)
            {
                return lock_reg(fd,F_SETLKW,F_RDLCK,offset,whence,len);
            }

            int read_lock_try(int fd,off_t offset,int whence,off_t len)
            {
                return lock_reg(fd,F_SETLK,F_RDLCK,offset,whence,len);
            }

            int write_lock(int fd,off_t offset,int whence,off_t len)
            {
                return lock_reg(fd,F_SETLKW,F_WRLCK,offset,whence,len);
            }

            int write_lock_try(int fd,off_t offset,int whence,off_t len)
            {
                return lock_reg(fd,F_SETLK,F_WRLCK,offset,whence,len);
            }

            int unlock(int fd,off_t offset, int whence,off_t len)
            {
                return lock_reg(fd,F_SETLK,F_UNLCK,offset,whence,len);
            }

            int is_read_lockable(int fd, off_t offset,int whence,off_t len)
            {
                return !lock_test(fd,F_RDLCK,offset,whence,len);
            }

            int is_write_lockable(int fd, off_t offset,int whence,off_t len)
            {
                return !lock_test(fd,F_WRLCK,offset,whence,len);
            }

下面是頭文件rwlock.h

    #ifndef __RWLOCK_H__
    #define __RWLOCK_H__

    int read_lock(int fd,off_t offset,int whence,off_t len);
    int read_lock_try(int fd,off_t offset,int whence,off_t len);
    int write_lock(int fd,off_t offset,int whence,off_t len);
    int write_lock_try(int fd,off_t offset,int whence,off_t len);
    int unlock(int fd,off_t offset, int whence,off_t len);
    int is_read_lockable(int fd, off_t offset,int whence,off_t len);
    int is_write_lockable(int fd, off_t offset,int whence,off_t len);

    #endif

現在萬事具備了，我們可以寫我們的測試程序了。實驗內容同flock系統調用一樣，A進程申請鎖，然後fork出B 進程，然後C進程申請鎖。過一會A進程死去，B仍然活着，看下C能否申請到鎖。

FL_POSIX鎖父子進程繼承性實驗

測試程序和上一篇一樣，只不過使用我們上面提到的write_lock,而不是flock函數。

            #include<stdio.h>
            #include <stdlib.h>
            #include <sys/types.h>
            #include <unistd.h>
            #include <sys/file.h>
            #include <errno.h>
            #include <string.h> 
            #include <time.h>
            #include <fcntl.h>
            #include "rwlock.h"

            int main()
            {
                char buf[128];
                time_t ltime;
                int fd = open("./tmp.txt",O_RDWR|O_APPEND);
                if(fd < 0)
                {
                    fprintf(stderr,"open failed %s\n",strerror(errno));
                    return -1;
                }

                int ret = write_lock(fd,0,SEEK_SET,0);
                if(ret)
                {
                    fprintf(stderr,"fcntl  failed for father\n");
                    return -2;
                }
                else
                {
                    time(&ltime);
                    fprintf(stderr,"%s    I got the lock\n",ctime_r(&ltime,buf));
                }

                ret = fork();
                if(ret == 0)
                {
                    time(&ltime);  
                    fprintf(stdout,"%s  I am the son process,pid is %d,ppid = %d\n",ctime_r(&ltime,buf),getpid(),getppid());
                    write(fd,"write by son\n",32);
                    sleep(100);
                    time(&ltime);
                    fprintf(stdout,"%s    son exit\n",ctime_r(&ltime,buf));
                }
                else if(ret > 0)
                {
                    time(&ltime);
                    fprintf(stdout,"%s    I am the father process,pid is %d\n",ctime_r(&ltime,buf),getpid());
                    write(fd,"write by father\n",32);
                    sleep(50);
                    close(fd);
                    time(&ltime);
                    fprintf(stdout, "%s    father exit\n",ctime_r(&ltime,buf));
                    return 0;
                }
                else
                {
                    fprintf(stderr, "error happened in fork\n");
                    return -3;
                }

            }

A進程持有鎖後，持續50秒，B進程作爲子進程持續100s，C進程在A推出前創建，我們觀察A死去後，C能否立刻獲取FL_POSIX類型的鎖如果可以，表明鎖沒有繼承性，子進程B並不持有鎖。如果不可以，非要等到B死去後才能申請到，那麼說明父進程的鎖，被繼承到了子進程。

其實細心的筒子看到struct flock的l_pid大概就能猜到，鎖記錄了進程ID，精確歸某進程所有，就不會被繼承到子進程，我們驗證之。

 pid_t l_pid;     /* PID of process blocking our lock
                                 (F_GETLK only) */

看下輸出結果：

 root@manu:~/code/c/self/flock# ./fcntl_test 
    Sun Feb 10 16:14:45 2013
        I got the lock
    Sun Feb 10 16:14:45 2013
        I am the father process,pid is 6475
    Sun Feb 10 16:14:45 2013
      I am the son process,pid is 6476,ppid = 6475
    Sun Feb 10 16:15:35 2013
        father exit
    root@manu:~/code/c/self/flock# Sun Feb 10 16:16:25 2013
        son exit

    root@manu:~/code/c/self/flock# 
    root@manu:~/code/c/self/flock# ./fcntl_test 
    Sun Feb 10 16:15:35 2013
        I got the lock
    Sun Feb 10 16:15:35 2013
        I am the father process,pid is 6477
    Sun Feb 10 16:15:35 2013
      I am the son process,pid is 6482,ppid = 6477
    Sun Feb 10 16:16:25 2013
        father exit
    root@manu:~/code/c/self/flock#

結論：父進程A退出後，進程C就獲取到了FL_POSIX鎖，所以子進程不會繼承FL_POSIX類型的鎖。這和FL_FLOCK類型的鎖是不同的。 WHY！！！

kernel分析原因

實驗到了這個份上，我們就需要從內核代碼分析原因了。所有的代碼都在fs/locks.c,大家感興趣可以細細參詳，我只講繼承性差異的原因，爲啥FL_FLOCK鎖可以被繼承，但是FL_POSIX只精確的屬於某進程，不會被子進程繼承。

注意了我們都沒有主動UN_LOCK，flock我們沒有調用LOCK_UN，fcntl沒有調用F_UNLCK，鎖的釋放在close的時候去釋放。先說flock：flock在內核調用locks_delete_flock來釋放鎖，同時喚醒沉睡在這把鎖上的其他進程。 close--->filp_close--------->fput 注意fput：

            void fput(struct file *file)
            {
                if (atomic_long_dec_and_test(&file->f_count)) {
                    struct task_struct *task = current;
                    file_sb_list_del(file);
                    if (unlikely(in_interrupt() || task->flags & PF_KTHREAD)) {
                        unsigned long flags;
                        spin_lock_irqsave(&delayed_fput_lock, flags);
                        list_add(&file->f_u.fu_list, &delayed_fput_list);
                        schedule_work(&delayed_fput_work);
                        spin_unlock_irqrestore(&delayed_fput_lock, flags);
                        return;
                    }
                    init_task_work(&file->f_u.fu_rcuhead, ____fput);
                    task_work_add(task, &file->f_u.fu_rcuhead, true);
                }
            }

注意了，條件atomic_long_dec_and_test(&file->f_count)，由於父子進程，那麼父進程退出引用計數減1,仍然不會調用到裏面的內容，而我們釋放FL_FLOCK類型鎖是在____fput，脈絡如下：

____fput-----> __fput----->locks_remove_flock---------->locks_delete_flock

那麼大家也就明白了，正是因爲引用計數並沒有減少到1,所以父進程的退出，並不會調用locks_delete_flock來喚醒等待這把鎖的進程。

對於fcntl實現的FL_POSIX類型的鎖，則不同，最終的釋放會走到__posix_lock_file,當然了，調用F_UNLCK最終也會調到此處。當進程推出，嘗試關閉進程打開的文件的時候，遵循這樣的脈絡

close----->filp_close----->locks_remove_posix---->vfs_lock_file----->posix_lock_file----->__posix_lock_file

當然走的是解鎖的分支。這條路徑上，沒有什麼條件阻止走到真正解鎖的地方，所以，當進程推出的時候，FL_POSIX類型的鎖就被釋放了。

觀察tool

我們如何觀測文件鎖的狀況呢？比如，我們知道某文件被鎖，如何知道是那個進程鎖的這個文件呢？procfs提供了信息：

            root@manu:~/code/c/self/flock# ./test 
            Sun Feb 10 20:51:06 2013
                I got the lock
            Sun Feb 10 20:51:06 2013
                I am the father process,pid is 9941
            Sun Feb 10 20:51:06 2013
              I am the son process,pid is 9942,ppid = 9941

            root@manu:~/code/c/self/flock# ./fcntl_test 
            Sun Feb 10 20:51:14 2013
                I got the lock
            Sun Feb 10 20:51:14 2013
                I am the father process,pid is 9943
            Sun Feb 10 20:51:14 2013
              I am the son process,pid is 9944,ppid = 9943

              root@manu:~/code/c/classical/linux-3.6.7/fs# cat /proc/locks 
            1: POSIX  ADVISORY  WRITE 9943 08:06:2359759 0 EOF
            2: FLOCK  ADVISORY  WRITE 9941 08:06:2359759 0 EOF

我們可以看到/proc/locks下面有鎖的信息：我現在分別敘述下含義：

POSIX FLOCK 這個比較明確，就是哪個類型的鎖。flock系統調用產生的是FLOCK，fcntl調用F_SETLK，F_SETLKW產生的是POSIX類型
ADVISORY表明是勸告鎖
WRITE顧名思義，是寫鎖，還有讀鎖
9943是持有鎖的進程ID。當然對於flock這種類型的鎖，會出現進程已經退出的狀況。
08：06：2359759表示的對應磁盤文件的所在設備的主設備好，次設備號，還有文件對應的inode number。
0表示的是所的其實位置
EOF表示的是結束位置。這兩個字段對fcntl類型比較有用，對flock來是總是0 和EOF。

看下/home所在的分區主設備號就是8,次設備號就是6，而我們操作的文件的inode，就是2359759

            /dev/sda6      77993572 47528652 26558672   65% /home

               8        6   78125000 sda6

            root@manu:~/code/c/self/flock# ls -li tmp.txt 
            2359759 -rw-r--r-- 1 manu root 2689  2月 10 20:51 tmp.txt

本文做實驗都是採用的fork產生子進程，另外system系統調用也會產生子進程，首先產生sh 子進程，sh又調起了system入參那個命令，對於system,flock會傳遞到子進程，fcntl產生的勸告鎖則不會傳遞到子進程，有興趣的筒子可以自己實驗。

相關代碼和pdf類型的文檔，已經上傳到了github，歡迎大家訪問：https://github.com/manuscola/rwlock，獲取代碼及pdf格式的文檔。

參考文獻

深入理解linux內核
linux設備驅動程序（如何將鎖的信息show出來，代碼用了seq_file，這個又能寫一篇博文，唉太多了）
Manual

文件鎖 flock及fcntl flock

應用層fcntl

FL_POSIX鎖父子進程繼承性實驗

kernel分析原因

觀察tool

參考文獻

如何使用 JS 判斷用戶是否處於活躍狀態

Mono 支持LoongArch架構

lightdb秒級增加列和刪除列（not null帶默認值）

lightdb數據庫超時相關控制參數

通過HPA+CronHPA組合應對業務複雜彈性伸縮場景

❤️‍🔥 Solon Cloud Event 新的事務特性與應用

lightdb mysql 8.0兼容之不可見主鍵

使用 JS 實現在瀏覽器控制檯打印圖片 console.image()

基於Ubuntu-22.04安裝K8s-v1.28.2實驗（四）使用域名訪問網站應用

windows下如何查看磁盤IO性能

core dump文件

詳解coredump

Linux inode耗盡導致圖片/文件無法上傳

linux-inode(yfruan)

https://yachay.unat.edu.pe/blog/index.php?comment_area=format_blog&comment_component=blog&comment_co

linux以太網驅動總結