2--信號量的實踐到內核--進入內核分析信號量的操作

我們接着看信號量的操作,在semaphore_p()函數中調用了semop(sem_id, &sem_b, 1) 函數,在sys_ipc()系統調用中則會到達
 

case SEMOP:
        return sys_semtimedop (first, (struct sembuf __user *)ptr, second, NULL);

可以看到轉向sys_semtiemdop處執行,在進入這個函數之前我們先看一下在應用程序中調用semop()時傳遞的參數,參數搞不清我們將無法理解系統調用的走向,semop()函數的主要作用是改變信號量的值,它的第一個參數是信號量的標識符,就象消息隊列和共享內存中我們講的那樣,第二個參數則指向一個sembuf數據結構

/* semop system calls takes an array of these. */
struct sembuf {
    unsigned short sem_num;    /* semaphore index in array */
    short        sem_op;        /* semaphore operation */
    short        sem_flg;    /* operation flags */
};

sembuf結構中的sem_num是信號量的個數,sem_op是要改變的信號量的值,一般是P操作爲-1,而在V操作中+1,而sem_flg是信號量的一些操作標記,一般是SEM_UNDO表示萬一進程未及時歸還信號量將由這個標記告訴內核由內核代爲處理。我們看到在應用程序中

struct sembuf sem_b;
    
    sem_b.sem_num = 0;
    sem_b.sem_op = -1; /* P() */
    sem_b.sem_flg = SEM_UNDO;

這裏將信號量的個數設爲0,也就是隻有一個信號量數組中只有一個元素,而信號量的操作數是-1,所以是P操作,另外sem_flg設爲了SEM_UNDO,表示如果進程中途退出了將由內核代他“清帳”。應用程序這邊看明白了我們可以接着對照着sys_semtimedop()函數看了,可以看出參數傳遞其中,這個函數在ipc/sem.c的1042行處。

asmlinkage long sys_semtimedop(int semid, struct sembuf __user *tsops,
            unsigned nsops, const struct timespec __user *timeout)
{
    int error = -EINVAL;
    struct sem_array *sma;
    struct sembuf fast_sops[SEMOPM_FAST];
    struct sembuf* sops = fast_sops, *sop;
    struct sem_undo *un;
    int undos = 0, alter = 0, max;
    struct sem_queue queue;
    unsigned long jiffies_left = 0;
    struct ipc_namespace *ns;

    ns = current->nsproxy->ipc_ns;

    if (nsops < 1 || semid < 0)
        return -EINVAL;
    if (nsops > ns->sc_semopm)
        return -E2BIG;
    if(nsops > SEMOPM_FAST) {
        sops = kmalloc(sizeof(*sops)*nsops,GFP_KERNEL);
        if(sops==NULL)
            return -ENOMEM;
    }
    if (copy_from_user (sops, tsops, nsops * sizeof(*tsops))) {
        error=-EFAULT;
        goto out_free;
    }
    if (timeout) {
        struct timespec _timeout;
        if (copy_from_user(&_timeout, timeout, sizeof(*timeout))) {
            error = -EFAULT;
            goto out_free;
        }
        if (_timeout.tv_sec < 0 || _timeout.tv_nsec < 0 ||
            _timeout.tv_nsec >= 1000000000L) {
            error = -EINVAL;
            goto out_free;
        }
        jiffies_left = timespec_to_jiffies(&_timeout);
    }

我們看到第一個參數是信號量的標識符,第二個參數則是我們應用程序中創建好的sembuf結構變量,第三個參數則是1,我們看這段代碼中最主要的copy_from_user,這個函數我們以後會重點講到,這個函數涉及到嵌入式彙編以後詳細描述,這段代碼其餘的部分相信只要看過前邊的消息隊列和共享內存的處理後這裏也不是難事,這裏我們可以看到有一個時間檢測timeout的參數,我們可以看到上面傳遞過來爲NULL,因此我們忽略這部分代碼,但是裏面的代碼部分也說明了是對時間的相關操作。暫且跳過,必盡我們的目標是順着應用程序看內核

max = 0;
    for (sop = sops; sop < sops + nsops; sop++) {
        if (sop->sem_num >= max)
            max = sop->sem_num;
        if (sop->sem_flg & SEM_UNDO)
            undos = 1;
        if (sop->sem_op != 0)
            alter = 1;
    }

retry_undos:
    if (undos) {
        un = find_undo(ns, semid);
        if (IS_ERR(un)) {
            error = PTR_ERR(un);
            goto out_free;
        }
    } else
        un = NULL;

    sma = sem_lock_check(ns, semid);
    if (IS_ERR(sma)) {
        error = PTR_ERR(sma);
        goto out_free;
    }

接着內核循環檢測是檢測是否有SEM_UNDO並且循環進入find_undo去執行信號量的undo操作

static struct sem_undo *find_undo(struct ipc_namespace *ns, int semid)
{
    struct sem_array *sma;
    struct sem_undo_list *ulp;
    struct sem_undo *un, *new;
    int nsems;
    int error;

    error = get_undo_list(&ulp);
    if (error)
        return ERR_PTR(error);

    spin_lock(&ulp->lock);
    un = lookup_undo(ulp, semid);
    spin_unlock(&ulp->lock);
    if (likely(un!=NULL))
        goto out;

    /* no undo structure around - allocate one. */
    sma = sem_lock_check(ns, semid);
    if (IS_ERR(sma))
        return ERR_PTR(PTR_ERR(sma));

    nsems = sma->sem_nsems;
    sem_getref_and_unlock(sma);

    new = kzalloc(sizeof(struct sem_undo) + sizeof(short)*nsems, GFP_KERNEL);
    if (!new) {
        sem_putref(sma);
        return ERR_PTR(-ENOMEM);
    }
    new->semadj = (short *) &new[1];
    new->semid = semid;

    spin_lock(&ulp->lock);
    un = lookup_undo(ulp, semid);
    if (un) {
        spin_unlock(&ulp->lock);
        kfree(new);
        sem_putref(sma);
        goto out;
    }
    sem_lock_and_putref(sma);
    if (sma->sem_perm.deleted) {
        sem_unlock(sma);
        spin_unlock(&ulp->lock);
        kfree(new);
        un = ERR_PTR(-EIDRM);
        goto out;
    }
    new->proc_next = ulp->proc_list;
    ulp->proc_list = new;
    new->id_next = sma->undo;
    sma->undo = new;
    sem_unlock(sma);
    un = new;
    spin_unlock(&ulp->lock);
out:
    return un;
}

這個函數中我們首先要接觸一下sem_undo數據結構

/* Each task has a list of undo requests. They are executed automatically
 * when the process exits.
 */

struct sem_undo {
    struct sem_undo *    proc_next;    /* next entry on this process */
    struct sem_undo *    id_next;    /* next entry on this semaphore set */
    int            semid;        /* semaphore set identifier */
    short *            semadj;        /* array of adjustments, one per semaphore */
};

我們剛纔介紹過進程在中途退出時都要委託內核“清帳”就是這裏的sem_undo結構記錄的,應用程序中我們看到具有了信號的標記SEM_UNDO,所以內核在這裏會爲進程分配一個sem_undo結構“記帳”,並且一個進程有一個sem_undo隊列以方便記住所有的“帳”,即進程都是需要對哪些信號量有還原的義務。同時,每個信號量也有一個undo指針,指向一個sem_undo隊列,它使信號量記住所有“欠帳”的進程,這二個隊列都需要sem_undo結構中的proc_next和id_next指針負責鏈入的,可以順着這二個指針找到所有相關同一個信號量的進程。通過這段介紹上面這段代碼非常清晰了。回到sys_semtimedop()函數中我們繼續看

if (un && un->semid == -1) {
        sem_unlock(sma);
        goto retry_undos;
    }
    error = -EFBIG;
    if (max >= sma->sem_nsems)
        goto out_unlock_free;

    error = -EACCES;
    if (ipcperms(&sma->sem_perm, alter ? S_IWUGO : S_IRUGO))
        goto out_unlock_free;

    error = security_sem_semop(sma, sops, nsops, alter);
    if (error)
        goto out_unlock_free;

    error = try_atomic_semop (sma, sops, nsops, un, task_tgid_vnr(current));
    if (error <= 0) {
        if (alter && error == 0)
            update_queue (sma);
        goto out_unlock_free;
    }

上面的代碼最重要的是try_atomic_semop()函數,這個函數是對信號量操作的關鍵

static int try_atomic_semop (struct sem_array * sma, struct sembuf * sops,
             int nsops, struct sem_undo *un, int pid)
{
    int result, sem_op;
    struct sembuf *sop;
    struct sem * curr;

    for (sop = sops; sop < sops + nsops; sop++) {
        curr = sma->sem_base + sop->sem_num;
        sem_op = sop->sem_op;
        result = curr->semval;
  
        if (!sem_op && result)
            goto would_block;

        result += sem_op;
        if (result < 0)
            goto would_block;
        if (result > SEMVMX)
            goto out_of_range;
        if (sop->sem_flg & SEM_UNDO) {
            int undo = un->semadj[sop->sem_num] - sem_op;
            /*
              *    Exceeding the undo range is an error.
             */

            if (undo < (-SEMAEM - 1) || undo > SEMAEM)
                goto out_of_range;
        }
        curr->semval = result;
    }

    sop--;
    while (sop >= sops) {
        sma->sem_base[sop->sem_num].sempid = pid;
        if (sop->sem_flg & SEM_UNDO)
            un->semadj[sop->sem_num] -= sop->sem_op;
        sop--;
    }
    
    sma->sem_otime = get_seconds();
    return 0;

out_of_range:
    result = -ERANGE;
    goto undo;

would_block:
    if (sop->sem_flg & IPC_NOWAIT)
        result = -EAGAIN;
    else
        result = 1;

undo:
    sop--;
    while (sop >= sops) {
        sma->sem_base[sop->sem_num].semval -= sop->sem_op;
        sop--;
    }

    return result;
}

這個函數重點實質性的二句,第一句是取得對信號量值的計算result += sem_op;第二句是將操作後的值賦給信號量curr->semval = result;這個函數中我們還可以看到“還帳”的操作,即sma->sem_base[sop->sem_num].semval -= sop->sem_op;上面加上這裏減去也就還原了。整個函數的跳轉看來很複雜,只不過分了幾種情況,首先是對信號量的操作值大於固定 SEMVMX的值,就要退出並執行undo還原操作。其次情況是對信號量的操作值變成了負值,這出現在信號量已經被其他進程搶先P操作了,此時就要goto would_block處,通常進程就會睡眠等待了,我們看到到那裏設置出錯碼然後執行undo還原操作。最後一種情況是操作值爲0時,代碼:if (!sem_op && result)也會goto would_block處。我們的應用程序執行到這裏順利取得了信號量,回到上面的函數我們繼續看

/* We need to sleep on this operation, so we put the current
     * task into the pending queue and go to sleep.
     */

        
    queue.sma = sma;
    queue.sops = sops;
    queue.nsops = nsops;
    queue.undo = un;
    queue.pid = task_tgid_vnr(current);
    queue.id = semid;
    queue.alter = alter;
    if (alter)
        append_to_queue(sma ,&queue);
    else
        prepend_to_queue(sma ,&queue);

    queue.status = -EINTR;
    queue.sleeper = current;
    current->state = TASK_INTERRUPTIBLE;
    sem_unlock(sma);

    if (timeout)
        jiffies_left = schedule_timeout(jiffies_left);
    else
        schedule();

    error = queue.status;
    while(unlikely(error == IN_WAKEUP)) {
        cpu_relax();
        error = queue.status;
    }

    if (error != -EINTR) {
        /* fast path: update_queue already obtained all requested
         * resources */

        goto out_free;
    }

    sma = sem_lock(ns, semid);
    if (IS_ERR(sma)) {
        BUG_ON(queue.prev != NULL);
        error = -EIDRM;
        goto out_free;
    }

    /*
     * If queue.status != -EINTR we are woken up by another process
     */

    error = queue.status;
    if (error != -EINTR) {
        goto out_unlock_free;
    }

    /*
     * If an interrupt occurred we have to clean up the queue
     */

    if (timeout && jiffies_left == 0)
        error = -EAGAIN;
    remove_from_queue(sma,&queue);
    goto out_unlock_free;

out_unlock_free:
    sem_unlock(sma);
out_free:
    if(sops != fast_sops)
        kfree(sops);
    return error;
}

和以前我們說過的消息隊列一樣通過睡眠時要建立信號量的隊列,是用sem_queue結構來完成的

/* One queue for each sleeping process in the system. */
struct sem_queue {
    struct sem_queue *    next;     /* next entry in the queue */
    struct sem_queue **    prev;     /* previous entry in the queue, *(q->prev) == q */
    struct task_struct*    sleeper; /* this process */
    struct sem_undo *    undo;     /* undo structure */
    int             pid;     /* process id of requesting process */
    int             status;     /* completion status of operation */
    struct sem_array *    sma;     /* semaphore array for operations */
    int            id;     /* internal sem id */
    struct sembuf *        sops;     /* array of pending operations */
    int            nsops;     /* number of operations */
    int            alter; /* does the operation alter the array? */
};

我們看到這裏通過與進程掛上鉤建立好聯繫就要轉入進程調度,在進入調試時current->state = TASK_INTERRUPTIBLE;使進程轉入睡眠狀態。那這個進程什麼時候會被喚醒呢,我們想到已經得到信號量的“欠帳”進程,不管那些進程如何,內核也可以代爲操作,都會最終“清帳”也就是執行完函數try_atomic_semop()然後update_queue ()來喚醒這些睡眠的進程,以通知他們可以來取得信號量了。

static void update_queue (struct sem_array * sma)
{
    int error;
    struct sem_queue * q;

    q = sma->sem_pending;
    while(q) {
        error = try_atomic_semop(sma, q->sops, q->nsops,
                     q->undo, q->pid);

        /* Does q->sleeper still need to sleep? */
        if (error <= 0) {
            struct sem_queue *n;
            remove_from_queue(sma,q);
            q->status = IN_WAKEUP;
            /*
             * Continue scanning. The next operation
             * that must be checked depends on the type of the
             * completed operation:
             * - if the operation modified the array, then
             * restart from the head of the queue and
             * check for threads that might be waiting
             * for semaphore values to become 0.
             * - if the operation didn't modify the array,
             * then just continue.
             */

            if (q->alter)
                n = sma->sem_pending;
            else
                n = q->next;
            wake_up_process(q->sleeper);
            /* hands-off: q will disappear immediately after
             * writing q->status.
             */

            smp_wmb();
            q->status = error;
            q = n;
        } else {
            q = q->next;
        }
    }
}

這個函數在while循環中首先再次調用try_atomic_semop函數來試一下隊列中的第一個進程是否能取得信號量的操作,如果成功了就要喚醒進程使他運行,負責的話就試試下一個在信號量隊列中等待的進程。順着next的指針鏈直到窮盡所有等待進程。不過我們在這裏看似好象是喚醒的排在隊列中最前的有優先權,其實我們應該明白在前面的函數中有關鍵的掛入函數是不同的,

    if (alter)
        append_to_queue(sma ,&queue);
    else
        prepend_to_queue(sma ,&queue);

這裏根據alter來決定是否更變掛入隊列的順序,這二個函數很簡單,到這裏我們就介紹了信號量的應用程序到內核的操作。其他關於信號量的操作都很簡單了,只要看過前邊消息隊列和共享內存,自己閱讀就不再是難事了,我們起到了關鍵的“導遊地圖”的作用即可。
發表評論
所有評論
還沒有人評論,想成為第一個評論的人麼? 請在上方評論欄輸入並且點擊發布.
相關文章