從別人那裏看,一邊翻源碼。總結的socket的系統調用,主要是參考大神的
{
int retval;
struct socket *sock;
retval = sock_create(family, type, protocol, &sock);
if (retval < 0)
goto out;
retval = sock_map_fd(sock);
if (retval < 0)
goto out_release;
/* It may be already another descriptor 8) Not kernel problem. */
return retval;
sock_release(sock);
return retval;
}
{
/* 傳入0表示是用戶態進程創建套接口 */
return __sock_create(family, type, protocol, res, 0);
}
__sock_create(family, type, protocol, res, 0);
/**
* 創建一個套接口
* family: 套接口協議族
* type: 套接口類型
* protocol: 傳輸層協議
* res: 輸出參數,創建成功的套接口指針
* kern: 由內核還是應用程序創建。
*/
static int __sock_create(int family, int type, int protocol, struct socket **res, int kern)
{
int err;
struct socket *sock;
* Check protocol is in range
*/
if (family < 0 || family >= NPROTO)/* 參數合法性檢測 */
return -EAFNOSUPPORT;
if (type < 0 || type >= SOCK_MAX)
return -EINVAL;
/**
* IPV4協議族的SOCK_PACKET類型套接口已經不被支持
* 爲兼容舊程序,轉換爲PF_PACKET */
if (family == PF_INET && type == SOCK_PACKET) {
static int warned;
if (!warned) {
warned = 1;
printk(KERN_INFO "%s uses obsolete (PF_INET,SOCK_PACKET)\n", current->comm);
}
family = PF_PACKET;
}
err = security_socket_create(family, type, protocol, kern);
if (err)
return err;
#if defined(CONFIG_KMOD)
/* Attempt to load a protocol module if the find failed.
*
* 12/09/1996 Marcin: But! this makes REALLY only sense, if the user
* requested real, full-featured networking support upon configuration.
* Otherwise module support will break!
*/
if (net_families[family]==NULL)/* 相應的協議族在內核中尚不存在,加載模塊以支持該協議族 */
{
request_module("net-pf-%d",family);
}
#endif
net_family_read_lock();
if (net_families[family] == NULL) {/* 如果協議族仍然不存在,說明不支持此協議族 */
err = -EAFNOSUPPORT;
goto out;
}
* Allocate the socket and allow the family to set things up. if
* the protocol is 0, the family is instructed to select an appropriate
* default.
*/
printk(KERN_WARNING "socket: no more sockets\n");
err = -ENFILE; /* Not exactly a match, but its the
closest posix thing */
goto out;
}
* We will call the ->create function, that possibly is in a loadable
* module, so we have to bump that loadable module refcnt first.
*/
err = -EAFNOSUPPORT;
if (!try_module_get(net_families[family]->owner))/* 增加對協議族模塊的引用,如果失敗則退出 */
goto out_release;
if ((err = net_families[family]->create(sock, protocol)) < 0)
goto out_module_put;
/*
* Now to bump the refcnt of the [loadable] module that owns this
* socket at sock_release time we decrement its refcnt.
*/
if (!try_module_get(sock->ops->owner)) {/* 增加傳輸層模塊的引用計數 */
sock->ops = NULL;
goto out_module_put;
}
/*
* Now that we're done with the ->create function, the [loadable]
* module can have its refcnt decremented
*/
/* 增加了傳輸層模塊的引用計數後,可以釋放協議族的模塊引用計數 */
module_put(net_families[family]->owner);
*res = sock;
/* 通知安全模塊,對創建過程進行檢查。 */
security_socket_post_create(sock, family, type, protocol, kern);
net_family_read_unlock();
return err;
out_module_put:
module_put(net_families[family]->owner);
out_release:
sock_release(sock);
goto out;
}
進行初始化。分析inet_init等網絡子系統初始化函數得知,pf->create指向的是inet_create函數
/*
* Create an inet socket.
*/
/**
* 創建一個IPV4的socket
* sock: 已經創建的套接口
* protocol: 套接口的協議號
*/
static int inet_create(struct socket *sock, int protocol)
{
struct sock *sk;
struct list_head *p;
struct inet_protosw *answer;
struct inet_sock *inet;
struct proto *answer_prot;
unsigned char answer_flags;
char answer_no_check;
int err;
answer = NULL;
rcu_read_lock();
list_for_each_rcu(p, &inetsw[sock->type]) {/* 根據套接口類型遍歷IPV4鏈表 */
answer = list_entry(p, struct inet_protosw, list);
if (protocol == answer->protocol) {/* 比較傳輸層協議 */
if (protocol != IPPROTO_IP)
break;
} else {
/* Check for the two wild cases. */
if (IPPROTO_IP == protocol) {
protocol = answer->protocol;
break;
}
if (IPPROTO_IP == answer->protocol)
break;
}
answer = NULL;
}
if (!answer)/* 找不到對應的傳輸層協議 */
goto out_rcu_unlock;
err = -EPERM;
/* 創建該類型的套接口需要特定能力,而當前進程沒有這種能力,則退出 */
if (answer->capability > 0 && !capable(answer->capability))
goto out_rcu_unlock;
err = -EPROTONOSUPPORT;
if (!protocol)
goto out_rcu_unlock;
sock->ops = answer->ops;
answer_prot = answer->prot;
answer_no_check = answer->no_check;
answer_flags = answer->flags;
rcu_read_unlock();
/* 根據協議族等參數分配傳輸控制塊。 */
sk = sk_alloc(PF_INET, GFP_KERNEL,
answer_prot->slab_obj_size,
answer_prot->slab);
if (sk == NULL)
goto out;
sk->sk_prot = answer_prot;
/* 設置是否需要校驗和 */
sk->sk_no_check = answer_no_check;
/* 是否可以重用地址和端口 */
if (INET_PROTOSW_REUSE & answer_flags)
sk->sk_reuse = 1;
inet->num = protocol;/* 設置本地端口爲協議號 */
if (IPPROTO_RAW == protocol)/* 如果是RAW協議,則需要自己構建IP首部 */
inet->hdrincl = 1;
}
inet->pmtudisc = IP_PMTUDISC_DONT;
else
inet->pmtudisc = IP_PMTUDISC_WANT;
sk_set_owner(sk, sk->sk_prot->owner);
sk->sk_destruct = inet_sock_destruct;
/* 設置協議族和協議號 */
sk->sk_family = PF_INET;
sk->sk_protocol = protocol;
/* 設置後備隊列接收函數。 */
sk->sk_backlog_rcv = sk->sk_prot->backlog_rcv;
inet->mc_loop = 1;
inet->mc_ttl = 1;
inet->mc_index = 0;
inet->mc_list = NULL;
atomic_inc(&inet_sock_nr);
#endif
/* It assumes that any protocol which allows
* the user to assign a number at socket
* creation time automatically
* shares.
*/
inet->sport = htons(inet->num);/* 設置網絡序的本地端口號 */
/* Add to protocol hash chains. */
sk->sk_prot->hash(sk);/* 將傳輸控制塊加入到靜列表中 */
}
err = sk->sk_prot->init(sk);
if (err)
sk_common_release(sk);
}
out:
return err;
out_rcu_unlock:
rcu_read_unlock();
goto out;
}
/*
分配傳輸控制塊
*/
struct sock *sk_alloc(int family, int priority, int zero_it, kmem_cache_t *slab)
{
struct sock *sk = NULL;
slab = sk_cachep;
/**
* 在指定的分配區中,用指定的分配參數分配傳輸控制塊。
*/
sk = kmem_cache_alloc(slab, priority);
if (sk) {
if (zero_it) {/* 需要初始化它 */
memset(sk, 0,
zero_it == 1 ? sizeof(struct sock) : zero_it);
sk->sk_family = family;
sock_lock_init(sk);
}
/* 釋放時需要使用 */
sk->sk_slab = slab;
if (security_sk_alloc(sk, family, priority)) {/* 安全審計,如果失敗則釋放傳輸層接口 */
kmem_cache_free(slab, sk);
sk = NULL;
}
}
return sk;
}
{
skb_queue_head_init(&sk->sk_receive_queue);
skb_queue_head_init(&sk->sk_write_queue);
skb_queue_head_init(&sk->sk_error_queue);
sk->sk_allocation = GFP_KERNEL;
sk->sk_rcvbuf = sysctl_rmem_default;
sk->sk_sndbuf = sysctl_wmem_default;
sk->sk_state = TCP_CLOSE;
sk->sk_zapped = 1;
sk->sk_socket = sock;
{
sk->sk_type = sock->type;
sk->sk_sleep = &sock->wait;
sock->sk = sk;
} else
sk->sk_sleep = NULL;
rwlock_init(&sk->sk_callback_lock);
sk->sk_data_ready = sock_def_readable;
sk->sk_write_space = sock_def_write_space;
sk->sk_error_report = sock_def_error_report;
sk->sk_destruct = sock_def_destruct;
sk->sk_sndmsg_off = 0;
sk->sk_peercred.uid = -1;
sk->sk_peercred.gid = -1;
sk->sk_write_pending = 0;
sk->sk_rcvlowat = 1;
sk->sk_rcvtimeo = MAX_SCHEDULE_TIMEOUT;
sk->sk_sndtimeo = MAX_SCHEDULE_TIMEOUT;
sk->sk_owner = NULL;
sk->sk_stamp.tv_usec = -1L;
}
sock_create函數的功能:創建一個struct socket類型的變量sock,找到TCP對應的操作
函數集inet_stream_ops並安裝到sock->ops中;再申請一個struct tcp_sock結構體大小
的內存並強制賦給struct sock指針sk,使sk->prot指向tcp_prot,將sk賦給sock->sk使
得sock與sk關聯起來,並對sk和sock進行初始化。而sock->ops和sk->prot的指向,決定
了後續所有系統調用的行爲集。
* 將套接口與文件描述符綁定。
*/
int sock_map_fd(struct socket *sock)
{
int fd;
struct qstr this;
char name[32];
* Find a file descriptor suitable for return to the user.
*/
/**
* 獲得空閒的文件描述符。
*/
fd = get_unused_fd();
if (fd >= 0) {/* 成功分配文件描述符 */
struct file *file = get_empty_filp();
put_unused_fd(fd);
fd = -ENFILE;
goto out;
}
this.name = name;
this.len = strlen(name);
this.hash = SOCK_INODE(sock)->i_ino;
* 分配文件目錄項。
*/
file->f_dentry = d_alloc(sock_mnt->mnt_sb->s_root, &this);
if (!file->f_dentry) {
put_filp(file);
put_unused_fd(fd);
fd = -ENOMEM;
goto out;
}
file->f_dentry->d_op = &sockfs_dentry_operations;
d_add(file->f_dentry, SOCK_INODE(sock));
file->f_vfsmnt = mntget(sock_mnt);
file->f_mapping = file->f_dentry->d_inode->i_mapping;
file->f_op = SOCK_INODE(sock)->i_fop = &socket_file_ops;
file->f_mode = FMODE_READ | FMODE_WRITE;
file->f_flags = O_RDWR;
file->f_pos = 0;
/* 將文件描述符實例增加到已經打開的文件列表中,完成文件與進程的綁定 */
fd_install(fd, file);
}
return fd;
}
可見sock_map_fd函數的功能是:獲取一個未被使用的整數fd,申請一個文件結構體對象newfile,將fd與newfile關聯起來。
void fastcall fd_install(unsigned int fd, struct file * file)
{
struct files_struct *files = current->files;
spin_lock(&files->file_lock);
if (unlikely(files->fd[fd] != NULL))
BUG();
files->fd[fd] = file;
spin_unlock(&files->file_lock);
}
對文件的讀寫等操作由文件系統inode的i_fop指向的函數集實現,在socket結構體中這個函數集是socket_file_ops:
* 套接口文件的接口
*/
static struct file_operations socket_file_ops = {
.owner = THIS_MODULE,
.llseek = no_llseek,
.aio_read = sock_aio_read,
.aio_write = sock_aio_write,
.poll = sock_poll,
.unlocked_ioctl = sock_ioctl,
.mmap = sock_mmap,
.open = sock_no_open, /* special open code to disallow open via /proc */
.release = sock_close,
.fasync = sock_fasync,
.readv = sock_readv,
.writev = sock_writev,
.sendpage = sock_sendpage
};
sock_map_fd函數的功能了:將socke結構體對象與文件系統結構關聯起來,
使得socket成爲了一種特殊的文件系統;並生成一個文件描述符,將描述
符與socket文件系統關聯,使得Linux API接口可以通過文件描述符操作socket。
理念的。但這樣做有什麼好處呢?我能想到的好處有兩點:一是在socket上
產生的事件可以和其它類型文件系統的事件一起交由poll或epoll這類高效
的事件監控機制管理,這樣可以大大擴展socket的應用範圍;二是進程無
論是正常退出還是異常退出,內核在銷燬進程的資源時都會關閉其打開的
所有文件描述符,這樣即使進程被異常殺死,也會觸發close系統調用將
socket對應的連接關掉(如果是TCP連接的話會通知對端關閉連接),這就
使得異常情況下網絡連接的資源也能及時得到回收。
在內核生成一個socket和tcp_sock類型的
對象用於保存TCP連接信息,並安裝TCP相關的操作函數集以限定後續的系統
調用的行爲;將socket對象與一個file類型的對象綁定到一起,並將一個文
件描述符與file對象綁定到一起,最後將這個文件描述符返回給調用者。