本文共 21156 字,大约阅读时间需要 70 分钟。
转自:
3、套接字的实现
套接字最先是在UNIX的BSD版本实现的,所以也叫做BSD套接字,它隐藏了各个协议之间的差异,并向上提供统一的接口。Linux中实现套接字的基本结构:
3.1、BSD套接字
3.1.1、核心数据结构
为了实现BSD套接字,内核提供一个重要的数据结构struct socket,它的定义如下:
//BSD套接字(include/linux/net.h)
struct socket {
socket_state state; //套接字状态
unsigned long flags;
struct proto_ops *ops; //操作函数集
struct fasync_struct *fasync_list;
struct file *file;//每个BSD套接字都有一个inode结点,通过文件对象与其关联起来
struct sock *sk; //socket内部结构,与具体的协议簇(比如PF_INET)相关
wait_queue_head_t wait;
short type; //套接字类型:如SOCK_STREAM, SOCK_DGRAM, SOCK_RAW, SOCK_RDM, SOCK_SEQPACKET, and SOCK_PACKET
unsigned char passcred;
};
//BSD套接字操作函数集
struct proto_ops {
int family;
struct module *owner;
int (*release) (struct socket *sock);
int (*bind) (struct socket *sock,
struct sockaddr *myaddr,
int sockaddr_len);
int (*connect) (struct socket *sock,
struct sockaddr *vaddr,
int sockaddr_len, int flags);
int (*socketpair)(struct socket *sock1,
struct socket *sock2);
int (*accept) (struct socket *sock,
struct socket *newsock, int flags);
int (*getname) (struct socket *sock,
struct sockaddr *addr,
int *sockaddr_len, int peer);
unsigned int (*poll) (struct file *file, struct socket *sock,
struct poll_table_struct *wait);
int (*ioctl) (struct socket *sock, unsigned int cmd,
unsigned long arg);
int (*listen) (struct socket *sock, int len);
int (*shutdown) (struct socket *sock, int flags);
int (*setsockopt)(struct socket *sock, int level,
int optname, char __user *optval, int optlen);
int (*getsockopt)(struct socket *sock, int level,
int optname, char __user *optval, int __user *optlen);
int (*sendmsg) (struct kiocb *iocb, struct socket *sock,
struct msghdr *m, size_t total_len);
int (*recvmsg) (struct kiocb *iocb, struct socket *sock,
struct msghdr *m, size_t total_len,
int flags);
int (*mmap) (struct file *file, struct socket *sock,
struct vm_area_struct * vma);
ssize_t (*sendpage) (struct socket *sock, struct page *page,
int offset, size_t size, int flags);
};
//BSD套接字状态
typedef enum {
SS_FREE = 0, /* not allocated */
SS_UNCONNECTED, /* unconnected to any socket */
SS_CONNECTING, /* in process of connecting */
SS_CONNECTED, /* connected to socket */
SS_DISCONNECTING /* in process of disconnecting */
} socket_state;
3.1.2、BSD套接字初始化
//net/socket.c
//BSD套接字的初始化
void __init sock_init(void)
{
int i;
/*
* Initialize all address (protocol) families.
*/
for (i = 0; i < NPROTO; i++)
net_families[i] = NULL; //协议簇数组初始化
/*
* Initialize sock SLAB cache.
*/
//分配sock缓存
sk_init();
#ifdef SLAB_SKB
/*
* Initialize skbuff SLAB cache
*/
skb_init();
#endif
/*
* Initialize the protocols module.
*/
init_inodecache();
//注册sockfs文件系统
register_filesystem(&sock_fs_type);
//安装sockfs
sock_mnt = kern_mount(&sock_fs_type);
/* The real protocol initialization is performed when
* do_initcalls is run.
*/
#ifdef CONFIG_NETFILTER
netfilter_init();
#endif
}
//net/socket.c
//sockfs文件系统的安装点
static struct vfsmount *sock_mnt;
//sockfs文件系统类型
static struct file_system_type sock_fs_type = {
.name = "sockfs",
.get_sb = sockfs_get_sb,
.kill_sb = kill_anon_super,
};
//地址簇及协议信息
static struct net_proto_family *net_families[NPROTO];
sock_init在系统初始化的被调用:
3.1.3、BSD套接字的系统调用
实际上,Linux内核只提供了一个与套接字相关的系统调用,即sys_socketcall,应用程序的所有套接字调用都会映射到这个系统调用上。
//BSD套接字调用入口(net/socket.c)
asmlinkage long sys_socketcall(int call, unsigned long __user *args)
{
unsigned long a[6];
unsigned long a0,a1;
int err;
if(call<1||call>SYS_RECVMSG)
return -EINVAL;
/* copy_from_user should be SMP safe. */
if (copy_from_user(a, args, nargs[call]))//从用户区拷贝参数
return -EFAULT;
a0=a[0];
a1=a[1];
switch(call) //调用相应的函数
{
case SYS_SOCKET:
err = sys_socket(a0,a1,a[2]);
break;
case SYS_BIND:
err = sys_bind(a0,(struct sockaddr __user *)a1, a[2]);
break;
case SYS_CONNECT:
err = sys_connect(a0, (struct sockaddr __user *)a1, a[2]);
break;
case SYS_LISTEN:
err = sys_listen(a0,a1);
break;
case SYS_ACCEPT:
err = sys_accept(a0,(struct sockaddr __user *)a1, (int __user *)a[2]);
break;
case SYS_GETSOCKNAME:
err = sys_getsockname(a0,(struct sockaddr __user *)a1, (int __user *)a[2]);
break;
case SYS_GETPEERNAME:
err = sys_getpeername(a0, (struct sockaddr __user *)a1, (int __user *)a[2]);
break;
case SYS_SOCKETPAIR:
err = sys_socketpair(a0,a1, a[2], (int __user *)a[3]);
break;
case SYS_SEND:
err = sys_send(a0, (void __user *)a1, a[2], a[3]);
break;
case SYS_SENDTO:
err = sys_sendto(a0,(void __user *)a1, a[2], a[3],
(struct sockaddr __user *)a[4], a[5]);
break;
case SYS_RECV:
err = sys_recv(a0, (void __user *)a1, a[2], a[3]);
break;
case SYS_RECVFROM:
err = sys_recvfrom(a0, (void __user *)a1, a[2], a[3],
(struct sockaddr __user *)a[4], (int __user *)a[5]);
break;
case SYS_SHUTDOWN:
err = sys_shutdown(a0,a1);
break;
case SYS_SETSOCKOPT:
err = sys_setsockopt(a0, a1, a[2], (char __user *)a[3], a[4]);
break;
case SYS_GETSOCKOPT:
err = sys_getsockopt(a0, a1, a[2], (char __user *)a[3], (int __user *)a[4]);
break;
case SYS_SENDMSG:
err = sys_sendmsg(a0, (struct msghdr __user *) a1, a[2]);
break;
case SYS_RECVMSG:
err = sys_recvmsg(a0, (struct msghdr __user *) a1, a[2]);
break;
default:
err = -EINVAL;
break;
}
return err;
}
//include/asm/unistd.h
#define __NR_socketcall 102 //系统调用号
下面来看一下sys_socket的实现:
//net/socket.c/*创建socket**首先建立一个socket数据结构,然后将其“映射”到一个已打开的文件.*/asmlinkage long sys_socket(int family, int type, int protocol){ int retval; struct socket *sock; //创建socket retval = sock_create(family, type, protocol, &sock); if (retval < 0) goto out; //将socket映射到文件描述符 retval = sock_map_fd(sock); if (retval < 0) goto out_release;out: /* It may be already another descriptor 8) Not kernel problem. */ return retval;out_release: sock_release(sock); return retval;}int sock_create(int family, int type, int protocol, struct socket **res){ return __sock_create(family, type, protocol, res, 0);}static int __sock_create(int family, int type, int protocol, struct socket **res, int kern){ int i; int err; struct socket *sock; /* * Check protocol is in range */ //检查协议是否可用 if (family < 0 || family >= NPROTO) return -EAFNOSUPPORT; if (type < 0 || type >= SOCK_MAX) return -EINVAL; /* Compatibility. This uglymoron is moved from INET layer to here to avoid deadlock in module load. */ if (family == PF_INET && type == SOCK_PACKET) { static int warned; if (!warned) { warned = 1; printk(KERN_INFO "%s uses obsolete (PF_INET,SOCK_PACKET)\n", current->comm); } family = PF_PACKET; } err = security_socket_create(family, type, protocol, kern); if (err) return err; #if defined(CONFIG_KMOD) /* Attempt to load a protocol module if the find failed. * * 12/09/1996 Marcin: But! this makes REALLY only sense, if the user * requested real, full-featured networking support upon configuration. * Otherwise module support will break! */ if (net_families[family]==NULL) { request_module("net-pf-%d",family); }#endif net_family_read_lock(); if (net_families[family] == NULL) { i = -EAFNOSUPPORT; goto out; }/* * Allocate the socket and allow the family to set things up. if * the protocol is 0, the family is instructed to select an appropriate * default. */ //从sockfs分配一个inode,并为之分配一个套接字结构 if (!(sock = sock_alloc())) { printk(KERN_WARNING "socket: no more sockets\n"); i = -ENFILE; /* Not exactly a match, but its the closest posix thing */ goto out; } //设置类型 sock->type = type; /* * We will call the ->create function, that possibly is in a loadable * module, so we have to bump that loadable module refcnt first. */ i = -EAFNOSUPPORT; if (!try_module_get(net_families[family]->owner)) goto out_release; //调用具体协议的create函数 if ((i = net_families[family]->create(sock, protocol)) < 0) goto out_module_put; /* * Now to bump the refcnt of the [loadable] module that owns this * socket at sock_release time we decrement its refcnt. */ if (!try_module_get(sock->ops->owner)) { sock->ops = NULL; goto out_module_put; } /* * Now that we're done with the ->create function, the [loadable] * module can have its refcnt decremented */ module_put(net_families[family]->owner); *res = sock; security_socket_post_create(sock, family, type, protocol, kern);out: net_family_read_unlock(); return i;out_module_put: module_put(net_families[family]->owner);out_release: sock_release(sock); goto out;}///////////////////////////////////////////////////////////int sock_map_fd(struct socket *sock){ int fd; struct qstr this; char name[32]; /* * Find a file descriptor suitable for return to the user. */ //分配一个没有使用的描述符 fd = get_unused_fd(); if (fd >= 0) { struct file *file = get_empty_filp(); if (!file) { put_unused_fd(fd); fd = -ENFILE; goto out; } sprintf(name, "[%lu]", SOCK_INODE(sock)->i_ino); this.name = name; this.len = strlen(name); this.hash = SOCK_INODE(sock)->i_ino; //从sockfs文件系统中分配一个目录项对象 file->f_dentry = d_alloc(sock_mnt->mnt_sb->s_root, &this); if (!file->f_dentry) { put_filp(file); put_unused_fd(fd); fd = -ENOMEM; goto out; } file->f_dentry->d_op = &sockfs_dentry_operations; //将目录项对象与sock的索引节点关联起来 d_add(file->f_dentry, SOCK_INODE(sock)); file->f_vfsmnt = mntget(sock_mnt); file->f_mapping = file->f_dentry->d_inode->i_mapping; //设置sock对应的文件对象 sock->file = file; //设置文件对象的操作函数 file->f_op = SOCK_INODE(sock)->i_fop = &socket_file_ops; file->f_mode = FMODE_READ | FMODE_WRITE; file->f_flags = O_RDWR; file->f_pos = 0; fd_install(fd, file); }out: return fd;} 3.2、INET套接字
INET套接字就是支持 Internet 地址族的套接字,它位于TCP协议之上, BSD套接字之下,如下:
3.2.1、数据结构
//include/net/sock.h//与特定协议相关的socketstruct sock { /* * Now struct tcp_tw_bucket also uses sock_common, so please just * don't add nothing before this first member (__sk_common) --acme */ struct sock_common __sk_common;#define sk_family __sk_common.skc_family#define sk_state __sk_common.skc_state#define sk_reuse __sk_common.skc_reuse#define sk_bound_dev_if __sk_common.skc_bound_dev_if#define sk_node __sk_common.skc_node#define sk_bind_node __sk_common.skc_bind_node#define sk_refcnt __sk_common.skc_refcnt volatile unsigned char sk_zapped; unsigned char sk_shutdown; unsigned char sk_use_write_queue; unsigned char sk_userlocks; socket_lock_t sk_lock; int sk_rcvbuf; wait_queue_head_t *sk_sleep; struct dst_entry *sk_dst_cache; rwlock_t sk_dst_lock; struct xfrm_policy *sk_policy[2]; atomic_t sk_rmem_alloc; struct sk_buff_head sk_receive_queue; atomic_t sk_wmem_alloc; struct sk_buff_head sk_write_queue; atomic_t sk_omem_alloc; int sk_wmem_queued; int sk_forward_alloc; unsigned int sk_allocation; int sk_sndbuf; unsigned long sk_flags; char sk_no_check; unsigned char sk_debug; unsigned char sk_rcvtstamp; unsigned char sk_no_largesend; int sk_route_caps; unsigned long sk_lingertime; int sk_hashent; /* * The backlog queue is special, it is always used with * the per-socket spinlock held and requires low latency * access. Therefore we special case it's implementation. */ struct { struct sk_buff *head; struct sk_buff *tail; } sk_backlog; rwlock_t sk_callback_lock; struct sk_buff_head sk_error_queue; struct proto *sk_prot; int sk_err, sk_err_soft; unsigned short sk_ack_backlog; unsigned short sk_max_ack_backlog; __u32 sk_priority; unsigned short sk_type; unsigned char sk_localroute; unsigned char sk_protocol; struct ucred sk_peercred; int sk_rcvlowat; long sk_rcvtimeo; long sk_sndtimeo; struct sk_filter *sk_filter; void *sk_protinfo; kmem_cache_t *sk_slab; struct timer_list sk_timer; struct timeval sk_stamp; struct socket *sk_socket; void *sk_user_data; struct module *sk_owner; struct page *sk_sndmsg_page; __u32 sk_sndmsg_off; struct sk_buff *sk_send_head; int sk_write_pending; void *sk_security; __u8 sk_queue_shrunk; /* three bytes hole, try to pack */ void (*sk_state_change)(struct sock *sk); void (*sk_data_ready)(struct sock *sk, int bytes); void (*sk_write_space)(struct sock *sk); void (*sk_error_report)(struct sock *sk); int (*sk_backlog_rcv)(struct sock *sk, struct sk_buff *skb); void (*sk_destruct)(struct sock *sk);};//底层协议的操作函数struct proto { void (*close)(struct sock *sk, long timeout); int (*connect)(struct sock *sk, struct sockaddr *uaddr, int addr_len); int (*disconnect)(struct sock *sk, int flags); struct sock * (*accept) (struct sock *sk, int flags, int *err); int (*ioctl)(struct sock *sk, int cmd, unsigned long arg); int (*init)(struct sock *sk); int (*destroy)(struct sock *sk); void (*shutdown)(struct sock *sk, int how); int (*setsockopt)(struct sock *sk, int level, int optname, char __user *optval, int optlen); int (*getsockopt)(struct sock *sk, int level, int optname, char __user *optval, int __user *option); int (*sendmsg)(struct kiocb *iocb, struct sock *sk, struct msghdr *msg, size_t len); int (*recvmsg)(struct kiocb *iocb, struct sock *sk, struct msghdr *msg, size_t len, int noblock, int flags, int *addr_len); int (*sendpage)(struct sock *sk, struct page *page, int offset, size_t size, int flags); int (*bind)(struct sock *sk, struct sockaddr *uaddr, int addr_len); int (*backlog_rcv) (struct sock *sk, struct sk_buff *skb); /* Keeping track of sk's, looking them up, and port selection methods. */ void (*hash)(struct sock *sk); void (*unhash)(struct sock *sk); int (*get_port)(struct sock *sk, unsigned short snum); /* Memory pressure */ void (*enter_memory_pressure)(void); atomic_t *memory_allocated; /* Current allocated memory. */ atomic_t *sockets_allocated; /* Current number of sockets. */ /* * Pressure flag: try to collapse. * Technical note: it is used by multiple contexts non atomically. * All the sk_stream_mem_schedule() is of this nature: accounting * is strict, actions are advisory and have some latency. */ int *memory_pressure; int *sysctl_mem; int *sysctl_wmem; int *sysctl_rmem; int max_header; kmem_cache_t *slab; int slab_obj_size; struct module *owner; char name[32]; struct { int inuse; u8 __pad[SMP_CACHE_BYTES - sizeof(int)]; } stats[NR_CPUS];}; inet_init()函数:
//net/ipv4/af_inet.c/*系统初始化时被调用**调用路径:start_kernel() -->init() -->do_basic_setup() -->do_initcalls()-->inet_init()*/static int __init inet_init(void){ struct sk_buff *dummy_skb; struct inet_protosw *q; struct list_head *r; int rc = -EINVAL; if (sizeof(struct inet_skb_parm) > sizeof(dummy_skb->cb)) { printk(KERN_CRIT "%s: panic\n", __FUNCTION__); goto out; } rc = sk_alloc_slab(&tcp_prot, "tcp_sock"); if (rc) { sk_alloc_slab_error(&tcp_prot); goto out; } rc = sk_alloc_slab(&udp_prot, "udp_sock"); if (rc) { sk_alloc_slab_error(&udp_prot); goto out_tcp_free_slab; } rc = sk_alloc_slab(&raw_prot, "raw_sock"); if (rc) { sk_alloc_slab_error(&raw_prot); goto out_udp_free_slab; } /* * Tell SOCKET that we are alive */ //注册Internet协议簇的相关信息 (void)sock_register(&inet_family_ops); /* * Add all the base protocols. */ //添加基本的协议 if (inet_add_protocol(&icmp_protocol, IPPROTO_ICMP) < 0) printk(KERN_CRIT "inet_init: Cannot add ICMP protocol\n"); if (inet_add_protocol(&udp_protocol, IPPROTO_UDP) < 0) printk(KERN_CRIT "inet_init: Cannot add UDP protocol\n"); if (inet_add_protocol(&tcp_protocol, IPPROTO_TCP) < 0) printk(KERN_CRIT "inet_init: Cannot add TCP protocol\n");#ifdef CONFIG_IP_MULTICAST if (inet_add_protocol(&igmp_protocol, IPPROTO_IGMP) < 0) printk(KERN_CRIT "inet_init: Cannot add IGMP protocol\n");#endif /* Register the socket-side information for inet_create. */ for (r = &inetsw[0]; r < &inetsw[SOCK_MAX]; ++r) INIT_LIST_HEAD(r); //将inetsw_array中元素加入到inetsw链表中 for (q = inetsw_array; q < &inetsw_array[INETSW_ARRAY_LEN]; ++q) inet_register_protosw(q); /* * Set the ARP module up */ arp_init(); //ARP协议初始化 /* * Set the IP module up */ ip_init(); //IP协议初始化 tcp_v4_init(&inet_family_ops); /* Setup TCP slab cache for open requests. */ tcp_init(); /* * Set the ICMP layer up */ icmp_init(&inet_family_ops); /* * Initialise the multicast router */#if defined(CONFIG_IP_MROUTE) ip_mr_init();#endif /* * Initialise per-cpu ipv4 mibs */ if(init_ipv4_mibs()) printk(KERN_CRIT "inet_init: Cannot init ipv4 mibs\n"); ; ipv4_proc_init(); ipfrag_init(); rc = 0;out: return rc;out_tcp_free_slab: sk_free_slab(&tcp_prot);out_udp_free_slab: sk_free_slab(&udp_prot); goto out;}//net/ipv4/af_inet.c//INET协议簇信息static struct net_proto_family inet_family_ops = { .family = PF_INET, .create = inet_create, .owner = THIS_MODULE,};static struct list_head inetsw[SOCK_MAX];//该数组中的所有元素都会插入到inetsw的链表中static struct inet_protosw inetsw_array[] ={ { .type = SOCK_STREAM, .protocol = IPPROTO_TCP, .prot = &tcp_prot, .ops = &inet_stream_ops, .capability = -1, .no_check = 0, .flags = INET_PROTOSW_PERMANENT, }, { .type = SOCK_DGRAM, .protocol = IPPROTO_UDP, .prot = &udp_prot, .ops = &inet_dgram_ops, .capability = -1, .no_check = UDP_CSUM_DEFAULT, .flags = INET_PROTOSW_PERMANENT, }, { .type = SOCK_RAW, .protocol = IPPROTO_IP, /* wild card */ .prot = &raw_prot, .ops = &inet_sockraw_ops, .capability = CAP_NET_RAW, .no_check = UDP_CSUM_DEFAULT, .flags = INET_PROTOSW_REUSE, }};//流套接字操作函数struct proto_ops inet_stream_ops = { .family = PF_INET, .owner = THIS_MODULE, .release = inet_release, .bind = inet_bind, .connect = inet_stream_connect, .socketpair = sock_no_socketpair, .accept = inet_accept, .getname = inet_getname, .poll = tcp_poll, .ioctl = inet_ioctl, .listen = inet_listen, .shutdown = inet_shutdown, .setsockopt = sock_common_setsockopt, .getsockopt = sock_common_getsockopt, .sendmsg = inet_sendmsg, .recvmsg = sock_common_recvmsg, .mmap = sock_no_mmap, .sendpage = tcp_sendpage};//tcp协议static struct net_protocol tcp_protocol = { .handler = tcp_v4_rcv, .err_handler = tcp_v4_err, .no_policy = 1,};static struct net_protocol udp_protocol = { .handler = udp_rcv, .err_handler = udp_err, .no_policy = 1,};static struct net_protocol icmp_protocol = { .handler = icmp_rcv,};//net/ipv4/tcp_ipv4.c//tcp协议的操作函数struct proto tcp_prot = { .name = "TCP", .owner = THIS_MODULE, .close = tcp_close, .connect = tcp_v4_connect, .disconnect = tcp_disconnect, .accept = tcp_accept, .ioctl = tcp_ioctl, .init = tcp_v4_init_sock, .destroy = tcp_v4_destroy_sock, .shutdown = tcp_shutdown, .setsockopt = tcp_setsockopt, .getsockopt = tcp_getsockopt, .sendmsg = tcp_sendmsg, .recvmsg = tcp_recvmsg, .backlog_rcv = tcp_v4_do_rcv, .hash = tcp_v4_hash, .unhash = tcp_unhash, .get_port = tcp_v4_get_port, .enter_memory_pressure = tcp_enter_memory_pressure, .sockets_allocated = &tcp_sockets_allocated, .memory_allocated = &tcp_memory_allocated, .memory_pressure = &tcp_memory_pressure, .sysctl_mem = sysctl_tcp_mem, .sysctl_wmem = sysctl_tcp_wmem, .sysctl_rmem = sysctl_tcp_rmem, .max_header = MAX_TCP_HEADER, .slab_obj_size = sizeof(struct tcp_sock),}; sock_register()函数:
//注册协议簇int sock_register(struct net_proto_family *ops){ int err; if (ops->family >= NPROTO) { printk(KERN_CRIT "protocol %d >= NPROTO(%d)\n", ops->family, NPROTO); return -ENOBUFS; } net_family_write_lock(); err = -EEXIST; if (net_families[ops->family] == NULL) { net_families[ops->family]=ops; err = 0; } net_family_write_unlock(); printk(KERN_INFO "NET: Registered protocol family %d\n", ops->family); return err;} inet_create()函数
//创建一个INET套接字static int inet_create(struct socket *sock, int protocol){ struct sock *sk; struct list_head *p; struct inet_protosw *answer; struct inet_opt *inet; struct proto *answer_prot; unsigned char answer_flags; char answer_no_check; int err; sock->state = SS_UNCONNECTED; /* Look for the requested type/protocol pair. */ answer = NULL; rcu_read_lock(); list_for_each_rcu(p, &inetsw[sock->type]) { answer = list_entry(p, struct inet_protosw, list); /* Check the non-wild match. */ if (protocol == answer->protocol) { if (protocol != IPPROTO_IP) break; } else { /* Check for the two wild cases. */ if (IPPROTO_IP == protocol) { protocol = answer->protocol; break; } if (IPPROTO_IP == answer->protocol) break; } answer = NULL; } err = -ESOCKTNOSUPPORT; if (!answer) goto out_rcu_unlock; err = -EPERM; if (answer->capability > 0 && !capable(answer->capability)) goto out_rcu_unlock; err = -EPROTONOSUPPORT; if (!protocol) goto out_rcu_unlock; //BSD socket的操作函数 sock->ops = answer->ops; answer_prot = answer->prot; answer_no_check = answer->no_check; answer_flags = answer->flags; rcu_read_unlock(); BUG_TRAP(answer_prot->slab != NULL); err = -ENOBUFS; sk = sk_alloc(PF_INET, GFP_KERNEL, answer_prot->slab_obj_size, answer_prot->slab); if (sk == NULL) goto out; err = 0; //特定协议套接字的操作函数 sk->sk_prot = answer_prot; sk->sk_no_check = answer_no_check; if (INET_PROTOSW_REUSE & answer_flags) sk->sk_reuse = 1; inet = inet_sk(sk); if (SOCK_RAW == sock->type) { inet->num = protocol; if (IPPROTO_RAW == protocol) inet->hdrincl = 1; } if (ipv4_config.no_pmtu_disc) inet->pmtudisc = IP_PMTUDISC_DONT; else inet->pmtudisc = IP_PMTUDISC_WANT; inet->id = 0; //将sock与sk关联起来 sock_init_data(sock, sk); sk_set_owner(sk, sk->sk_prot->owner); sk->sk_destruct = inet_sock_destruct; sk->sk_family = PF_INET; sk->sk_protocol = protocol; sk->sk_backlog_rcv = sk->sk_prot->backlog_rcv; inet->uc_ttl = -1; inet->mc_loop = 1; inet->mc_ttl = 1; inet->mc_index = 0; inet->mc_list = NULL;#ifdef INET_REFCNT_DEBUG atomic_inc(&inet_sock_nr);#endif if (inet->num) { /* It assumes that any protocol which allows * the user to assign a number at socket * creation time automatically * shares. */ inet->sport = htons(inet->num); /* Add to protocol hash chains. */ sk->sk_prot->hash(sk); } //调用init函数 if (sk->sk_prot->init) { err = sk->sk_prot->init(sk); if (err) sk_common_release(sk); }out: return err;out_rcu_unlock: rcu_read_unlock(); goto out;} 转载地址:http://gyfyz.baihongyu.com/