34

netstat源代码调试&原理分析 | Spoock

 5 years ago
source link: https://blog.spoock.com/2019/05/26/netstat-learn/?
Go to the source link to view the article. You can view the picture content, updated content and better typesetting reading experience. If the link is broken, please click the button below to view the snapshot at that time.

netstat源代码调试&原理分析

2019-05-26

估计平时大部分人都是通过netstat来查看网络状态,但是事实是netstat已经逐渐被其他的命令替代,很多新的Linux发行版本中很多都不支持了netstat。以ubuntu 18.04为例来进行说明:

1
2
~ netstat 
zsh: command not found: netstat

按照difference between netstat and ss in linux?这篇文章的说法,

NOTE
This program is obsolete. Replacement for netstat is ss. Replacement
for netstat -r is ip route. Replacement for netstat -i is ip -s link.
Replacement for netstat -g is ip maddr.

中文含义就是:netstat已经过时了,netstat的部分命令已经被ip这个命令取代了,当然还有更为强大的ss
ss命令用来显示处于活动状态的套接字信息。ss命令可以用来获取socket统计信息,它可以显示和netstat类似的内容。但ss的优势在于它能够显示更多更详细的有关TCP和连接状态的信息,而且比netstat更快速更高效。netstat的原理显示网络的原理仅仅只是解析/proc/net/tcp,所以如果服务器的socket连接数量变得非常大,那么通过netstat执行速度是非常慢。而ss采用的是通过tcp_diag的方式来获取网络信息,tcp_diag通过netlink的方式从内核拿到网络信息,这也是ss更高效更全面的原因。

下图就展示了ssnestat在监控上面的区别。

ss.png

ss是获取的socket的信息,而netstat是通过解析/proc/net/下面的文件来获取信息包括Sockets,TCP/UDPIPEthernet信息。

netstatss的效率的对比,找同一台机器执行:

1
2
3
4
5
6
7
8
9
10
time ss
........
real 0m0.016s
user 0m0.001s
sys 0m0.001s
--------------------------------
time netstat
real 0m0.198s
user 0m0.009s
sys 0m0.011s

ss明显比netstat更加高效.

netstat简介

netstat是在net-tools工具包下面的一个工具集,net-tools提供了一份net-tools的源码,我们通过net-tools来看看netstat的实现原理。

netstat源代码调试

下载net-tools之后,导入到Clion中,创建CMakeLists.txt文件,内容如下:

1
2
3
4
5
6
7
cmake_minimum_required(VERSION 3.13)
project(test C)

set(BUILD_DIR .)

#add_executable()
add_custom_target(netstat command -c ${BUILD_DIR})

修改根目录下的Makefile中的59行的编译配置为:

1
CFLAGS ?= -O0 -g3

netstat.png

按照如上图设置自己的编译选项

以上就是搭建netstat的源代码调试过程。

tcp show

在netstat不需要任何参数的情况,程序首先会运行到2317行的tcp_info()

1
2
3
4
5
6
7
8
9
10
11
12
13
#if HAVE_AFINET
if (!flag_arg || flag_tcp) {
i = tcp_info();
if (i)
return (i);
}

if (!flag_arg || flag_sctp) {
i = sctp_info();
if (i)
return (i);
}
.........

跟踪进入到tcp_info():

1
2
3
4
5
static int tcp_info(void)
{
INFO_GUTS6(_PATH_PROCNET_TCP, _PATH_PROCNET_TCP6, "AF INET (tcp)",
tcp_do_one, "tcp", "tcp6");
}

参数的情况如下:

  • _PATH_PROCNET_TCP,在lib/pathnames.h中定义,是#define _PATH_PROCNET_TCP "/proc/net/tcp"
  • _PATH_PROCNET_TCP6, 在lib/pathnames.h中定义, 是#define _PATH_PROCNET_TCP6 "/proc/net/tcp6"
  • tcp_do_one,函数指针,位于1100行,部分代码如下:

    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    static void tcp_do_one(int lnr, const char *line, const char *prot)
    {
    unsigned long rxq, txq, time_len, retr, inode;
    int num, local_port, rem_port, d, state, uid, timer_run, timeout;
    char rem_addr[128], local_addr[128], timers[64];
    const struct aftype *ap;
    struct sockaddr_storage localsas, remsas;
    struct sockaddr_in *localaddr = (struct sockaddr_in *)&localsas;
    struct sockaddr_in *remaddr = (struct sockaddr_in *)&remsas;
    ......

    tcp_do_one()就是用来解析/proc/net/tcp/proc/net/tcp6每一行的含义的,关于/proc/net/tcp的每一行的含义可以参考之前写过的osquery源码解读之分析process_open_socket中的扩展章节。

INFO_GUTS6

1
2
3
4
5
6
7
8
9
10
11
#define INFO_GUTS6(file,file6,name,proc,prot4,prot6)	\
char buffer[8192]; \
int rc = 0; \
int lnr = 0; \
if (!flag_arg || flag_inet) { \
INFO_GUTS1(file,name,proc,prot4) \
} \
if (!flag_arg || flag_inet6) { \
INFO_GUTS2(file6,proc,prot6) \
} \
INFO_GUTS3

INFO_GUTS6采用了#define的方式进行定义,最终根据是flag_inet(IPv4)或者flag_inet6(IPv6)的选项分别调用不同的函数,我们以INFO_GUTS1(file,name,proc,prot4)进一步分析。

INFO_GUTS1

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
#define INFO_GUTS1(file,name,proc,prot)			\
procinfo = proc_fopen((file)); \
if (procinfo == NULL) { \
if (errno != ENOENT && errno != EACCES) { \
perror((file)); \
return -1; \
} \
if (!flag_noprot && (flag_arg || flag_ver)) \
ESYSNOT("netstat", (name)); \
if (!flag_noprot && flag_arg) \
rc = 1; \
} else { \
do { \
if (fgets(buffer, sizeof(buffer), procinfo)) \
(proc)(lnr++, buffer,prot); \
} while (!feof(procinfo)); \
fclose(procinfo); \
}
  1. rocinfo = proc_fopen((file)) 获取/proc/net/tcp的文件句柄
  2. fgets(buffer, sizeof(buffer), procinfo) 解析文件内容并将每一行的内容存储在buffer中
  3. (proc)(lnr++, buffer,prot),利用(proc)函数解析buffer。(proc)就是前面说明的tcp_do_one()函数

tcp_do_one

" 14: 020110AC:B498 CF0DE1B9:4362 06 00000000:00000000 03:000001B2 00000000 0 0 0 3 0000000000000000这一行为例来说明tcp_do_one()函数的执行过程。

tcp_do_one_1.png

由于分析是Ipv4,所以会跳过#if HAVE_AFINET6这段代码。之后执行:

1
2
3
4
5
6
7
8
num = sscanf(line,
"%d: %64[0-9A-Fa-f]:%X %64[0-9A-Fa-f]:%X %X %lX:%lX %X:%lX %lX %d %d %lu %*s\n",
&d, local_addr, &local_port, rem_addr, &rem_port, &state,
&txq, &rxq, &timer_run, &time_len, &retr, &uid, &timeout, &inode);
if (num < 11) {
fprintf(stderr, _("warning, got bogus tcp line.\n"));
return;
}

解析数据,并将每一列的数据分别填充到对应的字段上面。分析一下其中的每个字段的定义:

1
2
3
4
char rem_addr[128], local_addr[128], timers[64];
struct sockaddr_storage localsas, remsas;
struct sockaddr_in *localaddr = (struct sockaddr_in *)&localsas;
struct sockaddr_in *remaddr = (struct sockaddr_in *)&remsas;

Linuxsockaddr_insockaddr_storage的定义如下:

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
struct sockaddr {
unsigned short sa_family; // address family, AF_xxx
char sa_data[14]; // 14 bytes of protocol address
};


struct  sockaddr_in {
short  int  sin_family;                      /* Address family */
unsigned  short  int  sin_port;       /* Port number */
struct  in_addr  sin_addr;              /* Internet address */
unsigned  char  sin_zero[8];         /* Same size as struct sockaddr */
};
/* Internet address. */
struct in_addr {
uint32_t s_addr; /* address in network byte order */
};

struct sockaddr_storage {
sa_family_t ss_family; // address family

// all this is padding, implementation specific, ignore it:
char __ss_pad1[_SS_PAD1SIZE];
int64_t __ss_align;
char __ss_pad2[_SS_PAD2SIZE];
};

之后代码继续执行:

1
2
3
4
sscanf(local_addr, "%X", &localaddr->sin_addr.s_addr);
sscanf(rem_addr, "%X", &remaddr->sin_addr.s_addr);
localsas.ss_family = AF_INET;
remsas.ss_family = AF_INET;

local_addr使用sscanf(,"%X")得到对应的十六进制,保存到&localaddr->sin_addr.s_addr(即in_addr结构体中的s_addr)中,同理&remaddr->sin_addr.s_addr。运行结果如下所示:

saddr.png

addr_do_one

1
2
addr_do_one(local_addr, sizeof(local_addr), 22, ap, &localsas, local_port, "tcp");
addr_do_one(rem_addr, sizeof(rem_addr), 22, ap, &remsas, rem_port, "tcp");

程序继续执行,最终会执行到addr_do_one()函数,用于解析本地IP地址和端口,以及远程IP地址和端口。

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
static void addr_do_one(char *buf, size_t buf_len, size_t short_len, const struct aftype *ap,
const struct sockaddr_storage *addr,
int port, const char *proto
)
{
const char *sport, *saddr;
size_t port_len, addr_len;

saddr = ap->sprint(addr, flag_not & FLAG_NUM_HOST);
sport = get_sname(htons(port), proto, flag_not & FLAG_NUM_PORT);
addr_len = strlen(saddr);
port_len = strlen(sport);
if (!flag_wide && (addr_len + port_len > short_len)) {
/* Assume port name is short */
port_len = netmin(port_len, short_len - 4);
addr_len = short_len - port_len;
strncpy(buf, saddr, addr_len);
buf[addr_len] = '\0';
strcat(buf, ":");
strncat(buf, sport, port_len);
} else
snprintf(buf, buf_len, "%s:%s", saddr, sport);
}
  1. saddr = ap->sprint(addr, flag_not & FLAG_NUM_HOST); 这个表示是否需要将addr转换为域名的形式。由于addr值是127.0.0.1,转换之后得到的就是localhost,其中FLAG_NUM_HOST的就等价于--numeric-hosts的选项。
  2. sport = get_sname(htons(port), proto, flag_not & FLAG_NUM_PORT);,port无法无法转换,其中的FLAG_NUM_PORT就等价于--numeric-ports这个选项。
  3. !flag_wide && (addr_len + port_len > short_len 这个代码的含义是判断是否需要对IPPORT进行截断。其中flag_wide的等同于-W, --wide don't truncate IP addresses。而short_len长度是22.
  4. snprintf(buf, buf_len, "%s:%s", saddr, sport);,将IP:PORT赋值给buf.

output

最终程序执行

1
2
printf("%-4s  %6ld %6ld %-*s %-*s %-11s",
prot, rxq, txq, (int)netmax(23,strlen(local_addr)), local_addr, (int)netmax(23,strlen(rem_addr)), rem_addr, _(tcp_state[state]));

按照制定的格式解析,输出结果

finish_this_one

最终程序会执行finish_this_one(uid,inode,timers);.

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
static void finish_this_one(int uid, unsigned long inode, const char *timers)
{
struct passwd *pw;

if (flag_exp > 1) {
if (!(flag_not & FLAG_NUM_USER) && ((pw = getpwuid(uid)) != NULL))
printf(" %-10s ", pw->pw_name);
else
printf(" %-10d ", uid);
printf("%-10lu",inode);
}
if (flag_prg)
printf(" %-" PROGNAME_WIDTHs "s",prg_cache_get(inode));
if (flag_selinux)
printf(" %-" SELINUX_WIDTHs "s",prg_cache_get_con(inode));

if (flag_opt)
printf(" %s", timers);
putchar('\n');
}
  1. flag_exp 等同于-e的参数。-e, --extend display other/more information.举例如下:

    1
    2
    3
    4
    5
    6
    7
    netstat -e 
    Proto Recv-Q Send-Q Local Address Foreign Address State User Inode
    tcp 0 0 localhost:6379 172.16.1.200:46702 ESTABLISHED redis 437788048

    netstat
    Proto Recv-Q Send-Q Local Address Foreign Address State
    tcp 0 0 localhost:6379 172.16.1.200:46702 ESTABLISHED

    发现使用-e参数会多显示UserInode号码。而在本例中还可以如果用户名不存在,则显示uid
    getpwuid

  2. flag_prg等同于-p, --programs display PID/Program name for sockets.举例如下:

    1
    2
    3
    4
    5
    6
    7
    netstat -pe
    Proto Recv-Q Send-Q Local Address Foreign Address State User Inode PID/Program name
    tcp 0 0 localhost:6379 172.16.1.200:34062 ESTABLISHED redis 437672000 6017/redis-server *

    netstat -e
    Proto Recv-Q Send-Q Local Address Foreign Address State User Inode
    tcp 0 0 localhost:6379 172.16.1.200:46702 ESTABLISHED redis 437788048

    可以看到是通过prg_cache_get(inode),inode来找到对应的PID和进程信息;

  3. flag_selinux等同于-Z, --context display SELinux security context for sockets

prg_cache_get

对于上面的通过inode找到对应进程的方法非常的好奇,于是去追踪prg_cache_get()函数的实现。

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
#define PRG_HASH_SIZE 211

#define PRG_HASHIT(x) ((x) % PRG_HASH_SIZE)

static struct prg_node {
struct prg_node *next;
unsigned long inode;
char name[PROGNAME_WIDTH];
char scon[SELINUX_WIDTH];
} *prg_hash[PRG_HASH_SIZE];

static const char *prg_cache_get(unsigned long inode)
{
unsigned hi = PRG_HASHIT(inode);
struct prg_node *pn;

for (pn = prg_hash[hi]; pn; pn = pn->next)
if (pn->inode == inode)
return (pn->name);
return ("-");
}

prg_hash中存储了所有的inode编号与program的对应关系,所以当给定一个inode编号时就能够找到对应的程序名称。那么prg_hash又是如何初始化的呢?

prg_cache_load

我们使用debug模式,加入-p的运行参数:

netstat-p.png

程序会运行到2289行的prg_cache_load(); 进入到prg_cache_load()函数中.
由于整个函数的代码较长,拆分来分析.

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
#define PATH_PROC      "/proc"
#define PATH_FD_SUFF "fd"
#define PATH_FD_SUFFl strlen(PATH_FD_SUFF)
#define PATH_PROC_X_FD PATH_PROC "/%s/" PATH_FD_SUFF
#define PATH_CMDLINE "cmdline"
#define PATH_CMDLINEl strlen(PATH_CMDLINE)

if (!(dirproc=opendir(PATH_PROC))) goto fail;
while (errno = 0, direproc = readdir(dirproc)) {
for (cs = direproc->d_name; *cs; cs++)
if (!isdigit(*cs))
break;
if (*cs)
continue;
procfdlen = snprintf(line,sizeof(line),PATH_PROC_X_FD,direproc->d_name);
if (procfdlen <= 0 || procfdlen >= sizeof(line) - 5)
continue;
errno = 0;
dirfd = opendir(line);
if (! dirfd) {
if (errno == EACCES)
eacces = 1;
continue;
}
line[procfdlen] = '/';
cmdlp = NULL;
  1. dirproc=opendir(PATH_PROC);errno = 0, direproc = readdir(dirproc)  遍历/proc拿到所有的pid
  2. procfdlen = snprintf(line,sizeof(line),PATH_PROC_X_FD,direproc→d_name); 遍历所有的/proc/pid拿到所有进程的fd
  3. dirfd = opendir(line); 得到/proc/pid/fd的文件句柄
获取inode
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
while ((direfd = readdir(dirfd))) {
/* Skip . and .. */
if (!isdigit(direfd->d_name[0]))
continue;
if (procfdlen + 1 + strlen(direfd->d_name) + 1 > sizeof(line))
   continue;
memcpy(line + procfdlen - PATH_FD_SUFFl, PATH_FD_SUFF "/",
PATH_FD_SUFFl + 1);
safe_strncpy(line + procfdlen + 1, direfd->d_name,
sizeof(line) - procfdlen - 1);
lnamelen = readlink(line, lname, sizeof(lname) - 1);
if (lnamelen == -1)
continue;
lname[lnamelen] = '\0'; /*make it a null-terminated string*/

if (extract_type_1_socket_inode(lname, &inode) < 0)
if (extract_type_2_socket_inode(lname, &inode) < 0)
continue;
  1. memcpy(line + procfdlen - PATH_FD_SUFFl, PATH_FD_SUFF "/",PATH_FD_SUFFl + 1);safe_strncpy(line + procfdlen + 1, direfd->d_name, sizeof(line) - procfdlen - 1); 得到遍历之后的fd信息,比如/proc/pid/fd
  2. lnamelen = readlink(line, lname, sizeof(lname) - 1); 得到fd所指向的link,因为通常情况下fd一般都是链接,要么是socket链接要么是pipe链接.如下所示:

    1
    2
    3
    4
    5
    6
    7
    8
    $ ls -al /proc/1289/fd
    total 0
    dr-x------ 2 username username 0 May 25 15:45 .
    dr-xr-xr-x 9 username username 0 May 25 09:11 ..
    lr-x------ 1 username username 64 May 25 16:23 0 -> 'pipe:[365366]'
    l-wx------ 1 username username 64 May 25 16:23 1 -> 'pipe:[365367]'
    l-wx------ 1 username username 64 May 25 16:23 2 -> 'pipe:[365368]'
    lr-x------ 1 username username 64 May 25 16:23 3 -> /proc/uptime
  3. 通过extract_type_1_socket_inode获取到link中对应的inode编号.

    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    15
    16
    17
    18
    19
    20
    21
    22
    23
    24
    25
    #define PRG_SOCKET_PFX    "socket:["
    #define PRG_SOCKET_PFXl (strlen(PRG_SOCKET_PFX))
    static int extract_type_1_socket_inode(const char lname[], unsigned long * inode_p) {

    /* If lname is of the form "socket:[12345]", extract the "12345"
    as *inode_p. Otherwise, return -1 as *inode_p.
    */
    // 判断长度是否小于 strlen(socket:[)+3
    if (strlen(lname) < PRG_SOCKET_PFXl+3) return(-1);

    //函数说明:memcmp()用来比较s1 和s2 所指的内存区间前n 个字符。
    // 判断lname是否以 socket:[ 开头
    if (memcmp(lname, PRG_SOCKET_PFX, PRG_SOCKET_PFXl)) return(-1);
    if (lname[strlen(lname)-1] != ']') return(-1); {
    char inode_str[strlen(lname + 1)]; /* e.g. "12345" */
    const int inode_str_len = strlen(lname) - PRG_SOCKET_PFXl - 1;
    char *serr;

    // 获取到inode的编号
    strncpy(inode_str, lname+PRG_SOCKET_PFXl, inode_str_len);
    inode_str[inode_str_len] = '\0';
    *inode_p = strtoul(inode_str, &serr, 0);
    if (!serr || *serr || *inode_p == ~0)
    return(-1);
    }
  4. 获取程序对应的cmdline

    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    15
    16
    17
    18
    19
    if (!cmdlp) {
    if (procfdlen - PATH_FD_SUFFl + PATH_CMDLINEl >=sizeof(line) - 5)
    continue;
    safe_strncpy(line + procfdlen - PATH_FD_SUFFl, PATH_CMDLINE,sizeof(line) - procfdlen + PATH_FD_SUFFl);
    fd = open(line, O_RDONLY);
    if (fd < 0)
    continue;
    cmdllen = read(fd, cmdlbuf, sizeof(cmdlbuf) - 1);
    if (close(fd))
    continue;
    if (cmdllen == -1)
    continue;
    if (cmdllen < sizeof(cmdlbuf) - 1)
    cmdlbuf[cmdllen]='\0';
    if (cmdlbuf[0] == '/' && (cmdlp = strrchr(cmdlbuf, '/')))
    cmdlp++;
    else
    cmdlp = cmdlbuf;
    }

    由于cmdline是可以直接读取的,所以并不需要像读取fd那样借助与readlink()函数,直接通过 read(fd, cmdlbuf, sizeof(cmdlbuf) - 1) 即可读取文件内容.

  5. snprintf(finbuf, sizeof(finbuf), "%s/%s", direproc->d_name, cmdlp); 拼接pid和cmdlp,最终得到的就是类似与 6017/redis-server * 这样的效果 
  6. 最终程序调用 prg_cache_add(inode, finbuf, "-"); 将解析得到的inode和finbuf 加入到缓存中.

prg_cache_add

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
#define PRG_HASH_SIZE 211
#define PRG_HASHIT(x) ((x) % PRG_HASH_SIZE)
static struct prg_node {
struct prg_node *next;
unsigned long inode;
char name[PROGNAME_WIDTH];
char scon[SELINUX_WIDTH];
} *prg_hash[ ];

static void prg_cache_add(unsigned long inode, char *name, const char *scon)
{
unsigned hi = PRG_HASHIT(inode);
struct prg_node **pnp,*pn;

prg_cache_loaded = 2;
for (pnp = prg_hash + hi; (pn = *pnp); pnp = &pn->next) {
if (pn->inode == inode) {
/* Some warning should be appropriate here
as we got multiple processes for one i-node */
return;
}
}
if (!(*pnp = malloc(sizeof(**pnp))))
return;
pn = *pnp;
pn->next = NULL;
pn->inode = inode;
safe_strncpy(pn->name, name, sizeof(pn->name));

{
int len = (strlen(scon) - sizeof(pn->scon)) + 1;
if (len > 0)
safe_strncpy(pn->scon, &scon[len + 1], sizeof(pn->scon));
else
safe_strncpy(pn->scon, scon, sizeof(pn->scon));
}

}
  1. unsigned hi = PRG_HASHIT(inode); 使用inode整除211得到作为hash值
  2. for (pnp = prg_hash + hi; (pn = *pnp); pnp = &pn->next) 由于prg_hash是一个链表结构,所以通过for循环找到链表的结尾;
  3. pn = *pnp;pn->next = NULL;pn->inode = inode;safe_strncpy(pn->name, name, sizeof(pn→name)); 为新的inode赋值并将其加入到链表的末尾;

所以prg_node是一个全局变量,是一个链表结果,保存了inode编号与pid/cmdline之间的对应关系;

prg_cache_get

1
2
3
4
5
6
7
8
9
10
static const char *prg_cache_get(unsigned long inode)
{
unsigned hi = PRG_HASHIT(inode);
struct prg_node *pn;

for (pn = prg_hash[hi]; pn; pn = pn->next)
if (pn->inode == inode)
return (pn->name);
return ("-");
}

分析完毕prg_cache_add()之后,看prg_cache_get()就很简单了.

  1. unsigned hi = PRG_HASHIT(inode); 通过inode号拿到hash值
  2. for (pn = prg_hash[hi]; pn; pn = pn->next) 遍历prg_hash链表中的每一个节点,如果遍历的inode与目标的inode相符就返回对应的信息.

通过对netstat的一个简单的分析,可以发现其实netstat就是通过遍历/proc目录下的目录或者是文件来获取对应的信息.如果在一个网络进程频繁关闭打开关闭,那么使用netstat显然是相当耗时的.


About Joyk


Aggregate valuable and interesting links.
Joyk means Joy of geeK