

从内核代码角度详解proc目录
source link: https://blog.spoock.com/2019/10/26/proc-from-kernel/
Go to the source link to view the article. You can view the picture content, updated content and better typesetting reading experience. If the link is broken, please click the button below to view the snapshot at that time.

上一篇文章,分析了proc目录的各个文件以及目录的含义。本篇文章主要是从内核的角度来看/proc的整个生成过程。
/proc文件系统是一种虚拟文件系统,以文件系统目录和文件形式,提供一个指向内核数据结构的接口,这为查看和改变各种系统属性开启了方便之门.此外,还能通过一组以/proc/PID形式命名的目录(PID即进程ID)查看系统中运行各进程的相关信息.
通常,/proc目录下的文件内容都采取人类可读的文本形式,shell脚本也能对其进行解析.程序可以打开,读取和写入/proc目录下的既定文件.大多数情况下,只有特权级进程才能修改/proc目录下的文件内容.
参考: Linux-UNIX系统编程手册
proc目录结构分析
所有要使用 proc的内核模块都应当包含 头文件。首先要了解以下proc编程中最重要的数据结构.以kernel 2.6为例:
struct proc_dir_entry {
unsigned int low_ino;
unsigned int namelen;
const char *name; // 入口函数名
mode_t mode; // 文件访问权限模式
nlink_t nlink;
uid_t uid; // 文件的用户ID
gid_t gid; // 文件的组ID
loff_t size;
const struct inode_operations *proc_iops; // 文件inode操作函数
/*
* NULL ->proc_fops means "PDE is going away RSN" or
* "PDE is just created". In either case, e.g. ->read_proc won't be
* called because it's too late or too early, respectively.
*
* If you're allocating ->proc_fops dynamically, save a pointer
* somewhere.
*/
const struct file_operations *proc_fops; // 文件操作函数
struct proc_dir_entry *next, *parent, *subdir; // 此入口的兄弟,父目录和下级入口指针
void *data; // 文件死有数据指针
read_proc_t *read_proc; // 文件读取操作函数指针
write_proc_t *write_proc; // 文件写操作函数指针
atomic_t count; /* use count */
int pde_users; /* number of callers into module in progress */ // 引用计数
spinlock_t pde_unload_lock; /* proc_fops checks and pde_users bumps */ // 进程调用模块的技术
struct completion *pde_unload_completion;
struct list_head pde_openers; /* who did ->open, but not ->release */ // 调用open,但是没调用release的进程指针
};
这个数据结果在 kernel4.15 中已经发生了变化.
这个数据结构在内核中代表了一个proc入口,在procfs中表现为一个文件。你可以在这个结构体中看到一些文件特有的属性成员,如uid、gid、mode、name等。但是在利用默认的proc 的API编程中,我们需要关注的是这个入口的读写函数成员:
- read_proc_t *read_proc;
- write_proc_t *write_proc;
/proc目录下的文件形成有以下的特点:
- 内容一部分是静态生成的,比如fs目录,fb目录.这部分子目录,子文件在系统初始化时候,应该挂载在proc目录对应的proc_dir_entry链表下;
- .和..子目录,分别是对当前目录和父目录的链接
- 由数字组成的子目录是每次读取proc内容是动态生成的
参考: proc_fs下进程信息形成原理、目录遍历方式、位图查找
创建/proc目录
/proc文件系统是一种虚拟文件系统,在内核启动是,start_kernel在完成了VFS的初始化就会调用:
referer:https://elixir.bootlin.com/linux/v2.6.39.4/source/init/main.c#L456
asmlinkage void __init start_kernel(void)
{
......
#ifdef CONFIG_PROC_FS
proc_root_init();
#endif
.....
}
proc_root_init
跟踪进入到proc_root_init()中.
referer: https://elixir.bootlin.com/linux/v2.6.39.4/source/fs/proc/root.c#L91
void __init proc_root_init(void)
{
struct vfsmount *mnt;
int err;
proc_init_inodecache(); // 为proc_inode创建slab cache --proc_inode_cachep
err = register_filesystem(&proc_fs_type); // 注册proc文件系统
if (err)
return;
mnt = kern_mount_data(&proc_fs_type, &init_pid_ns);
if (IS_ERR(mnt)) {
unregister_filesystem(&proc_fs_type);
return;
}
init_pid_ns.proc_mnt = mnt;
proc_symlink("mounts", NULL, "self/mounts"); // 创建 mounts 符合链接文件
proc_net_init(); // 创建 net符号链接及内部目录书结构
#ifdef CONFIG_SYSVIPC
proc_mkdir("sysvipc", NULL); // 创建sysvipc目录
#endif
proc_mkdir("fs", NULL); // 创建 fs 目录
proc_mkdir("driver", NULL); // 创建 drivers 目录
proc_mkdir("fs/nfsd", NULL); /* somewhere for the nfsd filesystem to be mounted */
#if defined(CONFIG_SUN_OPENPROMFS) || defined(CONFIG_SUN_OPENPROMFS_MODULE)
/* just give it a mountpoint */
proc_mkdir("openprom", NULL);
#endif
proc_tty_init(); // 创建 tty 目录及结构
#ifdef CONFIG_PROC_DEVICETREE
proc_device_tree_init();
#endif
proc_mkdir("bus", NULL); // 创建bus目录
proc_sys_init(); // 创建sys目录并初始化
}
在这之后,许多内核组建和模块就可以向proc文件系统机器子目录添加目录和文件了.其中最重要的数据结构就是:struct proc_dir_entry
.这个结构相当与proc文件系统中的数据节点,procfs中许多文件,符号连接和目录文件都是由她表示的.但是请注意,并不是所有的proc文件都对应这样的数据结构.对于/proc/sys
,/proc/${PID}
等特定目录下的文件需要动态生成,有其内部实现.而这个proc_dir_entry
只对普通的proc文件实现.
通过register_filesystem注册proc文件系统,proc_fs_type参数已经在root.c中定义了.
fs/proc/root.c
err = register_filesystem(&proc_fs_type); // 注册proc文件系统
static struct file_system_type proc_fs_type = {
.name = "proc",
.get_sb = proc_get_sb,
.kill_sb = kill_anon_super,
};
kern_mount_data()就是将文件系统挂载到vfs树中.kern_mount_data()–>vfs_kern_moun()—>proc_get_sb()
proc_get_sb
proc_get_sb()函数主要调用proc_fill_super()函数进行填充:
static int proc_get_sb(struct file_system_type *fs_type,
int flags, const char *dev_name, void *data, struct vfsmount *mnt)
{
if (proc_mnt) {
/* Seed the root directory with a pid so it doesn't need
* to be special in base.c. I would do this earlier but
* the only task alive when /proc is mounted the first time
* is the init_task and it doesn't have any pids.
*/
struct proc_inode *ei;
ei = PROC_I(proc_mnt->mnt_sb->s_root->d_inode);
if (!ei->pid)
ei->pid = find_get_pid(1);
}
return get_sb_single(fs_type, flags, data, proc_fill_super, mnt);
}
proc_fill_super
跟踪进入到proc_fill_super()
referer:fs/proc/inode.c
int proc_fill_super(struct super_block *s, void *data, int silent)
{
struct inode * root_inode;
s->s_flags |= MS_NODIRATIME | MS_NOSUID | MS_NOEXEC;
s->s_blocksize = 1024; // 块大小不能设置,一般都是1024
s->s_blocksize_bits = 10; // 必须是10,2^10=1024
s->s_magic = PROC_SUPER_MAGIC; // 用于任职文件系统 宏的具体数字为0x9fa0
s->s_op = &proc_sops; // 具体的超级块操作,主要涉及的是索引块的操作
s->s_time_gran = 1;
root_inode = proc_get_inode(s, PROC_ROOT_INO, &proc_root); // 转换为vfs具体能识别的索引节点
if (!root_inode)
goto out_no_root;
root_inode->i_uid = 0; // 索引节点初始化
root_inode->i_gid = 0;
s->s_root = d_alloc_root(root_inode); // 转化为dentry,赋值给super_block对象
if (!s->s_root)
goto out_no_root;
return 0;
out_no_root:
printk("proc_read_super: get root inode failed\n");
iput(root_inode);
return -ENOMEM;
}
proc_root
上面的根索引节点proc_root的具体定义如下:
referer: fs/proc/root.c
/*
* This is the root "inode" in the /proc tree..
*/
struct proc_dir_entry proc_root = {
.low_ino = PROC_ROOT_INO, // 根的索引节点号
.namelen = 5, // 根文件名长度,文件名
.name = "/proc",
.mode = S_IFDIR | S_IRUGO | S_IXUGO,
.nlink = 2,
.proc_iops = &proc_root_inode_operations, // 根文件的具体索引节点
.proc_fops = &proc_root_operations, // 根文件支持的文件操作
.parent = &proc_root,
};
referer:/include/linux/proc_fs.h
enum {
PROC_ROOT_INO = 1,
};
proc_root节点不仅包含正常的文件及文件夹,还要管理进程指定的pid文件.proc_root就必须能够处理索引和文件.
/*
* The root /proc directory is special, as it has the
* <pid> directories. Thus we don't use the generic
* directory handling functions for that..
*/
static struct file_operations proc_root_operations = {
.read = generic_read_dir, //
.readdir = proc_root_readdir,
};
/*
* proc root can do almost nothing..
*/
static struct inode_operations proc_root_inode_operations = {
.lookup = proc_root_lookup,
.getattr = proc_root_getattr,
};
proc_root_lookup
当用户空间访问proc文件,vfs就会调用real_lookup(),它就会调用inode_operations中的proc_root_lookup指针函数,实际上就是调用proc_root_lookup()函数.
static struct dentry *proc_root_lookup(struct inode * dir, struct dentry * dentry, struct nameidata *nd)
{
if (!proc_lookup(dir, dentry, nd)) { // 先查找内核运行状态的文件
return NULL;
}
return proc_pid_lookup(dir, dentry, nd); // 再查找进程id相关的文件.
}
proc_lookup
referer:fs/proc/root.c
/*
* Don't create negative dentries here, return -ENOENT by hand
* instead.
*/
struct dentry *proc_lookup(struct inode * dir, struct dentry *dentry, struct nameidata *nd)
{
struct inode *inode = NULL;
struct proc_dir_entry * de;
int error = -ENOENT;
lock_kernel(); // 内核锁 ,防止多个程序处于内核态
spin_lock(&proc_subdir_lock);
de = PDE(dir); // 从dentry中提取出具体的proc_dir_entry
if (de) {
for (de = de->subdir; de ; de = de->next) {
if (de->namelen != dentry->d_name.len)
continue;
if (!memcmp(dentry->d_name.name, de->name, de->namelen)) {
unsigned int ino = de->low_ino; // 获取对应的索引节点号
spin_unlock(&proc_subdir_lock);
error = -EINVAL;
inode = proc_get_inode(dir->i_sb, ino, de); // 获取对应索引节点inode
spin_lock(&proc_subdir_lock);
break;
}
}
}
spin_unlock(&proc_subdir_lock);
unlock_kernel();
if (inode) { // 如果存在 就加入缓存
dentry->d_op = &proc_dentry_operations;
d_add(dentry, inode);
return NULL;
}
return ERR_PTR(error);
}
proc_pid_lookup
//在指定的pid文件夹中查找dentry是否存在k
struct dentry *proc_pid_lookup(struct inode *dir, struct dentry * dentry, struct nameidata *nd)
{
struct dentry *result = ERR_PTR(-ENOENT);
struct task_struct *task;
unsigned tgid;
// 找出指定的进程是否存在,这里就进行了self文件夹的处理
result = proc_base_lookup(dir, dentry);
if (!IS_ERR(result) || PTR_ERR(result) != -ENOENT)
goto out;
// //将文件夹名称转换为pid
tgid = name_to_int(dentry);
if (tgid == ~0U)
goto out;
rcu_read_lock();
//通过pid查找到指定的task
task = find_task_by_pid(tgid);
if (task)
get_task_struct(task);
rcu_read_unlock();
if (!task)
goto out;
//生成一个新索引节点,并进行缓存
result = proc_pid_instantiate(dir, dentry, task, NULL);
put_task_struct(task);
out:
return result;
}
proc_base_lookup
其中的proc_base_lookup(dir, dentry)函数就是用来获取当前进程.
referer:fs/proc/base.c
static struct dentry *proc_base_lookup(struct inode *dir, struct dentry *dentry)
{
struct dentry *error;
struct task_struct *task = get_proc_task(dir);
struct pid_entry *p, *last;
error = ERR_PTR(-ENOENT);
if (!task)
goto out_no_task;
/* Lookup the directory entry */
// 用于获取当前self的链接
last = &proc_base_stuff[ARRAY_SIZE(proc_base_stuff) - 1];
for (p = proc_base_stuff; p <= last; p++) {
if (p->len != dentry->d_name.len)
continue;
if (!memcmp(dentry->d_name.name, p->name, p->len))
break;
}
if (p > last)
goto out;
error = proc_base_instantiate(dir, dentry, task, p);
out:
put_task_struct(task);
out_no_task:
return error;
}
/*
* proc base
*
* These are the directory entries in the root directory of /proc
* that properly belong to the /proc filesystem, as they describe
* describe something that is process related.
*/
// self文件夹处理的是当前请求线程的信息,实际为链接文件
static struct pid_entry proc_base_stuff[] = {
NOD("self", S_IFLNK|S_IRWXUGO,
&proc_self_inode_operations, NULL, {}),
};
proc_self_inode_operations
分析proc_self_inode_operations(),
/就只是读取链接文件:将链接文件的内容返回至用户
static int proc_self_readlink(struct dentry *dentry, char __user *buffer,
int buflen)
{
char tmp[PROC_NUMBUF];
//直接链接到当前pid的文件夹
sprintf(tmp, "%d", current->tgid);
return vfs_readlink(dentry,buffer,buflen,tmp);
}
}
//处理链接所具体指向的文件:
static void *proc_self_follow_link(struct dentry *dentry, struct nameidata *nd)
{
char tmp[PROC_NUMBUF];
sprintf(tmp, "%d", current->tgid);
//将查找tmp文件过程中得到的信息的结果写入nd中
return ERR_PTR(vfs_follow_link(nd,tmp));
}
static struct inode_operations proc_self_inode_operations = {
.readlink = proc_self_readlink,
.follow_link = proc_self_follow_link,
};
其中nameidata的定义如下
referer: include/linux/namei.h
struct nameidata {
struct dentry *dentry;
struct vfsmount *mnt;
struct qstr last;
unsigned int flags;
int last_type;
unsigned depth;
char *saved_names[MAX_NESTED_LINKS + 1];
/* Intent data */
union {
struct open_intent open;
} intent;
};
以上分析的都是proc_root_inode_operations操作以及具体细节的实现.
参考: proc文件系统分析(二)
proc_root_readdir
除了proc_root_inode_operations之外,在proc_root_operations中还存在proc_root_readdir()函数,直接获取/proc/pid文件的信息.
referer:
static int proc_root_readdir(struct file * filp,
void * dirent, filldir_t filldir)
{
unsigned int nr = filp->f_pos;
int ret;
lock_kernel();
if (nr < FIRST_PROCESS_ENTRY) {
int error = proc_readdir(filp, dirent, filldir); //形成静态目录,.和..目录
if (error <= 0) {
unlock_kernel();
return error;
}
// 获取进程文件的位置
filp->f_pos = FIRST_PROCESS_ENTRY;
}
unlock_kernel();
// 通过proc_pid_readdir()函数遍历当前所有的进程,即遍历TGID,
ret = proc_pid_readdir(filp, dirent, filldir);
return ret;
}
参考:https://blog.csdn.net/dog250/article/details/5822437
系统中对于一个目录有多种读取子目录的方式.比如ls和ls -al显示的结果不同.这是由传入过程中对file---→f_ops
设定不同的偏移决定的.对于proc根目录而言,有以下特点:
- f_ops = 0,为.目录链接,链接到自身
- f_ops = 1,为..目录链接,链接到父目录
- f_ops 如果是在2到FIRST_PROCESS_ENTRY-1之间,表示proc下的静态目录或者静态文件
- f_ops 如果是在FIRST_PROCESS_ENTRY到FIRST_PROCESS_ENTRY+ ARRAY_SIZE(proc_base_stuff)-1 为self子目录内容
- f_ops = FIRST_PROCESS_ENTRY+ ARRAY_SIZE(proc_base_stuff) 为init_task即0号初始进程
- f_pos = PID_MAX_LIMIT + TGID_OFFSET 标致着目录遍历结束,FIRST_PROCESS_ENTRY= 256
proc_pid_readir
继续分析其中的proc_pid_readir()函数.
/* for the /proc/ directory itself, after non-process stuff has been done */
int proc_pid_readdir(struct file * filp, void * dirent, filldir_t filldir)
{
unsigned int nr;
struct task_struct *reaper;
struct tgid_iter iter;
struct pid_namespace *ns;
if (filp->f_pos >= PID_MAX_LIMIT + TGID_OFFSET)
goto out_no_task;
nr = filp->f_pos - FIRST_PROCESS_ENTRY;
reaper = get_proc_task(filp->f_path.dentry->d_inode);
if (!reaper)
goto out_no_task;
// 遍历self子目录,
for (; nr < ARRAY_SIZE(proc_base_stuff); filp->f_pos++, nr++) {
const struct pid_entry *p = &proc_base_stuff[nr];
if (proc_base_fill_cache(filp, dirent, filldir, reaper, p) < 0)
goto out;
}
// pid_namespace 为pid结构的命名空间,可认为系统唯一
ns = filp->f_dentry->d_sb->s_fs_info;
iter.task = NULL;
iter.tgid = filp->f_pos - TGID_OFFSET;
// next_tgid(ns, iter) 来寻找每一个pid
for (iter = next_tgid(ns, iter);
iter.task;
iter.tgid += 1, iter = next_tgid(ns, iter)) {
filp->f_pos = iter.tgid + TGID_OFFSET;
// 通过struct tgid_iter iter包装结构,来遍历所有当前进程
// 通过proc_pid_fill_cache(filp, dirent, filldir, iter)来建立每进程目录层次结构
if (proc_pid_fill_cache(filp, dirent, filldir, iter) < 0) {
put_task_struct(iter.task);
goto out;
}
}
// 标志着目录遍历结束
filp->f_pos = PID_MAX_LIMIT + TGID_OFFSET;
out:
put_task_struct(reaper);
out_no_task:
return 0;
}
next_tgid()
分析next_tgid()函数.next_tgid()根据iter封装的tgid和namespace的信息,找到对应的pid
struct tgid_iter {
unsigned int tgid;
struct task_struct *task;
};
static struct tgid_iter next_tgid(struct pid_namespace *ns, struct tgid_iter iter)
{
struct pid *pid;
if (iter.task)
put_task_struct(iter.task);
rcu_read_lock();
retry:
iter.task = NULL;
// 通过tgid和pid_namespace,找到对应进程的pid
pid = find_ge_pid(iter.tgid, ns);
if (pid) {
iter.tgid = pid_nr_ns(pid, ns);
iter.task = pid_task(pid, PIDTYPE_PID);
/* What we to know is if the pid we have find is the
* pid of a thread_group_leader. Testing for task
* being a thread_group_leader is the obvious thing
* todo but there is a window when it fails, due to
* the pid transfer logic in de_thread.
*
* So we perform the straight forward test of seeing
* if the pid we have found is the pid of a thread
* group leader, and don't worry if the task we have
* found doesn't happen to be a thread group leader.
* As we don't care in the case of readdir.
*/
// 不断重试,直到找到线程组的组id.
if (!iter.task || !has_group_leader_pid(iter.task)) {
iter.tgid += 1;
goto retry;
}
get_task_struct(iter.task);
}
rcu_read_unlock();
return iter;
}
find_ge_pid
在next_tgid()中已经说明了,find_ge_pid()就是通过tgid和pid_namespace,找到对应进程的pid
/*
* Used by proc to find the first pid that is greater than or equal to nr.
*
* If there is a pid at nr this function is exactly the same as find_pid_ns.
*/
struct pid *find_ge_pid(int nr, struct pid_namespace *ns)
{
struct pid *pid;
do {
pid = find_pid_ns(nr, ns);
if (pid)
break;
nr = next_pidmap(ns, nr);
} while (nr > 0);
return pid;
}
find_pid_ns()
在find_pid_ns()通过遍历的方法得到获取pid
struct pid *find_pid_ns(int nr, struct pid_namespace *ns)
{
struct hlist_node *elem;
struct upid *pnr;
hlist_for_each_entry_rcu(pnr, elem,
&pid_hash[pid_hashfn(nr, ns)], pid_chain)
if (pnr->nr == nr && pnr->ns == ns)
return container_of(pnr, struct pid,
numbers[ns->level]);
return NULL;
}
由于内核实现了命名空间,大大增加了内核复杂性,为了查找效率,进程号nr和其struct pid 结构变量的查找采用hash表实现,各个进程号nr对应的struct upid
结构变量通过pid_chain
链表挂接在链表头&pid_hash[pid_hashfn(nr, ns)]
中,struct upid
表示在当前pid命令空间中的局部进程号等信息,找到该nr对应的struct upid
结构后,由于struct upid嵌入到该进程对应的struct pid
中,所以很容易找到进程在全局范围的struct pid
结构.
next_pidmap()
next_pidmap用于循环遍历得到下一个全局进程号.
int next_pidmap(struct pid_namespace *pid_ns, unsigned int last)
{
int offset;
struct pidmap *map, *end;
/*
* A maximum of 4 million PIDs should be enough for a while.
* [NOTE: PID/TIDs are limited to 2^29 ~= 500+ million, see futex.h.]
#define PID_MAX_LIMIT (CONFIG_BASE_SMALL ? PAGE_SIZE * 8 :(sizeof(long) > 4 ? 4 * 1024 * 1024 : PID_MAX_DEFAULT))
*/
if (last >= PID_MAX_LIMIT)
return -1;
// 1页大小为4KB,即BITS_PER_PAGE = 4096* 8bit
offset = (last + 1) & BITS_PER_PAGE_MASK;
map = &pid_ns->pidmap[(last + 1)/BITS_PER_PAGE];
end = &pid_ns->pidmap[PIDMAP_ENTRIES];
for (; map < end; map++, offset = 0) {
if (unlikely(!map->page))
continue;
offset = find_next_bit((map)->page, BITS_PER_PAGE, offset);
if (offset < BITS_PER_PAGE)
return mk_pid(pid_ns, map, offset);
}
return -1;
}
proc_pid_fill_cache
在遍历的到pid所对应进程的task_struct信息之后,通过proc_pid_fill_cache()函数填充.
static int proc_pid_fill_cache(struct file *filp, void *dirent, filldir_t filldir,
struct tgid_iter iter)
{
char name[PROC_NUMBUF];
int len = snprintf(name, sizeof(name), "%d", iter.tgid); //进程名的形成过程
return proc_fill_cache(filp, dirent, filldir, name, len,
proc_pid_instantiate, iter.task, NULL);
}
// 其中的proc_pid_instantiate()函数实现如下:
static struct dentry *proc_pid_instantiate(struct inode *dir,
struct dentry * dentry,
struct task_struct *task, const void *ptr)
{
struct dentry *error = ERR_PTR(-ENOENT);
struct inode *inode;
inode = proc_pid_make_inode(dir->i_sb, task);
if (!inode)
goto out;
// 通过proc_pid_instantiate建立inode结构,并填写inode_operations和file_operations结构
// file_opeartions结构的readdir为proc_tgid_base_operations, 为每进程遍历提供了条件
inode->i_mode = S_IFDIR|S_IRUGO|S_IXUGO;
inode->i_op = &proc_tgid_base_inode_operations; // 为索引节点提供inode操作
inode->i_fop = &proc_tgid_base_operations; // 为文件提供文件操作
inode->i_flags|=S_IMMUTABLE;
inode->i_nlink = 2 + pid_entry_count_dirs(tgid_base_stuff,
ARRAY_SIZE(tgid_base_stuff));
d_set_d_op(dentry, &pid_dentry_operations);
d_add(dentry, inode);
/* Close the race of the process dying before we return the dentry */
if (pid_revalidate(dentry, NULL))
error = NULL;
out:
return error;
}
// 索引节点操作proc_tgid_base_inode_operations和文件操作proc_tgid_base_operations的结构体定义如下:
static struct inode_operations proc_tgid_base_inode_operations = {
.lookup = proc_tgid_base_lookup,
.getattr = pid_getattr,
.setattr = proc_setattr,
};
static struct file_operations proc_tgid_base_operations = {
.read = generic_read_dir,
.readdir = proc_tgid_base_readdir,
};
参考: proc_fs下进程信息形成原理、目录遍历方式、位图查找 proc文件系统分析(三)
proc_tgid_base_lookup
在proc_tgid_base_inode_operations结构体中的lookup函数proc_tgid_base_lookup()的定义如下:
static struct dentry *proc_tgid_base_lookup(struct inode *dir, struct dentry *dentry, struct nameidata *nd){
// tgid_base_stuff:规定pid文件夹中的所有子文件夹项,及相应的处理函数,不同的文件夹不同的文件处理函数,但都类似
return proc_pident_lookup(dir, dentry,
tgid_base_stuff, ARRAY_SIZE(tgid_base_stuff));
}
继续跟踪进入到proc_pident_lookup()中.
// 在ents文件夹中查找dentry的目录项
static struct dentry *proc_pident_lookup(struct inode *dir,
struct dentry *dentry,
struct pid_entry *ents,
unsigned int nents)
{
struct inode *inode;
struct dentry *error;
struct task_struct *task = get_proc_task(dir);
struct pid_entry *p, *last;
error = ERR_PTR(-ENOENT);
inode = NULL;
if (!task)
goto out_no_task;
/*
* Yes, it does not scale. And it should not. Don't add
* new entries into /proc/<tgid>/ without very good reasons.
*/
last = &ents[nents - 1];
// 进行遍历查找
for (p = ents; p <= last; p++) {
if (p->len != dentry->d_name.len)
continue;
if (!memcmp(dentry->d_name.name, p->name, p->len))
break;
}
if (p > last)
goto out;
// 创建索引节点并进行缓存
error = proc_pident_instantiate(dir, dentry, task, p);
out:
put_task_struct(task);
out_no_task:
return error;
}
proc_tgid_base_readdir
在proc_tgid_base_operations()结构体中的readdir函数proc_tgid_base_readdir()定义如下:
// 从filp文件中读取一项填入dirent
static int proc_tgid_base_readdir(struct file * filp,
void * dirent, filldir_t filldir) {
return proc_pident_readdir(filp,dirent,filldir, tgid_base_stuff,ARRAY_SIZE(tgid_base_stuff));
}
继续跟踪进入到proc_tgid_base_readdir()中.
//在ents中读取文件,filldir_t定义为:typedef int (*filldir_t)(void*,const char*,int,loff_t,ino_t,unsigned);
static int proc_pident_readdir(struct file *filp,
void *dirent, filldir_t filldir,
struct pid_entry *ents, unsigned int nents)
{
int i;
int pid;
// 与文件相对应的dentry
struct dentry *dentry = filp->f_path.dentry;
// 找出对应的索引节点
struct inode *inode = dentry->d_inode;
// 找出pid
struct task_struct *task = get_proc_task(inode);
struct pid_entry *p, *last;
ino_t ino;
int ret;
ret = -ENOENT;
if (!task)
goto out_no_task;
ret = 0;
pid = task->pid;
// 当前文件偏移量
i = filp->f_pos;
switch (i) {
// 最开始是.,表示当前目录
case 0:
ino = inode->i_ino;
// 写入具体的空间
if (filldir(dirent, ".", 1, i, ino, DT_DIR) < 0)
goto out;
i++;
filp->f_pos++;
/* fall through */
// 是..,表示父目录
case 1:
ino = parent_ino(dentry);
if (filldir(dirent, "..", 2, i, ino, DT_DIR) < 0)
goto out;
i++;
filp->f_pos++;
/* fall through */
// 当前目录
default:
i -= 2;
if (i >= nents) {
ret = 1;
goto out;
}
// 得到偏移量,对应具体文件夹
p = ents + i;
last = &ents[nents - 1];
while (p <= last) {
// 将找到的文件夹写入缓存,再调用filldir写入dirent
if (proc_pident_fill_cache(filp, dirent, filldir, task, p) < 0)
goto out;
// 继续循环遍历
filp->f_pos++;
p++;
}
}
ret = 1;
out:
put_task_struct(task);
out_no_task:
return ret;
}
pid_entry
proc_dir_entry
是对proc目录的说明.proc目录下面还存在pid_entry,是对应与每一个进程ID文件夹的说明.pid_entry的定义如下:
struct pid_entry {
char *name;
int len;
mode_t mode;
const struct inode_operations *iop;
const struct file_operations *fop;
union proc_op op;
};
在/fs/proc/base.c
中定义了进程ID文件夹下的每一个文件的方法.
static const struct pid_entry tgid_base_stuff[] = {
DIR("task", S_IRUGO|S_IXUGO, proc_task_inode_operations, proc_task_operations),
DIR("fd", S_IRUSR|S_IXUSR, proc_fd_inode_operations, proc_fd_operations),
DIR("fdinfo", S_IRUSR|S_IXUSR, proc_fdinfo_inode_operations, proc_fdinfo_operations),
#ifdef CONFIG_NET
DIR("net", S_IRUGO|S_IXUGO, proc_net_inode_operations, proc_net_operations),
#endif
REG("environ", S_IRUSR, proc_environ_operations),
INF("auxv", S_IRUSR, proc_pid_auxv),
ONE("status", S_IRUGO, proc_pid_status),
ONE("personality", S_IRUGO, proc_pid_personality),
INF("limits", S_IRUGO, proc_pid_limits),
#ifdef CONFIG_SCHED_DEBUG
REG("sched", S_IRUGO|S_IWUSR, proc_pid_sched_operations),
#endif
#ifdef CONFIG_SCHED_AUTOGROUP
REG("autogroup", S_IRUGO|S_IWUSR, proc_pid_sched_autogroup_operations),
#endif
REG("comm", S_IRUGO|S_IWUSR, proc_pid_set_comm_operations),
#ifdef CONFIG_HAVE_ARCH_TRACEHOOK
INF("syscall", S_IRUGO, proc_pid_syscall),
#endif
INF("cmdline", S_IRUGO, proc_pid_cmdline),
ONE("stat", S_IRUGO, proc_tgid_stat),
ONE("statm", S_IRUGO, proc_pid_statm),
REG("maps", S_IRUGO, proc_maps_operations),
#ifdef CONFIG_NUMA
REG("numa_maps", S_IRUGO, proc_numa_maps_operations),
#endif
REG("mem", S_IRUSR|S_IWUSR, proc_mem_operations),
LNK("cwd", proc_cwd_link),
LNK("root", proc_root_link),
LNK("exe", proc_exe_link),
REG("mounts", S_IRUGO, proc_mounts_operations),
REG("mountinfo", S_IRUGO, proc_mountinfo_operations),
REG("mountstats", S_IRUSR, proc_mountstats_operations),
#ifdef CONFIG_PROC_PAGE_MONITOR
REG("clear_refs", S_IWUSR, proc_clear_refs_operations),
REG("smaps", S_IRUGO, proc_smaps_operations),
REG("pagemap", S_IRUGO, proc_pagemap_operations),
#endif
#ifdef CONFIG_SECURITY
DIR("attr", S_IRUGO|S_IXUGO, proc_attr_dir_inode_operations, proc_attr_dir_operations),
#endif
#ifdef CONFIG_KALLSYMS
INF("wchan", S_IRUGO, proc_pid_wchan),
#endif
#ifdef CONFIG_STACKTRACE
ONE("stack", S_IRUGO, proc_pid_stack),
#endif
#ifdef CONFIG_SCHEDSTATS
INF("schedstat", S_IRUGO, proc_pid_schedstat),
#endif
#ifdef CONFIG_LATENCYTOP
REG("latency", S_IRUGO, proc_lstats_operations),
#endif
#ifdef CONFIG_PROC_PID_CPUSET
REG("cpuset", S_IRUGO, proc_cpuset_operations),
#endif
#ifdef CONFIG_CGROUPS
REG("cgroup", S_IRUGO, proc_cgroup_operations),
#endif
INF("oom_score", S_IRUGO, proc_oom_score),
REG("oom_adj", S_IRUGO|S_IWUSR, proc_oom_adjust_operations),
REG("oom_score_adj", S_IRUGO|S_IWUSR, proc_oom_score_adj_operations),
#ifdef CONFIG_AUDITSYSCALL
REG("loginuid", S_IWUSR|S_IRUGO, proc_loginuid_operations),
REG("sessionid", S_IRUGO, proc_sessionid_operations),
#endif
#ifdef CONFIG_FAULT_INJECTION
REG("make-it-fail", S_IRUGO|S_IWUSR, proc_fault_inject_operations),
#endif
#ifdef CONFIG_ELF_CORE
REG("coredump_filter", S_IRUGO|S_IWUSR, proc_coredump_filter_operations),
#endif
#ifdef CONFIG_TASK_IO_ACCOUNTING
INF("io", S_IRUSR, proc_tgid_io_accounting),
#endif
};
proc_pid_cmdline
根据上面的定义 INF("cmdline", S_IRUGO, proc_pid_cmdline)
获取cmdline文件的信息就是通过proc_pid_cmdline()方法获取的信息.
referer:fs/proc/base.c
static int proc_pid_cmdline(struct task_struct *task, char * buffer)
{
int res = 0;
unsigned int len;
struct mm_struct *mm = get_task_mm(task);
if (!mm)
goto out;
if (!mm->arg_end)
goto out_mm; /* Shh! No looking before we're done */
// 获取所有参数的个数
len = mm->arg_end - mm->arg_start;
if (len > PAGE_SIZE)
len = PAGE_SIZE;
// 获取第一个参数
res = access_process_vm(task, mm->arg_start, buffer, len, 0);
// If the nul at the end of args has been overwritten, then
// assume application is using setproctitle(3).
// 循环获取所有的参数
if (res > 0 && buffer[res-1] != '\0' && len < PAGE_SIZE) {
len = strnlen(buffer, res);
if (len < res) {
res = len;
} else {
len = mm->env_end - mm->env_start;
if (len > PAGE_SIZE - res)
len = PAGE_SIZE - res;
// 获取参数,拼接参数得到cmdline
res += access_process_vm(task, mm->env_start, buffer+res, len, 0);
res = strnlen(buffer, res);
}
}
out_mm:
mmput(mm);
out:
return res;
}
其他的方法也是类似的,就不做分析了.
/proc操作
/proc虚拟文件系统也可以自行创建节点,实现用户空间与内核空间的交互.在 proc_fs.h
中定义了所有与/proc有关的操作.
static inline struct proc_dir_entry *create_proc_entry(const char *name,
mode_t mode, struct proc_dir_entry *parent) { return NULL; }
#define remove_proc_entry(name, parent) do {} while (0)
extern void remove_proc_entry(const char *name, struct proc_dir_entry *parent);
static inline struct proc_dir_entry *proc_symlink(const char *name,
struct proc_dir_entry *parent,const char *dest) {return NULL;}
static inline struct proc_dir_entry *proc_mkdir(const char *name,
struct proc_dir_entry *parent) {return NULL;}
static inline struct proc_dir_entry *create_proc_read_entry(const char *name,
mode_t mode, struct proc_dir_entry *base,
read_proc_t *read_proc, void * data) { return NULL; }
static inline struct proc_dir_entry *create_proc_info_entry(const char *name,
mode_t mode, struct proc_dir_entry *base, get_info_t *get_info)
{ return NULL; }
同时在 fs/proc/root.c 定义了在用户态所有能够操作/proc目录的方法.
参考: EXPORT_SYMBOL(proc_*)
EXPORT_SYMBOL(proc_symlink);
EXPORT_SYMBOL(proc_mkdir);
EXPORT_SYMBOL(create_proc_entry);
EXPORT_SYMBOL(remove_proc_entry);
EXPORT_SYMBOL(proc_root);
EXPORT_SYMBOL(proc_root_fs);
EXPORT_SYMBOL(proc_net);
EXPORT_SYMBOL(proc_net_stat);
EXPORT_SYMBOL(proc_bus);
EXPORT_SYMBOL(proc_root_driver);
参考: 创建proc节点之create_proc_entry
create_proc_entry
struct proc_dir_entry *create_proc_entry (const char *name, mode_t mode, struct proc_dir_entry *parent);
name
:/proc/下节点的文件名,也可以是路径;例:“driver/demo”,在driver下创建demo节点mode
:访问权限,与普通文件相同;例:0666可读可写,0444只读,0222只写parent
:父目录,可以为 NULL(表示 /proc 根目录);parent的类型是proc_dir_entry的指针,不能直接填文件名字符串,最好是用路径初始化name参数
create_proc_entry 的返回值是一个proc_dir_entry 指针(或者为 NULL,说明在 create 时发生了错误)。 可以使用这个返回的指针来配置这个虚拟文件的其他参数,例如在对该文件执行读操作时应该调用的函数。
remove_proc_entry
void remove_proc_entry( const char *name, struct proc_dir_entry *parent );
name
: /proc/下节点的文件名,也可以是路径;例:“driver/demo”,在driver下创建demo节点parent
: 父目录,可以为 NULL(表示 /proc 根目录);parent的类型是proc_dir_entry的指针,不能直接填文件名字符串,最好是用路径初始化name参数
proc_mkdir
struct proc_dir_entry *proc_mkdir(const char *name,struct proc_dir_entry *parent);
name
: 需要创建的文件夹名称parent
:创建的文件夹路径,就是在哪个文件夹中创建,如果是proc根目录,此参数为NULL
参考: Linux proc_mkdir和proc_create的用法
还有其他的操作proc目录的方法,这里就不做介绍了
本文从内核启动一直到proc中的每一个目录的产生,pid中的每一个属性产生都做了说明,对于我们之后基于proc目录进行各种操作能够有所借鉴.
Recommend
About Joyk
Aggregate valuable and interesting links.
Joyk means Joy of geeK