Linux 下获取进程所在文件的路径

这篇具有很好参考价值的文章主要介绍了Linux 下获取进程所在文件的路径。希望对大家有所帮助。如果存在错误或未考虑完全的地方，请大家不吝赐教，您也可以点击"举报违法"按钮提交疑问。

一、应用层

1.1 /proc/pid/exe

以top进程为例：

[root@localhost ~]# ps -ef | grep top
root     31386 15859  0 14:58 pts/2    00:00:00 top

top进程的pid为31386 ，可以通过查看 /proc/pid/exe：

在Linux系统中，每个进程都有一个/proc/pid/exe文件，它是一个符号链接文件，指向当前进程的可执行文件。

更具体地说，/proc/pid/exe文件是一个符号链接文件，它的内容是一个指向当前进程可执行文件的绝对路径的符号链接。例如，如果当前进程的可执行文件是/usr/bin/myprogram，那么/proc/pid/exe文件的内容将是/usr/bin/myprogram的绝对路径。

通过访问/proc/pid/exe文件，可以快速获取当前进程的可执行文件路径。

需要注意的是，/proc/pid/exe文件是一个符号链接文件，它指向的路径可能会随着进程的可执行文件的变化而变化，例如在运行过程中升级可执行文件或链接库，因此在读取它的内容时，应该对返回值进行错误检查，以确保它确实指向当前进程的可执行文件。

[root@localhost ~]# ls -l /proc/31386/exe
lrwxrwxrwx. 1 root root 0 5月  18 14:59 /proc/31386/exe -> /usr/bin/top

其中/usr/bin/top为top进程可执行文件所在的绝对文件路径。

由于/proc/31386/exe是符号链接，直接调用 readlink 命令获取该进程可执行文件所在的绝对文件路径：

[root@localhost ~]# readlink /proc/31386/exe
/usr/bin/top

NAME
       readlink - print resolved symbolic links or canonical file names
       
        Print value of a symbolic link or canonical file name

#include <stdio.h>
#include <linux/limits.h>

int main()
{
    char task_absolute_path[PATH_MAX];
 
    int cnt = readlink( "/proc/self/exe", task_absolute_path, PATH_MAX);
    if (cnt < 0){
        printf("readlink is error\n");
        return -1;
    }
    task_absolute_path[cnt] = '\0';
    printf("task absolute path:%s\n", task_absolute_path);

    return 0;
}

readlink()函数用于读取符号链接文件的内容，符号链接文件的内容就是一个路径，即解析符号链接。使用 readlink 读取符号链接，获取的就是符号链接的内容，符号链接的内容的就是目标文件的路径。

当然对于top这种系统shell命令，可以用which和whereis查看：

[root@localhost ~]# which top
/usr/bin/top
[root@localhost ~]# whereis top
top: /usr/bin/top /usr/share/man/man1/top.1.gz

对于普通程序通常用 /proc/pid/exe进行查看。

1.2 /proc/pid/cwd

在Linux系统中，每个进程都有一个/proc/pid/cwd文件，它是一个符号链接文件，指向当前进程的工作目录。
如下所示，我在 /root/link/test1/ 目录下运行 top 命令。

[root@localhost ~]# ls -l /proc/31386/cwd
lrwxrwxrwx. 1 root root 0 5月  18 15:02 /proc/31386/cwd -> /root/link/test1
[root@localhost ~]# readlink /proc/31386/cwd
/root/link/test1

更具体地说，/proc/pid/cwd文件是一个符号链接文件，它的内容是一个指向进程号为 pid 工作目录的绝对路径的符号链接。例如，如果当前进程的工作目录是/home/user，那么/proc/pid/cwd文件的内容将是/home/user的绝对路径。

通过访问/proc/pid/cwd文件，可以快速获取当前进程的工作目录路径，而无需在程序中调用getcwd()函数等获取当前工作目录的系统调用。

需要注意的是，/proc/pid/cwd文件是一个符号链接文件，它指向的路径可能会随着进程的工作目录的变化而变化，因此在读取它的内容时，应该对返回值进行错误检查，以确保它确实指向当前进程的工作目录。

比如函数 getcwd()：


NAME
       getcwd, getwd, get_current_dir_name - get current working directory

SYNOPSIS
       #include <unistd.h>

       char *getcwd(char *buf, size_t size);

1.3 代码示例

接下来给出一段代码，根据输入的参数进程pid号，开获取该进程可执行程序的绝对路径和当前在哪个目录执行：

#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <unistd.h>
#include <linux/limits.h>

#define PROC_PATH_LEN 64

int main(int argc, char *argv[]) {
    if (argc != 2) {
        fprintf(stderr, "Usage: %s <pid>\n", argv[0]);
        exit(EXIT_FAILURE);
    }

    char proc_path[PROC_PATH_LEN] = {0};
    snprintf(proc_path, sizeof(proc_path), "/proc/%s/exe", argv[1]);

    char exe_path[PATH_MAX] = {0};
    ssize_t len = readlink(proc_path, exe_path, sizeof(exe_path));
    if (len == -1) {
        perror("readlink");
        exit(EXIT_FAILURE);
    }
    exe_path[len] = '\0';
    printf("Executable path: %s\n", exe_path);

    memset(proc_path, 0, PROC_PATH_LEN);
    snprintf(proc_path, sizeof(proc_path), "/proc/%s/cwd", argv[1]);
    char cwd_path[PATH_MAX] = {0};
    len = readlink(proc_path, cwd_path, sizeof(cwd_path));
    if (len == -1) {
        perror("readlink");
        exit(EXIT_FAILURE);
    }
    cwd_path[len] = '\0';
    printf("pid current path: %s\n", cwd_path);

    return 0;
}

二、内核态获取

2.1 相对应的函数与结构体

struct fs_struct *fs 描述了文件系统和进程相关的信息：

// linux-3.10/include/linux/sched.h

struct task_struct {
	......
	/* filesystem information */
	struct fs_struct *fs;
	......
}

// linux-3.10/include/linux/fs_struct.h

struct fs_struct {
	int users;
	spinlock_t lock;
	seqcount_t seq;
	int umask;
	int in_exec;
	struct path root, pwd;
};

其中 struct path root 表示根目录路径，通常都是 / 目录，但是通过chroot系统调用后，对于进程来说会将 / 目录变成了某个子目录，那么相应的进程就是使用该子目录而不是全局的根目录，该进程会将该子目录当作其根目录。

chroot - run command or interactive shell with special root directory

Run COMMAND with root directory set to NEWROOT.

struct path pwd就是当前工作目录。

// linux-3.10/include/linux/path.h

struct path {
	struct vfsmount *mnt;
	struct dentry *dentry;
};

// linux-3.10/include/linux/dcache.h

/*
 * "quick string" -- eases parameter passing, but more importantly
 * saves "metadata" about the string (ie length and the hash).
 *
 * hash comes first so it snuggles against d_parent in the
 * dentry.
 */
struct qstr {
	......
	const unsigned char *name;
};

struct dentry {
	......
	struct qstr d_name;
	......
}

// linux-3.10/include/linux/sched.h

struct task_struct {
	......
	struct mm_struct *mm;
	......
}

从task_struct获取路径基本通过mm_struct这个结构，从中可以获取进程全路径。

// 获取进程全路径
task_struct->mm->exe_file->f_path

将进程的所在的文件路径存储到 /proc//exe symlink中：

// linux-3.10/include/linux/mm_types.h

struct mm_struct {
	......
	/* store ref to file /proc/<pid>/exe symlink points to */
	struct file *exe_file;
	......
}

// linux-3.10/include/linux/fs.h

struct file {
	......
	struct path		f_path;
	......
}

(1) 通过dentry_path_raw获取文件的全路径，低版本比如2.6.32没有该API

// linux-3.10/fs/dcache.c

static int prepend(char **buffer, int *buflen, const char *str, int namelen)
{
	*buflen -= namelen;
	if (*buflen < 0)
		return -ENAMETOOLONG;
	*buffer -= namelen;
	memcpy(*buffer, str, namelen);
	return 0;
}

static int prepend_name(char **buffer, int *buflen, struct qstr *name)
{
	return prepend(buffer, buflen, name->name, name->len);
}

/*
 * Write full pathname from the root of the filesystem into the buffer.
 */
static char *__dentry_path(struct dentry *dentry, char *buf, int buflen)
{
	char *end = buf + buflen;
	char *retval;

	prepend(&end, &buflen, "\0", 1);
	if (buflen < 1)
		goto Elong;
	/* Get '/' right */
	retval = end-1;
	*retval = '/';

	while (!IS_ROOT(dentry)) {
		struct dentry *parent = dentry->d_parent;
		int error;

		prefetch(parent);
		spin_lock(&dentry->d_lock);
		error = prepend_name(&end, &buflen, &dentry->d_name);
		spin_unlock(&dentry->d_lock);
		if (error != 0 || prepend(&end, &buflen, "/", 1) != 0)
			goto Elong;

		retval = end;
		dentry = parent;
	}
	return retval;
Elong:
	return ERR_PTR(-ENAMETOOLONG);
}

char *dentry_path_raw(struct dentry *dentry, char *buf, int buflen)
{
	char *retval;

	write_seqlock(&rename_lock);
	retval = __dentry_path(dentry, buf, buflen);
	write_sequnlock(&rename_lock);

	return retval;
}
EXPORT_SYMBOL(dentry_path_raw);

struct file *filp;
dentry_path_raw(filp->f_path.dentry,buf,buflen);

（2）通过d_path获取文件的全路径

// linux-3.10/fs/dcache.c

/**
 * d_path - return the path of a dentry
 * @path: path to report
 * @buf: buffer to return value in
 * @buflen: buffer length
 *
 * Convert a dentry into an ASCII path name. If the entry has been deleted
 * the string " (deleted)" is appended. Note that this is ambiguous.
 *
 * Returns a pointer into the buffer or an error code if the path was
 * too long. Note: Callers should use the returned pointer, not the passed
 * in buffer, to use the name! The implementation often starts at an offset
 * into the buffer, and may leave 0 bytes at the start.
 *
 * "buflen" should be positive.
 */
char *d_path(const struct path *path, char *buf, int buflen)
{
	char *res = buf + buflen;
	struct path root;
	int error;

	/*
	 * We have various synthetic filesystems that never get mounted.  On
	 * these filesystems dentries are never used for lookup purposes, and
	 * thus don't need to be hashed.  They also don't need a name until a
	 * user wants to identify the object in /proc/pid/fd/.  The little hack
	 * below allows us to generate a name for these objects on demand:
	 */
	if (path->dentry->d_op && path->dentry->d_op->d_dname)
		return path->dentry->d_op->d_dname(path->dentry, buf, buflen);

	get_fs_root(current->fs, &root);
	br_read_lock(&vfsmount_lock);
	write_seqlock(&rename_lock);
	error = path_with_deleted(path, &root, &res, &buflen);
	write_sequnlock(&rename_lock);
	br_read_unlock(&vfsmount_lock);
	if (error < 0)
		res = ERR_PTR(error);
	path_put(&root);
	return res;
}
EXPORT_SYMBOL(d_path);

调用d_path函数文件的路径时，应该使用返回的指针而不是转递进去的参数 buf 。
原因是该函数的实现通常从缓冲区的偏移量开始。

内核中用到d_path的例子：

// linux-3.10/include/linux/mm_types.h

/*
 * This struct defines a memory VMM memory area. There is one of these
 * per VM-area/task.  A VM area is any part of the process virtual memory
 * space that has a special rule for the page-fault handlers (ie a shared
 * library, the executable area etc).
 */
struct vm_area_struct {
	......
	struct file * vm_file;		/* File we map to (can be NULL). */
	......
}

// linux-3.10/include/linux/fs.h

struct file {
	......
	struct path		f_path;
	......
}

// linux-3.10/mm/memory.c

/*
 * Print the name of a VMA.
 */
void print_vma_addr(char *prefix, unsigned long ip)
{
	struct mm_struct *mm = current->mm;
	struct vm_area_struct *vma;

	/*
	 * Do not print if we are in atomic
	 * contexts (in exception stacks, etc.):
	 */
	if (preempt_count())
		return;

	down_read(&mm->mmap_sem);
	vma = find_vma(mm, ip);
	if (vma && vma->vm_file) {
		struct file *f = vma->vm_file;
		//使用伙伴系统接口，分配一个物理页，返回一个内核虚拟地址
		char *buf = (char *)__get_free_page(GFP_KERNEL);
		if (buf) {
			char *p;

			p = d_path(&f->f_path, buf, PAGE_SIZE);
			if (IS_ERR(p))
				p = "?";
			printk("%s%s[%lx+%lx]", prefix, kbasename(p),
					vma->vm_start,
					vma->vm_end - vma->vm_start);
			free_page((unsigned long)buf);
		}
	}
	up_read(&mm->mmap_sem);
}

2.2 API演示

在这里只是简单的给出怎么在内核态获取进程所在文件的路径，详细的话请参考内核源码，在第三节给出内核源码获取进程所在文件的路径的方法。

#include <linux/init.h>
#include <linux/kernel.h>
#include <linux/module.h>
#include <linux/sched.h>
#include <linux/pid.h>
#include <linux/fs.h>
#include <linux/fs_struct.h>
#include <linux/path.h>

#define TASK_PATH_MAX_LENGTH 512

//内核模块初始化函数
static int __init lkm_init(void)
{
    struct qstr root_task_path;
    struct qstr current_task_path;

    char buf_1[TASK_PATH_MAX_LENGTH] = {0};
    char *task_path_1 = NULL;

    char buf_2[TASK_PATH_MAX_LENGTH] = {0};
    char *task_path_2 = NULL;

	//获取当前目录名
    current_task_path = current->fs->pwd.dentry->d_name;
    //获取根目录
    root_task_path = current->fs->root.dentry->d_name;

	//内核线程的 mm 成员为空，这里没做判断
	
    //2.6.32 没有dentry_path_raw API
    //获取文件全路径
    task_path_1 = dentry_path_raw(current->mm->exe_file->f_path.dentry, buf_1, TASK_PATH_MAX_LENGTH);

	//获取文件全路径
	//调用d_path函数文件的路径时，应该使用返回的指针：task_path_2 ，而不是转递进去的参数buf：buf_2
    task_path_2 = d_path(&current->mm->exe_file->f_path, buf_2, TASK_PATH_MAX_LENGTH);
    if (IS_ERR(task_path_2)) {
        printk("Get path failed\n");
        return -1;
    }

    printk("current path = %s\n", current_task_path.name);
    printk("root path = %s\n", root_task_path.name);
    printk("task_path_1 = %s\n", task_path_1);
    printk("task_path_2 = %s\n", task_path_2);

	return -1;
}

module_init(lkm_init);

MODULE_LICENSE("GPL");

结果展示：

[root@localhost task_path]# dmesg -c
[415299.952165] current path = task_path
[415299.952172] root path = /
[415299.952176] task_path_1 = /usr/bin/kmod
[415299.952179] task_path_2 = /usr/bin/kmod

三、内核源码实现

// linux-3.10/fs/proc/base.c

/* NOTE:
 *	Implementing inode permission operations in /proc is almost
 *	certainly an error.  Permission checks need to happen during
 *	each system call not at open time.  The reason is that most of
 *	what we wish to check for permissions in /proc varies at runtime.
 *
 *	The classic example of a problem is opening file descriptors
 *	in /proc for a task before it execs a suid executable.
 */

struct pid_entry {
	char *name;
	int len;
	umode_t mode;
	const struct inode_operations *iop;
	const struct file_operations *fop;
	union proc_op op;
};

static int proc_exe_link(struct dentry *dentry, struct path *exe_path)
{
	struct task_struct *task;
	struct mm_struct *mm;
	struct file *exe_file;

	task = get_proc_task(dentry->d_inode);
	if (!task)
		return -ENOENT;
	mm = get_task_mm(task);
	put_task_struct(task);
	if (!mm)
		return -ENOENT;
	exe_file = get_mm_exe_file(mm);
	mmput(mm);
	if (exe_file) {
		*exe_path = exe_file->f_path;
		path_get(&exe_file->f_path);
		fput(exe_file);
		return 0;
	} else
		return -ENOENT;
}

/*
 * Tasks
 */
static const struct pid_entry tid_base_stuff[] = {
	DIR("fd",        S_IRUSR|S_IXUSR, proc_fd_inode_operations, proc_fd_operations),
	......
	REG("comm",      S_IRUGO|S_IWUSR, proc_pid_set_comm_operations),
	......
	LNK("cwd",       proc_cwd_link),
	LNK("root",      proc_root_link),
	LNK("exe",       proc_exe_link),

// linux-3.10/fs/proc/base.c

static int do_proc_readlink(struct path *path, char __user *buffer, int buflen)
{
	//由于这里申请的是一个页大小，便没有使用 kmalloc接口，调用伙伴系统接口分配一个物理页，返回内核虚拟地址
	char *tmp = (char*)__get_free_page(GFP_TEMPORARY);
	char *pathname;
	int len;

	if (!tmp)
		return -ENOMEM;

	//获取进程所在文件的路径
	pathname = d_path(path, tmp, PAGE_SIZE);
	len = PTR_ERR(pathname);
	if (IS_ERR(pathname))
		goto out;
	len = tmp + PAGE_SIZE - 1 - pathname;

	if (len > buflen)
		len = buflen;
		
	//把进程所在文件的路径拷贝到用户空间：char __user *buffer
	//用户空间调用 readlink /proc/pid/
	if (copy_to_user(buffer, pathname, len))
		len = -EFAULT;
 out:
	free_page((unsigned long)tmp);
	return len;
}

static int proc_pid_readlink(struct dentry * dentry, char __user * buffer, int buflen)
{
	int error = -EACCES;
	struct inode *inode = dentry->d_inode;
	struct path path;

	/* Are we allowed to snoop on the tasks file descriptors? */
	if (!proc_fd_access_allowed(inode))
		goto out;

	error = PROC_I(inode)->op.proc_get_link(dentry, &path);
	if (error)
		goto out;

	error = do_proc_readlink(&path, buffer, buflen);
	path_put(&path);
out:
	return error;
}

const struct inode_operations proc_pid_link_inode_operations = {
	.readlink	= proc_pid_readlink,
	.follow_link	= proc_pid_follow_link,
	.setattr	= proc_setattr,
};


// linux-3.10/fs/proc/internal.h
union proc_op {
	int (*proc_get_link)(struct dentry *, struct path *);
	int (*proc_read)(struct task_struct *task, char *page);
	int (*proc_show)(struct seq_file *m,
		struct pid_namespace *ns, struct pid *pid,
		struct task_struct *task);
};

struct proc_inode {
	struct pid *pid;
	int fd;
	union proc_op op;
	struct proc_dir_entry *pde;
	struct ctl_table_header *sysctl;
	struct ctl_table *sysctl_entry;
	struct proc_ns ns;
	struct inode vfs_inode;
};

/*
 * General functions
 */
static inline struct proc_inode *PROC_I(const struct inode *inode)
{
	return container_of(inode, struct proc_inode, vfs_inode);
}

其中：

proc_pid_readlink()
	-->do_proc_readlink(){
			char __user *buffer;
			copy_to_user(buffer, pathname, len);
		}

比如：当用户调用 readlink /proc/pid/exe，将该文件内容拷贝到用户空间：char __user *buffer中。

参考资料

Linux 3.10

https://blog.csdn.net/qq_42931917/article/details/119803534
https://blog.csdn.net/cenziboy/article/details/8761621
https://blog.csdn.net/whatday/article/details/100638552文章来源地址https://www.toymoban.com/news/detail-487706.html

到了这里，关于Linux 下获取进程所在文件的路径的文章就介绍完了。如果您还想了解更多内容，请在右上角搜索TOY模板网以前的文章或继续浏览下面的相关文章，希望大家以后多多支持TOY模板网！