标签 fuzzing 下的文章

开始之前

这段时间本来想研究下 Chrome V8,学了一段时间发现 V8 还是太吃操作了……感觉应该先了解下比较简单的 JS 引擎。于是想着先从适合嵌入式设备的轻量 JS 引擎 JerryScript 开始玩起。正好看到 JerryScript 的 Issues 有好多关于漏洞的报告(无人在意说是),那就复现一下 fuzzing 漏洞挖掘吧。

源码与编译

git clone https://github.com/jerryscript-project/jerryscript
cd jerryscript
python tools/build.py

编译 JerryScript 还是相当简单的,要想 fuzz 它,我们可以直接让 AFL 将文件作为参数传入然后等待崩溃。但是这样的 fuzz 是没有意义的,因为没有经过 AFL instruction。我们需要使用 afl-clang-lto 作为编译器。有关 AFL 的用法和原理,前人之述备矣,我就不赘述了。

JerryScript 已经在 tools/build.py 为我们准备好了接入 libfuzzer 的编译选项,而 AFL 支持为 libfuzzer sanitized binary 启用 persistent mode。那么就用现成的就好。

CC=afl-clang-lto python tools/build.py --libfuzzer=ON --compile-flag='-Wno-enum-enum-conversion' --strip=OFF
CC=afl-clang-lto AFL_LLVM_CMPLOG=1 python tools/build.py --libfuzzer=ON --compile-flag='-Wno-enum-enum  
-conversion -fsanitize=address' --strip=OFF

我们需要添加 -Wno-enum-enum-conversion 编译参数来防止高版本 clang 编译不通过。(如果要用高版本 gcc 编译的话,还需要添加 -Wno-unterminated-string-initialization,因为 jerry-core/ecma/builtin-objects/ecma-builtin-helpers-date.c 中的 day_names_pmonth_names_p 没有考虑 C-style 字符串字面量 tailing NULL byte 占用的空间。)

准备初始 corpus

作为实验,我没有考虑太多,选用 test262 作为 JS 样本,去除其中的注释,就直接作为初始 corpus 了。我选用 AFL 作为 fuzzing 引擎。这对于 JS 引擎而言,效果不会好,但本来也只是实验性质的尝试。AFL 在 fuzz 过程中会根据这些文件不断通过各种策略构造新的输入,收集对于每个输入程序执行后的覆盖率,继续构造新的输入。

import os
import shutil
import subprocess

TEST262_REPO = "https://github.com/tc39/test262.git"
CLONE_DIR = "test262"
CORPUS_DIR = "corpus"
NUM_FILES = 100  # Adjust how many files you want

# Directories considered ES5 core tests
ES5_TEST_DIRS = [
    "test/built-ins",
    "test/language",
    "test/statements",
    "test/annexB"
]

def clone_test262():
    if not os.path.exists(CLONE_DIR):
        print("Cloning test262 repo...")
        subprocess.run(["git", "clone", TEST262_REPO], check=True)
    else:
        print("test262 repo already cloned.")

def gather_es5_js_files():
    js_files = []
    for root, _, files in os.walk(CLONE_DIR):
        # Check if the file is inside one of the ES5 directories
        if any(es5_dir in root.replace("\\", "/") for es5_dir in ES5_TEST_DIRS):
            for file in files:
                if file.endswith(".js"):
                    js_files.append(os.path.join(root, file))
    return js_files

def prepare_corpus(js_files):
    os.makedirs(CORPUS_DIR, exist_ok=True)
    selected_files = js_files[:NUM_FILES]
    print(f"Copying {len(selected_files)} files to corpus directory...")
    existing_names = set()

    for path in selected_files:
        filename = os.path.basename(path)
        name, ext = os.path.splitext(filename)

        # Avoid duplicates by renaming with suffix if needed
        original_filename = filename
        suffix = 1
        while filename in existing_names:
            filename = f"{name}_{suffix}{ext}"
            suffix += 1

        existing_names.add(filename)
        shutil.copy(path, os.path.join(CORPUS_DIR, filename))

    print("Corpus preparation complete.")

if __name__ == "__main__":
    clone_test262()
    all_js_files = gather_es5_js_files()
    if len(all_js_files) == 0:
        print("No ES5 JS files found in test262 repo!")
    else:
        prepare_corpus(all_js_files)

fuzzing

afl-fuzz -i input -o output -b 2 -a text -M master -- ./jerry-libfuzzer
AFL_USE_ASAN=1 afl-fuzz -i input -o output -b 4 -a text -S sanitizer -c 0 -l 2AT -P exploit -p exploit -- ./jerry-libfuzzer

很快就发生了 crash。可以看到 AFL 构造的 JS 输入和乱码真的没区别了。也就是说 JerryScript 在语法分析甚至词法分析阶段就可能崩溃,发生段错误。

结果处理

虽然听起来有点离谱,但是挂机一天后 AFL 收集到了 543 个 crashes。但其中大多数都是 null pointer deref。所以我决定简单筛选一下无效的 crashes。使用 Python gdb 模块批量调试 crash inputs,段错误后先提取产生段错误位置的汇编指令,找到解引用 [reg + offset](寄存器间接寻址)处使用的寄存器,然后再让 gdb 查询这个寄存器的值,如果值为很大的数则将这个 input 另存起来。

import gdb
import os
import shlex
import shutil
import re
from pathlib import Path

# ====== Configuration ======
CRASH_DIR = Path("./crashes")
VALID_DIR = Path("./valid")
LOG_DIR = Path("./logs")
MODE = "copy"   # "copy" or "link"
PATTERN = "cafebabe"   # if NOT found in crash bt/output -> save to VALID_DIR
USE_STDIN = False    # If True, run "run < file" to feed the file on stdin
# Note: timeouts are not enforced inside gdb-embedded script; if you need per-run
# timeouts, run gdb under an external timeout wrapper (e.g. GNU timeout) or use
# the external/python+subprocess approach.
# ===========================

CRASH_DIR = CRASH_DIR.resolve()
VALID_DIR = VALID_DIR.resolve()
LOG_DIR = LOG_DIR.resolve()

x86_64_registers = [
    "rax", "rbx", "rcx", "rdx",
    "rsp", "rbp", "rsi", "rdi",
    "r8", "r9", "r10", "r11",
    "r12", "r13", "r14", "r15"
]

for d in (VALID_DIR, LOG_DIR):
    d.mkdir(parents=True, exist_ok=True)

# helper: unique destination path (avoid overwriting)
def unique_dest(dest: Path) -> Path:
    if not dest.exists():
        return dest
    i = 1
    while True:
        candidate = dest.with_name(dest.name + f".{i}")
        if not candidate.exists():
            return candidate
        i += 1

def install_file(src: Path) -> Path:
    dest = VALID_DIR / src.name
    dest = unique_dest(dest)
    if MODE == "link":
        # try symlink to absolute path
        try:
            os.symlink(str(src.resolve()), str(dest))
        except OSError:
            shutil.copy2(src, dest)
    else:
        shutil.copy2(src, dest)
    return dest

CRASH_PATTERNS = [
    r"Program received signal",
    r"SIGSEGV",
    r"SIGABRT",
    r"Segmentation fault",
    r"SIGILL",
    r"SIGFPE",
    r"^#0",            # backtrace frame 0
    r"AddressSanitizer",
    r"ASAN:",
    r"terminate called",
]

_crash_re = re.compile("|".join("(?:" + p + ")" for p in CRASH_PATTERNS), flags=re.I | re.M)

def detect_crash(text: str) -> bool:
    return bool(_crash_re.search(text))

# Turn off pagination so gdb.execute(..., to_string=True) returns full text
try:
    gdb.execute("set pagination off")
except Exception:
    pass

# The program to run is the one passed with --args ./jerry when launching gdb.
# gdb already knows the executable from --args; we will just set program args each run.
files = sorted([p for p in CRASH_DIR.iterdir() if p.is_file()])

summary = {"processed": 0, "crashes": 0, "saved": 0, "no_crash": 0}

for infile in files:
    summary["processed"] += 1
    name = infile.name
    logfile = LOG_DIR / (name + ".log")
    print("---- Processing:", name)

    # Set args or use stdin redirection
    if USE_STDIN:
        # clear any args (not necessary, but explicit)
        try:
            gdb.execute("set args")
        except Exception:
            pass
        run_cmd = "run < " + shlex.quote(str(infile))
    else:
        # set argv for the debugged program to the filename
        # (if your program accepts multiple args, adjust as needed)
        try:
            gdb.execute("set args " + shlex.quote(str(infile)))
        except Exception:
            pass
        run_cmd = "run"

    # Execute run and capture textual output
    try:
        out_run = gdb.execute(run_cmd, to_string=True)
    except gdb.error as e:
        # gdb.error may be thrown if the program exited in a way gdb treats specially;
        # capture the string representation and continue to collect bt below.
        out_run = str(e)

    # After run, collect a backtrace (best-effort)
    try:
        out_bt = gdb.execute("bt full", to_string=True)
    except Exception:
        try:
            out_bt = gdb.execute("bt", to_string=True)
        except Exception:
            out_bt = ""

    combined = out_run + "\n" + out_bt

    # Save log
    with logfile.open("w", encoding="utf-8", errors="replace") as f:
        f.write("COMMAND: " + run_cmd + "\n\n")
        f.write("=== RUN OUTPUT ===\n")
        f.write(out_run + "\n\n")
        f.write("=== BACKTRACE ===\n")
        f.write(out_bt + "\n")

    # Detect crash
    if detect_crash(combined):
        summary["crashes"] += 1
        crash_line = gdb.execute('x/i $rip', to_string=True)
        valid = False
        if "[" not in crash_line:
            continue
        for reg in x86_64_registers:
            if reg in crash_line[crash_line.index("["):crash_line.index("]")] and int(gdb.execute(f"p ${reg}", to_string=True).split(' ')[-1], 16) > 8:
                valid = True
        if not valid:
            continue
        print("  -> Valid crash detected. Log:", logfile)
        if PATTERN.lower() in combined.lower():
            print(f"     -> pattern '{PATTERN}' FOUND in backtrace/output. Not saving.")
        else:
            dest = install_file(infile)
            summary["saved"] += 1
            print(f"     -> pattern '{PATTERN}' NOT found. Saved to:", dest)
    else:
        summary["no_crash"] += 1
        print("  -> No crash detected. Log:", logfile)

    # Attempt to kill inferior if still running so we can restart cleanly next time
    try:
        gdb.execute("kill", to_string=True)
    except Exception:
        # ignore; keep going
        pass

# Final summary
print("\nDone.")
print("Summary:")
for k, v in summary.items():
    print(f"  {k}: {v}")
print("Logs:", LOG_DIR)
print("Valid candidates:", VALID_DIR)

# End of gdb_run.py

经过筛选后,我发现了一个很有意思的崩溃:

$ ./jerry-asan /storage/jsfuzz/valid/id:000005,sig:11,src:005743,time:469380,execs:12877861,op:havo
c,rep:4
=================================================================
==1365920==ERROR: AddressSanitizer: stack-buffer-overflow on address 0x7b6b9b700098 at pc 0x558aff052c4d bp 0x7ffcb9f80e60 sp 0x7ffcb9f80e50
READ of size 1 at 0x7b6b9b700098 thread T0
    #0 0x558aff052c4c in scanner_create_variables (/storage/jsfuzz/jerry-asan+0x78c4c) (BuildId: 85560800a62467c72ec57dc61008c1abe723d70b)
    #1 0x558aff0551bc in parser_parse_function_arguments.lto_priv.0 (/storage/jsfuzz/jerry-asan+0x7b1bc) (BuildId: 85560800a62467c72ec57dc61008c1abe723d70b)
    #2 0x558aff0585c8 in parser_parse_function (/storage/jsfuzz/jerry-asan+0x7e5c8) (BuildId: 85560800a62467c72ec57dc61008c1abe723d70b)
    #3 0x558aff0a26bc in lexer_construct_function_object (/storage/jsfuzz/jerry-asan+0xc86bc) (BuildId: 85560800a62467c72ec57dc61008c1abe723d70b)
    #4 0x558aff0a6a77 in parser_parse_class (/storage/jsfuzz/jerry-asan+0xcca77) (BuildId: 85560800a62467c72ec57dc61008c1abe723d70b)
    #5 0x558aff0b6198 in parser_parse_statements (/storage/jsfuzz/jerry-asan+0xdc198) (BuildId: 85560800a62467c72ec57dc61008c1abe723d70b)
    #6 0x558aff057d49 in parser_parse_source.lto_priv.0 (/storage/jsfuzz/jerry-asan+0x7dd49) (BuildId: 85560800a62467c72ec57dc61008c1abe723d70b)
    #7 0x558aff008764 in jerry_parse_common.lto_priv.0 (/storage/jsfuzz/jerry-asan+0x2e764) (BuildId: 85560800a62467c72ec57dc61008c1abe723d70b)
    #8 0x558aff0bf0bc in jerryx_source_parse_script (/storage/jsfuzz/jerry-asan+0xe50bc) (BuildId: 85560800a62467c72ec57dc61008c1abe723d70b)
    #9 0x558afeff6be3 in main (/storage/jsfuzz/jerry-asan+0x1cbe3) (BuildId: 85560800a62467c72ec57dc61008c1abe723d70b)
    #10 0x7f6b9da27674  (/usr/lib/libc.so.6+0x27674) (BuildId: 4fe011c94a88e8aeb6f2201b9eb369f42b4a1e9e)
    #11 0x7f6b9da27728 in __libc_start_main (/usr/lib/libc.so.6+0x27728) (BuildId: 4fe011c94a88e8aeb6f2201b9eb369f42b4a1e9e)
    #12 0x558afeff72e4 in _start (/storage/jsfuzz/jerry-asan+0x1d2e4) (BuildId: 85560800a62467c72ec57dc61008c1abe723d70b)

Address 0x7b6b9b700098 is located in stack of thread T0 at offset 152 in frame
    #0 0x558aff055ffe in parser_parse_source.lto_priv.0 (/storage/jsfuzz/jerry-asan+0x7bffe) (BuildId: 85560800a62467c72ec57dc61008c1abe723d70b)

  This frame has 6 object(s):
    [32, 33) 'flags' (line 2041)
    [48, 49) 'flags' (line 2063)
    [64, 80) 'branch' (line 2253)
    [96, 112) 'literal'
    [128, 152) 'scanner_info_end' (line 2115) <== Memory access at offset 152 overflows this variable
    [192, 792) 'context' (line 1988)
HINT: this may be a false positive if your program uses some custom stack unwind mechanism, swapcontext or vfork
      (longjmp and C++ exceptions *are* supported)
SUMMARY: AddressSanitizer: stack-buffer-overflow (/storage/jsfuzz/jerry-asan+0x78c4c) (BuildId: 85560800a62467c72ec57dc61008c1abe723d70b) in scanner_create_variables
Shadow bytes around the buggy address:
  0x7b6b9b6ffe00: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00
  0x7b6b9b6ffe80: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00
  0x7b6b9b6fff00: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00
  0x7b6b9b6fff80: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00
  0x7b6b9b700000: f1 f1 f1 f1 01 f2 01 f2 00 00 f2 f2 f8 f8 f2 f2
=>0x7b6b9b700080: 00 00 00[f2]f2 f2 f2 f2 00 00 00 00 00 00 00 00
  0x7b6b9b700100: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00
  0x7b6b9b700180: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00
  0x7b6b9b700200: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00
  0x7b6b9b700280: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00
  0x7b6b9b700300: 00 00 00 f3 f3 f3 f3 f3 f3 f3 f3 f3 f3 f3 f3 f3
Shadow byte legend (one shadow byte represents 8 application bytes):
  Addressable:           00
  Partially addressable: 01 02 03 04 05 06 07 
  Heap left redzone:       fa
  Freed heap region:       fd
  Stack left redzone:      f1
  Stack mid redzone:       f2
  Stack right redzone:     f3
  Stack after return:      f5
  Stack use after scope:   f8
  Global redzone:          f9
  Global init order:       f6
  Poisoned by user:        f7
  Container overflow:      fc
  Array cookie:            ac
  Intra object redzone:    bb
  ASan internal:           fe
  Left alloca redzone:     ca
  Right alloca redzone:    cb
==1365920==ABORTING

这个输入是:

class MyError extends Error {7667111111111111111;;;;;;;static
 { throwased = true;
  d = trsert.s}.defeuse(resourcd = true;
 new MyError(); });
stack.defer(function () {});
assert.throws(MyError, functction (# {
 Csu 12), .defer(function41024448kTtrspose()&
});

还有一个输入会使得用于寄存器间接寻址的寄存器 RDI 地址值变为 RDI 0x646573610a20650a ('\ne \nased'),RDI 内容是输入本身的一部分。不过很有意思的是它并不会触发 Address Sanitizer。说明 ASAN 很可能会改变某些调用栈帧的内存布局。(我手动 trim 了一下,不然这个输入真的又长又难看。)

class MyE{7667;;667;;sta;7;;667;;s;;#;statTtra;sta;7;;667;;;;;;;;s;;#;statTtra;;';s;;#at;#;statTtra;;';s;;#atTtra;;#;;sta;;;
e 
ased = 
class{76671;
6
;
s;;;;;;;;;;static
ase
6
e 
ased = 
class{76671;
6
;
s;;;;;;;;;;static
ased6671;
6
e 
ased = 
class{76671;
6
;
s;;;;;;;;;;static
as}}}}|}}}Of(}}|}csleO}}}}|}}}Of}}|}02000(1167E0Y.u(3}}}}}}}}}PisleO}}}}|}}}Of}}|}02000(1167E000002000(11676cY.u(Pisle}}}}PisleO}}}}|}}}OfInfinityaa, new .u9PisleOaaaaa!pa}}}}}}PisleO}}}}|}}}Of

另外有很多与它相似的 crash inputs,可以很明显发现 JerryScript 对于 JS 类私有字段名的处理有很大问题。

总结

其实这是一次没什么意义的 fuzzing,fuzz 类似编译器的软件应该使用结构化的 fuzzer,而不是 AFL++ 这样基本依靠字节随机变异的 fuzzer,不然连语法检查都过不了很难进一步挖掘漏洞。之后我可能再尝试一下 fuzzilli,或者考虑自己手写一个 fuzzer(画大饼 ing)。

第一次尝试

使用 archlinux 的 p7zip PKGBULD,修改编译器为 afl-clang-lto(++)

编译时发现一个函数指针类型转换的报错,用 -Wno-cast-function-type-strict 参数来抑制。

准备了三个随便的 7z 文件,放进 input 文件夹直接开始。(@@ 表示输入文件目录,而不是从 stdin 输入)

AFL_SKIP_CPUFREQ=1 afl-fuzz -i input -o output -- ./7zr x @@

结果:我看也就娱乐 fuzz,效果一坨🔟。

第二次尝试

准备了很多只有一个或几个文件,大小仅 100+ 字节的 7z 文件作为输入。

AFL_SKIP_CPUFREQ=1 afl-fuzz -i input -o output -- ./7zr x @@ -y

结果:覆盖率很快达到和第一次同样水平。

第三次尝试

了解到 7z 格式有 CRC 校验,估计大多数 fuzz 输入都死在校验上了。patch 源码去除校验:(应该使用 FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION 宏)

static inline bool TestStartCrc(const Byte *p)
{
  (void)p; // 抑制 -Wunused-parameter
  return true;
}

if (CrcCalc(buffer2, nextHeaderSize_t) != nextHeaderCRC)
    ;

if (CrcCalc(data, unpackSize) != folders.FolderCRCs.Vals[i])
    ;

添加 -fsanitize=address -g 编译选项。添加 -so 运行选项,这样不会在目录里拉💩。

本来想加上 -si 参数从 stdin 输入压缩文件,应该可以大幅提升性能。但是 7z 格式竟然不支持,原因是 7z 有一部分文件头在文件末尾,解压前必须先读取。(意义不明...)

把整个 fuzzing 项目文件夹放在 tmpfs 里应该会更快吧,虽然 afl 官网教程推荐 ext2 + noatime。因为现在基本是在实验,暂时懒得配置 ext2 环境了。

AFL_USE_ASAN=1 AFL_SKIP_CPUFREQ=1 afl-fuzz -i input -o output -- ./7zr x @@ -y -so

结果:本来不抱什么希望,睡了个午觉起来竟然就真的收集到了很多 crashes,但都是 OOM,没什么实际意义。(用 KDE Ark 打开其中某个 input,直接 coredump 了。)

==589143==ERROR: AddressSanitizer: requested allocation size 0x207ffffffffffff (0x208000000001000 after adjustments for alignment, red zones etc.) exceeds maximum supported size of 0x10000000000 (thread T0)
    #0 0x6048688567e2 in operator new[](unsigned long) (/tmp/7zfuzz/7zr+0x1fa7e2) (BuildId: 8450a6b1d6712a80c42046efedb6d74eb798c38d)
    #1 0x604868c0e1e0 in CBuffer<unsigned char>::Alloc(unsigned long) /usr/src/debug/7zip/CPP/7zip/Bundles/Alone7z/../../Archive/7z/../../Common/../../Common/MyBuffer.h:72:18
    #2 0x604868c1f5bd in NArchive::N7z::CInArchive::ReadAndDecodePackedStreams(unsigned long, unsigned long&, CObjectVector<CBuffer<unsigned char>>&, ICryptoGetTextPassword*, bool&, bool&, UString&) /usr/src/debug/7zip/CPP/7zip/Bundles/Alone7z/../../Archive/7z/7zIn.cpp:1187:10
    #3 0x604868c280ce in NArchive::N7z::CInArchive::ReadDatabase2(NArchive::N7z::CDbEx&, ICryptoGetTextPassword*, bool&, bool&, UString&) /usr/src/debug/7zip/CPP/7zip/Bundles/Alone7z/../../Archive/7z/7zIn.cpp:1705:28
    #4 0x604868be4ddd in NArchive::N7z::CInArchive::ReadDatabase(NArchive::N7z::CDbEx&, ICryptoGetTextPassword*, bool&, bool&, UString&) /usr/src/debug/7zip/CPP/7zip/Bundles/Alone7z/../../Archive/7z/7zIn.cpp:1743:25
    #5 0x604868be4ddd in NArchive::N7z::CHandler::Open(IInStream*, unsigned long const*, IArchiveOpenCallback*) /usr/src/debug/7zip/CPP/7zip/Bundles/Alone7z/../../Archive/7z/7zHandler.cpp:708:30
    #6 0x604868dc6db4 in OpenArchiveSpec(IInArchive*, bool, IInStream*, unsigned long const*, IArchiveOpenCallback*, IArchiveExtractCallback*) /usr/src/debug/7zip/CPP/7zip/Bundles/Alone7z/../../UI/Common/OpenArchive.cpp:1599:3
    #7 0x604868dbc2ba in CArc::OpenStream2(COpenOptions const&) /usr/src/debug/7zip/CPP/7zip/Bundles/Alone7z/../../UI/Common/OpenArchive.cpp:2744:26
    #8 0x604868dc9229 in CArc::OpenStream(COpenOptions const&) /usr/src/debug/7zip/CPP/7zip/Bundles/Alone7z/../../UI/Common/OpenArchive.cpp:3024:3
    #9 0x604868dcb6c1 in CArc::OpenStreamOrFile(COpenOptions&) /usr/src/debug/7zip/CPP/7zip/Bundles/Alone7z/../../UI/Common/OpenArchive.cpp:3119:17
    #10 0x604868dcda76 in CArchiveLink::Open(COpenOptions&) /usr/src/debug/7zip/CPP/7zip/Bundles/Alone7z/../../UI/Common/OpenArchive.cpp:3295:28
    #11 0x604868dd277e in CArchiveLink::Open2(COpenOptions&, IOpenCallbackUI*) /usr/src/debug/7zip/CPP/7zip/Bundles/Alone7z/../../UI/Common/OpenArchive.cpp:3419:17
    #12 0x604868d49f6d in CArchiveLink::Open3(COpenOptions&, IOpenCallbackUI*) /usr/src/debug/7zip/CPP/7zip/Bundles/Alone7z/../../UI/Common/OpenArchive.cpp:3487:17
    #13 0x604868d49f6d in CArchiveLink::Open_Strict(COpenOptions&, IOpenCallbackUI*) /usr/src/debug/7zip/CPP/7zip/Bundles/Alone7z/../../UI/Common/../Common/OpenArchive.h:437:22
    #14 0x604868d49f6d in Extract(CCodecs*, CObjectVector<COpenType> const&, CRecordVector<int> const&, CObjectVector<UString>&, CObjectVector<UString>&, NWildcard::CCensorNode const&, CExtractOptions const&, IOpenCallbackUI*, IExtractCallbackUI*, IFolderArchiveExtractCallback*, IHashCalc*, UString&, CDecompressStat&) /usr/src/debug/7zip/CPP/7zip/Bundles/Alone7z/../../UI/Common/Extract.cpp:422:30
    #15 0x604868e55d25 in Main2(int, char**) /usr/src/debug/7zip/CPP/7zip/Bundles/Alone7z/../../UI/Console/Main.cpp:1378:21
    #16 0x604868e68411 in main /usr/src/debug/7zip/CPP/7zip/Bundles/Alone7z/../../UI/Console/MainAr.cpp:132:11
    #17 0x7bc23b33f6b4 in __libc_start_call_main /usr/src/debug/glibc/glibc/csu/../sysdeps/nptl/libc_start_call_main.h:58:16
    #18 0x7bc23b33f768 in __libc_start_main /usr/src/debug/glibc/glibc/csu/../csu/libc-start.c:360:3
    #19 0x604868717a24 in _start (/tmp/7zfuzz/7zr+0xbba24) (BuildId: 8450a6b1d6712a80c42046efedb6d74eb798c38d)

==589143==HINT: if you don't care about these errors you may set allocator_may_return_null=1
SUMMARY: AddressSanitizer: allocation-size-too-big (/tmp/7zfuzz/7zr+0x1fa7e2) (BuildId: 8450a6b1d6712a80c42046efedb6d74eb798c38d) in operator new[](unsigned long)
==589143==ABORTING

这触发原理我还真没看明白,不过 7z 文件格式里有不少直接由用户控制长度的字段,出现这种情况也算正常吧。

第四次尝试

CPP/Common/MyBuffer.h 里内存分配相关的函数用 __attribute__((no_sanitize("address"))) 标记,这样就不会被 ASAN 追踪了。由于这些内存分配函数本来就是热点,所以性能提升了不少,也不会报无意义的 OOM 错误,而且应该不会错过什么漏洞(毕竟真的只是 new[] 而已)。

刚才发现 afl-llvm-cmplog 这个工具,通过插桩记录程序中的比较操作,帮助 afl++ 生成能触发关键路径的输入。要想启用,需要在编译时加上 AFL_LLVM_CMPLOG=1 环境变量,fuzz 时加上参数 -c 0。在面对需要特定文件格式输入(魔法头之类)的 fuzzing 时效果明显。

AFL_USE_ASAN=1 AFL_SKIP_CPUFREQ=1 afl-fuzz -i input -o output -- ./7zr x @@ -y -so

结果:map density 翻倍了!

wget https://www.gstatic.com/webp/gallery/1.webp -O input/1.webp
wget https://www.gstatic.com/webp/gallery/2.webp -O input/2.webp
wget https://www.gstatic.com/webp/gallery/3.webp -O input/3.webp
wget https://www.gstatic.com/webp/gallery/4.webp -O input/4.webp
wget https://www.gstatic.com/webp/gallery/5.webp -O input/5.webp
wget https://raw.githubusercontent.com/signalapp/Signal-Android/main/glide-webp/app/src/main/assets/test_01.webp -O input/test_01.webp
wget https://raw.githubusercontent.com/signalapp/Signal-Android/main/glide-webp/app/src/main/assets/test_02.webp -O input/test_02.webp
wget https://raw.githubusercontent.com/signalapp/Signal-Android/main/glide-webp/app/src/main/assets/test_03.webp -O input/test_03.webp
wget https://raw.githubusercontent.com/signalapp/Signal-Android/main/glide-webp/app/src/main/assets/test_04.webp -O input/test_04.webp
wget https://raw.githubusercontent.com/signalapp/Signal-Android/main/glide-webp/app/src/main/assets/test_05.webp -O input/test_05.webp
wget https://raw.githubusercontent.com/signalapp/Signal-Android/main/glide-webp/app/src/main/assets/test_06_lossless.webp -O input/test_06_lossless.webp
wget https://raw.githubusercontent.com/signalapp/Signal-Android/main/glide-webp/app/src/main/assets/test_06_lossy.webp -O input/test_06_lossy.webp
wget https://raw.githubusercontent.com/signalapp/Signal-Android/main/glide-webp/app/src/main/assets/test_07_lossless.webp -O input/test_07_lossless.webp
wget https://raw.githubusercontent.com/signalapp/Signal-Android/main/glide-webp/app/src/main/assets/test_07_lossy.webp -O input/test_07_lossy.webp
wget https://raw.githubusercontent.com/signalapp/Signal-Android/main/glide-webp/app/src/main/assets/test_08_lossless.webp -O input/test_08_lossless.webp
wget https://raw.githubusercontent.com/signalapp/Signal-Android/main/glide-webp/app/src/main/assets/test_08_lossy.webp -O input/test_08_lossy.webp
wget https://raw.githubusercontent.com/signalapp/Signal-Android/main/glide-webp/app/src/main/assets/test_09_large.webp -O input/test_09_large.webp

第五次尝试

AFL_QUIET=1

CC=afl-clang-lto CXX=afl-clang-lto++ ./configure --disable-shared
  • 2 Master
  • 4 AFL_USE_ASAN=1 AFL_USE_UBSAN=1 AFL_USE_CFISAN=1
  • 6 AFL_LLVM_CMPLOG=1 -l 2AT
  • 8 AFL_LLVM_LAF_ALL=1
  • 10
cd CPP/7zip/Bundles/Alone2/

export EXEPTOR_CONFIG=$(pwd)/libexeptor.yaml
export EXEPTOR_LOG=$(pwd)/exeptor.log

LD_PRELOAD=/tmp/exeptor/build/libexeptor.so make -j -f ../../cmpl_clang_x64.mak CC=afl-clang-lto CXX=afl-clang-lto++ USE_ASM=1 MY_ASM="uasm"

AFL_QUIET=1 AFL_LLVM_CMPLOG=1 LD_PRELOAD=/tmp/exeptor/build/libexeptor.so make -j -f ../../cmpl_clang_x64.mak CC=afl-clang-lto CXX=afl-clang-lto++ USE_ASM=1 MY_ASM="uasm"

AFL_LLVM_LAF_ALL=1 LD_PRELOAD=/tmp/exeptor/build/libexeptor.so make -j -f ../../cmpl_clang_x64.mak CC=afl-clang-lto CXX=afl-clang-lto++ USE_ASM=1 MY_ASM="uasm"

-f

llvm-ar rcs 7z.a ./b/c_x64/*.o

docker build . -t fuzz-7zip
docker run --rm -it --tmpfs /ramdisk:exec fuzz-7zip
cp -r /root/fuzz/ /ramdisk/ && cd /ramdisk/fuzz
export AFL_TESTCACHE_SIZE=256
AFL_FINAL_SYNC=1 afl-fuzz -i input -o output -M master -a binary -G 1024 -b 2 -- ./7zz_normal x -so -tzip @@
AFL_USE_ASAN=1 afl-fuzz -i input -o output -S sanitizer -a binary -G 1024 -b 4 -- ./7zz_sanitizer x -so -tzip @@
afl-fuzz -i input -o output -S cmplog -a binary -G 1024 -b 6 -c 0 -l 2AT -- ./7zz_cmplog x -so -tzip @@
afl-fuzz -i input -o output -S compcov -a binary -G 1024 -b 8 -- ./7zz_cmpcov x -so -tzip @@
#define Z7_ST
#include "../CPP/7zip/Archive/Common/DummyOutStream.h"
#include "../CPP/7zip/Common/CWrappers.h"
#include "7zAlloc.h"
#include "7zTypes.h"
#include "Alloc.h"
#include "Xz.h"
#include <string.h>
#include <unistd.h>
static const ISzAlloc alloc = {SzAlloc, SzFree};
static int isMT = False;
static CXzStatInfo stat;
class FakeOutStream : public ISequentialOutStream {
  public:
    // IUnknown
    STDMETHOD(QueryInterface)(REFIID, void **) { return S_OK; }
    STDMETHOD_(ULONG, AddRef)() { return 1; }
    STDMETHOD_(ULONG, Release)() { return 1; }
    // ISequentialOutStream
    STDMETHOD(Write)(const void *, UInt32 size, UInt32 *processedSize) {
        *processedSize = size;
        return S_OK;
    }
};
// Dummy input stream for fuzzing, construt with const uint8_t *Data, size_t
// Size
#include <algorithm>
class BufInStream : public ISequentialInStream {
  private:
    const uint8_t *_data;
    size_t _size;
    size_t _pos;

  public:
    BufInStream() : _data(nullptr), _size(0), _pos(0) {}
    void SetData(const uint8_t *data, size_t size) {
        _data = data;
        _size = size;
        _pos = 0;
    }
    // IUnknown
    STDMETHOD(QueryInterface)(REFIID, void **) { return S_OK; }
    STDMETHOD_(ULONG, AddRef)() { return 1; }
    STDMETHOD_(ULONG, Release)() { return 1; }
    // ISequentialInStream
    STDMETHOD(Read)(void *data, UInt32 size, UInt32 *processedSize) {
        if (_pos >= _size) {
            if (processedSize)
                *processedSize = 0;
            return S_OK;
        }
        UInt32 toRead = (UInt32)std::min<size_t>(size, _size - _pos);
        memcpy(data, _data + _pos, toRead);
        _pos += toRead;
        if (processedSize)
            *processedSize = toRead;
        return S_OK;
    }
    STDMETHOD(Seek)(Int64 offset, UInt32 seekOrigin, UInt64 *newPosition) {
        if (seekOrigin == STREAM_SEEK_SET)
            _pos = (UInt32)offset;
        else if (seekOrigin == STREAM_SEEK_CUR)
            _pos += (UInt32)offset;
        else if (seekOrigin == STREAM_SEEK_END)
            _pos = (UInt32)(_size + offset);
        if (newPosition)
            *newPosition = _pos;
        return S_OK;
    }
};
extern "C" int LLVMFuzzerTestOneInput(const uint8_t *Data, size_t Size) {
    CXzDecMtHandle p = XzDecMt_Create(&alloc, &g_AlignedAlloc);
    CXzDecMtProps props;
    XzDecMtProps_Init(&props);
    BufInStream inStream;
    inStream.SetData(Data, Size);
    FakeOutStream outStream;
    CSeqInStreamWrap inWrap;
    CSeqOutStreamWrap outWrap;
    CCompressProgressWrap progressWrap;
    inWrap.Init(&inStream);
    outWrap.Init(&outStream);
    SRes res = XzDecMt_Decode(p, &props, NULL, CODER_FINISH_ANY, &outWrap.vt,
                              &inWrap.vt, &stat, &isMT, NULL);
    XzDecMt_Destroy(p);
    return 0;
}