diff --git a/CMakeLists.txt b/CMakeLists.txt index 71c13fd4745..18c648233d4 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -82,6 +82,9 @@ include (CMakeDependentOption) include(MongoC-Warnings) +# "Fuzzing" must be included before "Sanitizers," to enable fuzzer sanitizer +include (Fuzzing) + # Enable CCache, if possible include (CCache) diff --git a/build/cmake/CMakeLists.txt b/build/cmake/CMakeLists.txt index faf4fe295b5..7ff5468c78e 100644 --- a/build/cmake/CMakeLists.txt +++ b/build/cmake/CMakeLists.txt @@ -6,6 +6,7 @@ set (build_cmake_MODULES FindSASL2.cmake FindSnappy.cmake FindSphinx.cmake + Fuzzing.cmake LoadVersion.cmake MaintainerFlags.cmake MongoCPackage.cmake diff --git a/build/cmake/Fuzzing.cmake b/build/cmake/Fuzzing.cmake new file mode 100644 index 00000000000..c22eb77dc19 --- /dev/null +++ b/build/cmake/Fuzzing.cmake @@ -0,0 +1,131 @@ +option (ENABLE_FUZZING "Enable fuzzing using LLVM libFuzzer" OFF) + +if (ENABLE_FUZZING) + # This will add another sanitizer when we later include Sanitizers.cmake + list (APPEND MONGO_SANITIZE "fuzzer-no-link") +endif () + +include (ProcessorCount) +ProcessorCount (_FUZZER_PARALLELISM) + +set (_FUZZERS_OUT_DIR "${CMAKE_CURRENT_BINARY_DIR}/fuzzers") + + +#[[ + Generate an executable target that links and runs with LLVM libFuzzer. + + Amongst the given source files there must be one definition of LLVMFuzzerTestOneInput. + Refer: https://www.llvm.org/docs/LibFuzzer.html#fuzz-target + + This will define an executable with the given name, and all additional + arguments will be given as source files to that executable. This executable + will be linked with the '-fsanitize=fuzzer' command-line option. + + This will additionally define a custom target "run-fuzzer-${name}," which, + when executed, will run the fuzzer executable with a set of pre-defined + libFuzzer command-line options. + + The following target properties can be used to control the 'run-fuzzer' + target: + + FUZZER_FORK (integer) + Set the number of parallel fuzzer tasks to run. The default is the + parallelism of the host plus four. + + FUZZER_TIMEOUT (integer, seonds) + Set the maximum amount a single fuzzer task should run before the fuzzer + consideres it to be "stuck" and to generate a timeout report for the + given input. + + FUZZER_LEN_CONTROL (integer, 1-100) + Set the len_control option for the libFuzzer run. Lower values tend to + generate larger inputs. Default is 50. + + FUZZER_MAX_LEN (integer, bytes) + Set the maximum input size for a fuzzer input. The default is 4096. + + FUZZER_ONLY_ASCII (boolean) + If TRUE, only valid ASCII will be given as fuzzer input. + The default is FALSE. + + FUZZER_DICT (filepath) + Set to a filepath of a fuzzer dictionary. + Refer: https://www.llvm.org/docs/LibFuzzer.html#dictionaries + Default is to have no dictionary. + + Fuzzer executables are written to to the /fuzzers directory. + + This will unconditionally define the target and the custom target that + executes it, but it will be EXCLUDE_FROM_ALL=TRUE if the CMake setting + ENABLE_FUZZING is not true. +]] +function (mongoc_add_fuzzer name) + add_executable ("${name}" ${ARGN}) + # Run with 4 more jobs than hardware parallelism + math (EXPR default_fork "${_FUZZER_PARALLELISM} + 4") + set_target_properties("${name}" PROPERTIES + # Qualify the filename with the build type: + DEBUG_POSTFIX ".debug" + RELEASE_POSTFIX ".opt" + RELWITHDEBINFO_POSTFIX ".opt-debug" + # Put them all in the fuzzers/ directory: + RUNTIME_OUTPUT_DIRECTORY "${_FUZZERS_OUT_DIR}" + RUNTIME_OUTPUT_DIRECTORY_RELEASE "${_FUZZERS_OUT_DIR}" + RUNTIME_OUTPUT_DIRECTORY_DEBUG "${_FUZZERS_OUT_DIR}" + RUNTIME_OUTPUT_DIRECTORY_RELWITHDEBINFO "${_FUZZERS_OUT_DIR}" + # Target options to control the fuzzer run: + FUZZER_FORK "${default_fork}" + FUZZER_TIMEOUT "10" + FUZZER_LEN_CONTROL "50" + FUZZER_MAX_LEN "4096" + FUZZER_ONLY_ASCII "FALSE" + ) + # Link with the libFuzzer runtime: + target_link_libraries ("${name}" PRIVATE -fsanitize=fuzzer) + + set (dict "$") + set (art_dir "$/${name}.out/") + add_custom_target(run-fuzzer-${name} + COMMAND "${CMAKE_COMMAND}" -E make_directory "${art_dir}/corpus" + # Print some usefile info for the user: + COMMAND "${CMAKE_COMMAND}" -E echo + COMMAND "${CMAKE_COMMAND}" -E echo + " Running fuzzer program : $" + COMMAND "${CMAKE_COMMAND}" -E echo + " Corpus is stored in : ${art_dir}/corpus" + COMMAND "${CMAKE_COMMAND}" -E echo + " Crashes will appear in : ${art_dir}" + COMMAND "${CMAKE_COMMAND}" -E echo + # Run the fuzzer: + COMMAND + "${CMAKE_COMMAND}" -E chdir "${art_dir}" + "$" + -create_missing_dirs=1 + -collect_data_flow=1 + -shrink=1 # Try to shrink the test corpus + -use_value_profile=1 + -ignore_timeouts=0 # Do not ignore timeouts + -ignore_ooms=0 # Do not ignore OOMs + -reload=10 # Reload every ten seconds + "-artifact_prefix=${art_dir}" + # Target property options: + "-fork=$" + "-timeout=$" + "-max_len=$" + "-len_control=$" + "-only_ascii=$>" + "-analyze_dict=$" + "$,-dict=${dict},${art_dir}/corpus>" + "${art_dir}/corpus" + WORKING_DIRECTORY "${_FUZZERS_OUT_DIR}" + DEPENDS "${name}" + VERBATIM USES_TERMINAL + ) + + # We might not want to build by default: + if (NOT ENABLE_FUZZING) + # Fuzzing is not enabled. Exclude the target from being built by default, but still define + # it so that CMake can verify that it is used correctly. + set_property (TARGET "${name}" PROPERTY EXCLUDE_FROM_ALL TRUE) + endif () +endfunction () diff --git a/src/libbson/CMakeLists.txt b/src/libbson/CMakeLists.txt index d6df66f8657..ed1fe0e4248 100644 --- a/src/libbson/CMakeLists.txt +++ b/src/libbson/CMakeLists.txt @@ -473,7 +473,6 @@ endif () add_subdirectory (build) # sub-directory 'doc' was already included above add_subdirectory (examples) -add_subdirectory (fuzz) add_subdirectory (src) add_subdirectory (tests) @@ -481,14 +480,30 @@ set_local_dist (src_libbson_DIST_local CMakeLists.txt NEWS THIRD_PARTY_NOTICES + fuzz/bson.fuzz.c + fuzz/json.fuzz.c ) +mongoc_add_fuzzer (json-fuzz fuzz/json.fuzz.c) +target_link_libraries(json-fuzz PRIVATE bson_static) + +mongoc_add_fuzzer (bson-fuzz fuzz/bson.fuzz.c) +target_link_libraries(bson-fuzz PRIVATE bson_static) +set_property(TARGET bson-fuzz PROPERTY FUZZER_MAX_LEN 65536) + +add_executable (bson2json tools/bson2json.main.c) +add_executable (json2bson tools/json2bson.main.c) +target_link_libraries(bson2json PRIVATE bson_static) +target_link_libraries(json2bson PRIVATE bson_static) +set_target_properties(bson2json json2bson PROPERTIES + RUNTIME_OUTPUT_DIRECTORY "${CMAKE_CURRENT_BINARY_DIR}" + ) + set (src_libbson_DIST ${src_libbson_DIST_local} ${src_libbson_build_DIST} ${src_libbson_doc_DIST} ${src_libbson_examples_DIST} - ${src_libbson_fuzz_DIST} ${src_libbson_src_DIST} ${src_libbson_tests_DIST} PARENT_SCOPE diff --git a/src/libbson/fuzz/CMakeLists.txt b/src/libbson/fuzz/CMakeLists.txt deleted file mode 100644 index 50aee866c98..00000000000 --- a/src/libbson/fuzz/CMakeLists.txt +++ /dev/null @@ -1,4 +0,0 @@ -set_dist_list (src_libbson_fuzz_DIST - CMakeLists.txt - fuzz_test_libbson.c -) diff --git a/src/libbson/fuzz/bson.fuzz.c b/src/libbson/fuzz/bson.fuzz.c new file mode 100644 index 00000000000..348afae588b --- /dev/null +++ b/src/libbson/fuzz/bson.fuzz.c @@ -0,0 +1,15 @@ +#include + +#include + +int +LLVMFuzzerTestOneInput (const uint8_t *data, size_t len) +{ + bson_t *b = bson_new_from_data (data, len); + if (!b) { + return 0; + } + bson_validate (b, 0xffffff, NULL); + bson_destroy (b); + return 0; +} diff --git a/src/libbson/fuzz/fuzz_test_libbson.c b/src/libbson/fuzz/fuzz_test_libbson.c deleted file mode 100644 index 9ad6f5c3449..00000000000 --- a/src/libbson/fuzz/fuzz_test_libbson.c +++ /dev/null @@ -1,19 +0,0 @@ -#include -#include -#include -#include - -int LLVMFuzzerTestOneInput(const uint8_t *data, size_t size) { - char *nt = malloc(size+1); - memcpy(nt, data, size); - nt[size] = '\0'; - bson_error_t error; - - bson_t b; - if (bson_init_from_json(&b, nt, -1, &error)) { - bson_destroy(&b); - } - - free(nt); - return 0; -} diff --git a/src/libbson/fuzz/json.fuzz.c b/src/libbson/fuzz/json.fuzz.c new file mode 100644 index 00000000000..4dc3244e9c3 --- /dev/null +++ b/src/libbson/fuzz/json.fuzz.c @@ -0,0 +1,11 @@ +#include + +#include + +int +LLVMFuzzerTestOneInput (const uint8_t *data, size_t len) +{ + bson_t *b = bson_new_from_json (data, (ssize_t) len, NULL); + bson_destroy (b); + return 0; +} diff --git a/src/libbson/tools/bson2json.main.c b/src/libbson/tools/bson2json.main.c new file mode 100644 index 00000000000..a41815cdd22 --- /dev/null +++ b/src/libbson/tools/bson2json.main.c @@ -0,0 +1,54 @@ +#include + +#include "./common.h" + + +int +main (int argc, char **argv) +{ + if (argc != 1) { + fputs ("Usage:\n" + " Pipe a BSON document through standard input, and this program\n" + " will write JSON data to standard output.\n", + stderr); + return 1; + } + + int retcode = 0; + + read_result read = read_stream (stdin); + if (read.error) { + fprintf (stderr, "Failed to read from stdin: %s", strerror (read.error)); + retcode = 2; + goto read_fail; + } + + bson_t b; + if (!bson_init_static (&b, read.data, read.len)) { + fputs ("Failed to read BSON: Invalid header\n", stderr); + retcode = 3; + goto bson_init_fail; + } + + size_t len; + char *json = bson_as_canonical_extended_json (&b, &len); + if (!json) { + fputs ("Failed to create JSON data\n", stderr); + retcode = 4; + goto json_fail; + } + + const char *jptr = json; + for (size_t remain = len; remain;) { + size_t nwritten = fwrite (jptr, 1, remain, stdout); + remain -= nwritten; + jptr += nwritten; + } + +json_fail: + bson_free (json); +bson_init_fail: + free (read.data); +read_fail: + return retcode; +} diff --git a/src/libbson/tools/common.h b/src/libbson/tools/common.h new file mode 100644 index 00000000000..1bceb75df13 --- /dev/null +++ b/src/libbson/tools/common.h @@ -0,0 +1,58 @@ +#ifndef BSON_TOOLS_COMMON_H_INCLUDED +#define BSON_TOOLS_COMMON_H_INCLUDED + +#include +#include +#include + +enum { PRINT_TRACE = 0 }; +#define TRACE(S, ...) \ + if (PRINT_TRACE) { \ + fprintf (stderr, S "\n", __VA_ARGS__); \ + } else \ + ((void) (0)) + +typedef struct read_result { + uint8_t *data; + size_t len; + int error; +} read_result; + +static inline read_result +read_stream (FILE *strm) +{ + size_t buf_size = 0; + uint8_t *data = NULL; + size_t total_nread = 0; + while (true) { + // Calc how much is space is left in our buffer: + const size_t buf_remain = buf_size - total_nread; + if (buf_remain == 0) { + // Increase the buffer size: + buf_size += 1024; + TRACE ("Increase buffer size to %zu bytes", buf_size); + data = realloc (data, buf_size); + if (!data) { + fputs ("Failed to allocate a buffer for input\n", stderr); + free (data); + return (read_result){.error = ENOMEM}; + } + // Try again + continue; + } + // Set the output pointer to the beginning of the unread area: + uint8_t *const ptr = data + total_nread; + // Read some more + TRACE ("Try to read %zu bytes", buf_remain); + const size_t part_nread = fread (ptr, 1, buf_remain, strm); + TRACE ("Read %zu bytes", part_nread); + if (part_nread == 0) { + // EOF + break; + } + total_nread += part_nread; + } + return (read_result){.data = data, .len = total_nread}; +} + +#endif // BSON_TOOLS_COMMON_H_INCLUDED diff --git a/src/libbson/tools/json2bson.main.c b/src/libbson/tools/json2bson.main.c new file mode 100644 index 00000000000..367f6cb96c2 --- /dev/null +++ b/src/libbson/tools/json2bson.main.c @@ -0,0 +1,49 @@ +#include + +#include "./common.h" + +int +main (int argc, char **argv) +{ + if (argc != 1) { + fputs ("Usage:\n" + " Pipe a JSON document through standard input, and this program\n" + " will write bson data to standard output.\n", + stderr); + return 1; + } + + int retcode = 0; + + read_result read = read_stream (stdin); + if (read.error) { + fprintf ( + stderr, "Failed to read from stdin: %s\n", strerror (read.error)); + retcode = 2; + goto read_fail; + } + + bson_error_t error; + bson_t *b = bson_new_from_json (read.data, read.len, &error); + if (!b) { + fprintf (stderr, + "Failed to read JSON into BSON: %d:%d %s\n", + error.domain, + error.code, + error.message); + goto from_json_fail; + } + + const uint8_t *bdata = bson_get_data (b); + for (size_t remain = b->len; remain;) { + size_t nwritten = fwrite (bdata, 1, remain, stdout); + remain -= nwritten; + bdata += nwritten; + } + +from_json_fail: + bson_destroy (b); + free (read.data); +read_fail: + return retcode; +}