simdjson/singleheader/amalgamate.sh

197 lines
5.6 KiB
Bash
Raw Normal View History

#!/usr/bin/env bash
2018-12-31 10:11:47 +08:00
########################################################################
# Generates an "amalgamation build" for simdjson. Inspired by similar
2018-12-31 10:11:47 +08:00
# script used by whefs.
########################################################################
2020-04-18 06:07:18 +08:00
set -e
2018-12-31 10:11:47 +08:00
SCRIPTPATH="$( cd "$(dirname "$0")" ; pwd -P )"
PROJECTPATH="$(dirname $SCRIPTPATH)"
echo "Project at "$PROJECTPATH
2018-12-31 10:11:47 +08:00
echo "We are about to amalgamate all simdjson files into one source file. "
echo "See https://www.sqlite.org/amalgamation.html and https://en.wikipedia.org/wiki/Single_Compilation_Unit for rationale. "
if [ -z "$AMALGAMATE_SOURCE_PATH" ]; then AMALGAMATE_SOURCE_PATH="$PROJECTPATH/src"; fi
if [ -z "$AMALGAMATE_INCLUDE_PATH" ]; then AMALGAMATE_INCLUDE_PATH="$PROJECTPATH/include"; fi
2020-04-18 06:07:18 +08:00
if [ -z "$AMALGAMATE_OUTPUT_PATH" ]; then AMALGAMATE_OUTPUT_PATH="$SCRIPTPATH"; fi
# this list excludes the "src/generic headers"
2018-12-31 10:50:10 +08:00
ALLCFILES="
simdjson.cpp
2018-12-31 10:50:10 +08:00
"
2018-12-31 10:11:47 +08:00
# order matters
ALLCHEADERS="
simdjson.h
2018-12-31 10:11:47 +08:00
"
found_includes=()
for file in ${ALLCFILES}; do
2020-04-18 06:07:18 +08:00
test -e "$AMALGAMATE_SOURCE_PATH/$file" && continue
echo "FATAL: source file [$AMALGAMATE_SOURCE_PATH/$file] not found."
2018-12-31 10:11:47 +08:00
exit 127
done
for file in ${ALLCHEADERS}; do
2020-04-18 06:07:18 +08:00
test -e "$AMALGAMATE_INCLUDE_PATH/$file" && continue
echo "FATAL: source file [$AMALGAMATE_INCLUDE_PATH/$file] not found."
exit 127
done
function doinclude()
{
file=$1
line="${@:2}"
2020-04-18 06:07:18 +08:00
if [ -f $AMALGAMATE_INCLUDE_PATH/$file ]; then
if [[ ! " ${found_includes[@]} " =~ " ${file} " ]]; then
found_includes+=("$file")
2020-04-18 06:07:18 +08:00
dofile $AMALGAMATE_INCLUDE_PATH $file
fi
2020-04-18 06:07:18 +08:00
elif [ -f $AMALGAMATE_SOURCE_PATH/$file ]; then
# generic includes are included multiple times
if [[ "${file}" == *'generic/'*'.h' ]]; then
2020-04-18 06:07:18 +08:00
dofile $AMALGAMATE_SOURCE_PATH $file
elif [[ ! " ${found_includes[@]} " =~ " ${file} " ]]; then
found_includes+=("$file")
2020-04-18 06:07:18 +08:00
dofile $AMALGAMATE_SOURCE_PATH $file
else
echo "/* $file already included: $line */"
fi
else
# If we don't recognize it, just emit the #include
echo "$line"
fi
}
2018-12-31 10:11:47 +08:00
function dofile()
{
2020-04-18 06:07:18 +08:00
file="$1/$2"
RELFILE=${file#"$PROJECTPATH/"}
# Last lines are always ignored. Files should end by an empty lines.
echo "/* begin file $RELFILE */"
2018-12-31 10:11:47 +08:00
# echo "#line 8 \"$1\"" ## redefining the line/file is not nearly as useful as it sounds for debugging. It breaks IDEs.
while IFS= read -r line || [ -n "$line" ];
do
if [[ "${line}" == '#include "'*'"'* ]]; then
file=$(echo $line| cut -d'"' -f 2)
# include all from simdjson.cpp except simdjson.h
if [ "${file}" == "simdjson.h" ] && [ "${2}" == "simdjson.cpp" ]; then
echo "$line"
continue
fi
if [[ "${file}" == '../'* ]]; then
file=$(echo $file| cut -d'/' -f 2-)
fi
# we explicitly include simdjson headers, one time each (unless they are generic, in which case multiple times is fine)
doinclude $file $line
else
# Otherwise we simply copy the line
echo "$line"
fi
2020-04-18 06:07:18 +08:00
done < "$file"
echo "/* end file $RELFILE */"
2018-12-31 10:11:47 +08:00
}
2020-04-18 06:07:18 +08:00
2018-12-31 10:11:47 +08:00
timestamp=$(date)
2020-04-18 06:07:18 +08:00
mkdir -p $AMALGAMATE_OUTPUT_PATH
AMAL_H="${AMALGAMATE_OUTPUT_PATH}/simdjson.h"
AMAL_C="${AMALGAMATE_OUTPUT_PATH}/simdjson.cpp"
DEMOCPP="${AMALGAMATE_OUTPUT_PATH}/amalgamate_demo.cpp"
README="$AMALGAMATE_OUTPUT_PATH/README.md"
2018-12-31 10:11:47 +08:00
echo "Creating ${AMAL_H}..."
2020-04-18 06:07:18 +08:00
echo "/* auto-generated on ${timestamp}. Do not edit! */" > ${AMAL_H}
2018-12-31 10:11:47 +08:00
{
for h in ${ALLCHEADERS}; do
doinclude $h "ERROR $h not found"
2018-12-31 10:11:47 +08:00
done
2020-04-18 06:07:18 +08:00
} >> ${AMAL_H}
2018-12-31 10:11:47 +08:00
echo "Creating ${AMAL_C}..."
2020-04-18 06:07:18 +08:00
echo "/* auto-generated on ${timestamp}. Do not edit! */" > ${AMAL_C}
2018-12-31 10:11:47 +08:00
{
for file in ${ALLCFILES}; do
2020-04-18 06:07:18 +08:00
dofile $AMALGAMATE_SOURCE_PATH $file
2018-12-31 10:11:47 +08:00
done
2020-04-18 06:07:18 +08:00
} >> ${AMAL_C}
2018-12-31 10:11:47 +08:00
echo "Creating ${DEMOCPP}..."
2020-04-18 06:07:18 +08:00
echo "/* auto-generated on ${timestamp}. Do not edit! */" > ${DEMOCPP}
2018-12-31 10:11:47 +08:00
cat <<< '
#include <iostream>
#include "simdjson.h"
#include "simdjson.cpp"
int main(int argc, char *argv[]) {
2019-12-11 21:13:29 +08:00
if(argc < 2) {
std::cerr << "Please specify at least one file name. " << std::endl;
2020-06-24 10:07:40 +08:00
return EXIT_FAILURE;
2019-07-31 06:10:48 +08:00
}
2019-02-23 04:42:44 +08:00
const char * filename = argv[1];
2020-03-29 02:43:41 +08:00
simdjson::dom::parser parser;
2020-04-18 06:07:18 +08:00
UNUSED simdjson::dom::element elem;
2020-06-21 13:03:57 +08:00
auto error = parser.load(filename).get(elem); // do the parsing
2020-02-14 05:30:12 +08:00
if (error) {
2020-03-06 03:05:37 +08:00
std::cout << "parse failed" << std::endl;
2020-02-25 06:13:10 +08:00
std::cout << "error code: " << error << std::endl;
2020-03-07 04:14:23 +08:00
std::cout << error << std::endl;
2020-04-18 06:07:18 +08:00
return EXIT_FAILURE;
2018-12-31 10:11:47 +08:00
} else {
2020-03-06 03:05:37 +08:00
std::cout << "parse valid" << std::endl;
2018-12-31 10:11:47 +08:00
}
2019-12-11 21:13:29 +08:00
if(argc == 2) {
return EXIT_SUCCESS;
}
Streams of JSON documents + Large files (>4GB) (#350) (#364) * rough prototype working. Needs more test and fine tuning. * prototype working on large files. * prototype working on large files. * Adding benchmarks * jsonstream API adjustment * type * minor fixes and cleaning. * minor fixes and cleaning. * removing warnings * removing some copies * runtime dispatch error fix * makefile linking src/jsonstream.cpp * fixing arm stage 1 headers * fixing stage 2 headers * fixing stage 1 arm header * making jsonstream portable * cleaning imports * including <algorithms> for windows compiler * cleaning benchmark imports * adding jsonstream to amalgamation * merged main into branch * bug fix where JsonStream would bug on rare cases. * Addind a JsonStream Demo to Amalgamation * Fix for https://github.com/lemire/simdjson/issues/345 * Follow up test and fix for https://github.com/lemire/simdjson/issues/345 (#347) * Final (?) fix for https://github.com/lemire/simdjson/issues/345 * Verbose basictest * Being more forgiving of powers of ten. * Let us zero the tail end. * add basic fuzzers (#348) * add basic fuzzing using libFuzzer * let cmake respect cflags, otherwise the fuzzer flags go unnoticed also, integrates badly with oss-fuzz * add new fuzzer for minification, simplify the old one * add fuzzer for the dump example * clang format * adding Paul Dreik * rough prototype working. Needs more test and fine tuning. * prototype working on large files. * prototype working on large files. * Adding benchmarks * jsonstream API adjustment * type * minor fixes and cleaning. * Fixing issue 351 (#352) * Fixing issues 351 and 353 * minor fixes and cleaning. * removing warnings * removing some copies * Fix ARM compile errors on g++ 7.4 (#354) * Fix ARM compilation errors * Update singleheader * runtime dispatch error fix * makefile linking src/jsonstream.cpp * fixing arm stage 1 headers * fixing stage 2 headers * fixing stage 1 arm header * fix integer overflow in subnormal_power10 (#355) detected by oss-fuzz https://bugs.chromium.org/p/oss-fuzz/issues/detail?id=18714 * Adding new test file, following https://github.com/lemire/simdjson/pull/355 * making jsonstream portable * cleaning imports * including <algorithms> for windows compiler * cleaning benchmark imports * adding jsonstream to amalgamation * merged main into branch * bug fix where JsonStream would bug on rare cases. * Addind a JsonStream Demo to Amalgamation * merging main * rough prototype working. Needs more test and fine tuning. * prototype working on large files. * prototype working on large files. * Adding benchmarks * jsonstream API adjustment * minor fixes and cleaning. * minor fixes and cleaning. * removing warnings * removing some copies * runtime dispatch error fix * makefile linking src/jsonstream.cpp * fixing arm stage 1 headers * fixing stage 2 headers * fixing stage 1 arm header * making jsonstream portable * cleaning imports * including <algorithms> for windows compiler * cleaning benchmark imports * adding jsonstream to amalgamation * bug fix where JsonStream would bug on rare cases. * Addind a JsonStream Demo to Amalgamation * rough prototype working. Needs more test and fine tuning. * minor fixes and cleaning. * adding jsonstream to amalgamation * merged main into branch * Addind a JsonStream Demo to Amalgamation * merging main * merging main * make file fix
2019-11-09 06:39:45 +08:00
2020-03-06 03:05:37 +08:00
// parse_many
Streams of JSON documents + Large files (>4GB) (#350) (#364) * rough prototype working. Needs more test and fine tuning. * prototype working on large files. * prototype working on large files. * Adding benchmarks * jsonstream API adjustment * type * minor fixes and cleaning. * minor fixes and cleaning. * removing warnings * removing some copies * runtime dispatch error fix * makefile linking src/jsonstream.cpp * fixing arm stage 1 headers * fixing stage 2 headers * fixing stage 1 arm header * making jsonstream portable * cleaning imports * including <algorithms> for windows compiler * cleaning benchmark imports * adding jsonstream to amalgamation * merged main into branch * bug fix where JsonStream would bug on rare cases. * Addind a JsonStream Demo to Amalgamation * Fix for https://github.com/lemire/simdjson/issues/345 * Follow up test and fix for https://github.com/lemire/simdjson/issues/345 (#347) * Final (?) fix for https://github.com/lemire/simdjson/issues/345 * Verbose basictest * Being more forgiving of powers of ten. * Let us zero the tail end. * add basic fuzzers (#348) * add basic fuzzing using libFuzzer * let cmake respect cflags, otherwise the fuzzer flags go unnoticed also, integrates badly with oss-fuzz * add new fuzzer for minification, simplify the old one * add fuzzer for the dump example * clang format * adding Paul Dreik * rough prototype working. Needs more test and fine tuning. * prototype working on large files. * prototype working on large files. * Adding benchmarks * jsonstream API adjustment * type * minor fixes and cleaning. * Fixing issue 351 (#352) * Fixing issues 351 and 353 * minor fixes and cleaning. * removing warnings * removing some copies * Fix ARM compile errors on g++ 7.4 (#354) * Fix ARM compilation errors * Update singleheader * runtime dispatch error fix * makefile linking src/jsonstream.cpp * fixing arm stage 1 headers * fixing stage 2 headers * fixing stage 1 arm header * fix integer overflow in subnormal_power10 (#355) detected by oss-fuzz https://bugs.chromium.org/p/oss-fuzz/issues/detail?id=18714 * Adding new test file, following https://github.com/lemire/simdjson/pull/355 * making jsonstream portable * cleaning imports * including <algorithms> for windows compiler * cleaning benchmark imports * adding jsonstream to amalgamation * merged main into branch * bug fix where JsonStream would bug on rare cases. * Addind a JsonStream Demo to Amalgamation * merging main * rough prototype working. Needs more test and fine tuning. * prototype working on large files. * prototype working on large files. * Adding benchmarks * jsonstream API adjustment * minor fixes and cleaning. * minor fixes and cleaning. * removing warnings * removing some copies * runtime dispatch error fix * makefile linking src/jsonstream.cpp * fixing arm stage 1 headers * fixing stage 2 headers * fixing stage 1 arm header * making jsonstream portable * cleaning imports * including <algorithms> for windows compiler * cleaning benchmark imports * adding jsonstream to amalgamation * bug fix where JsonStream would bug on rare cases. * Addind a JsonStream Demo to Amalgamation * rough prototype working. Needs more test and fine tuning. * minor fixes and cleaning. * adding jsonstream to amalgamation * merged main into branch * Addind a JsonStream Demo to Amalgamation * merging main * merging main * make file fix
2019-11-09 06:39:45 +08:00
const char * filename2 = argv[2];
2020-06-21 13:03:57 +08:00
simdjson::dom::document_stream stream;
error = parser.load_many(filename2).get(stream);
if (!error) {
for (auto result : stream) {
error = result.error();
}
2019-12-11 21:13:29 +08:00
}
2020-03-06 03:05:37 +08:00
if (error) {
std::cout << "parse_many failed" << std::endl;
std::cout << "error code: " << error << std::endl;
2020-03-07 04:14:23 +08:00
std::cout << error << std::endl;
2020-04-18 06:07:18 +08:00
return EXIT_FAILURE;
Streams of JSON documents + Large files (>4GB) (#350) (#364) * rough prototype working. Needs more test and fine tuning. * prototype working on large files. * prototype working on large files. * Adding benchmarks * jsonstream API adjustment * type * minor fixes and cleaning. * minor fixes and cleaning. * removing warnings * removing some copies * runtime dispatch error fix * makefile linking src/jsonstream.cpp * fixing arm stage 1 headers * fixing stage 2 headers * fixing stage 1 arm header * making jsonstream portable * cleaning imports * including <algorithms> for windows compiler * cleaning benchmark imports * adding jsonstream to amalgamation * merged main into branch * bug fix where JsonStream would bug on rare cases. * Addind a JsonStream Demo to Amalgamation * Fix for https://github.com/lemire/simdjson/issues/345 * Follow up test and fix for https://github.com/lemire/simdjson/issues/345 (#347) * Final (?) fix for https://github.com/lemire/simdjson/issues/345 * Verbose basictest * Being more forgiving of powers of ten. * Let us zero the tail end. * add basic fuzzers (#348) * add basic fuzzing using libFuzzer * let cmake respect cflags, otherwise the fuzzer flags go unnoticed also, integrates badly with oss-fuzz * add new fuzzer for minification, simplify the old one * add fuzzer for the dump example * clang format * adding Paul Dreik * rough prototype working. Needs more test and fine tuning. * prototype working on large files. * prototype working on large files. * Adding benchmarks * jsonstream API adjustment * type * minor fixes and cleaning. * Fixing issue 351 (#352) * Fixing issues 351 and 353 * minor fixes and cleaning. * removing warnings * removing some copies * Fix ARM compile errors on g++ 7.4 (#354) * Fix ARM compilation errors * Update singleheader * runtime dispatch error fix * makefile linking src/jsonstream.cpp * fixing arm stage 1 headers * fixing stage 2 headers * fixing stage 1 arm header * fix integer overflow in subnormal_power10 (#355) detected by oss-fuzz https://bugs.chromium.org/p/oss-fuzz/issues/detail?id=18714 * Adding new test file, following https://github.com/lemire/simdjson/pull/355 * making jsonstream portable * cleaning imports * including <algorithms> for windows compiler * cleaning benchmark imports * adding jsonstream to amalgamation * merged main into branch * bug fix where JsonStream would bug on rare cases. * Addind a JsonStream Demo to Amalgamation * merging main * rough prototype working. Needs more test and fine tuning. * prototype working on large files. * prototype working on large files. * Adding benchmarks * jsonstream API adjustment * minor fixes and cleaning. * minor fixes and cleaning. * removing warnings * removing some copies * runtime dispatch error fix * makefile linking src/jsonstream.cpp * fixing arm stage 1 headers * fixing stage 2 headers * fixing stage 1 arm header * making jsonstream portable * cleaning imports * including <algorithms> for windows compiler * cleaning benchmark imports * adding jsonstream to amalgamation * bug fix where JsonStream would bug on rare cases. * Addind a JsonStream Demo to Amalgamation * rough prototype working. Needs more test and fine tuning. * minor fixes and cleaning. * adding jsonstream to amalgamation * merged main into branch * Addind a JsonStream Demo to Amalgamation * merging main * merging main * make file fix
2019-11-09 06:39:45 +08:00
} else {
2020-03-06 03:05:37 +08:00
std::cout << "parse_many valid" << std::endl;
Streams of JSON documents + Large files (>4GB) (#350) (#364) * rough prototype working. Needs more test and fine tuning. * prototype working on large files. * prototype working on large files. * Adding benchmarks * jsonstream API adjustment * type * minor fixes and cleaning. * minor fixes and cleaning. * removing warnings * removing some copies * runtime dispatch error fix * makefile linking src/jsonstream.cpp * fixing arm stage 1 headers * fixing stage 2 headers * fixing stage 1 arm header * making jsonstream portable * cleaning imports * including <algorithms> for windows compiler * cleaning benchmark imports * adding jsonstream to amalgamation * merged main into branch * bug fix where JsonStream would bug on rare cases. * Addind a JsonStream Demo to Amalgamation * Fix for https://github.com/lemire/simdjson/issues/345 * Follow up test and fix for https://github.com/lemire/simdjson/issues/345 (#347) * Final (?) fix for https://github.com/lemire/simdjson/issues/345 * Verbose basictest * Being more forgiving of powers of ten. * Let us zero the tail end. * add basic fuzzers (#348) * add basic fuzzing using libFuzzer * let cmake respect cflags, otherwise the fuzzer flags go unnoticed also, integrates badly with oss-fuzz * add new fuzzer for minification, simplify the old one * add fuzzer for the dump example * clang format * adding Paul Dreik * rough prototype working. Needs more test and fine tuning. * prototype working on large files. * prototype working on large files. * Adding benchmarks * jsonstream API adjustment * type * minor fixes and cleaning. * Fixing issue 351 (#352) * Fixing issues 351 and 353 * minor fixes and cleaning. * removing warnings * removing some copies * Fix ARM compile errors on g++ 7.4 (#354) * Fix ARM compilation errors * Update singleheader * runtime dispatch error fix * makefile linking src/jsonstream.cpp * fixing arm stage 1 headers * fixing stage 2 headers * fixing stage 1 arm header * fix integer overflow in subnormal_power10 (#355) detected by oss-fuzz https://bugs.chromium.org/p/oss-fuzz/issues/detail?id=18714 * Adding new test file, following https://github.com/lemire/simdjson/pull/355 * making jsonstream portable * cleaning imports * including <algorithms> for windows compiler * cleaning benchmark imports * adding jsonstream to amalgamation * merged main into branch * bug fix where JsonStream would bug on rare cases. * Addind a JsonStream Demo to Amalgamation * merging main * rough prototype working. Needs more test and fine tuning. * prototype working on large files. * prototype working on large files. * Adding benchmarks * jsonstream API adjustment * minor fixes and cleaning. * minor fixes and cleaning. * removing warnings * removing some copies * runtime dispatch error fix * makefile linking src/jsonstream.cpp * fixing arm stage 1 headers * fixing stage 2 headers * fixing stage 1 arm header * making jsonstream portable * cleaning imports * including <algorithms> for windows compiler * cleaning benchmark imports * adding jsonstream to amalgamation * bug fix where JsonStream would bug on rare cases. * Addind a JsonStream Demo to Amalgamation * rough prototype working. Needs more test and fine tuning. * minor fixes and cleaning. * adding jsonstream to amalgamation * merged main into branch * Addind a JsonStream Demo to Amalgamation * merging main * merging main * make file fix
2019-11-09 06:39:45 +08:00
}
2018-12-31 10:11:47 +08:00
return EXIT_SUCCESS;
}
2020-04-18 06:07:18 +08:00
' >> ${DEMOCPP}
2018-12-31 10:11:47 +08:00
2020-04-18 06:07:18 +08:00
CPPBIN=$(basename ${DEMOCPP} .cpp)
2018-12-31 10:11:47 +08:00
2020-04-18 06:07:18 +08:00
echo "Try :" > ${README}
echo "c++ -O3 -std=c++17 -pthread -o ${CPPBIN} ${DEMOCPP##*/} && ./${CPPBIN##*/} ../jsonexamples/twitter.json ../jsonexamples/amazon_cellphones.ndjson" >> ${README}
2018-12-31 10:11:47 +08:00
2020-04-18 06:07:18 +08:00
echo "Done with all files generation."
2018-12-31 10:11:47 +08:00
2020-04-18 06:07:18 +08:00
echo "Files have been written to directory: ${AMALGAMATE_OUTPUT_PATH}/"
ls -la ${AMAL_C} ${AMAL_H} ${DEMOCPP} ${README}
2018-12-31 10:11:47 +08:00
2020-04-18 06:07:18 +08:00
#
# Instructions to create demo
#
echo ""
echo "Giving final instructions:"
2018-12-31 10:11:47 +08:00
2020-04-18 06:07:18 +08:00
cat ${README}
2019-02-23 04:42:44 +08:00
2018-12-31 10:11:47 +08:00
lowercase(){
echo "$1" | tr 'A-Z' 'a-z'
}
OS=`lowercase \`uname\``