2020-04-28 10:09:27 +08:00
|
|
|
#!/usr/bin/env bash
|
2018-12-31 10:11:47 +08:00
|
|
|
########################################################################
|
2020-06-13 05:57:45 +08:00
|
|
|
# Generates an "amalgamation build" for simdjson. Inspired by similar
|
2018-12-31 10:11:47 +08:00
|
|
|
# script used by whefs.
|
|
|
|
########################################################################
|
2020-04-18 06:07:18 +08:00
|
|
|
set -e
|
|
|
|
|
2020-05-07 10:38:49 +08:00
|
|
|
|
2018-12-31 10:11:47 +08:00
|
|
|
SCRIPTPATH="$( cd "$(dirname "$0")" ; pwd -P )"
|
2020-05-14 08:25:54 +08:00
|
|
|
PROJECTPATH="$(dirname $SCRIPTPATH)"
|
2020-05-07 10:38:49 +08:00
|
|
|
echo "Project at "$PROJECTPATH
|
|
|
|
|
2018-12-31 10:11:47 +08:00
|
|
|
echo "We are about to amalgamate all simdjson files into one source file. "
|
|
|
|
echo "See https://www.sqlite.org/amalgamation.html and https://en.wikipedia.org/wiki/Single_Compilation_Unit for rationale. "
|
|
|
|
|
2020-05-14 08:25:54 +08:00
|
|
|
if [ -z "$AMALGAMATE_SOURCE_PATH" ]; then AMALGAMATE_SOURCE_PATH="$PROJECTPATH/src"; fi
|
|
|
|
if [ -z "$AMALGAMATE_INCLUDE_PATH" ]; then AMALGAMATE_INCLUDE_PATH="$PROJECTPATH/include"; fi
|
2020-04-18 06:07:18 +08:00
|
|
|
if [ -z "$AMALGAMATE_OUTPUT_PATH" ]; then AMALGAMATE_OUTPUT_PATH="$SCRIPTPATH"; fi
|
2020-02-04 01:51:24 +08:00
|
|
|
|
2019-12-17 08:09:18 +08:00
|
|
|
# this list excludes the "src/generic headers"
|
2018-12-31 10:50:10 +08:00
|
|
|
ALLCFILES="
|
2020-03-03 07:19:20 +08:00
|
|
|
simdjson.cpp
|
2018-12-31 10:50:10 +08:00
|
|
|
"
|
2018-12-31 10:11:47 +08:00
|
|
|
|
|
|
|
# order matters
|
|
|
|
ALLCHEADERS="
|
2020-03-03 06:23:19 +08:00
|
|
|
simdjson.h
|
2018-12-31 10:11:47 +08:00
|
|
|
"
|
|
|
|
|
2020-02-04 01:51:24 +08:00
|
|
|
found_includes=()
|
|
|
|
|
|
|
|
for file in ${ALLCFILES}; do
|
2020-04-18 06:07:18 +08:00
|
|
|
test -e "$AMALGAMATE_SOURCE_PATH/$file" && continue
|
|
|
|
echo "FATAL: source file [$AMALGAMATE_SOURCE_PATH/$file] not found."
|
2018-12-31 10:11:47 +08:00
|
|
|
exit 127
|
|
|
|
done
|
|
|
|
|
2020-02-04 01:51:24 +08:00
|
|
|
for file in ${ALLCHEADERS}; do
|
2020-04-18 06:07:18 +08:00
|
|
|
test -e "$AMALGAMATE_INCLUDE_PATH/$file" && continue
|
|
|
|
echo "FATAL: source file [$AMALGAMATE_INCLUDE_PATH/$file] not found."
|
2020-02-04 01:51:24 +08:00
|
|
|
exit 127
|
|
|
|
done
|
|
|
|
|
|
|
|
function doinclude()
|
|
|
|
{
|
|
|
|
file=$1
|
|
|
|
line="${@:2}"
|
2020-04-18 06:07:18 +08:00
|
|
|
if [ -f $AMALGAMATE_INCLUDE_PATH/$file ]; then
|
2020-02-04 01:51:24 +08:00
|
|
|
if [[ ! " ${found_includes[@]} " =~ " ${file} " ]]; then
|
|
|
|
found_includes+=("$file")
|
2020-04-18 06:07:18 +08:00
|
|
|
dofile $AMALGAMATE_INCLUDE_PATH $file
|
2020-04-20 05:22:38 +08:00
|
|
|
fi
|
2020-04-18 06:07:18 +08:00
|
|
|
elif [ -f $AMALGAMATE_SOURCE_PATH/$file ]; then
|
2020-02-04 01:51:24 +08:00
|
|
|
# generic includes are included multiple times
|
|
|
|
if [[ "${file}" == *'generic/'*'.h' ]]; then
|
2020-04-18 06:07:18 +08:00
|
|
|
dofile $AMALGAMATE_SOURCE_PATH $file
|
2020-02-04 01:51:24 +08:00
|
|
|
elif [[ ! " ${found_includes[@]} " =~ " ${file} " ]]; then
|
|
|
|
found_includes+=("$file")
|
2020-04-18 06:07:18 +08:00
|
|
|
dofile $AMALGAMATE_SOURCE_PATH $file
|
2020-02-04 01:51:24 +08:00
|
|
|
else
|
|
|
|
echo "/* $file already included: $line */"
|
|
|
|
fi
|
|
|
|
else
|
|
|
|
# If we don't recognize it, just emit the #include
|
|
|
|
echo "$line"
|
|
|
|
fi
|
|
|
|
}
|
|
|
|
|
2018-12-31 10:11:47 +08:00
|
|
|
function dofile()
|
|
|
|
{
|
2020-04-18 06:07:18 +08:00
|
|
|
file="$1/$2"
|
2020-05-02 22:59:15 +08:00
|
|
|
RELFILE=${file#"$PROJECTPATH/"}
|
2019-08-05 03:58:35 +08:00
|
|
|
# Last lines are always ignored. Files should end by an empty lines.
|
2020-05-02 22:59:15 +08:00
|
|
|
echo "/* begin file $RELFILE */"
|
2018-12-31 10:11:47 +08:00
|
|
|
# echo "#line 8 \"$1\"" ## redefining the line/file is not nearly as useful as it sounds for debugging. It breaks IDEs.
|
2019-12-17 08:09:18 +08:00
|
|
|
while IFS= read -r line || [ -n "$line" ];
|
2019-08-05 03:58:35 +08:00
|
|
|
do
|
2019-08-21 19:59:49 +08:00
|
|
|
if [[ "${line}" == '#include "'*'"'* ]]; then
|
|
|
|
file=$(echo $line| cut -d'"' -f 2)
|
2020-04-20 05:22:38 +08:00
|
|
|
# include all from simdjson.cpp except simdjson.h
|
|
|
|
if [ "${file}" == "simdjson.h" ] && [ "${2}" == "simdjson.cpp" ]; then
|
|
|
|
echo "$line"
|
|
|
|
continue
|
|
|
|
fi
|
2019-08-21 19:59:49 +08:00
|
|
|
|
|
|
|
if [[ "${file}" == '../'* ]]; then
|
2020-02-04 01:51:24 +08:00
|
|
|
file=$(echo $file| cut -d'/' -f 2-)
|
2020-04-20 05:22:38 +08:00
|
|
|
fi
|
2019-08-21 19:59:49 +08:00
|
|
|
|
2020-02-04 01:51:24 +08:00
|
|
|
# we explicitly include simdjson headers, one time each (unless they are generic, in which case multiple times is fine)
|
|
|
|
doinclude $file $line
|
|
|
|
else
|
|
|
|
# Otherwise we simply copy the line
|
|
|
|
echo "$line"
|
|
|
|
fi
|
2020-04-18 06:07:18 +08:00
|
|
|
done < "$file"
|
2019-03-03 06:18:45 +08:00
|
|
|
echo "/* end file $RELFILE */"
|
2018-12-31 10:11:47 +08:00
|
|
|
}
|
2020-04-18 06:07:18 +08:00
|
|
|
|
2018-12-31 10:11:47 +08:00
|
|
|
timestamp=$(date)
|
2020-04-18 06:07:18 +08:00
|
|
|
mkdir -p $AMALGAMATE_OUTPUT_PATH
|
|
|
|
|
|
|
|
AMAL_H="${AMALGAMATE_OUTPUT_PATH}/simdjson.h"
|
|
|
|
AMAL_C="${AMALGAMATE_OUTPUT_PATH}/simdjson.cpp"
|
|
|
|
DEMOCPP="${AMALGAMATE_OUTPUT_PATH}/amalgamate_demo.cpp"
|
|
|
|
README="$AMALGAMATE_OUTPUT_PATH/README.md"
|
|
|
|
|
2018-12-31 10:11:47 +08:00
|
|
|
echo "Creating ${AMAL_H}..."
|
2020-04-18 06:07:18 +08:00
|
|
|
echo "/* auto-generated on ${timestamp}. Do not edit! */" > ${AMAL_H}
|
2018-12-31 10:11:47 +08:00
|
|
|
{
|
|
|
|
for h in ${ALLCHEADERS}; do
|
2020-02-04 01:51:24 +08:00
|
|
|
doinclude $h "ERROR $h not found"
|
2018-12-31 10:11:47 +08:00
|
|
|
done
|
2020-04-18 06:07:18 +08:00
|
|
|
} >> ${AMAL_H}
|
2018-12-31 10:11:47 +08:00
|
|
|
|
|
|
|
|
|
|
|
echo "Creating ${AMAL_C}..."
|
2020-04-18 06:07:18 +08:00
|
|
|
echo "/* auto-generated on ${timestamp}. Do not edit! */" > ${AMAL_C}
|
2018-12-31 10:11:47 +08:00
|
|
|
{
|
2020-02-04 01:51:24 +08:00
|
|
|
for file in ${ALLCFILES}; do
|
2020-04-18 06:07:18 +08:00
|
|
|
dofile $AMALGAMATE_SOURCE_PATH $file
|
2018-12-31 10:11:47 +08:00
|
|
|
done
|
2020-04-18 06:07:18 +08:00
|
|
|
} >> ${AMAL_C}
|
2018-12-31 10:11:47 +08:00
|
|
|
|
|
|
|
|
|
|
|
echo "Creating ${DEMOCPP}..."
|
2020-04-18 06:07:18 +08:00
|
|
|
echo "/* auto-generated on ${timestamp}. Do not edit! */" > ${DEMOCPP}
|
2018-12-31 10:11:47 +08:00
|
|
|
cat <<< '
|
|
|
|
#include <iostream>
|
|
|
|
#include "simdjson.h"
|
|
|
|
#include "simdjson.cpp"
|
|
|
|
int main(int argc, char *argv[]) {
|
2019-12-11 21:13:29 +08:00
|
|
|
if(argc < 2) {
|
|
|
|
std::cerr << "Please specify at least one file name. " << std::endl;
|
2019-07-31 06:10:48 +08:00
|
|
|
}
|
2019-02-23 04:42:44 +08:00
|
|
|
const char * filename = argv[1];
|
2020-03-29 02:43:41 +08:00
|
|
|
simdjson::dom::parser parser;
|
2020-04-18 06:07:18 +08:00
|
|
|
simdjson::error_code error;
|
|
|
|
UNUSED simdjson::dom::element elem;
|
|
|
|
parser.load(filename).tie(elem, error); // do the parsing
|
2020-02-14 05:30:12 +08:00
|
|
|
if (error) {
|
2020-03-06 03:05:37 +08:00
|
|
|
std::cout << "parse failed" << std::endl;
|
2020-02-25 06:13:10 +08:00
|
|
|
std::cout << "error code: " << error << std::endl;
|
2020-03-07 04:14:23 +08:00
|
|
|
std::cout << error << std::endl;
|
2020-04-18 06:07:18 +08:00
|
|
|
return EXIT_FAILURE;
|
2018-12-31 10:11:47 +08:00
|
|
|
} else {
|
2020-03-06 03:05:37 +08:00
|
|
|
std::cout << "parse valid" << std::endl;
|
2018-12-31 10:11:47 +08:00
|
|
|
}
|
2019-12-11 21:13:29 +08:00
|
|
|
if(argc == 2) {
|
|
|
|
return EXIT_SUCCESS;
|
|
|
|
}
|
2019-11-09 06:39:45 +08:00
|
|
|
|
2020-03-06 03:05:37 +08:00
|
|
|
// parse_many
|
2019-11-09 06:39:45 +08:00
|
|
|
const char * filename2 = argv[2];
|
2020-03-07 10:14:34 +08:00
|
|
|
for (auto result : parser.load_many(filename2)) {
|
2020-03-21 04:14:47 +08:00
|
|
|
error = result.error();
|
2019-12-11 21:13:29 +08:00
|
|
|
}
|
2020-03-06 03:05:37 +08:00
|
|
|
if (error) {
|
|
|
|
std::cout << "parse_many failed" << std::endl;
|
|
|
|
std::cout << "error code: " << error << std::endl;
|
2020-03-07 04:14:23 +08:00
|
|
|
std::cout << error << std::endl;
|
2020-04-18 06:07:18 +08:00
|
|
|
return EXIT_FAILURE;
|
2019-11-09 06:39:45 +08:00
|
|
|
} else {
|
2020-03-06 03:05:37 +08:00
|
|
|
std::cout << "parse_many valid" << std::endl;
|
2019-11-09 06:39:45 +08:00
|
|
|
}
|
2018-12-31 10:11:47 +08:00
|
|
|
return EXIT_SUCCESS;
|
|
|
|
}
|
2020-04-18 06:07:18 +08:00
|
|
|
' >> ${DEMOCPP}
|
2018-12-31 10:11:47 +08:00
|
|
|
|
2020-04-18 06:07:18 +08:00
|
|
|
CPPBIN=$(basename ${DEMOCPP} .cpp)
|
2018-12-31 10:11:47 +08:00
|
|
|
|
2020-04-18 06:07:18 +08:00
|
|
|
echo "Try :" > ${README}
|
|
|
|
echo "c++ -O3 -std=c++17 -pthread -o ${CPPBIN} ${DEMOCPP##*/} && ./${CPPBIN##*/} ../jsonexamples/twitter.json ../jsonexamples/amazon_cellphones.ndjson" >> ${README}
|
2018-12-31 10:11:47 +08:00
|
|
|
|
2020-04-18 06:07:18 +08:00
|
|
|
echo "Done with all files generation."
|
2018-12-31 10:11:47 +08:00
|
|
|
|
2020-04-18 06:07:18 +08:00
|
|
|
echo "Files have been written to directory: ${AMALGAMATE_OUTPUT_PATH}/"
|
|
|
|
ls -la ${AMAL_C} ${AMAL_H} ${DEMOCPP} ${README}
|
2018-12-31 10:11:47 +08:00
|
|
|
|
2020-04-18 06:07:18 +08:00
|
|
|
#
|
|
|
|
# Instructions to create demo
|
|
|
|
#
|
|
|
|
echo ""
|
|
|
|
echo "Giving final instructions:"
|
2018-12-31 10:11:47 +08:00
|
|
|
|
2020-04-18 06:07:18 +08:00
|
|
|
cat ${README}
|
2019-02-23 04:42:44 +08:00
|
|
|
|
2018-12-31 10:11:47 +08:00
|
|
|
lowercase(){
|
|
|
|
echo "$1" | tr 'A-Z' 'a-z'
|
|
|
|
}
|
|
|
|
|
|
|
|
OS=`lowercase \`uname\``
|