Moving tests to a separate file and directory.
This commit is contained in:
parent
01ea7996b2
commit
d204e54170
37
Makefile
37
Makefile
|
@ -11,8 +11,9 @@ CXXFLAGS = -std=c++11 -O2 -march=native -Wall -Wextra -Wshadow -Idependencies/d
|
|||
LIBFLAGS = -ldouble-conversion
|
||||
#CXXFLAGS = -std=c++11 -O2 -march=native -Wall -Wextra -Wshadow -Wno-implicit-function-declaration
|
||||
|
||||
EXECUTABLES=parse
|
||||
|
||||
EXECUTABLES=parse jsoncheck
|
||||
HEADERS=common_defs.h jsonioutil.h linux-perf-events.h simdjson_internal.h stage1_find_marks.h stage2_flatten.h stage3_ape_machine.h stage4_shovel_machine.h
|
||||
LIBFILES=stage1_find_marks.cpp stage2_flatten.cpp stage3_ape_machine.cpp stage4_shovel_machine.cpp
|
||||
EXTRA_EXECUTABLES=parsenocheesy parsenodep8
|
||||
|
||||
LIDDOUBLE:=dependencies/double-conversion/release/libdouble-conversion.a
|
||||
|
@ -20,16 +21,22 @@ LIDDOUBLE:=dependencies/double-conversion/release/libdouble-conversion.a
|
|||
LIBS=$(LIDDOUBLE)
|
||||
|
||||
all: $(LIBS) $(EXECUTABLES)
|
||||
-./parse
|
||||
|
||||
test: jsoncheck
|
||||
./jsoncheck
|
||||
|
||||
$(LIDDOUBLE) : dependencies/double-conversion/README.md
|
||||
cd dependencies/double-conversion/ && mkdir -p release && cd release && cmake .. && make
|
||||
|
||||
parse: main.cpp stage1_find_marks.cpp common_defs.h linux-perf-events.h
|
||||
$(CXX) $(CXXFLAGS) -o parse stage1_find_marks.cpp stage2_flatten.cpp stage3_ape_machine.cpp stage4_shovel_machine.cpp main.cpp $(LIBFLAGS)
|
||||
parse: main.cpp $(HEADERS) $(LIBFILES)
|
||||
$(CXX) $(CXXFLAGS) -o parse $(LIBFILES) main.cpp $(LIBFLAGS)
|
||||
|
||||
parsehisto: main.cpp common_defs.h linux-perf-events.h
|
||||
$(CXX) $(CXXFLAGS) -o parsehisto main.cpp $(LIBFLAGS) -DBUILDHISTOGRAM
|
||||
jsoncheck:tests/jsoncheck.cpp $(HEADERS) $(LIBFILES)
|
||||
$(CXX) $(CXXFLAGS) -o jsoncheck $(LIBFILES) tests/jsoncheck.cpp -I. $(LIBFLAGS)
|
||||
|
||||
|
||||
parsehisto: main.cpp $(HEADERS) $(LIBFILES)
|
||||
$(CXX) $(CXXFLAGS) -o parsehisto main.cpp $(LIBFILES) $(LIBFLAGS) -DBUILDHISTOGRAM
|
||||
|
||||
testflatten: parse parsenocheesy parsenodep8 parsenodep10 parsenodep12
|
||||
for filename in jsonexamples/twitter.json jsonexamples/gsoc-2018.json jsonexamples/citm_catalog.json jsonexamples/canada.json ; do \
|
||||
|
@ -43,18 +50,18 @@ testflatten: parse parsenocheesy parsenodep8 parsenodep10 parsenodep12
|
|||
set +x; \
|
||||
done
|
||||
|
||||
parsenocheesy: main.cpp common_defs.h linux-perf-events.h
|
||||
$(CXX) $(CXXFLAGS) -o parsenocheesy main.cpp -DSUPPRESS_CHEESY_FLATTEN
|
||||
parsenocheesy: main.cpp $(HEADERS) $(LIBFILES)
|
||||
$(CXX) $(CXXFLAGS) -o parsenocheesy main.cpp $(LIBFILES) -DSUPPRESS_CHEESY_FLATTEN
|
||||
|
||||
parsenodep8: main.cpp common_defs.h linux-perf-events.h
|
||||
$(CXX) $(CXXFLAGS) -o parsenodep8 main.cpp -DNO_PDEP_PLEASE -DNO_PDEP_WIDTH=8
|
||||
parsenodep8: main.cpp $(HEADERS) $(LIBFILES)
|
||||
$(CXX) $(CXXFLAGS) -o parsenodep8 main.cpp $(LIBFILES) -DNO_PDEP_PLEASE -DNO_PDEP_WIDTH=8
|
||||
|
||||
parsenodep10: main.cpp common_defs.h linux-perf-events.h
|
||||
$(CXX) $(CXXFLAGS) -o parsenodep12 main.cpp -DNO_PDEP_PLEASE -DNO_PDEP_WIDTH=10
|
||||
parsenodep10: main.cpp $(HEADERS) $(LIBFILES)
|
||||
$(CXX) $(CXXFLAGS) -o parsenodep12 main.cpp $(LIBFILES) -DNO_PDEP_PLEASE -DNO_PDEP_WIDTH=10
|
||||
|
||||
|
||||
parsenodep12: main.cpp common_defs.h linux-perf-events.h
|
||||
$(CXX) $(CXXFLAGS) -o parsenodep12 main.cpp -DNO_PDEP_PLEASE -DNO_PDEP_WIDTH=12
|
||||
parsenodep12: main.cpp $(HEADERS) $(LIBFILES)
|
||||
$(CXX) $(CXXFLAGS) -o parsenodep12 main.cpp $(LIBFILES) -DNO_PDEP_PLEASE -DNO_PDEP_WIDTH=12
|
||||
|
||||
|
||||
dependencies/double-conversion/README.md:
|
||||
|
|
|
@ -0,0 +1,31 @@
|
|||
#ifndef JSONIOUTIL_H
|
||||
#define JSONIOUTIL_H
|
||||
|
||||
#include <exception>
|
||||
#include <fstream>
|
||||
#include <iostream>
|
||||
#include <sstream>
|
||||
#include <string>
|
||||
|
||||
// get a corpus; pad out to cache line so we can always use SIMD
|
||||
// throws exceptions in case of failure
|
||||
std::pair<u8 *, size_t> get_corpus(std::string filename) {
|
||||
std::ifstream is(filename, std::ios::binary);
|
||||
if (is) {
|
||||
std::stringstream buffer;
|
||||
buffer << is.rdbuf();
|
||||
size_t length = buffer.str().size();
|
||||
char *aligned_buffer;
|
||||
if (posix_memalign((void **)&aligned_buffer, 64, ROUNDUP_N(length, 64))) {
|
||||
throw std::runtime_error("Could not allocate sufficient memory");
|
||||
};
|
||||
memset(aligned_buffer, 0x20, ROUNDUP_N(length, 64));
|
||||
memcpy(aligned_buffer, buffer.str().c_str(), length);
|
||||
is.close();
|
||||
return std::make_pair((u8 *)aligned_buffer, length);
|
||||
}
|
||||
throw std::runtime_error("could not load corpus");
|
||||
return std::make_pair((u8 *)0, (size_t)0);
|
||||
}
|
||||
|
||||
#endif
|
115
main.cpp
115
main.cpp
|
@ -39,29 +39,10 @@ using namespace double_conversion;
|
|||
#include "stage2_flatten.h"
|
||||
#include "stage3_ape_machine.h"
|
||||
#include "stage4_shovel_machine.h"
|
||||
|
||||
#include "jsonioutil.h"
|
||||
using namespace std;
|
||||
|
||||
// get a corpus; pad out to cache line so we can always use SIMD
|
||||
pair<u8 *, size_t> get_corpus(string filename) {
|
||||
ifstream is(filename, ios::binary);
|
||||
if (is) {
|
||||
stringstream buffer;
|
||||
buffer << is.rdbuf();
|
||||
size_t length = buffer.str().size();
|
||||
char * aligned_buffer;
|
||||
if (posix_memalign( (void **)&aligned_buffer, 64, ROUNDUP_N(length, 64))) {
|
||||
cerr << "Could not allocate memory\n";
|
||||
exit(1);
|
||||
};
|
||||
memset(aligned_buffer, 0x20, ROUNDUP_N(length, 64));
|
||||
memcpy(aligned_buffer, buffer.str().c_str(), length);
|
||||
is.close();
|
||||
return make_pair((u8 *)aligned_buffer, length);
|
||||
}
|
||||
throw "No corpus";
|
||||
return make_pair((u8 *)0, (size_t)0);
|
||||
}
|
||||
|
||||
|
||||
|
||||
// https://stackoverflow.com/questions/2616906/how-do-i-output-coloured-text-to-a-linux-terminal
|
||||
|
@ -115,80 +96,12 @@ void colorfuldisplay(ParsedJson & pj, const u8 * buf) {
|
|||
}
|
||||
|
||||
|
||||
/**
|
||||
* Does the file filename ends with the given extension.
|
||||
*/
|
||||
static bool hasExtension(const char *filename, const char *extension) {
|
||||
const char *ext = strrchr(filename, '.');
|
||||
return (ext && !strcmp(ext, extension));
|
||||
}
|
||||
|
||||
bool startsWith(const char *pre, const char *str) {
|
||||
size_t lenpre = strlen(pre),
|
||||
lenstr = strlen(str);
|
||||
return lenstr < lenpre ? false : strncmp(pre, str, lenpre) == 0;
|
||||
}
|
||||
|
||||
void validate() {
|
||||
init_state_machine();// to be safe
|
||||
const char *dirname = "jsonchecker/"; // ugly, hardcoded, brittle
|
||||
const char *extension = ".json";
|
||||
size_t dirlen = strlen(dirname);
|
||||
struct dirent **entry_list;
|
||||
int c = scandir(dirname, &entry_list, 0, alphasort);
|
||||
if (c < 0) {
|
||||
printf("error accessing %s \n", dirname);
|
||||
return;
|
||||
}
|
||||
if (c == 0) {
|
||||
printf("nothing in dir %s \n", dirname);
|
||||
return;
|
||||
}
|
||||
for (int i = 0; i < c; i++) {
|
||||
const char *name = entry_list[i]->d_name;
|
||||
if (hasExtension(name, extension)) {
|
||||
printf("validating: file %s \n",name);
|
||||
size_t filelen = strlen(name);
|
||||
char *fullpath = (char *)malloc(dirlen + filelen + 1);
|
||||
strcpy(fullpath, dirname);
|
||||
strcpy(fullpath + dirlen, name);
|
||||
pair<u8 *, size_t> p = get_corpus(fullpath);
|
||||
// terrible hack but just to get it working
|
||||
ParsedJson * pj_ptr = new ParsedJson;
|
||||
ParsedJson & pj(*pj_ptr);
|
||||
if (posix_memalign( (void **)&pj.structurals, 8, ROUNDUP_N(p.second, 64)/8)) {
|
||||
cerr << "Could not allocate memory\n";
|
||||
return;
|
||||
};
|
||||
pj.n_structural_indexes = 0;
|
||||
u32 max_structures = ROUNDUP_N(p.second, 64) + 2 + 7;
|
||||
pj.structural_indexes = new u32[max_structures];
|
||||
find_structural_bits(p.first, p.second, pj);
|
||||
flatten_indexes(p.second, pj);
|
||||
bool isok = ape_machine(p.first, p.second, pj);
|
||||
if(isok)
|
||||
isok = shovel_machine(p.first, p.second, pj);
|
||||
if(startsWith("pass",name)) {
|
||||
if(!isok) printf("warning: file %s should pass but it fails.\n",name);
|
||||
}
|
||||
if(startsWith("fail",name)) {
|
||||
if(isok) printf("warning: file %s should fail but it passes.\n",name);
|
||||
}
|
||||
free(pj.structurals);
|
||||
free(p.first);
|
||||
delete[] pj.structural_indexes;
|
||||
free(fullpath);
|
||||
}
|
||||
}
|
||||
for (int i = 0; i < c; ++i) free(entry_list[i]);
|
||||
free(entry_list);
|
||||
}
|
||||
|
||||
int main(int argc, char * argv[]) {
|
||||
if (argc != 2) {
|
||||
cerr << "Usage: " << argv[0] << " <jsonfile>\n";
|
||||
cout << "We are going to validate:\n" << std::endl;
|
||||
validate();
|
||||
cerr << "Usage: " << argv[0] << " <jsonfile>" << endl;
|
||||
exit(1);
|
||||
}
|
||||
pair<u8 *, size_t> p = get_corpus(argv[1]);
|
||||
|
@ -196,7 +109,7 @@ int main(int argc, char * argv[]) {
|
|||
ParsedJson & pj(*pj_ptr);
|
||||
|
||||
if (posix_memalign( (void **)&pj.structurals, 8, ROUNDUP_N(p.second, 64)/8)) {
|
||||
cerr << "Could not allocate memory\n";
|
||||
cerr << "Could not allocate memory" << endl;
|
||||
exit(1);
|
||||
};
|
||||
|
||||
|
@ -237,38 +150,44 @@ int main(int argc, char * argv[]) {
|
|||
unsigned long cy1 = 0, cy2 = 0, cy3 = 0, cy4 = 0;
|
||||
unsigned long cl1 = 0, cl2 = 0, cl3 = 0, cl4 = 0;
|
||||
#endif
|
||||
bool isok = true;
|
||||
for (u32 i = 0; i < iterations; i++) {
|
||||
auto start = std::chrono::steady_clock::now();
|
||||
#ifndef SQUASH_COUNTERS
|
||||
unified.start();
|
||||
#endif
|
||||
find_structural_bits(p.first, p.second, pj);
|
||||
isok = find_structural_bits(p.first, p.second, pj);
|
||||
#ifndef SQUASH_COUNTERS
|
||||
unified.end(results);
|
||||
cy1 += results[0]; cl1 += results[1];
|
||||
if(! isok ) break;
|
||||
unified.start();
|
||||
#endif
|
||||
flatten_indexes(p.second, pj);
|
||||
isok = flatten_indexes(p.second, pj);
|
||||
#ifndef SQUASH_COUNTERS
|
||||
unified.end(results);
|
||||
cy2 += results[0]; cl2 += results[1];
|
||||
if(! isok ) break;
|
||||
unified.start();
|
||||
#endif
|
||||
ape_machine(p.first, p.second, pj);
|
||||
isok = ape_machine(p.first, p.second, pj);
|
||||
#ifndef SQUASH_COUNTERS
|
||||
unified.end(results);
|
||||
cy3 += results[0]; cl3 += results[1];
|
||||
if(! isok ) break;
|
||||
unified.start();
|
||||
#endif
|
||||
shovel_machine(p.first, p.second, pj);
|
||||
isok = shovel_machine(p.first, p.second, pj);
|
||||
#ifndef SQUASH_COUNTERS
|
||||
unified.end(results);
|
||||
cy4 += results[0]; cl4 += results[1];
|
||||
#endif
|
||||
if(! isok ) break;
|
||||
auto end = std::chrono::steady_clock::now();
|
||||
std::chrono::duration<double> secs = end - start;
|
||||
res[i] = secs.count();
|
||||
}
|
||||
|
||||
#ifndef SQUASH_COUNTERS
|
||||
printf("number of bytes %ld number of structural chars %d ratio %.3f\n", p.second, pj.n_structural_indexes,
|
||||
(double) pj.n_structural_indexes / p.second);
|
||||
|
@ -303,5 +222,9 @@ int main(int argc, char * argv[]) {
|
|||
free(p.first);
|
||||
delete[] pj.structural_indexes;
|
||||
delete pj_ptr;
|
||||
return 0;
|
||||
if(! isok ) {
|
||||
printf(" Parsing failed. \n ");
|
||||
return EXIT_FAILURE;
|
||||
}
|
||||
return EXIT_SUCCESS;
|
||||
}
|
||||
|
|
|
@ -16,6 +16,10 @@ really_inline u64 cmp_mask_against_input(m256 input_lo, m256 input_hi, m256 mask
|
|||
}
|
||||
|
||||
/*never_inline*/ bool find_structural_bits(const u8 * buf, size_t len, ParsedJson & pj) {
|
||||
if (len > 0xffffff) {
|
||||
cerr << "Currently only support JSON files < 16MB\n";
|
||||
return false;
|
||||
}
|
||||
// Useful constant masks
|
||||
const u64 even_bits = 0x5555555555555555ULL;
|
||||
const u64 odd_bits = ~even_bits;
|
||||
|
|
|
@ -0,0 +1,117 @@
|
|||
#include <assert.h>
|
||||
#include <cstring>
|
||||
#include <dirent.h>
|
||||
#include <inttypes.h>
|
||||
#include <stdbool.h>
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
|
||||
#include "common_defs.h"
|
||||
#include "jsonioutil.h"
|
||||
#include "simdjson_internal.h"
|
||||
#include "stage1_find_marks.h"
|
||||
#include "stage2_flatten.h"
|
||||
#include "stage3_ape_machine.h"
|
||||
#include "stage4_shovel_machine.h"
|
||||
|
||||
/**
|
||||
* Does the file filename ends with the given extension.
|
||||
*/
|
||||
static bool hasExtension(const char *filename, const char *extension) {
|
||||
const char *ext = strrchr(filename, '.');
|
||||
return (ext && !strcmp(ext, extension));
|
||||
}
|
||||
|
||||
bool startsWith(const char *pre, const char *str) {
|
||||
size_t lenpre = strlen(pre), lenstr = strlen(str);
|
||||
return lenstr < lenpre ? false : strncmp(pre, str, lenpre) == 0;
|
||||
}
|
||||
|
||||
bool validate(const char *dirname) {
|
||||
bool everythingfine = true;
|
||||
init_state_machine(); // to be safe
|
||||
const char *extension = ".json";
|
||||
size_t dirlen = strlen(dirname);
|
||||
struct dirent **entry_list;
|
||||
int c = scandir(dirname, &entry_list, 0, alphasort);
|
||||
if (c < 0) {
|
||||
printf("error accessing %s \n", dirname);
|
||||
return false;
|
||||
}
|
||||
if (c == 0) {
|
||||
printf("nothing in dir %s \n", dirname);
|
||||
return false;
|
||||
}
|
||||
bool needsep = (strlen(dirname) > 1) && (dirname[strlen(dirname) - 1] != '/');
|
||||
for (int i = 0; i < c; i++) {
|
||||
const char *name = entry_list[i]->d_name;
|
||||
if (hasExtension(name, extension)) {
|
||||
printf("validating: file %s \n", name);
|
||||
size_t filelen = strlen(name);
|
||||
char *fullpath = (char *)malloc(dirlen + filelen + 1 + 1);
|
||||
strcpy(fullpath, dirname);
|
||||
if (needsep) {
|
||||
fullpath[dirlen] = '/';
|
||||
strcpy(fullpath + dirlen + 1, name);
|
||||
} else {
|
||||
strcpy(fullpath + dirlen, name);
|
||||
}
|
||||
std::pair<u8 *, size_t> p = get_corpus(fullpath);
|
||||
// terrible hack but just to get it working
|
||||
ParsedJson *pj_ptr = new ParsedJson;
|
||||
ParsedJson &pj(*pj_ptr);
|
||||
if (posix_memalign((void **)&pj.structurals, 8,
|
||||
ROUNDUP_N(p.second, 64) / 8)) {
|
||||
std::cerr << "Could not allocate memory" << std::endl;
|
||||
return false;
|
||||
};
|
||||
pj.n_structural_indexes = 0;
|
||||
u32 max_structures = ROUNDUP_N(p.second, 64) + 2 + 7;
|
||||
pj.structural_indexes = new u32[max_structures];
|
||||
bool isok = find_structural_bits(p.first, p.second, pj);
|
||||
if (isok) {
|
||||
isok = flatten_indexes(p.second, pj);
|
||||
}
|
||||
if (isok) {
|
||||
isok = ape_machine(p.first, p.second, pj);
|
||||
}
|
||||
if (isok) {
|
||||
isok = shovel_machine(p.first, p.second, pj);
|
||||
}
|
||||
if (startsWith("pass", name)) {
|
||||
if (!isok) {
|
||||
printf("warning: file %s should pass but it fails.\n", name);
|
||||
everythingfine = false;
|
||||
}
|
||||
} else if (startsWith("fail", name)) {
|
||||
if (isok) {
|
||||
printf("warning: file %s should fail but it passes.\n", name);
|
||||
everythingfine = false;
|
||||
}
|
||||
} else {
|
||||
printf("File %s %s.\n", name,
|
||||
isok ? " is valid JSON " : " is not valid JSON");
|
||||
}
|
||||
free(pj.structurals);
|
||||
free(p.first);
|
||||
delete[] pj.structural_indexes;
|
||||
free(fullpath);
|
||||
}
|
||||
}
|
||||
for (int i = 0; i < c; ++i)
|
||||
free(entry_list[i]);
|
||||
free(entry_list);
|
||||
return everythingfine;
|
||||
}
|
||||
|
||||
int main(int argc, char *argv[]) {
|
||||
if (argc != 2) {
|
||||
std::cerr << "Usage: " << argv[0] << " <directorywithjsonfiles>"
|
||||
<< std::endl;
|
||||
std::cout
|
||||
<< "We are going to assume you mean to use the 'jsonchecker' directory."
|
||||
<< std::endl;
|
||||
return validate("jsonchecker/") ? EXIT_SUCCESS : EXIT_FAILURE;
|
||||
}
|
||||
return validate(argv[1]) ? EXIT_SUCCESS : EXIT_FAILURE;
|
||||
}
|
Loading…
Reference in New Issue