Moving tests to a separate file and directory.

This commit is contained in:
Daniel Lemire 2018-08-17 19:57:31 -04:00
parent 01ea7996b2
commit d204e54170
5 changed files with 194 additions and 112 deletions

View File

@ -11,8 +11,9 @@ CXXFLAGS = -std=c++11 -O2 -march=native -Wall -Wextra -Wshadow -Idependencies/d
LIBFLAGS = -ldouble-conversion
#CXXFLAGS = -std=c++11 -O2 -march=native -Wall -Wextra -Wshadow -Wno-implicit-function-declaration
EXECUTABLES=parse
EXECUTABLES=parse jsoncheck
HEADERS=common_defs.h jsonioutil.h linux-perf-events.h simdjson_internal.h stage1_find_marks.h stage2_flatten.h stage3_ape_machine.h stage4_shovel_machine.h
LIBFILES=stage1_find_marks.cpp stage2_flatten.cpp stage3_ape_machine.cpp stage4_shovel_machine.cpp
EXTRA_EXECUTABLES=parsenocheesy parsenodep8
LIDDOUBLE:=dependencies/double-conversion/release/libdouble-conversion.a
@ -20,16 +21,22 @@ LIDDOUBLE:=dependencies/double-conversion/release/libdouble-conversion.a
LIBS=$(LIDDOUBLE)
all: $(LIBS) $(EXECUTABLES)
-./parse
test: jsoncheck
./jsoncheck
$(LIDDOUBLE) : dependencies/double-conversion/README.md
cd dependencies/double-conversion/ && mkdir -p release && cd release && cmake .. && make
parse: main.cpp stage1_find_marks.cpp common_defs.h linux-perf-events.h
$(CXX) $(CXXFLAGS) -o parse stage1_find_marks.cpp stage2_flatten.cpp stage3_ape_machine.cpp stage4_shovel_machine.cpp main.cpp $(LIBFLAGS)
parse: main.cpp $(HEADERS) $(LIBFILES)
$(CXX) $(CXXFLAGS) -o parse $(LIBFILES) main.cpp $(LIBFLAGS)
parsehisto: main.cpp common_defs.h linux-perf-events.h
$(CXX) $(CXXFLAGS) -o parsehisto main.cpp $(LIBFLAGS) -DBUILDHISTOGRAM
jsoncheck:tests/jsoncheck.cpp $(HEADERS) $(LIBFILES)
$(CXX) $(CXXFLAGS) -o jsoncheck $(LIBFILES) tests/jsoncheck.cpp -I. $(LIBFLAGS)
parsehisto: main.cpp $(HEADERS) $(LIBFILES)
$(CXX) $(CXXFLAGS) -o parsehisto main.cpp $(LIBFILES) $(LIBFLAGS) -DBUILDHISTOGRAM
testflatten: parse parsenocheesy parsenodep8 parsenodep10 parsenodep12
for filename in jsonexamples/twitter.json jsonexamples/gsoc-2018.json jsonexamples/citm_catalog.json jsonexamples/canada.json ; do \
@ -43,18 +50,18 @@ testflatten: parse parsenocheesy parsenodep8 parsenodep10 parsenodep12
set +x; \
done
parsenocheesy: main.cpp common_defs.h linux-perf-events.h
$(CXX) $(CXXFLAGS) -o parsenocheesy main.cpp -DSUPPRESS_CHEESY_FLATTEN
parsenocheesy: main.cpp $(HEADERS) $(LIBFILES)
$(CXX) $(CXXFLAGS) -o parsenocheesy main.cpp $(LIBFILES) -DSUPPRESS_CHEESY_FLATTEN
parsenodep8: main.cpp common_defs.h linux-perf-events.h
$(CXX) $(CXXFLAGS) -o parsenodep8 main.cpp -DNO_PDEP_PLEASE -DNO_PDEP_WIDTH=8
parsenodep8: main.cpp $(HEADERS) $(LIBFILES)
$(CXX) $(CXXFLAGS) -o parsenodep8 main.cpp $(LIBFILES) -DNO_PDEP_PLEASE -DNO_PDEP_WIDTH=8
parsenodep10: main.cpp common_defs.h linux-perf-events.h
$(CXX) $(CXXFLAGS) -o parsenodep12 main.cpp -DNO_PDEP_PLEASE -DNO_PDEP_WIDTH=10
parsenodep10: main.cpp $(HEADERS) $(LIBFILES)
$(CXX) $(CXXFLAGS) -o parsenodep12 main.cpp $(LIBFILES) -DNO_PDEP_PLEASE -DNO_PDEP_WIDTH=10
parsenodep12: main.cpp common_defs.h linux-perf-events.h
$(CXX) $(CXXFLAGS) -o parsenodep12 main.cpp -DNO_PDEP_PLEASE -DNO_PDEP_WIDTH=12
parsenodep12: main.cpp $(HEADERS) $(LIBFILES)
$(CXX) $(CXXFLAGS) -o parsenodep12 main.cpp $(LIBFILES) -DNO_PDEP_PLEASE -DNO_PDEP_WIDTH=12
dependencies/double-conversion/README.md:

31
jsonioutil.h Normal file
View File

@ -0,0 +1,31 @@
#ifndef JSONIOUTIL_H
#define JSONIOUTIL_H
#include <exception>
#include <fstream>
#include <iostream>
#include <sstream>
#include <string>
// get a corpus; pad out to cache line so we can always use SIMD
// throws exceptions in case of failure
std::pair<u8 *, size_t> get_corpus(std::string filename) {
std::ifstream is(filename, std::ios::binary);
if (is) {
std::stringstream buffer;
buffer << is.rdbuf();
size_t length = buffer.str().size();
char *aligned_buffer;
if (posix_memalign((void **)&aligned_buffer, 64, ROUNDUP_N(length, 64))) {
throw std::runtime_error("Could not allocate sufficient memory");
};
memset(aligned_buffer, 0x20, ROUNDUP_N(length, 64));
memcpy(aligned_buffer, buffer.str().c_str(), length);
is.close();
return std::make_pair((u8 *)aligned_buffer, length);
}
throw std::runtime_error("could not load corpus");
return std::make_pair((u8 *)0, (size_t)0);
}
#endif

115
main.cpp
View File

@ -39,29 +39,10 @@ using namespace double_conversion;
#include "stage2_flatten.h"
#include "stage3_ape_machine.h"
#include "stage4_shovel_machine.h"
#include "jsonioutil.h"
using namespace std;
// get a corpus; pad out to cache line so we can always use SIMD
pair<u8 *, size_t> get_corpus(string filename) {
ifstream is(filename, ios::binary);
if (is) {
stringstream buffer;
buffer << is.rdbuf();
size_t length = buffer.str().size();
char * aligned_buffer;
if (posix_memalign( (void **)&aligned_buffer, 64, ROUNDUP_N(length, 64))) {
cerr << "Could not allocate memory\n";
exit(1);
};
memset(aligned_buffer, 0x20, ROUNDUP_N(length, 64));
memcpy(aligned_buffer, buffer.str().c_str(), length);
is.close();
return make_pair((u8 *)aligned_buffer, length);
}
throw "No corpus";
return make_pair((u8 *)0, (size_t)0);
}
// https://stackoverflow.com/questions/2616906/how-do-i-output-coloured-text-to-a-linux-terminal
@ -115,80 +96,12 @@ void colorfuldisplay(ParsedJson & pj, const u8 * buf) {
}
/**
* Does the file filename ends with the given extension.
*/
static bool hasExtension(const char *filename, const char *extension) {
const char *ext = strrchr(filename, '.');
return (ext && !strcmp(ext, extension));
}
bool startsWith(const char *pre, const char *str) {
size_t lenpre = strlen(pre),
lenstr = strlen(str);
return lenstr < lenpre ? false : strncmp(pre, str, lenpre) == 0;
}
void validate() {
init_state_machine();// to be safe
const char *dirname = "jsonchecker/"; // ugly, hardcoded, brittle
const char *extension = ".json";
size_t dirlen = strlen(dirname);
struct dirent **entry_list;
int c = scandir(dirname, &entry_list, 0, alphasort);
if (c < 0) {
printf("error accessing %s \n", dirname);
return;
}
if (c == 0) {
printf("nothing in dir %s \n", dirname);
return;
}
for (int i = 0; i < c; i++) {
const char *name = entry_list[i]->d_name;
if (hasExtension(name, extension)) {
printf("validating: file %s \n",name);
size_t filelen = strlen(name);
char *fullpath = (char *)malloc(dirlen + filelen + 1);
strcpy(fullpath, dirname);
strcpy(fullpath + dirlen, name);
pair<u8 *, size_t> p = get_corpus(fullpath);
// terrible hack but just to get it working
ParsedJson * pj_ptr = new ParsedJson;
ParsedJson & pj(*pj_ptr);
if (posix_memalign( (void **)&pj.structurals, 8, ROUNDUP_N(p.second, 64)/8)) {
cerr << "Could not allocate memory\n";
return;
};
pj.n_structural_indexes = 0;
u32 max_structures = ROUNDUP_N(p.second, 64) + 2 + 7;
pj.structural_indexes = new u32[max_structures];
find_structural_bits(p.first, p.second, pj);
flatten_indexes(p.second, pj);
bool isok = ape_machine(p.first, p.second, pj);
if(isok)
isok = shovel_machine(p.first, p.second, pj);
if(startsWith("pass",name)) {
if(!isok) printf("warning: file %s should pass but it fails.\n",name);
}
if(startsWith("fail",name)) {
if(isok) printf("warning: file %s should fail but it passes.\n",name);
}
free(pj.structurals);
free(p.first);
delete[] pj.structural_indexes;
free(fullpath);
}
}
for (int i = 0; i < c; ++i) free(entry_list[i]);
free(entry_list);
}
int main(int argc, char * argv[]) {
if (argc != 2) {
cerr << "Usage: " << argv[0] << " <jsonfile>\n";
cout << "We are going to validate:\n" << std::endl;
validate();
cerr << "Usage: " << argv[0] << " <jsonfile>" << endl;
exit(1);
}
pair<u8 *, size_t> p = get_corpus(argv[1]);
@ -196,7 +109,7 @@ int main(int argc, char * argv[]) {
ParsedJson & pj(*pj_ptr);
if (posix_memalign( (void **)&pj.structurals, 8, ROUNDUP_N(p.second, 64)/8)) {
cerr << "Could not allocate memory\n";
cerr << "Could not allocate memory" << endl;
exit(1);
};
@ -237,38 +150,44 @@ int main(int argc, char * argv[]) {
unsigned long cy1 = 0, cy2 = 0, cy3 = 0, cy4 = 0;
unsigned long cl1 = 0, cl2 = 0, cl3 = 0, cl4 = 0;
#endif
bool isok = true;
for (u32 i = 0; i < iterations; i++) {
auto start = std::chrono::steady_clock::now();
#ifndef SQUASH_COUNTERS
unified.start();
#endif
find_structural_bits(p.first, p.second, pj);
isok = find_structural_bits(p.first, p.second, pj);
#ifndef SQUASH_COUNTERS
unified.end(results);
cy1 += results[0]; cl1 += results[1];
if(! isok ) break;
unified.start();
#endif
flatten_indexes(p.second, pj);
isok = flatten_indexes(p.second, pj);
#ifndef SQUASH_COUNTERS
unified.end(results);
cy2 += results[0]; cl2 += results[1];
if(! isok ) break;
unified.start();
#endif
ape_machine(p.first, p.second, pj);
isok = ape_machine(p.first, p.second, pj);
#ifndef SQUASH_COUNTERS
unified.end(results);
cy3 += results[0]; cl3 += results[1];
if(! isok ) break;
unified.start();
#endif
shovel_machine(p.first, p.second, pj);
isok = shovel_machine(p.first, p.second, pj);
#ifndef SQUASH_COUNTERS
unified.end(results);
cy4 += results[0]; cl4 += results[1];
#endif
if(! isok ) break;
auto end = std::chrono::steady_clock::now();
std::chrono::duration<double> secs = end - start;
res[i] = secs.count();
}
#ifndef SQUASH_COUNTERS
printf("number of bytes %ld number of structural chars %d ratio %.3f\n", p.second, pj.n_structural_indexes,
(double) pj.n_structural_indexes / p.second);
@ -303,5 +222,9 @@ int main(int argc, char * argv[]) {
free(p.first);
delete[] pj.structural_indexes;
delete pj_ptr;
return 0;
if(! isok ) {
printf(" Parsing failed. \n ");
return EXIT_FAILURE;
}
return EXIT_SUCCESS;
}

View File

@ -16,6 +16,10 @@ really_inline u64 cmp_mask_against_input(m256 input_lo, m256 input_hi, m256 mask
}
/*never_inline*/ bool find_structural_bits(const u8 * buf, size_t len, ParsedJson & pj) {
if (len > 0xffffff) {
cerr << "Currently only support JSON files < 16MB\n";
return false;
}
// Useful constant masks
const u64 even_bits = 0x5555555555555555ULL;
const u64 odd_bits = ~even_bits;

117
tests/jsoncheck.cpp Normal file
View File

@ -0,0 +1,117 @@
#include <assert.h>
#include <cstring>
#include <dirent.h>
#include <inttypes.h>
#include <stdbool.h>
#include <stdio.h>
#include <stdlib.h>
#include "common_defs.h"
#include "jsonioutil.h"
#include "simdjson_internal.h"
#include "stage1_find_marks.h"
#include "stage2_flatten.h"
#include "stage3_ape_machine.h"
#include "stage4_shovel_machine.h"
/**
* Does the file filename ends with the given extension.
*/
static bool hasExtension(const char *filename, const char *extension) {
const char *ext = strrchr(filename, '.');
return (ext && !strcmp(ext, extension));
}
bool startsWith(const char *pre, const char *str) {
size_t lenpre = strlen(pre), lenstr = strlen(str);
return lenstr < lenpre ? false : strncmp(pre, str, lenpre) == 0;
}
bool validate(const char *dirname) {
bool everythingfine = true;
init_state_machine(); // to be safe
const char *extension = ".json";
size_t dirlen = strlen(dirname);
struct dirent **entry_list;
int c = scandir(dirname, &entry_list, 0, alphasort);
if (c < 0) {
printf("error accessing %s \n", dirname);
return false;
}
if (c == 0) {
printf("nothing in dir %s \n", dirname);
return false;
}
bool needsep = (strlen(dirname) > 1) && (dirname[strlen(dirname) - 1] != '/');
for (int i = 0; i < c; i++) {
const char *name = entry_list[i]->d_name;
if (hasExtension(name, extension)) {
printf("validating: file %s \n", name);
size_t filelen = strlen(name);
char *fullpath = (char *)malloc(dirlen + filelen + 1 + 1);
strcpy(fullpath, dirname);
if (needsep) {
fullpath[dirlen] = '/';
strcpy(fullpath + dirlen + 1, name);
} else {
strcpy(fullpath + dirlen, name);
}
std::pair<u8 *, size_t> p = get_corpus(fullpath);
// terrible hack but just to get it working
ParsedJson *pj_ptr = new ParsedJson;
ParsedJson &pj(*pj_ptr);
if (posix_memalign((void **)&pj.structurals, 8,
ROUNDUP_N(p.second, 64) / 8)) {
std::cerr << "Could not allocate memory" << std::endl;
return false;
};
pj.n_structural_indexes = 0;
u32 max_structures = ROUNDUP_N(p.second, 64) + 2 + 7;
pj.structural_indexes = new u32[max_structures];
bool isok = find_structural_bits(p.first, p.second, pj);
if (isok) {
isok = flatten_indexes(p.second, pj);
}
if (isok) {
isok = ape_machine(p.first, p.second, pj);
}
if (isok) {
isok = shovel_machine(p.first, p.second, pj);
}
if (startsWith("pass", name)) {
if (!isok) {
printf("warning: file %s should pass but it fails.\n", name);
everythingfine = false;
}
} else if (startsWith("fail", name)) {
if (isok) {
printf("warning: file %s should fail but it passes.\n", name);
everythingfine = false;
}
} else {
printf("File %s %s.\n", name,
isok ? " is valid JSON " : " is not valid JSON");
}
free(pj.structurals);
free(p.first);
delete[] pj.structural_indexes;
free(fullpath);
}
}
for (int i = 0; i < c; ++i)
free(entry_list[i]);
free(entry_list);
return everythingfine;
}
int main(int argc, char *argv[]) {
if (argc != 2) {
std::cerr << "Usage: " << argv[0] << " <directorywithjsonfiles>"
<< std::endl;
std::cout
<< "We are going to assume you mean to use the 'jsonchecker' directory."
<< std::endl;
return validate("jsonchecker/") ? EXIT_SUCCESS : EXIT_FAILURE;
}
return validate(argv[1]) ? EXIT_SUCCESS : EXIT_FAILURE;
}