Simpler iteration code (#190)

* Adding convenience method to simplify code.

* Simplifying the iteration code.
This commit is contained in:
Daniel Lemire 2019-06-12 16:29:24 -04:00 committed by GitHub
parent b1e8990654
commit b0e6bfa84c
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
3 changed files with 47 additions and 83 deletions

View File

@ -378,46 +378,21 @@ void compute_dump(ParsedJson::iterator &pjh) {
The following function will find all user.id integers:
```C
void simdjson_traverse(std::vector<int64_t> &answer, ParsedJson::iterator &i) {
switch (i.get_type()) {
case '{':
if (i.down()) {
do {
bool founduser = equals(i.get_string(), "user");
i.next(); // move to value
if (i.is_object()) {
if (founduser && i.move_to_key("id")) {
void simdjson_scan(std::vector<int64_t> &answer, ParsedJson::iterator &i) {
while(i.move_forward()) {
if(i.get_scope_type() == '{') {
bool founduser = (i.get_string_length() == 4) && (memcmp(i.get_string(), "user", 4) == 0);
i.move_to_value();
if(founduser) {
if(i.is_object() && i.move_to_key("id",2)) {
if (i.is_integer()) {
answer.push_back(i.get_integer());
}
i.up();
}
simdjson_traverse(answer, i);
} else if (i.is_array()) {
simdjson_traverse(answer, i);
}
} while (i.next());
i.up();
}
break;
case '[':
if (i.down()) {
do {
if (i.is_object_or_array()) {
simdjson_traverse(answer, i);
}
} while (i.next());
i.up();
}
break;
case 'l':
case 'd':
case 'n':
case 't':
case 'f':
default:
break;
}
}
}
}
}
```

View File

@ -30,54 +30,28 @@ void print_vec(const std::vector<int64_t> &v) {
std::cout << std::endl;
}
void simdjson_traverse(std::vector<int64_t> &answer, ParsedJson::iterator &i) {
switch (i.get_type()) {
case '{':
if (i.down()) {
do {
bool founduser = (i.get_string_length() == 4) && (memcmp(i.get_string(), "user", 4) == 0);
i.move_to_value(); // move to value
if (i.is_object()) {
if (founduser && i.move_to_key("id")) {
void simdjson_scan(std::vector<int64_t> &answer, ParsedJson::iterator &i) {
while(i.move_forward()) {
if(i.get_scope_type() == '{') {
bool founduser = (i.get_string_length() == 4) && (memcmp(i.get_string(), "user", 4) == 0);
i.move_to_value();
if(founduser) {
if(i.is_object() && i.move_to_key("id",2)) {
if (i.is_integer()) {
answer.push_back(i.get_integer());
}
}
i.up();
}
simdjson_traverse(answer, i);
} else if (i.is_array()) {
simdjson_traverse(answer, i);
}
} while (i.next());
i.up();
}
break;
case '[':
if (i.down()) {
do {
if (i.is_object_or_array()) {
simdjson_traverse(answer, i);
}
} while (i.next());
i.up();
}
break;
case 'l':
case 'd':
case 'n':
case 't':
case 'f':
default:
break;
}
}
}
}
}
}
__attribute__ ((noinline))
std::vector<int64_t> simdjson_justdom(ParsedJson &pj) {
std::vector<int64_t> answer;
ParsedJson::iterator i(pj);
simdjson_traverse(answer, i);
simdjson_scan(answer,i);
remove_duplicates(answer);
return answer;
}
@ -90,8 +64,7 @@ std::vector<int64_t> simdjson_computestats(const padded_string &p) {
return answer;
}
ParsedJson::iterator i(pj);
simdjson_traverse(answer, i);
simdjson_scan(answer,i);
remove_duplicates(answer);
return answer;
}
@ -338,7 +311,6 @@ int main(int argc, char *argv[]) {
}
BEST_TIME("simdjson ", simdjson_computestats(p).size(), size, , repeat,
volume, !justdata);
BEST_TIME("rapid ", rapid_computestats(p).size(), size, , repeat, volume,
!justdata);
BEST_TIME("sasjon ", sasjon_computestats(p).size(), size, , repeat, volume,

View File

@ -207,11 +207,17 @@ public:
// when at {, go one level deep, looking for a given key
// if successful, we are left pointing at the value,
// if not, we are still pointing at the object ({)
// (in case of repeated keys, this only finds the first one)
// (in case of repeated keys, this only finds the first one).
// We seek the key using C's strcmp so if your JSON strings contain
// NULL chars, this would trigger a false positive: if you expect that
// to be the case, take extra precautions.
inline bool move_to_key(const char * key);
// when at {, go one level deep, looking for a given key
// if successful, we are left pointing at the value,
// if not, we are still pointing at the object ({)
// (in case of repeated keys, this only finds the first one).
// The string we search for can contain NULL values.
inline bool move_to_key(const char * key, uint32_t length);
// when at a key location within an object, this moves to the accompanying value (located next to it).
// this is equivalent but much faster than calling "next()".
@ -355,10 +361,6 @@ bool ParsedJson::iterator::move_forward() {
} else if ((current_type == ']') || (current_type == '}')) {
// Leaving a scope.
depth--;
if(depth == 0) {
// Should not be necessary
return false;
}
} else if ((current_type == 'd') || (current_type == 'l')) {
// d and l types use 2 locations on the tape, not just one.
location += 1;
@ -393,6 +395,21 @@ bool ParsedJson::iterator::move_to_key(const char * key) {
return false;
}
bool ParsedJson::iterator::move_to_key(const char * key, uint32_t length) {
if(down()) {
do {
assert(is_string());
bool rightkey = ((get_string_length() == length) && (memcmp(get_string(),key,length)==0));
move_to_value();
if(rightkey) {
return true;
}
} while(next());
assert(up());// not found
}
return false;
}
bool ParsedJson::iterator::prev() {
if(location - 1 < depthindex[depth].start_of_scope) {
@ -456,7 +473,7 @@ void ParsedJson::iterator::to_start_scope() {
}
bool ParsedJson::iterator::next() {
size_t npos; // next position
size_t npos;
if ((current_type == '[') || (current_type == '{')){
// we need to jump
npos = ( current_val & JSONVALUEMASK);