Simpler iteration code (#190)
* Adding convenience method to simplify code. * Simplifying the iteration code.
This commit is contained in:
parent
b1e8990654
commit
b0e6bfa84c
45
README.md
45
README.md
|
@ -378,46 +378,21 @@ void compute_dump(ParsedJson::iterator &pjh) {
|
|||
The following function will find all user.id integers:
|
||||
|
||||
```C
|
||||
void simdjson_traverse(std::vector<int64_t> &answer, ParsedJson::iterator &i) {
|
||||
switch (i.get_type()) {
|
||||
case '{':
|
||||
if (i.down()) {
|
||||
do {
|
||||
bool founduser = equals(i.get_string(), "user");
|
||||
i.next(); // move to value
|
||||
if (i.is_object()) {
|
||||
if (founduser && i.move_to_key("id")) {
|
||||
void simdjson_scan(std::vector<int64_t> &answer, ParsedJson::iterator &i) {
|
||||
while(i.move_forward()) {
|
||||
if(i.get_scope_type() == '{') {
|
||||
bool founduser = (i.get_string_length() == 4) && (memcmp(i.get_string(), "user", 4) == 0);
|
||||
i.move_to_value();
|
||||
if(founduser) {
|
||||
if(i.is_object() && i.move_to_key("id",2)) {
|
||||
if (i.is_integer()) {
|
||||
answer.push_back(i.get_integer());
|
||||
}
|
||||
i.up();
|
||||
}
|
||||
simdjson_traverse(answer, i);
|
||||
} else if (i.is_array()) {
|
||||
simdjson_traverse(answer, i);
|
||||
}
|
||||
} while (i.next());
|
||||
i.up();
|
||||
}
|
||||
break;
|
||||
case '[':
|
||||
if (i.down()) {
|
||||
do {
|
||||
if (i.is_object_or_array()) {
|
||||
simdjson_traverse(answer, i);
|
||||
}
|
||||
} while (i.next());
|
||||
i.up();
|
||||
}
|
||||
break;
|
||||
case 'l':
|
||||
case 'd':
|
||||
case 'n':
|
||||
case 't':
|
||||
case 'f':
|
||||
default:
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
|
|
|
@ -30,54 +30,28 @@ void print_vec(const std::vector<int64_t> &v) {
|
|||
std::cout << std::endl;
|
||||
}
|
||||
|
||||
void simdjson_traverse(std::vector<int64_t> &answer, ParsedJson::iterator &i) {
|
||||
switch (i.get_type()) {
|
||||
case '{':
|
||||
if (i.down()) {
|
||||
do {
|
||||
bool founduser = (i.get_string_length() == 4) && (memcmp(i.get_string(), "user", 4) == 0);
|
||||
i.move_to_value(); // move to value
|
||||
if (i.is_object()) {
|
||||
if (founduser && i.move_to_key("id")) {
|
||||
void simdjson_scan(std::vector<int64_t> &answer, ParsedJson::iterator &i) {
|
||||
while(i.move_forward()) {
|
||||
if(i.get_scope_type() == '{') {
|
||||
bool founduser = (i.get_string_length() == 4) && (memcmp(i.get_string(), "user", 4) == 0);
|
||||
i.move_to_value();
|
||||
if(founduser) {
|
||||
if(i.is_object() && i.move_to_key("id",2)) {
|
||||
if (i.is_integer()) {
|
||||
answer.push_back(i.get_integer());
|
||||
}
|
||||
}
|
||||
i.up();
|
||||
}
|
||||
simdjson_traverse(answer, i);
|
||||
} else if (i.is_array()) {
|
||||
simdjson_traverse(answer, i);
|
||||
}
|
||||
} while (i.next());
|
||||
i.up();
|
||||
}
|
||||
break;
|
||||
case '[':
|
||||
if (i.down()) {
|
||||
do {
|
||||
if (i.is_object_or_array()) {
|
||||
simdjson_traverse(answer, i);
|
||||
}
|
||||
} while (i.next());
|
||||
i.up();
|
||||
}
|
||||
break;
|
||||
case 'l':
|
||||
case 'd':
|
||||
case 'n':
|
||||
case 't':
|
||||
case 'f':
|
||||
default:
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
__attribute__ ((noinline))
|
||||
std::vector<int64_t> simdjson_justdom(ParsedJson &pj) {
|
||||
std::vector<int64_t> answer;
|
||||
ParsedJson::iterator i(pj);
|
||||
|
||||
simdjson_traverse(answer, i);
|
||||
simdjson_scan(answer,i);
|
||||
remove_duplicates(answer);
|
||||
return answer;
|
||||
}
|
||||
|
@ -90,8 +64,7 @@ std::vector<int64_t> simdjson_computestats(const padded_string &p) {
|
|||
return answer;
|
||||
}
|
||||
ParsedJson::iterator i(pj);
|
||||
|
||||
simdjson_traverse(answer, i);
|
||||
simdjson_scan(answer,i);
|
||||
remove_duplicates(answer);
|
||||
return answer;
|
||||
}
|
||||
|
@ -338,7 +311,6 @@ int main(int argc, char *argv[]) {
|
|||
}
|
||||
BEST_TIME("simdjson ", simdjson_computestats(p).size(), size, , repeat,
|
||||
volume, !justdata);
|
||||
|
||||
BEST_TIME("rapid ", rapid_computestats(p).size(), size, , repeat, volume,
|
||||
!justdata);
|
||||
BEST_TIME("sasjon ", sasjon_computestats(p).size(), size, , repeat, volume,
|
||||
|
|
|
@ -207,11 +207,17 @@ public:
|
|||
// when at {, go one level deep, looking for a given key
|
||||
// if successful, we are left pointing at the value,
|
||||
// if not, we are still pointing at the object ({)
|
||||
// (in case of repeated keys, this only finds the first one)
|
||||
// (in case of repeated keys, this only finds the first one).
|
||||
// We seek the key using C's strcmp so if your JSON strings contain
|
||||
// NULL chars, this would trigger a false positive: if you expect that
|
||||
// to be the case, take extra precautions.
|
||||
inline bool move_to_key(const char * key);
|
||||
// when at {, go one level deep, looking for a given key
|
||||
// if successful, we are left pointing at the value,
|
||||
// if not, we are still pointing at the object ({)
|
||||
// (in case of repeated keys, this only finds the first one).
|
||||
// The string we search for can contain NULL values.
|
||||
inline bool move_to_key(const char * key, uint32_t length);
|
||||
|
||||
// when at a key location within an object, this moves to the accompanying value (located next to it).
|
||||
// this is equivalent but much faster than calling "next()".
|
||||
|
@ -355,10 +361,6 @@ bool ParsedJson::iterator::move_forward() {
|
|||
} else if ((current_type == ']') || (current_type == '}')) {
|
||||
// Leaving a scope.
|
||||
depth--;
|
||||
if(depth == 0) {
|
||||
// Should not be necessary
|
||||
return false;
|
||||
}
|
||||
} else if ((current_type == 'd') || (current_type == 'l')) {
|
||||
// d and l types use 2 locations on the tape, not just one.
|
||||
location += 1;
|
||||
|
@ -393,6 +395,21 @@ bool ParsedJson::iterator::move_to_key(const char * key) {
|
|||
return false;
|
||||
}
|
||||
|
||||
bool ParsedJson::iterator::move_to_key(const char * key, uint32_t length) {
|
||||
if(down()) {
|
||||
do {
|
||||
assert(is_string());
|
||||
bool rightkey = ((get_string_length() == length) && (memcmp(get_string(),key,length)==0));
|
||||
move_to_value();
|
||||
if(rightkey) {
|
||||
return true;
|
||||
}
|
||||
} while(next());
|
||||
assert(up());// not found
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
|
||||
bool ParsedJson::iterator::prev() {
|
||||
if(location - 1 < depthindex[depth].start_of_scope) {
|
||||
|
@ -456,7 +473,7 @@ void ParsedJson::iterator::to_start_scope() {
|
|||
}
|
||||
|
||||
bool ParsedJson::iterator::next() {
|
||||
size_t npos; // next position
|
||||
size_t npos;
|
||||
if ((current_type == '[') || (current_type == '{')){
|
||||
// we need to jump
|
||||
npos = ( current_val & JSONVALUEMASK);
|
||||
|
|
Loading…
Reference in New Issue