Move stage2 classes into their own files

This commit is contained in:
John Keiser 2020-05-11 23:06:38 -07:00
parent a476531524
commit 4ea866f050
16 changed files with 117 additions and 110 deletions

View File

@ -29,7 +29,7 @@ simdjson's source structure, from the top level, looks like this:
```c++
namespace simdjson {
namespace haswell {
#include "generic/stage1_find_marks.h"
#include "generic/stage1.h"
}
}
```

View File

@ -1,5 +1,5 @@
#ifndef SIMDJSON_ARM64_STAGE1_FIND_MARKS_H
#define SIMDJSON_ARM64_STAGE1_FIND_MARKS_H
#ifndef SIMDJSON_ARM64_STAGE1_H
#define SIMDJSON_ARM64_STAGE1_H
#include "simdjson.h"
#include "arm64/bitmask.h"
@ -89,4 +89,4 @@ WARN_UNUSED error_code implementation::stage1(const uint8_t *buf, size_t len, pa
} // namespace arm64
} // namespace simdjson
#endif // SIMDJSON_ARM64_STAGE1_FIND_MARKS_H
#endif // SIMDJSON_ARM64_STAGE1_H

View File

@ -1,5 +1,5 @@
#ifndef SIMDJSON_ARM64_STAGE2_BUILD_TAPE_H
#define SIMDJSON_ARM64_STAGE2_BUILD_TAPE_H
#ifndef SIMDJSON_ARM64_STAGE2_H
#define SIMDJSON_ARM64_STAGE2_H
#include "simdjson.h"
#include "arm64/implementation.h"
@ -10,10 +10,11 @@ namespace simdjson {
namespace arm64 {
#include "generic/atomparsing.h"
#include "generic/stage2_build_tape.h"
#include "generic/stage2_streaming_build_tape.h"
#include "generic/structural_iterator.h"
#include "generic/structural_parser.h"
#include "generic/streaming_structural_parser.h"
} // namespace arm64
} // namespace simdjson
#endif // SIMDJSON_ARM64_STAGE2_BUILD_TAPE_H
#endif // SIMDJSON_ARM64_STAGE2_H

View File

@ -1,5 +1,5 @@
#ifndef SIMDJSON_FALLBACK_STAGE1_FIND_MARKS_H
#define SIMDJSON_FALLBACK_STAGE1_FIND_MARKS_H
#ifndef SIMDJSON_FALLBACK_STAGE1_H
#define SIMDJSON_FALLBACK_STAGE1_H
#include "simdjson.h"
#include "fallback/implementation.h"
@ -211,4 +211,4 @@ WARN_UNUSED error_code implementation::minify(const uint8_t *buf, size_t len, ui
} // namespace fallback
} // namespace simdjson
#endif // SIMDJSON_FALLBACK_STAGE1_FIND_MARKS_H
#endif // SIMDJSON_FALLBACK_STAGE1_H

View File

@ -1,5 +1,5 @@
#ifndef SIMDJSON_FALLBACK_STAGE2_BUILD_TAPE_H
#define SIMDJSON_FALLBACK_STAGE2_BUILD_TAPE_H
#ifndef SIMDJSON_FALLBACK_STAGE2_H
#define SIMDJSON_FALLBACK_STAGE2_H
#include "simdjson.h"
@ -11,10 +11,11 @@ namespace simdjson {
namespace fallback {
#include "generic/atomparsing.h"
#include "generic/stage2_build_tape.h"
#include "generic/stage2_streaming_build_tape.h"
#include "generic/structural_iterator.h"
#include "generic/structural_parser.h"
#include "generic/streaming_structural_parser.h"
} // namespace fallback
} // namespace simdjson
#endif // SIMDJSON_FALLBACK_STAGE2_BUILD_TAPE_H
#endif // SIMDJSON_FALLBACK_STAGE2_H

View File

@ -1,7 +1,7 @@
// This file contains the common code every implementation uses in stage1
// It is intended to be included multiple times and compiled multiple times
// We assume the file in which it is included already includes
// "simdjson/stage1_find_marks.h" (this simplifies amalgation)
// "simdjson/stage1.h" (this simplifies amalgation)
namespace stage1 {

View File

@ -1,7 +1,7 @@
// This file contains the common code every implementation uses in stage1
// It is intended to be included multiple times and compiled multiple times
// We assume the file in which it is included already includes
// "simdjson/stage1_find_marks.h" (this simplifies amalgation)
// "simdjson/stage1.h" (this simplifies amalgation)
namespace stage1 {

View File

@ -0,0 +1,69 @@
namespace stage2 {
class structural_iterator {
public:
really_inline structural_iterator(const uint8_t* _buf, size_t _len, const uint32_t *_structural_indexes, size_t next_structural_index)
: buf{_buf},
len{_len},
structural_indexes{_structural_indexes},
next_structural{next_structural_index}
{}
really_inline char advance_char() {
idx = structural_indexes[next_structural];
next_structural++;
c = *current();
return c;
}
really_inline char current_char() {
return c;
}
really_inline const uint8_t* current() {
return &buf[idx];
}
really_inline size_t remaining_len() {
return len - idx;
}
template<typename F>
really_inline bool with_space_terminated_copy(const F& f) {
/**
* We need to make a copy to make sure that the string is space terminated.
* This is not about padding the input, which should already padded up
* to len + SIMDJSON_PADDING. However, we have no control at this stage
* on how the padding was done. What if the input string was padded with nulls?
* It is quite common for an input string to have an extra null character (C string).
* We do not want to allow 9\0 (where \0 is the null character) inside a JSON
* document, but the string "9\0" by itself is fine. So we make a copy and
* pad the input with spaces when we know that there is just one input element.
* This copy is relatively expensive, but it will almost never be called in
* practice unless you are in the strange scenario where you have many JSON
* documents made of single atoms.
*/
char *copy = static_cast<char *>(malloc(len + SIMDJSON_PADDING));
if (copy == nullptr) {
return true;
}
memcpy(copy, buf, len);
memset(copy + len, ' ', SIMDJSON_PADDING);
bool result = f(reinterpret_cast<const uint8_t*>(copy), idx);
free(copy);
return result;
}
really_inline bool past_end(uint32_t n_structural_indexes) {
return next_structural+1 > n_structural_indexes;
}
really_inline bool at_end(uint32_t n_structural_indexes) {
return next_structural+1 == n_structural_indexes;
}
really_inline size_t next_structural_index() {
return next_structural;
}
const uint8_t* const buf;
const size_t len;
const uint32_t* const structural_indexes;
size_t next_structural; // next structural index
size_t idx{0}; // location of the structural character in the input (buf)
uint8_t c{0}; // used to track the (structural) character we are looking at
};
} // namespace stage2

View File

@ -1,7 +1,7 @@
// This file contains the common code every implementation uses for stage2
// It is intended to be included multiple times and compiled multiple times
// We assume the file in which it is include already includes
// "simdjson/stage2_build_tape.h" (this simplifies amalgation)
// "simdjson/stage2.h" (this simplifies amalgation)
namespace stage2 {
@ -47,72 +47,6 @@ struct unified_machine_addresses {
#undef FAIL_IF
#define FAIL_IF(EXPR) { if (EXPR) { return addresses.error; } }
class structural_iterator {
public:
really_inline structural_iterator(const uint8_t* _buf, size_t _len, const uint32_t *_structural_indexes, size_t next_structural_index)
: buf{_buf},
len{_len},
structural_indexes{_structural_indexes},
next_structural{next_structural_index}
{}
really_inline char advance_char() {
idx = structural_indexes[next_structural];
next_structural++;
c = *current();
return c;
}
really_inline char current_char() {
return c;
}
really_inline const uint8_t* current() {
return &buf[idx];
}
really_inline size_t remaining_len() {
return len - idx;
}
template<typename F>
really_inline bool with_space_terminated_copy(const F& f) {
/**
* We need to make a copy to make sure that the string is space terminated.
* This is not about padding the input, which should already padded up
* to len + SIMDJSON_PADDING. However, we have no control at this stage
* on how the padding was done. What if the input string was padded with nulls?
* It is quite common for an input string to have an extra null character (C string).
* We do not want to allow 9\0 (where \0 is the null character) inside a JSON
* document, but the string "9\0" by itself is fine. So we make a copy and
* pad the input with spaces when we know that there is just one input element.
* This copy is relatively expensive, but it will almost never be called in
* practice unless you are in the strange scenario where you have many JSON
* documents made of single atoms.
*/
char *copy = static_cast<char *>(malloc(len + SIMDJSON_PADDING));
if (copy == nullptr) {
return true;
}
memcpy(copy, buf, len);
memset(copy + len, ' ', SIMDJSON_PADDING);
bool result = f(reinterpret_cast<const uint8_t*>(copy), idx);
free(copy);
return result;
}
really_inline bool past_end(uint32_t n_structural_indexes) {
return next_structural+1 > n_structural_indexes;
}
really_inline bool at_end(uint32_t n_structural_indexes) {
return next_structural+1 == n_structural_indexes;
}
really_inline size_t next_structural_index() {
return next_structural;
}
const uint8_t* const buf;
const size_t len;
const uint32_t* const structural_indexes;
size_t next_structural; // next structural index
size_t idx{0}; // location of the structural character in the input (buf)
uint8_t c{0}; // used to track the (structural) character we are looking at
};
struct number_writer {
parser &doc_parser;

View File

@ -1,5 +1,5 @@
#ifndef SIMDJSON_HASWELL_STAGE1_FIND_MARKS_H
#define SIMDJSON_HASWELL_STAGE1_FIND_MARKS_H
#ifndef SIMDJSON_HASWELL_STAGE1_H
#define SIMDJSON_HASWELL_STAGE1_H
#include "simdjson.h"
@ -80,4 +80,4 @@ WARN_UNUSED error_code implementation::stage1(const uint8_t *buf, size_t len, pa
} // namespace simdjson
UNTARGET_REGION
#endif // SIMDJSON_HASWELL_STAGE1_FIND_MARKS_H
#endif // SIMDJSON_HASWELL_STAGE1_H

View File

@ -1,5 +1,5 @@
#ifndef SIMDJSON_HASWELL_STAGE2_BUILD_TAPE_H
#define SIMDJSON_HASWELL_STAGE2_BUILD_TAPE_H
#ifndef SIMDJSON_HASWELL_STAGE2_H
#define SIMDJSON_HASWELL_STAGE2_H
#include "simdjson.h"
#include "haswell/implementation.h"
@ -11,11 +11,12 @@ namespace simdjson {
namespace haswell {
#include "generic/atomparsing.h"
#include "generic/stage2_build_tape.h"
#include "generic/stage2_streaming_build_tape.h"
#include "generic/structural_iterator.h"
#include "generic/structural_parser.h"
#include "generic/streaming_structural_parser.h"
} // namespace haswell
} // namespace simdjson
UNTARGET_REGION
#endif // SIMDJSON_HASWELL_STAGE2_BUILD_TAPE_H
#endif // SIMDJSON_HASWELL_STAGE2_H

View File

@ -1,12 +1,12 @@
#if SIMDJSON_IMPLEMENTATION_ARM64
#include "arm64/stage1_find_marks.h"
#include "arm64/stage1.h"
#endif
#if SIMDJSON_IMPLEMENTATION_FALLBACK
#include "fallback/stage1_find_marks.h"
#include "fallback/stage1.h"
#endif
#if SIMDJSON_IMPLEMENTATION_HASWELL
#include "haswell/stage1_find_marks.h"
#include "haswell/stage1.h"
#endif
#if SIMDJSON_IMPLEMENTATION_WESTMERE
#include "westmere/stage1_find_marks.h"
#include "westmere/stage1.h"
#endif

View File

@ -12,14 +12,14 @@ void found_bad_string(const uint8_t *buf);
#endif
#if SIMDJSON_IMPLEMENTATION_ARM64
#include "arm64/stage2_build_tape.h"
#include "arm64/stage2.h"
#endif
#if SIMDJSON_IMPLEMENTATION_FALLBACK
#include "fallback/stage2_build_tape.h"
#include "fallback/stage2.h"
#endif
#if SIMDJSON_IMPLEMENTATION_HASWELL
#include "haswell/stage2_build_tape.h"
#include "haswell/stage2.h"
#endif
#if SIMDJSON_IMPLEMENTATION_WESTMERE
#include "westmere/stage2_build_tape.h"
#include "westmere/stage2.h"
#endif

View File

@ -1,5 +1,5 @@
#ifndef SIMDJSON_WESTMERE_STAGE1_FIND_MARKS_H
#define SIMDJSON_WESTMERE_STAGE1_FIND_MARKS_H
#ifndef SIMDJSON_WESTMERE_STAGE1_H
#define SIMDJSON_WESTMERE_STAGE1_H
#include "simdjson.h"
#include "westmere/bitmask.h"
@ -79,4 +79,4 @@ WARN_UNUSED error_code implementation::stage1(const uint8_t *buf, size_t len, pa
} // namespace simdjson
UNTARGET_REGION
#endif // SIMDJSON_WESTMERE_STAGE1_FIND_MARKS_H
#endif // SIMDJSON_WESTMERE_STAGE1_H

View File

@ -1,5 +1,5 @@
#ifndef SIMDJSON_WESTMERE_STAGE2_BUILD_TAPE_H
#define SIMDJSON_WESTMERE_STAGE2_BUILD_TAPE_H
#ifndef SIMDJSON_WESTMERE_STAGE2_H
#define SIMDJSON_WESTMERE_STAGE2_H
#include "simdjson.h"
#include "westmere/implementation.h"
@ -11,10 +11,11 @@ namespace simdjson {
namespace westmere {
#include "generic/atomparsing.h"
#include "generic/stage2_build_tape.h"
#include "generic/stage2_streaming_build_tape.h"
#include "generic/structural_iterator.h"
#include "generic/structural_parser.h"
#include "generic/streaming_structural_parser.h"
} // namespace westmere
} // namespace simdjson
UNTARGET_REGION
#endif // SIMDJSON_WESTMERE_STAGE2_BUILD_TAPE_H
#endif // SIMDJSON_WESTMERE_STAGE2_H