improved string padded (#440)

* dirent portable latest version

* improved

std::string argument passed by const reference
ctor added with std::string_view  argument
`allocate_padded_buffer()`  moved here with **optional** check on `length < 1`

* allocate_padded_buffer moved to padded_string.h
This commit is contained in:
dbj 2020-01-10 16:15:48 +01:00 committed by Daniel Lemire
parent 833e5d8bf1
commit 85e84fc1fa
2 changed files with 65 additions and 24 deletions

View File

@ -5,39 +5,84 @@
#include <memory>
#include <string>
// padded buffer can be made with length < 1
#define SIMDJSON_OK_EMPTY_PADDED_BUFFER 1
namespace simdjson {
// low-level function to allocate memory with padding so we can read passed the
// "length" bytes safely. if you must provide a pointer to some data, create it
// with this function: length is the max. size in bytes of the string caller is
// responsible to free the memory (free(...))
char *allocate_padded_buffer(size_t length);
inline char *allocate_padded_buffer(size_t length) noexcept {
#ifndef NDEBUG
#ifndef SIMDJSON_OK_EMPTY_PADDED_BUFFER
if (length < 1) {
errno = EINVAL;
perror("simdjson::allocate_padded_buffer() length argument is less than 1");
return nullptr;
}
#endif // SIMDJSON_OK_EMPTY_PADDED_STRING
#endif // NDEBUG
// we could do a simple malloc
// return (char *) malloc(length + SIMDJSON_PADDING);
// However, we might as well align to cache lines...
size_t totalpaddedlength = length + SIMDJSON_PADDING;
char *padded_buffer = aligned_malloc_char(64, totalpaddedlength);
#ifndef NDEBUG
if (padded_buffer == nullptr) {
errno = EINVAL;
perror("simdjson::allocate_padded_buffer() aligned_malloc_char() failed");
return nullptr;
}
#endif // NDEBUG
memset(padded_buffer + length, 0, totalpaddedlength - length);
return padded_buffer;
} // allocate_padded_buffer
// Simple string with padded allocation.
// We deliberately forbid copies, users should rely on swap or move
// constructors.
class padded_string {
public:
struct padded_string final {
explicit padded_string() noexcept : viable_size(0), data_ptr(nullptr) {}
explicit padded_string(size_t length) noexcept
: viable_size(length), data_ptr(allocate_padded_buffer(length)) {
if (data_ptr != nullptr)
data_ptr[length] = '\0'; // easier when you need a c_str
}
explicit padded_string(char *data, size_t length) noexcept
: viable_size(length), data_ptr(allocate_padded_buffer(length)) {
if (data_ptr != nullptr) {
if (data != nullptr) {
memcpy(data_ptr, data, length);
data_ptr[length] = '\0'; // easier when you need a c_str
}
}
padded_string(std::string s) noexcept
: viable_size(s.size()), data_ptr(allocate_padded_buffer(s.size())) {
// note: do not pass std::string arguments by value
padded_string(const std::string & str_ ) noexcept
: viable_size(str_.size()), data_ptr(allocate_padded_buffer(str_.size())) {
if (data_ptr != nullptr) {
memcpy(data_ptr, s.data(), s.size());
data_ptr[s.size()] = '\0'; // easier when you need a c_str
memcpy(data_ptr, str_.data(), str_.size());
data_ptr[str_.size()] = '\0'; // easier when you need a c_str
}
}
// note: do pass std::string_view arguments by value
padded_string(std::string_view sv_) noexcept
: viable_size(sv_.size()), data_ptr(allocate_padded_buffer(sv_.size())) {
if (data_ptr != nullptr) {
memcpy(data_ptr, sv_.data(), sv_.size());
data_ptr[sv_.size()] = '\0'; // easier when you need a c_str
}
}
padded_string(padded_string &&o) noexcept
: viable_size(o.viable_size), data_ptr(o.data_ptr) {
o.data_ptr = nullptr; // we take ownership
@ -60,21 +105,26 @@ public:
o.viable_size = tmp_viable_size;
}
~padded_string() { aligned_free_char(data_ptr); }
~padded_string() {
aligned_free_char(data_ptr);
this->data_ptr = nullptr;
}
size_t size() const { return viable_size; }
size_t size() const { return viable_size; }
size_t length() const { return viable_size; }
size_t length() const { return viable_size; }
char *data() const { return data_ptr; }
char *data() const { return data_ptr; }
private:
padded_string &operator=(const padded_string &o) = delete;
padded_string(const padded_string &o) = delete;
size_t viable_size;
char *data_ptr;
};
size_t viable_size ;
char *data_ptr{nullptr};
}; // padded_string
} // namespace simdjson
#endif

View File

@ -4,15 +4,6 @@
#include <climits>
namespace simdjson {
char *allocate_padded_buffer(size_t length) {
// we could do a simple malloc
// return (char *) malloc(length + SIMDJSON_PADDING);
// However, we might as well align to cache lines...
size_t totalpaddedlength = length + SIMDJSON_PADDING;
char *padded_buffer = aligned_malloc_char(64, totalpaddedlength);
memset(padded_buffer + length, 0, totalpaddedlength - length);
return padded_buffer;
}
padded_string get_corpus(const std::string &filename) {
std::FILE *fp = std::fopen(filename.c_str(), "rb");