Cleaning up the code.
This commit is contained in:
parent
35381279c3
commit
8315f4c888
|
@ -5,6 +5,7 @@
|
||||||
#include <stddef.h>
|
#include <stddef.h>
|
||||||
#include <stdint.h>
|
#include <stdint.h>
|
||||||
#include <x86intrin.h>
|
#include <x86intrin.h>
|
||||||
|
#include <string.h>
|
||||||
/*
|
/*
|
||||||
* legal utf-8 byte sequence
|
* legal utf-8 byte sequence
|
||||||
* http://www.unicode.org/versions/Unicode6.0.0/ch03.pdf - page 94
|
* http://www.unicode.org/versions/Unicode6.0.0/ch03.pdf - page 94
|
||||||
|
@ -396,7 +397,6 @@ avxcheckUTF8Bytes_asciipath(__m256i current_bytes,
|
||||||
9, 9, 9, 9, 9, 9, 9, 1)),*has_error);
|
9, 9, 9, 9, 9, 9, 9, 1)),*has_error);
|
||||||
return *previous;
|
return *previous;
|
||||||
}
|
}
|
||||||
|
|
||||||
struct avx_processed_utf_bytes pb;
|
struct avx_processed_utf_bytes pb;
|
||||||
avx_count_nibbles(current_bytes, &pb);
|
avx_count_nibbles(current_bytes, &pb);
|
||||||
|
|
||||||
|
|
|
@ -40,10 +40,6 @@ WARN_UNUSED
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
#ifdef UTF8VALIDATE
|
#ifdef UTF8VALIDATE
|
||||||
#define TRYASCIIFIRST // we try parsing it as ASCII and fall over to UTF-8 only as needed.
|
|
||||||
#ifdef TRYASCIIFIRST
|
|
||||||
bool isasciisofar = true;
|
|
||||||
#endif
|
|
||||||
__m256i has_error = _mm256_setzero_si256();
|
__m256i has_error = _mm256_setzero_si256();
|
||||||
struct avx_processed_utf_bytes previous = {
|
struct avx_processed_utf_bytes previous = {
|
||||||
.rawbytes = _mm256_setzero_si256(),
|
.rawbytes = _mm256_setzero_si256(),
|
||||||
|
@ -84,24 +80,20 @@ WARN_UNUSED
|
||||||
m256 input_lo = _mm256_load_si256((const m256 *)(buf + idx + 0));
|
m256 input_lo = _mm256_load_si256((const m256 *)(buf + idx + 0));
|
||||||
m256 input_hi = _mm256_load_si256((const m256 *)(buf + idx + 32));
|
m256 input_hi = _mm256_load_si256((const m256 *)(buf + idx + 32));
|
||||||
#ifdef UTF8VALIDATE
|
#ifdef UTF8VALIDATE
|
||||||
#ifdef TRYASCIIFIRST
|
|
||||||
if(isasciisofar) {
|
|
||||||
m256 highbit = _mm256_set1_epi8(0x80);
|
m256 highbit = _mm256_set1_epi8(0x80);
|
||||||
if((_mm256_testz_si256(input_lo,highbit) & _mm256_testz_si256(input_hi,highbit)) != 1) {
|
if((_mm256_testz_si256(_mm256_or_si256(input_lo, input_hi),highbit)) == 1) {
|
||||||
isasciisofar = false;
|
// it is ascii, we just check continuation
|
||||||
|
has_error = _mm256_or_si256(
|
||||||
|
_mm256_cmpgt_epi8(previous.carried_continuations,
|
||||||
|
_mm256_setr_epi8(9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9,
|
||||||
|
9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9,
|
||||||
|
9, 9, 9, 9, 9, 9, 9, 1)),has_error);
|
||||||
|
|
||||||
|
} else {
|
||||||
|
// it is not ascii so we have to do heavy work
|
||||||
previous = avxcheckUTF8Bytes(input_lo, &previous, &has_error);
|
previous = avxcheckUTF8Bytes(input_lo, &previous, &has_error);
|
||||||
previous = avxcheckUTF8Bytes(input_hi, &previous, &has_error);
|
previous = avxcheckUTF8Bytes(input_hi, &previous, &has_error);
|
||||||
}
|
}
|
||||||
} else {
|
|
||||||
#endif // TRYASCIIFIRST
|
|
||||||
previous = avxcheckUTF8Bytes_asciipath(input_lo, &previous, &has_error);
|
|
||||||
previous = avxcheckUTF8Bytes_asciipath(input_hi, &previous, &has_error);
|
|
||||||
//previous = avxcheckUTF8Bytes(input_lo, &previous, &has_error);
|
|
||||||
//previous = avxcheckUTF8Bytes(input_hi, &previous, &has_error);
|
|
||||||
|
|
||||||
#ifdef TRYASCIIFIRST
|
|
||||||
}
|
|
||||||
#endif // TRYASCIIFIRST
|
|
||||||
#endif
|
#endif
|
||||||
////////////////////////////////////////////////////////////////////////////////////////////
|
////////////////////////////////////////////////////////////////////////////////////////////
|
||||||
// Step 1: detect odd sequences of backslashes
|
// Step 1: detect odd sequences of backslashes
|
||||||
|
@ -259,11 +251,7 @@ WARN_UNUSED
|
||||||
*(u64 *)(pj.structurals + idx / 8) = structurals;
|
*(u64 *)(pj.structurals + idx / 8) = structurals;
|
||||||
}
|
}
|
||||||
#ifdef UTF8VALIDATE
|
#ifdef UTF8VALIDATE
|
||||||
#ifdef TRYASCIIFIRST
|
|
||||||
return isasciisofar || ((!isasciisofar) && (_mm256_testz_si256(has_error, has_error)));
|
|
||||||
#else
|
|
||||||
return _mm256_testz_si256(has_error, has_error);
|
return _mm256_testz_si256(has_error, has_error);
|
||||||
#endif
|
|
||||||
#else
|
#else
|
||||||
return true;
|
return true;
|
||||||
#endif
|
#endif
|
||||||
|
|
Loading…
Reference in New Issue