Better documentation.

This commit is contained in:
Daniel Lemire 2018-11-20 12:59:06 -05:00
parent bbeb64a70b
commit 1fcd2688f8
1 changed files with 8 additions and 3 deletions

View File

@ -79,6 +79,9 @@ uint32_t hex_to_u32_nocheck(const u8 *src) {
// and clz and table lookups, but JSON documents // and clz and table lookups, but JSON documents
// have few escaped code points, and the following // have few escaped code points, and the following
// function looks cheap. // function looks cheap.
//
// Note: we assume that surrogates are treated separately
//
inline size_t codepoint_to_utf8(uint32_t cp, u8 *c) { inline size_t codepoint_to_utf8(uint32_t cp, u8 *c) {
if (cp <= 0x7F) { if (cp <= 0x7F) {
c[0] = cp; c[0] = cp;
@ -87,8 +90,9 @@ inline size_t codepoint_to_utf8(uint32_t cp, u8 *c) {
c[0] = (cp >> 6) + 192; c[0] = (cp >> 6) + 192;
c[1] = (cp & 63) + 128; c[1] = (cp & 63) + 128;
return 2; // universal plane return 2; // universal plane
} else if (0xd800 <= cp && cp <= 0xdfff) { // Surrogates are treated elsewhere...
return 0; // surrogates // could put assert here //} //else if (0xd800 <= cp && cp <= 0xdfff) {
// return 0; // surrogates // could put assert here
} else if (cp <= 0xFFFF) { } else if (cp <= 0xFFFF) {
c[0] = (cp >> 12) + 224; c[0] = (cp >> 12) + 224;
c[1] = ((cp >> 6) & 63) + 128; c[1] = ((cp >> 6) & 63) + 128;
@ -101,6 +105,7 @@ inline size_t codepoint_to_utf8(uint32_t cp, u8 *c) {
c[3] = (cp & 63) + 128; c[3] = (cp & 63) + 128;
return 4; return 4;
} }
return 0; // bad // could put assert her // will return 0 when the code point was too large.
return 0; // bad r
} }