diff options
author | daurnimator <quae@daurnimator.com> | 2017-11-12 14:29:42 +1100 |
---|---|---|
committer | daurnimator <quae@daurnimator.com> | 2017-11-12 14:29:50 +1100 |
commit | ed7815d9a5da88e7c83a0596fb75249c3ce165ab (patch) | |
tree | b787a8ec918d7f27e9b70a641f39b3d76d5e3efa | |
parent | 3033341b741cfb256baf91acf1f257a7f1ed6f7b (diff) | |
download | fengari-ed7815d9a5da88e7c83a0596fb75249c3ce165ab.tar.gz fengari-ed7815d9a5da88e7c83a0596fb75249c3ce165ab.tar.bz2 fengari-ed7815d9a5da88e7c83a0596fb75249c3ce165ab.zip |
src/defs.js: Fix conversion of non-BMP unicode codepoints
Adds tests for to_luastring
-rw-r--r-- | src/defs.js | 2 | ||||
-rw-r--r-- | tests/defs.js | 50 |
2 files changed, 51 insertions, 1 deletions
diff --git a/src/defs.js b/src/defs.js index 4d5de3b..f76471a 100644 --- a/src/defs.js +++ b/src/defs.js @@ -193,7 +193,7 @@ const to_luastring = function(str, cache) { // See http://unicode.org/faq/utf_bom.html#utf16-3 // For UTF8 byte structure, see http://en.wikipedia.org/wiki/UTF-8#Description and https://www.ietf.org/rfc/rfc2279.txt and https://tools.ietf.org/html/rfc3629 let u = str.codePointAt(i); - if (u >= 0xD800) i++; // If it was a surrogate pair it used up two bytes + if (u >= 0x10000) i++; // It was a surrogate pair and hence used up two bytes if (u <= 0x7F) { outU8Array[outIdx++] = u; } else if (u <= 0x7FF) { diff --git a/tests/defs.js b/tests/defs.js new file mode 100644 index 0000000..8c86588 --- /dev/null +++ b/tests/defs.js @@ -0,0 +1,50 @@ +const test = require('tape'); + +global.WEB = false; +const defs = require('../src/defs.js'); + +test('to_luastring', function (t) { + t.deepEqual( + defs.to_luastring("foo"), + ["f".charCodeAt(0), "o".charCodeAt(0), "o".charCodeAt(0)], + "Convert normal ascii string" + ); + + t.deepEqual( + defs.to_luastring("fo\0o"), + ["f".charCodeAt(0), "o".charCodeAt(0), 0, "o".charCodeAt(0)], + "Convert ascii string containing null byte" + ); + + t.deepEqual( + defs.to_luastring("Café"), + [67, 97, 102, 195, 169], + "Convert string with BMP unicode chars" + ); + + t.deepEqual( + defs.to_luastring(""), + [239, 163, 191], + "Convert string with codepoint in PUA (U+E000 to U+F8FF)" + ); + + t.deepEqual( + defs.to_luastring("❤️🍾"), + [226, 157, 164, 239, 184, 143, 240, 159, 141, 190], + "Convert string with surrogate pair" + ); + + t.deepEqual( + defs.to_luastring("\uD800a"), + [237, 160, 128, 97], + "Convert string with broken surrogate pair" + ); + + t.deepEqual( + defs.to_luastring("\uD823"), + [237, 160, 163], + "Convert string with broken surrogate pair at end of string" + ); + + t.end(); +}); |