From ed7815d9a5da88e7c83a0596fb75249c3ce165ab Mon Sep 17 00:00:00 2001 From: daurnimator Date: Sun, 12 Nov 2017 14:29:42 +1100 Subject: src/defs.js: Fix conversion of non-BMP unicode codepoints Adds tests for to_luastring --- src/defs.js | 2 +- tests/defs.js | 50 ++++++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 51 insertions(+), 1 deletion(-) create mode 100644 tests/defs.js diff --git a/src/defs.js b/src/defs.js index 4d5de3b..f76471a 100644 --- a/src/defs.js +++ b/src/defs.js @@ -193,7 +193,7 @@ const to_luastring = function(str, cache) { // See http://unicode.org/faq/utf_bom.html#utf16-3 // For UTF8 byte structure, see http://en.wikipedia.org/wiki/UTF-8#Description and https://www.ietf.org/rfc/rfc2279.txt and https://tools.ietf.org/html/rfc3629 let u = str.codePointAt(i); - if (u >= 0xD800) i++; // If it was a surrogate pair it used up two bytes + if (u >= 0x10000) i++; // It was a surrogate pair and hence used up two bytes if (u <= 0x7F) { outU8Array[outIdx++] = u; } else if (u <= 0x7FF) { diff --git a/tests/defs.js b/tests/defs.js new file mode 100644 index 0000000..8c86588 --- /dev/null +++ b/tests/defs.js @@ -0,0 +1,50 @@ +const test = require('tape'); + +global.WEB = false; +const defs = require('../src/defs.js'); + +test('to_luastring', function (t) { + t.deepEqual( + defs.to_luastring("foo"), + ["f".charCodeAt(0), "o".charCodeAt(0), "o".charCodeAt(0)], + "Convert normal ascii string" + ); + + t.deepEqual( + defs.to_luastring("fo\0o"), + ["f".charCodeAt(0), "o".charCodeAt(0), 0, "o".charCodeAt(0)], + "Convert ascii string containing null byte" + ); + + t.deepEqual( + defs.to_luastring("Café"), + [67, 97, 102, 195, 169], + "Convert string with BMP unicode chars" + ); + + t.deepEqual( + defs.to_luastring(""), + [239, 163, 191], + "Convert string with codepoint in PUA (U+E000 to U+F8FF)" + ); + + t.deepEqual( + defs.to_luastring("❤️🍾"), + [226, 157, 164, 239, 184, 143, 240, 159, 141, 190], + "Convert string with surrogate pair" + ); + + t.deepEqual( + defs.to_luastring("\uD800a"), + [237, 160, 128, 97], + "Convert string with broken surrogate pair" + ); + + t.deepEqual( + defs.to_luastring("\uD823"), + [237, 160, 163], + "Convert string with broken surrogate pair at end of string" + ); + + t.end(); +}); -- cgit v1.2.3-70-g09d2