summaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
authordaurnimator <quae@daurnimator.com>2017-11-12 14:55:08 +1100
committerdaurnimator <quae@daurnimator.com>2017-11-12 15:22:22 +1100
commit2ec10cc9151920aa286a7ae376888d61cf9a3bca (patch)
tree09034ab8d9757e6d717dc81551ec08091880b9a0 /src
parent0b903fef1bfe08d0fe573f4bdb02a6c90fd39ab2 (diff)
downloadfengari-2ec10cc9151920aa286a7ae376888d61cf9a3bca.tar.gz
fengari-2ec10cc9151920aa286a7ae376888d61cf9a3bca.tar.bz2
fengari-2ec10cc9151920aa286a7ae376888d61cf9a3bca.zip
src/defs.js: Refactor to_jsstring
Now fails on non-utf8 input. Previously it would convert to equivalent unicode codepoints as bytes, which did not round-trip
Diffstat (limited to 'src')
-rw-r--r--src/defs.js60
1 files changed, 28 insertions, 32 deletions
diff --git a/src/defs.js b/src/defs.js
index 5ebaad9..e55e3dc 100644
--- a/src/defs.js
+++ b/src/defs.js
@@ -138,43 +138,39 @@ const is_luastring = function(s) {
const to_jsstring = function(value, from, to) {
assert(is_luastring(value), "jsstring expect a array of bytes");
- let u0, u1, u2, u3, u4, u5;
- let idx = 0;
value = value.slice(from ? from : 0, to);
- var str = '';
- while (1) {
- // For UTF8 byte structure, see http://en.wikipedia.org/wiki/UTF-8#Description and https://www.ietf.org/rfc/rfc2279.txt and https://tools.ietf.org/html/rfc3629
- u0 = value[idx++];
- if (u0 === 0) { str += "\0"; continue; } // Lua string embed '\0'
- if (!u0) return str;
- if (!(u0 & 0x80)) { str += String.fromCharCode(u0); continue; }
- u1 = value[idx++] & 63;
- if ((u0 & 0xE0) == 0xC0) { str += String.fromCharCode(((u0 & 31) << 6) | u1); continue; }
- u2 = value[idx++] & 63;
- if ((u0 & 0xF0) == 0xE0) {
- u0 = ((u0 & 15) << 12) | (u1 << 6) | u2;
- } else {
- u3 = value[idx++] & 63;
- if ((u0 & 0xF8) == 0xF0) {
- u0 = ((u0 & 7) << 18) | (u1 << 12) | (u2 << 6) | u3;
- } else {
- u4 = value[idx++] & 63;
- if ((u0 & 0xFC) == 0xF8) {
- u0 = ((u0 & 3) << 24) | (u1 << 18) | (u2 << 12) | (u3 << 6) | u4;
- } else {
- u5 = value[idx++] & 63;
- u0 = ((u0 & 1) << 30) | (u1 << 24) | (u2 << 18) | (u3 << 12) | (u4 << 6) | u5;
- }
- }
- }
- if (u0 < 0x10000) {
- str += String.fromCharCode(u0);
+ let str = "";
+ for (let i = 0; i < value.length;) {
+ let u;
+ let u0 = value[i++];
+ if (u0 < 0x80) {
+ /* single byte sequence */
+ u = u0;
+ } else if (u0 < 0xC2 || u0 > 0xF4) {
+ throw RangeError("cannot convert invalid utf8 to javascript string");
+ } else if (u0 <= 0xDF) {
+ /* two byte sequence */
+ if (i >= value.length) throw RangeError("cannot convert invalid utf8 to javascript string");
+ let u1 = value[i++];
+ u = ((u0 & 0x1F) << 6) + (u1 & 0x3F);
+ } else if (u0 <= 0xEF) {
+ /* three byte sequence */
+ if (i+1 >= value.length) throw RangeError("cannot convert invalid utf8 to javascript string");
+ let u1 = value[i++];
+ let u2 = value[i++];
+ u = ((u0 & 0x0F) << 12) + ((u1 & 0x3F) << 6) + (u2 & 0x3F);
} else {
- var ch = u0 - 0x10000;
- str += String.fromCharCode(0xD800 | (ch >> 10), 0xDC00 | (ch & 0x3FF));
+ /* four byte sequence */
+ if (i+2 >= value.length) throw RangeError("cannot convert invalid utf8 to javascript string");
+ let u1 = value[i++];
+ let u2 = value[i++];
+ let u3 = value[i++];
+ u = ((u0 & 0x07) << 18) + ((u1 & 0x3F) << 12) + ((u2 & 0x3F) << 6) + (u3 & 0x3F);
}
+ str += String.fromCodePoint(u);
}
+ return str;
};
const to_luastring_cache = {};