From 0d51854b27836c485d0bac7a1a28c19fc61496f1 Mon Sep 17 00:00:00 2001 From: Benoit Giannangeli Date: Tue, 14 Mar 2017 08:30:45 +0100 Subject: Use emscripten's utf8tojs string function --- logo.sketch | Bin 81920 -> 81920 bytes src/lobject.js | 39 ++++++++++++++++++++++++++++++++++++++- src/ltm.js | 2 +- src/lua.js | 2 +- src/lutf8lib.js | 12 +++++++----- 5 files changed, 47 insertions(+), 8 deletions(-) diff --git a/logo.sketch b/logo.sketch index a98d6a7..bddadc1 100644 Binary files a/logo.sketch and b/logo.sketch differ diff --git a/src/lobject.js b/src/lobject.js index 739db50..556da08 100644 --- a/src/lobject.js +++ b/src/lobject.js @@ -111,7 +111,44 @@ class TValue { } jsstring() { - return this.ttisstring() ? String.fromCharCode(...this.value) : null; + //return this.ttisstring() ? String.fromCharCode(...this.value) : null; + let u0, u1, u2, u3, u4, u5; + let idx = 0; + + var str = ''; + while (1) { + // For UTF8 byte structure, see http://en.wikipedia.org/wiki/UTF-8#Description and https://www.ietf.org/rfc/rfc2279.txt and https://tools.ietf.org/html/rfc3629 + u0 = this.value[idx++]; + if (!u0) return str; + if (!(u0 & 0x80)) { str += String.fromCharCode(u0); continue; } + u1 = this.value[idx++] & 63; + if ((u0 & 0xE0) == 0xC0) { str += String.fromCharCode(((u0 & 31) << 6) | u1); continue; } + u2 = this.value[idx++] & 63; + if ((u0 & 0xF0) == 0xE0) { + u0 = ((u0 & 15) << 12) | (u1 << 6) | u2; + } else { + u3 = this.value[idx++] & 63; + if ((u0 & 0xF8) == 0xF0) { + u0 = ((u0 & 7) << 18) | (u1 << 12) | (u2 << 6) | u3; + } else { + u4 = this.value[idx++] & 63; + if ((u0 & 0xFC) == 0xF8) { + u0 = ((u0 & 3) << 24) | (u1 << 18) | (u2 << 12) | (u3 << 6) | u4; + } else { + u5 = this.value[idx++] & 63; + u0 = ((u0 & 1) << 30) | (u1 << 24) | (u2 << 18) | (u3 << 12) | (u4 << 6) | u5; + } + } + } + if (u0 < 0x10000) { + str += String.fromCharCode(u0); + } else { + var ch = u0 - 0x10000; + str += String.fromCharCode(0xD800 | (ch >> 10), 0xDC00 | (ch & 0x3FF)); + } + } + + return str; } } diff --git a/src/ltm.js b/src/ltm.js index 6377c93..e48ff23 100644 --- a/src/ltm.js +++ b/src/ltm.js @@ -89,7 +89,7 @@ const ttypename = function(t) { const luaT_init = function(L) { L.l_G.tmname = []; for (let event in TMS) { - let name = lua.to_luastring(TMS[event], TMS[event].length); + let name = lua.to_luastring(TMS[event]); L.l_G.tmname.push(L.l_G.intern(name)); // Strings are already interned by JS } }; diff --git a/src/lua.js b/src/lua.js index 08bad58..be6cc1e 100644 --- a/src/lua.js +++ b/src/lua.js @@ -133,7 +133,7 @@ class lua_Debug { } const to_luastring = function(str, maxBytesToWrite) { - maxBytesToWrite = maxBytesToWrite !== undefined ? maxBytesToWrite : str.length + 1; + maxBytesToWrite = maxBytesToWrite !== undefined ? maxBytesToWrite : Number.MAX_SAFE_INTEGER; let outU8Array = []; if (!(maxBytesToWrite > 0)) // Parameter maxBytesToWrite is not optional. Negative values, 0, null, undefined and false each don't write out any bytes. diff --git a/src/lutf8lib.js b/src/lutf8lib.js index bc43d54..db5f7ad 100644 --- a/src/lutf8lib.js +++ b/src/lutf8lib.js @@ -8,7 +8,8 @@ const lauxlib = require('./lauxlib.js'); const iscont = function(p) { - return p & 0xC0 === 0x80; + let c = p & 0xC0; + return c === 0x80; }; /* translate a relative string position: negative means back from end */ @@ -24,6 +25,7 @@ const u_posrelat = function(pos, len) { */ const byteoffset = function(L) { let s = lauxlib.luaL_checkstring(L, 1); + s = L.stack[lapi.index2addr_(L, 1)].value; let n = lauxlib.luaL_checkinteger(L, 2); let posi = n >= 0 ? 1 : s.length + 1; posi = u_posrelat(lauxlib.luaL_optinteger(L, 3, posi), s.length); @@ -31,16 +33,16 @@ const byteoffset = function(L) { if (n === 0) { /* find beginning of current byte sequence */ - while (posi > 0 && iscont(s.slice(posi))) posi--; + while (posi > 0 && iscont(s[posi])) posi--; } else { - if (iscont(s.slice(posi))) + if (iscont(s[posi])) lauxlib.luaL_error(L, "initial position is a continuation byte"); if (n < 0) { while (n < 0 && posi > 0) { /* move back */ do { /* find beginning of previous character */ posi--; - } while (posi > 0 && iscont(s.slice(posi))); + } while (posi > 0 && iscont(s[posi])); n++; } } else { @@ -48,7 +50,7 @@ const byteoffset = function(L) { while (n > 0 && posi < s.length) { do { /* find beginning of next character */ posi++; - } while (iscont(s.slice(posi))); /* (cannot pass final '\0') */ + } while (iscont(s[posi])); /* (cannot pass final '\0') */ n--; } } -- cgit v1.2.3-70-g09d2