From 36f3247d47c1ad854fa89aabf17f6d954a6a6657 Mon Sep 17 00:00:00 2001 From: Benoit Giannangeli Date: Tue, 28 Mar 2017 13:18:13 +0200 Subject: luaO_utf8esc --- src/lauxlib.js | 2 +- src/llex.js | 12 ++++++------ src/lobject.js | 22 ++++++++++++++++++++++ 3 files changed, 29 insertions(+), 7 deletions(-) (limited to 'src') diff --git a/src/lauxlib.js b/src/lauxlib.js index e3974cb..8bda842 100644 --- a/src/lauxlib.js +++ b/src/lauxlib.js @@ -437,7 +437,7 @@ if (typeof require === "function") { lf.pos += bytes; } if (bytes > 0) - return lf.binary ? toDataView(lf.buff) : new lobject.TValue(0, lf.buff).jsstring(0, bytes); // TODO: Here reading utf8 only + return lf.binary ? toDataView(lf.buff) : lobject.jsstring(lf.buff, 0, bytes); // TODO: Here reading utf8 only else return null; }; diff --git a/src/llex.js b/src/llex.js index 96e9e00..2d34b1f 100644 --- a/src/llex.js +++ b/src/llex.js @@ -152,7 +152,7 @@ const save = function(ls, c) { if (b.buffer.length >= Number.MAX_SAFE_INTEGER/2) lexerror(ls, "lexical element too long", 0); } - b.buffer[b.n++] = c; + b.buffer[b.n++] = c < 0 ? 255 + c + 1 : c; }; const luaX_token2str = function(ls, token) { @@ -407,9 +407,9 @@ const readutf8desc = function(ls) { }; const utf8esc = function(ls) { - let buff = new Array(lobject.UTF8BUFFSZ); - let n = lobject.luaO_utf8esc(buff, readutf8desc(ls)); - for (; n > 0; n--) /* add 'buff' to string */ + let u = lobject.luaO_utf8esc(readutf8desc(ls)); + let buff = u.buff; + for (let n = u.n; n > 0; n--) /* add 'buff' to string */ save(ls, buff[lobject.UTF8BUFFSZ - n]); }; @@ -450,12 +450,12 @@ const read_string = function(ls, del, seminfo) { case 't': c = '\t'; will = 'read_save'; break; case 'v': c = '\v'; will = 'read_save'; break; case 'x': c = readhexaesc(ls); will = 'read_save'; break; - case 'u': utf8esc(ls); will = 'read_save'; break; + case 'u': utf8esc(ls); will = 'no_save'; break; case '\n': case '\r': inclinenumber(ls); c = '\n'; will = 'only_save'; break; case '\\': case '\"': case '\'': c = ls.current; will = 'read_save'; break; - case -1: will = 'read_save'; break; /* will raise an error next loop */ + case -1: will = 'no_save'; break; /* will raise an error next loop */ case 'z': { /* zap following span of spaces */ ls.buff.n -= 1; /* remove '\\' */ next(ls); /* skip the 'z' */ diff --git a/src/lobject.js b/src/lobject.js index fca259d..966bdd8 100644 --- a/src/lobject.js +++ b/src/lobject.js @@ -499,6 +499,27 @@ const luaO_str2num = function(s) { } }; +const luaO_utf8esc = function(x) { + let buff = []; + let n = 1; /* number of bytes put in buffer (backwards) */ + assert(x <= 0x10ffff); + if (x < 0x80) /* ascii? */ + buff[UTF8BUFFSZ - 1] = x; + else { /* need continuation bytes */ + let mfb = 0x3f; /* maximum that fits in first byte */ + do { /* add continuation bytes */ + buff[UTF8BUFFSZ - (n++)] = 0x80 | (x & 0x3f); + x >>= 6; /* remove added bits */ + mfb >>= 1; /* now there is one less bit available in first byte */ + } while (x > mfb); /* still needs continuation byte? */ + buff[UTF8BUFFSZ - n] = (~mfb << 1) | x; /* add first byte */ + } + return { + buff: buff, + n: n + }; +}; + /* ** converts an integer to a "floating point byte", represented as ** (eeeeexxx), where the real value is (1xxx) * 2^(eeeee - 1) if @@ -564,4 +585,5 @@ module.exports.luaO_hexavalue = luaO_hexavalue; module.exports.luaO_int2fb = luaO_int2fb; module.exports.luaO_str2num = luaO_str2num; module.exports.luaO_utf8desc = luaO_utf8desc; +module.exports.luaO_utf8esc = luaO_utf8esc; module.exports.numarith = numarith; -- cgit v1.2.3-54-g00ecf