From 456ab7b69f88859683c60cc2261e70d6dbadd8e8 Mon Sep 17 00:00:00 2001 From: Benoit Giannangeli Date: Wed, 29 Mar 2017 14:39:57 +0200 Subject: 8-bit string internally tests Lexing/Parsing is done on byte rather than js strings --- src/llex.js | 113 +++++++++++++++++++++++++++++++----------------------------- 1 file changed, 59 insertions(+), 54 deletions(-) (limited to 'src/llex.js') diff --git a/src/llex.js b/src/llex.js index 9b81cd7..e043c17 100644 --- a/src/llex.js +++ b/src/llex.js @@ -15,6 +15,12 @@ const TS = lua.thread_status; const FIRST_RESERVED = 257; +// To avoid charCodeAt everywhere +const char = []; +for (let i = 0; i < 127; i++) + char[String.fromCharCode(i)] = i; +module.exports.char = char; + const RESERVED = { /* terminal symbols denoted by reserved words */ TK_AND: FIRST_RESERVED, @@ -157,7 +163,7 @@ const save = function(ls, c) { const luaX_token2str = function(ls, token) { if (typeof token === "string" || token < FIRST_RESERVED) { /* single-byte symbols? */ - return lua.to_luastring(`'${typeof token === "string" ? token : lobject.jsstring(token)}'`); + return lua.to_luastring(`'${typeof token === "string" ? token : lobject.jsstring([token])}'`); } else { let s = luaX_tokens[token - FIRST_RESERVED]; if (token < R.TK_EOS) /* fixed format (symbols and reserved words)? */ @@ -168,7 +174,7 @@ const luaX_token2str = function(ls, token) { }; const currIsNewline = function(ls) { - return ls.current === '\n'.charCodeAt(0) || ls.current === '\r'.charCodeAt(0); + return ls.current === char['\n'] || ls.current === char['\r']; }; const next = function(ls) { @@ -235,7 +241,7 @@ const check_next1 = function(ls, c) { ** saves it */ const check_next2 = function(ls, set) { - if (ls.current === set.charAt(0).charCodeAt(0) || ls.current === set.charAt(1).charCodeAt(0)) { + if (ls.current === set[0].charCodeAt(0) || ls.current === set[1].charCodeAt(0)) { save_and_next(ls); return true; } @@ -248,7 +254,7 @@ const read_numeral = function(ls, seminfo) { let first = ls.current; assert(ljstype.lisdigit(ls.current)); save_and_next(ls); - if (first === '0' && check_next2(ls, "xX")) /* hexadecimal? */ + if (first === char['0'] && check_next2(ls, "xX")) /* hexadecimal? */ expo = "Pp"; for (;;) { @@ -256,7 +262,7 @@ const read_numeral = function(ls, seminfo) { check_next2(ls, "-+"); /* optional exponent sign */ if (ljstype.lisxdigit(ls.current)) save_and_next(ls); - else if (ls.current === '.') + else if (ls.current === char['.']) save_and_next(ls); else break; } @@ -295,7 +301,6 @@ const lexerror = function(ls, msg, token) { }; const luaX_syntaxerror = function(ls, msg) { - msg = msg instanceof TValue ? msg.value : lua.to_luastring(msg); lexerror(ls, msg, ls.t.token); }; @@ -307,9 +312,9 @@ const luaX_syntaxerror = function(ls, msg) { const skip_sep = function(ls) { let count = 0; let s = ls.current; - assert(s === '['.charCodeAt(0) || s === ']'.charCodeAt(0)); + assert(s === char['['] || s === char[']']); save_and_next(ls); - while (ls.current === '='.charCodeAt(0)) { + while (ls.current === char['=']) { save_and_next(ls); count++; } @@ -332,15 +337,15 @@ const read_long_string = function(ls, seminfo, sep) { lexerror(ls, lua.to_luastring(msg), R.TK_EOS); break; } - case ']'.charCodeAt(0): { + case char[']']: { if (skip_sep(ls) === sep) { save_and_next(ls); /* skip 2nd ']' */ skip = true; } break; } - case '\n'.charCodeAt(0): case '\r'.charCodeAt(0): { - save(ls, '\n'.charCodeAt(0)); + case char['\n']: case char['\r']: { + save(ls, char['\n']); inclinenumber(ls); if (!seminfo) { ls.buff.n = 0; @@ -386,7 +391,7 @@ const readhexaesc = function(ls) { const readutf8desc = function(ls) { let i = 4; /* chars to be removed: '\', 'u', '{', and first digit */ save_and_next(ls); /* skip 'u' */ - esccheck(ls, ls.current === '{'.charCodeAt(0), lua.to_luastring("missing '{'")); + esccheck(ls, ls.current === char['{'], lua.to_luastring("missing '{'")); let r = gethexa(ls); /* must have at least one digit */ save_and_next(ls); @@ -396,7 +401,7 @@ const readutf8desc = function(ls) { esccheck(ls, r <= 0x10FFFF, lua.to_luastring("UTF-8 value too large")); save_and_next(ls); } - esccheck(ls, ls.current === '}'.charCodeAt(0), lua.to_luastring("missing '}'")); + esccheck(ls, ls.current === char['}'], lua.to_luastring("missing '}'")); next(ls); /* skip '}' */ ls.buff.n -= i; /* remove saved chars from buffer */ return r; @@ -429,30 +434,30 @@ const read_string = function(ls, del, seminfo) { case -1: lexerror(ls, lua.to_luastring("unfinished string"), R.TK_EOS); break; - case '\n'.charCodeAt(0): - case '\r'.charCodeAt(0): + case char['\n']: + case char['\r']: lexerror(ls, lua.to_luastring("unfinished string"), R.TK_STRING); break; - case '\\'.charCodeAt(0): { /* escape sequences */ + case char['\\']: { /* escape sequences */ save_and_next(ls); /* keep '\\' for error messages */ let will; let c; switch(ls.current) { - case 'a': c = '\a'.charCodeAt(0); will = 'read_save'; break; - case 'b': c = '\b'.charCodeAt(0); will = 'read_save'; break; - case 'f': c = '\f'.charCodeAt(0); will = 'read_save'; break; - case 'n': c = '\n'.charCodeAt(0); will = 'read_save'; break; - case 'r': c = '\r'.charCodeAt(0); will = 'read_save'; break; - case 't': c = '\t'.charCodeAt(0); will = 'read_save'; break; - case 'v': c = '\v'.charCodeAt(0); will = 'read_save'; break; - case 'x': c = readhexaesc(ls); will = 'read_save'; break; - case 'u': utf8esc(ls); will = 'no_save'; break; - case '\n'.charCodeAt(0): case '\r'.charCodeAt(0): - inclinenumber(ls); c = '\n'; will = 'only_save'; break; - case '\\'.charCodeAt(0): case '\"'.charCodeAt(0): case '\''.charCodeAt(0): + case char['a']: c = char['\a']; will = 'read_save'; break; + case char['b']: c = char['\b']; will = 'read_save'; break; + case char['f']: c = char['\f']; will = 'read_save'; break; + case char['n']: c = char['\n']; will = 'read_save'; break; + case char['r']: c = char['\r']; will = 'read_save'; break; + case char['t']: c = char['\t']; will = 'read_save'; break; + case char['v']: c = char['\v']; will = 'read_save'; break; + case char['x']: c = readhexaesc(ls); will = 'read_save'; break; + case char['u']: utf8esc(ls); will = 'no_save'; break; + case char['\n']: case char['\r']: + inclinenumber(ls); c = char['\n']; will = 'only_save'; break; + case char['\\']: case char['\"']: case char['\'']: c = ls.current; will = 'read_save'; break; case -1: will = 'no_save'; break; /* will raise an error next loop */ - case 'z': { /* zap following span of spaces */ + case char['z']: { /* zap following span of spaces */ ls.buff.n -= 1; /* remove '\\' */ next(ls); /* skip the 'z' */ while (ljstype.lisspace(ls.current)) { @@ -500,20 +505,20 @@ const llex = function(ls, seminfo) { for (;;) { switch (ls.current) { - case '\n'.charCodeAt(0): case '\r'.charCodeAt(0): { /* line breaks */ + case char['\n']: case char['\r']: { /* line breaks */ inclinenumber(ls); break; } - case ' '.charCodeAt(0): case '\f'.charCodeAt(0): case '\t'.charCodeAt(0): case '\v'.charCodeAt(0): { /* spaces */ + case char[' ']: case char['\f']: case char['\t']: case char['\v']: { /* spaces */ next(ls); break; } - case '-'.charCodeAt(0): { /* '-' or '--' (comment) */ + case char['-']: { /* '-' or '--' (comment) */ next(ls); - if (ls.current !== '-'.charCodeAt(0)) return '-'; + if (ls.current !== char['-']) return char['-']; /* else is a comment */ next(ls); - if (ls.current === '['.charCodeAt(0)) { /* long comment? */ + if (ls.current === char['[']) { /* long comment? */ let sep = skip_sep(ls); ls.buff.n = 0; /* 'skip_sep' may dirty the buffer */ ls.buff.buffer = []; @@ -530,63 +535,63 @@ const llex = function(ls, seminfo) { next(ls); /* skip until end of line (or end of file) */ break; } - case '['.charCodeAt(0): { /* long string or simply '[' */ + case char['[']: { /* long string or simply '[' */ let sep = skip_sep(ls); if (sep >= 0) { read_long_string(ls, seminfo, sep); return R.TK_STRING; } else if (sep !== -1) /* '[=...' missing second bracket */ lexerror(ls, lua.to_luastring("invalid long string delimiter"), R.TK_STRING); - return '['; + return char['[']; } - case '='.charCodeAt(0): { + case char['=']: { next(ls); if (check_next1(ls, '=')) return R.TK_EQ; - else return '='; + else return char['=']; } - case '<'.charCodeAt(0): { + case char['<']: { next(ls); if (check_next1(ls, '=')) return R.TK_LE; else if (check_next1(ls, '<')) return R.TK_SHL; - else return '<'; + else return char['<']; } - case '>'.charCodeAt(0): { + case char['>']: { next(ls); if (check_next1(ls, '=')) return R.TK_GE; else if (check_next1(ls, '>')) return R.TK_SHR; - else return '>'; + else return char['>']; } - case '/'.charCodeAt(0): { + case char['/']: { next(ls); if (check_next1(ls, '/')) return R.TK_IDIV; - else return '/'; + else return char['/']; } - case '~'.charCodeAt(0): { + case char['~']: { next(ls); if (check_next1(ls, '=')) return R.TK_NE; - else return '~'; + else return char['~']; } - case ':'.charCodeAt(0): { + case char[':']: { next(ls); if (check_next1(ls, ':')) return R.TK_DBCOLON; - else return ':'; + else return char[':']; } - case '"'.charCodeAt(0): case '\''.charCodeAt(0): { /* short literal strings */ + case char['"']: case char['\'']: { /* short literal strings */ read_string(ls, ls.current, seminfo); return R.TK_STRING; } - case '.'.charCodeAt(0): { /* '.', '..', '...', or number */ + case char['.']: { /* '.', '..', '...', or number */ save_and_next(ls); if (check_next1(ls, '.')) { if (check_next1(ls, '.')) return R.TK_DOTS; /* '...' */ else return R.TK_CONCAT; /* '..' */ } - else if (!ljstype.lisdigit(ls.current)) return '.'; + else if (!ljstype.lisdigit(ls.current)) return char['.']; else return read_numeral(ls, seminfo); } - case '0': case '1': case '2': case '3': case '4': - case '5': case '6': case '7': case '8': case '9': { + case char['0']: case char['1']: case char['2']: case char['3']: case char['4']: + case char['5']: case char['6']: case char['7']: case char['8']: case char['9']: { return read_numeral(ls, seminfo); } case -1: { @@ -598,7 +603,7 @@ const llex = function(ls, seminfo) { save_and_next(ls); } while (ljstype.lislalnum(ls.current)); - let ts = new TValue(CT.LUA_TLNGSTR, lua.to_luastring(ls.buff.buffer.join(''))); + let ts = new TValue(CT.LUA_TLNGSTR, ls.buff.buffer); seminfo.ts = ts; let kidx = luaX_tokens.slice(0, 22).indexOf(ts.jsstring()); if (kidx >= 0) /* reserved word? */ -- cgit v1.2.3-54-g00ecf