summaryrefslogtreecommitdiff
path: root/src/llex.js
diff options
context:
space:
mode:
authorBenoit Giannangeli <benoit.giannangeli@boursorama.fr>2017-02-27 08:46:56 +0100
committerBenoit Giannangeli <benoit.giannangeli@boursorama.fr>2017-02-27 14:19:05 +0100
commit5a47025d64f013975051473c1115ff70c0281785 (patch)
tree2a79b29beea6d4571993783cef523c38568c2742 /src/llex.js
parent94a301a27a8a75c4684790a99a898262b8354f68 (diff)
downloadfengari-5a47025d64f013975051473c1115ff70c0281785.tar.gz
fengari-5a47025d64f013975051473c1115ff70c0281785.tar.bz2
fengari-5a47025d64f013975051473c1115ff70c0281785.zip
lexing
Diffstat (limited to 'src/llex.js')
-rw-r--r--src/llex.js596
1 files changed, 596 insertions, 0 deletions
diff --git a/src/llex.js b/src/llex.js
new file mode 100644
index 0000000..4c85f65
--- /dev/null
+++ b/src/llex.js
@@ -0,0 +1,596 @@
+/* jshint esversion: 6 */
+"use strict";
+
+const assert = require('assert');
+
+const lapi = require('./lapi.js');
+const ldebug = require('./ldebug.js');
+const ldo = require('./ldo.js');
+const lua = require('./lua.js');
+const lobject = require('./lobject');
+const ljstype = require('./ljstype');
+const TValue = lobject.TValue;
+const CT = lua.constant_types;
+const TS = lua.thread_status;
+
+const FIRST_RESERVED = 257;
+
+const RESERVED = {
+ /* terminal symbols denoted by reserved words */
+ TK_AND: FIRST_RESERVED,
+ TK_BREAK: FIRST_RESERVED + 1,
+ TK_DO: FIRST_RESERVED + 2,
+ TK_ELSE: FIRST_RESERVED + 3,
+ TK_ELSEIF: FIRST_RESERVED + 4,
+ TK_END: FIRST_RESERVED + 5,
+ TK_FALSE: FIRST_RESERVED + 6,
+ TK_FOR: FIRST_RESERVED + 8,
+ TK_FUNCTION: FIRST_RESERVED + 10,
+ TK_GOTO: FIRST_RESERVED + 11,
+ TK_IF: FIRST_RESERVED + 12,
+ TK_IN: FIRST_RESERVED + 13,
+ TK_LOCAL: FIRST_RESERVED + 14,
+ TK_NIL: FIRST_RESERVED + 15,
+ TK_NOT: FIRST_RESERVED + 16,
+ TK_OR: FIRST_RESERVED + 17,
+ TK_REPEAT: FIRST_RESERVED + 18,
+ TK_RETURN: FIRST_RESERVED + 19,
+ TK_THEN: FIRST_RESERVED + 20,
+ TK_TRUE: FIRST_RESERVED + 21,
+ TK_UNTIL: FIRST_RESERVED + 22,
+ TK_WHILE: FIRST_RESERVED + 23,
+ /* other terminal symbols */
+ TK_IDIV: FIRST_RESERVED + 24,
+ TK_CONCAT: FIRST_RESERVED + 25,
+ TK_DOTS: FIRST_RESERVED + 26,
+ TK_EQ: FIRST_RESERVED + 27,
+ TK_GE: FIRST_RESERVED + 28,
+ TK_LE: FIRST_RESERVED + 29,
+ TK_NE: FIRST_RESERVED + 30,
+ TK_SHL: FIRST_RESERVED + 31,
+ TK_SHR: FIRST_RESERVED + 32,
+ TK_DBCOLON: FIRST_RESERVED + 33,
+ TK_EOS: FIRST_RESERVED + 34,
+ TK_FLT: FIRST_RESERVED + 35,
+ TK_INT: FIRST_RESERVED + 36,
+ TK_NAME: FIRST_RESERVED + 37,
+ TK_STRING: FIRST_RESERVED + 38
+};
+
+const R = RESERVED;
+
+const reserved_keywords = [
+ "and", "break", "do", "else", "elseif",
+ "end", "false", "for", "function", "goto", "if",
+ "in", "local", "nil", "not", "or", "repeat",
+ "return", "then", "true", "until", "while"
+];
+
+const reserved_keywords_tokens = [
+ R.TK_AND,
+ R.TK_BREAK,
+ R.TK_DO,
+ R.TK_ELSE,
+ R.TK_ELSEIF,
+ R.TK_END,
+ R.TK_FALSE,
+ R.TK_FOR,
+ R.TK_FUNCTION,
+ R.TK_GOTO,
+ R.TK_IF,
+ R.TK_IN,
+ R.TK_LOCAL,
+ R.TK_NIL,
+ R.TK_NOT,
+ R.TK_OR,
+ R.TK_REPEAT,
+ R.TK_RETURN,
+ R.TK_THEN,
+ R.TK_TRUE,
+ R.TK_UNTIL,
+ R.TK_WHILE,
+];
+
+const luaX_tokens = [
+ "and", "break", "do", "else", "elseif",
+ "end", "false", "for", "function", "goto", "if",
+ "in", "local", "nil", "not", "or", "repeat",
+ "return", "then", "true", "until", "while",
+ "//", "..", "...", "==", ">=", "<=", "~=",
+ "<<", ">>", "::", "<eof>",
+ "<number>", "<integer>", "<name>", "<string>"
+];
+
+const NUM_RESERVED = Object.keys(RESERVED).length;
+
+class Buffer {
+ constructor(string) {
+ this.buffer = string ? string.split('') : [];
+ this.n = this.buffer.length;
+ this.off = 0;
+ }
+}
+
+class SemInfo {
+ constructor() {
+ this.r = NaN;
+ this.i = NaN;
+ this.ts = null;
+ }
+}
+
+class Token {
+ constructor() {
+ this.token = NaN;
+ this.seminfo = null;
+ }
+}
+
+/* state of the lexer plus state of the parser when shared by all
+ functions */
+class LexState {
+ constructor() {
+ this.current = NaN; /* current character (charint) */
+ this.linenumber = NaN; /* input line counter */
+ this.lastline = NaN; /* line of last token 'consumed' */
+ this.t = null; /* current token */
+ this.lookahead = null; /* look ahead token */
+ this.fs = null; /* current function (parser) */
+ this.L = null;
+ this.z = new Buffer();
+ this.buff = new Buffer(); /* buffer for tokens */
+ this.h = null; /* to avoid collection/reuse strings */
+ this.dyd = null; /* dynamic structures used by the parser */
+ this.source = null; /* current source name */
+ this.envn = null; /* environment variable name */
+ }
+}
+
+const save = function(ls, c) {
+ let b = ls.buff;
+ if (b.n + 1 > b.buffer.length) {
+ if (b.buffer.length >= Number.MAX_SAFE_INTEGER/2)
+ lexerror(ls, "lexical element too long", 0);
+ }
+ b.buffer[b.n++] = c;
+};
+
+const luaX_token2str = function(ls, token) {
+ if (token < FIRST_RESERVED) { /* single-byte symbols? */
+ return lapi.lua_pushstring(ls.L, `'%{String.fromCharCode(token)}'`);
+ } else {
+ let s = luaX_tokens[token - FIRST_RESERVED];
+ if (token < R.TK_EOS) /* fixed format (symbols and reserved words)? */
+ return lapi.lua_pushstring(ls.L, `'${s}'`);
+ else /* names, strings, and numerals */
+ return s;
+ }
+};
+
+const currIsNewline = function(ls) {
+ return ls.current === '\n' || ls.current === '\r';
+};
+
+const next = function(ls) {
+ ls.current = ls.z.n-- > 0 ? ls.z.buffer[ls.z.off++] : -1;
+};
+
+const save_and_next = function(ls) {
+ save(ls, ls.current);
+ next(ls);
+};
+
+/*
+** increment line number and skips newline sequence (any of
+** \n, \r, \n\r, or \r\n)
+*/
+const inclinenumber = function(ls) {
+ let old = ls.current;
+ assert(currIsNewline(ls));
+ next(ls); /* skip '\n' or '\r' */
+ if (currIsNewline(ls) && ls.current !== old)
+ next(ls); /* skip '\n\r' or '\r\n' */
+ if (++ls.linenumber >= Number.MAX_SAFE_INTEGER)
+ lexerror(ls, "chunk has too many lines", 0);
+};
+
+const luaX_setinput = function(L, ls, z, source, firstchar) {
+ ls.t.token = 0;
+ ls.L = L;
+ ls.current = firstchar;
+ ls.lookahead.token = R.TK_EOS;
+ ls.z = z;
+ ls.fs = null;
+ ls.linenumber = 1;
+ ls.lastline = 1;
+ ls.source = source;
+ ls.envn = new TValue(CT.LUA_TLNGSTR, "_ENV");
+};
+
+const check_next1 = function(ls, c) {
+ if (ls.current === c) {
+ next(ls);
+ return true;
+ }
+
+ return false;
+};
+
+/*
+** Check whether current char is in set 'set' (with two chars) and
+** saves it
+*/
+const check_next2 = function(ls, set) {
+ if (ls.current === set.charAt(0) || ls.current === set.charAt(1)) {
+ save_and_next(ls);
+ return true;
+ }
+
+ return false;
+};
+
+const read_numeral = function(ls, seminfo) {
+ let expo = "Ee";
+ let first = ls.current;
+ assert(ljstype.lisdigit(ls.current));
+ save_and_next(ls);
+ if (first === '0' && check_next2(ls, "xX")) /* hexadecimal? */
+ expo = "Pp";
+
+ for (;;) {
+ if (check_next2(ls, expo)) /* exponent part? */
+ check_next2(ls, "-+"); /* optional exponent sign */
+ if (ljstype.lisxdigit(ls.current))
+ save_and_next(ls);
+ else if (ls.current === '.')
+ save_and_next(ls);
+ else break;
+ }
+
+ save(ls, '\0');
+
+ let obj = lobject.luaO_str2num(ls.buff.buffer);
+ if (obj === false) /* format error? */
+ lexerror(ls, "malformed number", R.TK_FLT);
+ if (obj.ttisinteger()) {
+ seminfo.i = obj.value;
+ return R.TK_INT;
+ } else {
+ assert(obj.ttisfloat());
+ seminfo.r = obj.value;
+ return R.TK_FLT;
+ }
+};
+
+const txtToken = function(ls, token) {
+ switch (token) {
+ case R.TK_NAME: case R.TK_STRING:
+ case R.TK_FLT: case R.TK_INT:
+ save(ls, '\0');
+ return lapi.lua_pushstring(ls.L, `'${ls.buff.buffer}'`);
+ default:
+ return luaX_token2str(ls, token);
+ }
+};
+
+const lexerror = function(ls, msg, token) {
+ msg = ldebug.luaG_addinfo(ls.L, msg, ls.source, ls.linenumber);
+ if (token)
+ lapi.lua_pushstring(ls.L, `${msg} near ${txtToken(ls, token)}`);
+ ldo.luaD_throw(ls.L, TS.LUA_ERRSYNTAX);
+};
+
+/*
+** skip a sequence '[=*[' or ']=*]'; if sequence is well formed, return
+** its number of '='s; otherwise, return a negative number (-1 iff there
+** are no '='s after initial bracket)
+*/
+const skip_sep = function(ls) {
+ let count = 0;
+ let s = ls.current;
+ assert(s === '[' || s === ']');
+ save_and_next(ls);
+ while (ls.current === '=') {
+ save_and_next(ls);
+ count++;
+ }
+ return ls.current === s ? count : (-count) - 1;
+};
+
+const read_long_string = function(ls, seminfo, sep) {
+ let line = ls.linenumber; /* initial line (for error message) */
+ save_and_next(ls); /* skip 2nd '[' */
+
+ if (currIsNewline(ls)) /* string starts with a newline? */
+ inclinenumber(ls); /* skip it */
+
+ let skip = false;
+ for (; !skip ;) {
+ switch (ls.current) {
+ case -1: { /* error */
+ let what = seminfo ? "string" : "comment";
+ let msg = lapi.lua_pushstring(ls.L, `unfinished long ${what} (starting at line ${line})`);
+ lexerror(ls, msg, R.TK_EOS);
+ break;
+ }
+ case ']': {
+ if (skip_sep(ls) === sep) {
+ save_and_next(ls); /* skip 2nd ']' */
+ skip = true;
+ }
+ break;
+ }
+ case '\n': case '\r': {
+ save(ls, '\n');
+ inclinenumber(ls);
+ if (!seminfo) ls.buff.n = 0;
+ break;
+ }
+ default: {
+ if (seminfo) save_and_next(ls);
+ else next(ls);
+ }
+ }
+ }
+
+ if (seminfo)
+ seminfo.ts = new TValue(CT.LUA_TLNGSTR, ls.buff.buffer.slice(2 + sep).join(''));
+};
+
+const esccheck = function(ls, c, msg) {
+ if (!c) {
+ if (ls.current !== -1)
+ save_and_next(ls); /* add current to buffer for error message */
+ lexerror(ls, msg, R.TK_STRING);
+ }
+};
+
+const gethexa = function(ls) {
+ save_and_next(ls);
+ esccheck(ls, ljstype.lisxdigit(ls.current), "hexadecimal digit expected");
+ return lobject.luaO_hexavalue(ls.current);
+};
+
+const readhexaesc = function(ls) {
+ let r = gethexa(ls);
+ r = (r << 4) + gethexa(ls);
+ ls.buff.n -= 2; /* remove saved chars from buffer */
+ return r;
+};
+
+const readutf8desc = function(ls) {
+ let i = 4; /* chars to be removed: '\', 'u', '{', and first digit */
+ save_and_next(ls); /* skip 'u' */
+ esccheck(ls, ls.current === '{', "missing '{'");
+ let r = gethexa(ls); /* must have at least one digit */
+
+ save_and_next(ls);
+ while (ljstype.lisxdigit(ls.current)) {
+ i++;
+ r = (r << 4) + lobject.luaO_hexavalue(ls.current);
+ esccheck(ls, r <= 0x10FFFF, "UTF-8 value too large");
+ save_and_next(ls);
+ }
+ esccheck(ls, ls.current === '}', "missing '}'");
+ next(ls); /* skip '}' */
+ ls.buff.n -= i; /* remove saved chars from buffer */
+ return r;
+};
+
+const utf8esc = function(ls) {
+ let buff = new Array(lobject.UTF8BUFFSZ);
+ let n = lobject.luaO_utf8esc(buff, readutf8desc(ls));
+ for (; n > 0; n--) /* add 'buff' to string */
+ save(ls, buff[lobject.UTF8BUFFSZ - n]);
+};
+
+const readdecesc = function(ls) {
+ let r = 0; /* result accumulator */
+ let i;
+ for (i = 0; i < 3 && ljstype.lisdigit(ls.current); i++) { /* read up to 3 digits */
+ r = 10 * r + parseInt(ls.current);
+ save_and_next(ls);
+ }
+ esccheck(ls, r <= 255, "decimal escape too large");
+ ls.buff.n -= i; /* remove read digits from buffer */
+ return r;
+};
+
+const read_string = function(ls, del, seminfo) {
+ save_and_next(ls); /* keep delimiter (for error messages) */
+
+ while (ls.current !== del) {
+ switch (ls.current) {
+ case -1:
+ lexerror(ls, "unfinished string", R.TK_EOS);
+ break;
+ case '\n':
+ case '\r':
+ lexerror(ls, "unfinished string", R.TK_STRING);
+ break;
+ case '\\': { /* escape sequences */
+ save_and_next(ls); /* keep '\\' for error messages */
+ let will;
+ let c;
+ switch(ls.current) {
+ case 'a': c = '\a'; will = 'read_save'; break;
+ case 'b': c = '\b'; will = 'read_save'; break;
+ case 'f': c = '\f'; will = 'read_save'; break;
+ case 'n': c = '\n'; will = 'read_save'; break;
+ case 'r': c = '\r'; will = 'read_save'; break;
+ case 't': c = '\t'; will = 'read_save'; break;
+ case 'v': c = '\v'; will = 'read_save'; break;
+ case 'x': c = readhexaesc(ls); will = 'read_save'; break;
+ case 'u': utf8esc(ls); will = 'read_save'; break;
+ case '\n': case '\r':
+ inclinenumber(ls); c = '\n'; will = 'read_save'; break;
+ case '\\': case '\"': case '\'':
+ c = ls.current; will = 'read_save'; break;
+ case -1: will = 'read_save'; break; /* will raise an error next loop */
+ case 'z': { /* zap following span of spaces */
+ ls.buff.n -= 1; /* remove '\\' */
+ next(ls); /* skip the 'z' */
+ while (ljstype.lisspace(ls.current)) {
+ if (currIsNewline(ls)) inclinenumber(ls);
+ else next(ls);
+ }
+ will = 'no_save'; break;
+ }
+ default: {
+ esccheck(ls, ljstype.lisdigit(ls.current), "invalid escape sequence");
+ c = readdecesc(ls); /* digital escape '\ddd' */
+ will = 'only_save'; break;
+ }
+ }
+
+ if (will === 'read_save')
+ next(ls);
+ else if (will === 'only_save') {
+ ls.buff.n -= 1; /* remove '\\' */
+ save(ls, c);
+ } else if (will === 'no_save')
+ break;
+ }
+ default:
+ save_and_next(ls);
+ }
+ }
+ save_and_next(ls); /* skip delimiter */
+ seminfo.ts = new TValue(CT.LUA_TLNGSTR, ls.buff.buffer.slice(1).join(''));
+};
+
+const llex = function(ls, seminfo) {
+ ls.buff.n = 0;
+
+ for (;;) {
+ switch (ls.current) {
+ case '\n': case '\r': { /* line breaks */
+ inclinenumber(ls);
+ break;
+ }
+ case ' ': case '\f': case '\t': case '\v': { /* spaces */
+ next(ls);
+ break;
+ }
+ case '-': { /* '-' or '--' (comment) */
+ next(ls);
+ if (ls.current !== '-') return '-';
+ /* else is a comment */
+ next(ls);
+ if (ls.current === '[') { /* long comment? */
+ let sep = skip_sep(ls);
+ ls.buff.n = 0; /* 'skip_sep' may dirty the buffer */
+ if (sep >= 0) {
+ read_long_string(ls, null, sep); /* skip long comment */
+ ls.buff.n = 0; /* previous call may dirty the buff. */
+ break;
+ }
+ }
+
+ /* else short comment */
+ while (!currIsNewline(ls) && ls.current !== -1)
+ next(ls); /* skip until end of line (or end of file) */
+ break;
+ }
+ case '[': { /* long string or simply '[' */
+ let sep = skip_sep(ls);
+ if (sep.charCodeAt(0) >= 0) {
+ read_long_string(ls, seminfo, sep);
+ return R.TK_STRING;
+ } else if (sep !== -1) /* '[=...' missing second bracket */
+ lexerror(ls, "invalid long string delimiter", R.TK_STRING);
+ return '[';
+ }
+ case '=': {
+ next(ls);
+ if (check_next1(ls, '=')) return R.TK_EQ;
+ else return '=';
+ }
+ case '<': {
+ next(ls);
+ if (check_next1(ls, '=')) return R.TK_LE;
+ else if (check_next1(ls, '<')) return R.TK_SHL;
+ else return '<';
+ }
+ case '>': {
+ next(ls);
+ if (check_next1(ls, '=')) return R.TK_GE;
+ else if (check_next1(ls, '>')) return R.TK_SHR;
+ else return '>';
+ }
+ case '/': {
+ next(ls);
+ if (check_next1(ls, '/')) return R.TK_IDIV;
+ else return '/';
+ }
+ case '~': {
+ next(ls);
+ if (check_next1(ls, '=')) return R.TK_NE;
+ else return '~';
+ }
+ case ':': {
+ next(ls);
+ if (check_next1(ls, ':')) return R.TK_DBCOLON;
+ else return ':';
+ }
+ case '"': case '\'': { /* short literal strings */
+ read_string(ls, ls.current, seminfo);
+ return R.TK_STRING;
+ }
+ case '.': { /* '.', '..', '...', or number */
+ save_and_next(ls);
+ if (check_next1(ls, '.')) {
+ if (check_next1(ls, '.'))
+ return R.TK_DOTS; /* '...' */
+ else return R.TK_CONCAT; /* '..' */
+ }
+ else if (!ljstype.lisdigit(ls.current)) return '.';
+ else return read_numeral(ls, seminfo);
+ }
+ case '0': case '1': case '2': case '3': case '4':
+ case '5': case '6': case '7': case '8': case '9': {
+ return read_numeral(ls, seminfo);
+ }
+ case -1: {
+ return R.TK_EOS;
+ }
+ default: {
+ if (ljstype.lislalpha(ls.current)) { /* identifier or reserved word? */
+ do {
+ save_and_next(ls);
+ } while (ljstype.lislalnum(ls.current));
+
+ let ts = new TValue(CT.LUA_TLNGSTR, ls.buff.buffer.join(''));
+ seminfo.ts = ts;
+ if (reserved_keywords.indexOf(ts.value) >= 0) /* reserved word? */
+ return reserved_keywords_tokens[reserved_keywords.indexOf(ts.value)];
+ else
+ return R.TK_NAME;
+ } else { /* single-char tokens (+ - / ...) */
+ let c = ls.current;
+ next(ls);
+ return c;
+ }
+ }
+ }
+ }
+};
+
+const luaX_next = function(ls) {
+ ls.lastline = ls.linenumber;
+ if (ls.lookahead.token !== R.TK_EOS) { /* is there a look-ahead token? */
+ ls.t = ls.lookahead; /* use this one */
+ ls.lookahead.token = R.TK_EOS; /* and discharge it */
+ } else
+ ls.t.token = llex(ls, ls.t.seminfo); /* read next token */
+};
+
+const luaX_lookahead = function(ls) {
+ assert(ls.lookahead.token === R.TK_EOS);
+ ls.lookahead.token = llex(ls. ls.lookahead.seminfo);
+ return ls.lookahead.token;
+};
+
+module.exports.luaX_lookahead = luaX_lookahead;
+module.exports.luaX_next = luaX_next;
+module.exports.luaX_setinput = luaX_setinput; \ No newline at end of file