From e96d75a87d879f7f455e4b9c6457bf6580743fa5 Mon Sep 17 00:00:00 2001 From: Benoit Giannangeli Date: Mon, 6 Mar 2017 15:58:11 +0100 Subject: String are represented by Array of 8-bit numbers --- src/lstate.js | 11 ++++++++++- src/lua.js | 58 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++- src/lundump.js | 56 ++------------------------------------------------------ 3 files changed, 69 insertions(+), 56 deletions(-) diff --git a/src/lstate.js b/src/lstate.js index 2e6ebbe..519abde 100644 --- a/src/lstate.js +++ b/src/lstate.js @@ -69,7 +69,7 @@ class global_State { constructor(L) { this.mainthread = L; - this.strt = null; // TODO: string hash table + this.strt = new Map(); this.l_registry = nil; this.panic = null; this.version = null; @@ -77,6 +77,15 @@ class global_State { this.mt = new Array(LUA_NUMTAGS); } + intern(stringArray) { + let key = stringArray.map(e => `${e}|`).join(''); + + if (!this.strt.has(key)) + this.strt.set(key, new lobject.TValue(CT.LUA_TLNGSTR, stringArray)); + + return this.strt.get(key); + } + } diff --git a/src/lua.js b/src/lua.js index 95d0ef8..971593d 100644 --- a/src/lua.js +++ b/src/lua.js @@ -132,6 +132,61 @@ class lua_Debug { } +const to_luastring = function(str, maxBytesToWrite) { + let outU8Array = new Array(maxBytesToWrite); + + if (!(maxBytesToWrite > 0)) // Parameter maxBytesToWrite is not optional. Negative values, 0, null, undefined and false each don't write out any bytes. + return 0; + + let outIdx = 0; + let startIdx = 0; + let endIdx = maxBytesToWrite - 1; // -1 for string null terminator. + for (let i = 0; i < str.length; ++i) { + // Gotcha: charCodeAt returns a 16-bit word that is a UTF-16 encoded code unit, not a Unicode code point of the character! So decode UTF16->UTF32->UTF8. + // See http://unicode.org/faq/utf_bom.html#utf16-3 + // For UTF8 byte structure, see http://en.wikipedia.org/wiki/UTF-8#Description and https://www.ietf.org/rfc/rfc2279.txt and https://tools.ietf.org/html/rfc3629 + let u = str.charCodeAt(i); // possibly a lead surrogate + if (u >= 0xD800 && u <= 0xDFFF) u = 0x10000 + ((u & 0x3FF) << 10) | (str.charCodeAt(++i) & 0x3FF); + if (u <= 0x7F) { + if (outIdx >= endIdx) break; + outU8Array[outIdx++] = u; + } else if (u <= 0x7FF) { + if (outIdx + 1 >= endIdx) break; + outU8Array[outIdx++] = 0xC0 | (u >> 6); + outU8Array[outIdx++] = 0x80 | (u & 63); + } else if (u <= 0xFFFF) { + if (outIdx + 2 >= endIdx) break; + outU8Array[outIdx++] = 0xE0 | (u >> 12); + outU8Array[outIdx++] = 0x80 | ((u >> 6) & 63); + outU8Array[outIdx++] = 0x80 | (u & 63); + } else if (u <= 0x1FFFFF) { + if (outIdx + 3 >= endIdx) break; + outU8Array[outIdx++] = 0xF0 | (u >> 18); + outU8Array[outIdx++] = 0x80 | ((u >> 12) & 63); + outU8Array[outIdx++] = 0x80 | ((u >> 6) & 63); + outU8Array[outIdx++] = 0x80 | (u & 63); + } else if (u <= 0x3FFFFFF) { + if (outIdx + 4 >= endIdx) break; + outU8Array[outIdx++] = 0xF8 | (u >> 24); + outU8Array[outIdx++] = 0x80 | ((u >> 18) & 63); + outU8Array[outIdx++] = 0x80 | ((u >> 12) & 63); + outU8Array[outIdx++] = 0x80 | ((u >> 6) & 63); + outU8Array[outIdx++] = 0x80 | (u & 63); + } else { + if (outIdx + 5 >= endIdx) break; + outU8Array[outIdx++] = 0xFC | (u >> 30); + outU8Array[outIdx++] = 0x80 | ((u >> 24) & 63); + outU8Array[outIdx++] = 0x80 | ((u >> 18) & 63); + outU8Array[outIdx++] = 0x80 | ((u >> 12) & 63); + outU8Array[outIdx++] = 0x80 | ((u >> 6) & 63); + outU8Array[outIdx++] = 0x80 | (u & 63); + } + } + // Null-terminate the pointer to the buffer. + outU8Array[outIdx] = 0; + return outU8Array; +}; + module.exports.CT = CT; module.exports.FENGARI_AUTHORS = FENGARI_AUTHORS; module.exports.FENGARI_COPYRIGHT = FENGARI_COPYRIGHT; @@ -181,4 +236,5 @@ module.exports.constant_types = constant_types; module.exports.lua_Debug = lua_Debug; module.exports.lua_upvalueindex = lua_upvalueindex; module.exports.print_version = print_version; -module.exports.thread_status = thread_status; \ No newline at end of file +module.exports.thread_status = thread_status; +module.exports.to_luastring = to_luastring; \ No newline at end of file diff --git a/src/lundump.js b/src/lundump.js index 92063a2..aa66584 100644 --- a/src/lundump.js +++ b/src/lundump.js @@ -13,18 +13,8 @@ const OpCodes = require('./lopcodes.js'); const LUAI_MAXSHORTLEN = 40; -/** - * Parse Lua 5.3 bytecode - * @see {@link http://www.lua.org/source/5.3/lundump.c.html|lundump.c} - */ class BytecodeParser { - /** - * Initilialize bytecode parser - * @constructor - * @param {lua_State} Lua state object - * @param {DataView} dataView Contains the binary data - */ constructor(dataView) { this.intSize = 4; this.size_tSize = 8; @@ -79,7 +69,6 @@ class BytecodeParser { return number; } - // TODO: 8-bit clean strings readString(n) { let size = typeof n !== 'undefined' ? n : this.readByte() - 1; @@ -90,10 +79,10 @@ class BytecodeParser { return null; } - let string = ""; + let string = new Uint8Array(); for (let i = 0; i < size; i++) - string += String.fromCharCode(this.readByte()); + string.push(this.readByte()); return string; } @@ -133,8 +122,6 @@ class BytecodeParser { Ax: (ins >> o.POS_Ax) & p.MASK1(o.SIZE_Ax, 0), sBx: ((ins >> o.POS_Bx) & p.MASK1(o.SIZE_Bx, 0)) - o.MAXARG_sBx }; - - // console.log(` [${i}] Op: ${o.OpCodes[f.code[i].opcode]} A: ${f.code[i].A} B: ${f.code[i].B} C: ${f.code[i].C} Ax: ${f.code[i].Ax} Bx: ${f.code[i].Bx} sBx: ${f.code[i].sBx}`); } } @@ -147,12 +134,6 @@ class BytecodeParser { instack: this.readByte(), idx: this.readByte() }; - - // console.log(` - // f.upvalues[${i}].name = ${f.upvalues[i].name} - // f.upvalues[${i}].instack = ${f.upvalues[i].instack} - // f.upvalues[${i}].idx = ${f.upvalues[i].idx} - // `); } } @@ -165,24 +146,19 @@ class BytecodeParser { switch (t) { case constant_types.LUA_TNIL: f.k.push(new TValue(constant_types.LUA_TNIL, null)); - // console.log(` LUA_TNIL = ${f.k[f.k.length - 1].value}`); break; case constant_types.LUA_TBOOLEAN: f.k.push(new TValue(constant_types.LUA_TBOOLEAN, this.readByte())); - // console.log(` LUA_TBOOLEAN = ${f.k[f.k.length - 1].value}`); break; case constant_types.LUA_TNUMFLT: f.k.push(new TValue(constant_types.LUA_TNUMFLT, this.readNumber())); - // console.log(` LUA_TNUMFLT = ${f.k[f.k.length - 1].value}`); break; case constant_types.LUA_TNUMINT: f.k.push(new TValue(constant_types.LUA_TNUMINT, this.readInteger())); - // console.log(` LUA_TNUMINT = ${f.k[f.k.length - 1].value}`); break; case constant_types.LUA_TSHRSTR: case constant_types.LUA_TLNGSTR: f.k.push(new TValue(constant_types.LUA_TLNGSTR, this.readString())); - // console.log(` LUA_TLNGSTR = ${f.k[f.k.length - 1].value}`); break; default: throw new Error(`unrecognized constant '${t}'`); @@ -211,21 +187,11 @@ class BytecodeParser { startpc: this.readInt(), endpc: this.readInt() }; - - // console.log(` - // f.locvars[${i}].varname = ${f.locvars[i].varname} - // f.locvars[${i}].startpc = ${f.locvars[i].startpc} - // f.locvars[${i}].endpc = ${f.locvars[i].endpc} - // `); } n = this.readInt(); for (let i = 0; i < n; i++) { f.upvalues[i].name = this.readString(); - - // console.log(` - // f.upvalues[${i}].name = ${f.upvalues[i].name} - // `); } } @@ -238,16 +204,6 @@ class BytecodeParser { f.numparams = this.readByte(); f.is_vararg = this.readByte(); f.maxstacksize = this.readByte(); - - // console.log(` - // f.source = ${f.source} - // f.linedefined = ${f.linedefined} - // f.lastlinedefined = ${f.lastlinedefined} - // f.numparams = ${f.numparams} - // f.is_vararg = ${f.is_vararg} - // f.maxstacksize = ${f.maxstacksize} - // `); - this.readCode(f); this.readConstants(f); this.readUpvalues(f); @@ -274,14 +230,6 @@ class BytecodeParser { this.integerSize = this.readByte(); this.numberSize = this.readByte(); - // console.log(` - // intSize = ${this.intSize} - // size_tSize = ${this.size_tSize} - // instructionSize = ${this.instructionSize} - // integerSize = ${this.integerSize} - // numberSize = ${this.numberSize} - // `) - if (this.readInteger() !== 0x5678) throw new Error("endianness mismatch"); -- cgit v1.2.3-54-g00ecf