From 1b4551edce66763db780ad21fbbd85896e78b523 Mon Sep 17 00:00:00 2001 From: daurnimator Date: Sun, 4 Mar 2018 12:56:44 +1100 Subject: src/defs.js: to_jsstring now optionally replaces invalid utf8 sequences with the unicode replacement character Related to https://github.com/fengari-lua/fengari-interop/issues/30 --- src/defs.js | 59 ++++++++++++++++++++++++++++++++++++++++++++++++----------- 1 file changed, 48 insertions(+), 11 deletions(-) diff --git a/src/defs.js b/src/defs.js index 8bdd23e..e761f86 100644 --- a/src/defs.js +++ b/src/defs.js @@ -58,7 +58,7 @@ const luastring_eq = function(a, b) { return true; }; -const to_jsstring = function(value, from, to) { +const to_jsstring = function(value, from, to, replacement_char) { if (!is_luastring(value)) throw new TypeError("to_jsstring expects a Uint8Array"); if (to === void 0) { @@ -74,20 +74,41 @@ const to_jsstring = function(value, from, to) { /* single byte sequence */ str += String.fromCharCode(u0); } else if (u0 < 0xC2 || u0 > 0xF4) { - throw RangeError("cannot convert invalid utf8 to javascript string"); + if (!replacement_char) throw RangeError("cannot convert invalid utf8 to javascript string"); + str += "�"; } else if (u0 <= 0xDF) { /* two byte sequence */ - if (i >= to) throw RangeError("cannot convert invalid utf8 to javascript string"); + if (i >= to) { + if (!replacement_char) throw RangeError("cannot convert invalid utf8 to javascript string"); + str += "�"; + continue; + } let u1 = value[i++]; - if ((u1&0xC0) !== 0x80) throw RangeError("cannot convert invalid utf8 to javascript string"); + if ((u1&0xC0) !== 0x80) { + if (!replacement_char) throw RangeError("cannot convert invalid utf8 to javascript string"); + str += "�"; + continue; + } str += String.fromCharCode(((u0 & 0x1F) << 6) + (u1 & 0x3F)); } else if (u0 <= 0xEF) { /* three byte sequence */ - if (i+1 >= to) throw RangeError("cannot convert invalid utf8 to javascript string"); + if (i+1 >= to) { + if (!replacement_char) throw RangeError("cannot convert invalid utf8 to javascript string"); + str += "�"; + continue; + } let u1 = value[i++]; - if ((u1&0xC0) !== 0x80) throw RangeError("cannot convert invalid utf8 to javascript string"); + if ((u1&0xC0) !== 0x80) { + if (!replacement_char) throw RangeError("cannot convert invalid utf8 to javascript string"); + str += "�"; + continue; + } let u2 = value[i++]; - if ((u2&0xC0) !== 0x80) throw RangeError("cannot convert invalid utf8 to javascript string"); + if ((u2&0xC0) !== 0x80) { + if (!replacement_char) throw RangeError("cannot convert invalid utf8 to javascript string"); + str += "�"; + continue; + } let u = ((u0 & 0x0F) << 12) + ((u1 & 0x3F) << 6) + (u2 & 0x3F); if (u <= 0xFFFF) { /* BMP codepoint */ str += String.fromCharCode(u); @@ -99,13 +120,29 @@ const to_jsstring = function(value, from, to) { } } else { /* four byte sequence */ - if (i+2 >= to) throw RangeError("cannot convert invalid utf8 to javascript string"); + if (i+2 >= to) { + if (!replacement_char) throw RangeError("cannot convert invalid utf8 to javascript string"); + str += "�"; + continue; + } let u1 = value[i++]; - if ((u1&0xC0) !== 0x80) throw RangeError("cannot convert invalid utf8 to javascript string"); + if ((u1&0xC0) !== 0x80) { + if (!replacement_char) throw RangeError("cannot convert invalid utf8 to javascript string"); + str += "�"; + continue; + } let u2 = value[i++]; - if ((u2&0xC0) !== 0x80) throw RangeError("cannot convert invalid utf8 to javascript string"); + if ((u2&0xC0) !== 0x80) { + if (!replacement_char) throw RangeError("cannot convert invalid utf8 to javascript string"); + str += "�"; + continue; + } let u3 = value[i++]; - if ((u3&0xC0) !== 0x80) throw RangeError("cannot convert invalid utf8 to javascript string"); + if ((u3&0xC0) !== 0x80) { + if (!replacement_char) throw RangeError("cannot convert invalid utf8 to javascript string"); + str += "�"; + continue; + } /* Has to be astral codepoint */ let u = ((u0 & 0x07) << 18) + ((u1 & 0x3F) << 12) + ((u2 & 0x3F) << 6) + (u3 & 0x3F); u -= 0x10000; -- cgit v1.2.3-54-g00ecf