diff options
author | daurnimator <quae@daurnimator.com> | 2018-03-04 12:56:44 +1100 |
---|---|---|
committer | daurnimator <quae@daurnimator.com> | 2018-03-04 12:58:58 +1100 |
commit | 1b4551edce66763db780ad21fbbd85896e78b523 (patch) | |
tree | dc8b5a6e9b372fa1fc4c2b07f369f3f6bc5608ec /src/defs.js | |
parent | b2c7f18f2d3b70daf3a18fedf486cac71e16dc58 (diff) | |
download | fengari-1b4551edce66763db780ad21fbbd85896e78b523.tar.gz fengari-1b4551edce66763db780ad21fbbd85896e78b523.tar.bz2 fengari-1b4551edce66763db780ad21fbbd85896e78b523.zip |
src/defs.js: to_jsstring now optionally replaces invalid utf8 sequences with the unicode replacement character
Related to https://github.com/fengari-lua/fengari-interop/issues/30
Diffstat (limited to 'src/defs.js')
-rw-r--r-- | src/defs.js | 59 |
1 files changed, 48 insertions, 11 deletions
diff --git a/src/defs.js b/src/defs.js index 8bdd23e..e761f86 100644 --- a/src/defs.js +++ b/src/defs.js @@ -58,7 +58,7 @@ const luastring_eq = function(a, b) { return true; }; -const to_jsstring = function(value, from, to) { +const to_jsstring = function(value, from, to, replacement_char) { if (!is_luastring(value)) throw new TypeError("to_jsstring expects a Uint8Array"); if (to === void 0) { @@ -74,20 +74,41 @@ const to_jsstring = function(value, from, to) { /* single byte sequence */ str += String.fromCharCode(u0); } else if (u0 < 0xC2 || u0 > 0xF4) { - throw RangeError("cannot convert invalid utf8 to javascript string"); + if (!replacement_char) throw RangeError("cannot convert invalid utf8 to javascript string"); + str += "�"; } else if (u0 <= 0xDF) { /* two byte sequence */ - if (i >= to) throw RangeError("cannot convert invalid utf8 to javascript string"); + if (i >= to) { + if (!replacement_char) throw RangeError("cannot convert invalid utf8 to javascript string"); + str += "�"; + continue; + } let u1 = value[i++]; - if ((u1&0xC0) !== 0x80) throw RangeError("cannot convert invalid utf8 to javascript string"); + if ((u1&0xC0) !== 0x80) { + if (!replacement_char) throw RangeError("cannot convert invalid utf8 to javascript string"); + str += "�"; + continue; + } str += String.fromCharCode(((u0 & 0x1F) << 6) + (u1 & 0x3F)); } else if (u0 <= 0xEF) { /* three byte sequence */ - if (i+1 >= to) throw RangeError("cannot convert invalid utf8 to javascript string"); + if (i+1 >= to) { + if (!replacement_char) throw RangeError("cannot convert invalid utf8 to javascript string"); + str += "�"; + continue; + } let u1 = value[i++]; - if ((u1&0xC0) !== 0x80) throw RangeError("cannot convert invalid utf8 to javascript string"); + if ((u1&0xC0) !== 0x80) { + if (!replacement_char) throw RangeError("cannot convert invalid utf8 to javascript string"); + str += "�"; + continue; + } let u2 = value[i++]; - if ((u2&0xC0) !== 0x80) throw RangeError("cannot convert invalid utf8 to javascript string"); + if ((u2&0xC0) !== 0x80) { + if (!replacement_char) throw RangeError("cannot convert invalid utf8 to javascript string"); + str += "�"; + continue; + } let u = ((u0 & 0x0F) << 12) + ((u1 & 0x3F) << 6) + (u2 & 0x3F); if (u <= 0xFFFF) { /* BMP codepoint */ str += String.fromCharCode(u); @@ -99,13 +120,29 @@ const to_jsstring = function(value, from, to) { } } else { /* four byte sequence */ - if (i+2 >= to) throw RangeError("cannot convert invalid utf8 to javascript string"); + if (i+2 >= to) { + if (!replacement_char) throw RangeError("cannot convert invalid utf8 to javascript string"); + str += "�"; + continue; + } let u1 = value[i++]; - if ((u1&0xC0) !== 0x80) throw RangeError("cannot convert invalid utf8 to javascript string"); + if ((u1&0xC0) !== 0x80) { + if (!replacement_char) throw RangeError("cannot convert invalid utf8 to javascript string"); + str += "�"; + continue; + } let u2 = value[i++]; - if ((u2&0xC0) !== 0x80) throw RangeError("cannot convert invalid utf8 to javascript string"); + if ((u2&0xC0) !== 0x80) { + if (!replacement_char) throw RangeError("cannot convert invalid utf8 to javascript string"); + str += "�"; + continue; + } let u3 = value[i++]; - if ((u3&0xC0) !== 0x80) throw RangeError("cannot convert invalid utf8 to javascript string"); + if ((u3&0xC0) !== 0x80) { + if (!replacement_char) throw RangeError("cannot convert invalid utf8 to javascript string"); + str += "�"; + continue; + } /* Has to be astral codepoint */ let u = ((u0 & 0x07) << 18) + ((u1 & 0x3F) << 12) + ((u2 & 0x3F) << 6) + (u3 & 0x3F); u -= 0x10000; |