You cannot select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

223 lines
7.2 KiB
JavaScript

// Module exports
var iconv = module.exports = {
toEncoding: function(str, encoding) {
return iconv.getCodec(encoding).toEncoding(str);
},
fromEncoding: function(buf, encoding) {
return iconv.getCodec(encoding).fromEncoding(buf);
},
defaultCharUnicode: '<27>',
defaultCharSingleByte: '?',
encodingsLoaded: false,
// Get correct codec for given encoding.
getCodec: function(encoding) {
if (!iconv.encodingsLoaded) {
applyEncodings(require('./encodings/singlebyte'));
applyEncodings(require('./encodings/gbk'));
applyEncodings(require('./encodings/big5'));
iconv.encodingsLoaded = true;
}
var enc = encoding || "utf8";
var codecOptions = undefined;
while (1) {
if (getType(enc) === "String")
enc = enc.replace(/[- ]/g, "").toLowerCase();
var codec = iconv.encodings[enc];
var type = getType(codec);
if (type === "String") {
// Link to other encoding.
codecOptions = {originalEncoding: enc};
enc = codec;
}
else if (type === "Object" && codec.type != undefined) {
// Options for other encoding.
codecOptions = codec;
enc = codec.type;
}
else if (type === "Function")
// Codec itself.
return codec(codecOptions);
else
throw new Error("Encoding not recognized: '" + encoding + "' (searched as: '"+enc+"')");
}
},
// Define basic encodings
encodings: {
internal: function(options) {
return {
toEncoding: toInternalEncoding,
fromEncoding: fromInternalEncoding,
options: options
};
},
utf8: "internal",
ucs2: "internal",
binary: "internal",
ascii: "internal",
base64: "internal",
// Codepage single-byte encodings.
singlebyte: function(options) {
// Prepare chars if needed
if (!options.charsBuf) {
if (!options.chars || (options.chars.length !== 128 && options.chars.length !== 256))
throw new Error("Encoding '"+options.type+"' has incorrect 'chars' (must be of len 128 or 256)");
if (options.chars.length === 128)
options.chars = asciiString + options.chars;
options.charsBuf = new Buffer(options.chars, 'ucs2');
}
if (!options.revCharsBuf) {
options.revCharsBuf = new Buffer(65536);
var defChar = iconv.defaultCharSingleByte.charCodeAt(0);
for (var i = 0; i < options.revCharsBuf.length; i++)
options.revCharsBuf[i] = defChar;
for (var i = 0; i < options.chars.length; i++)
options.revCharsBuf[options.chars.charCodeAt(i)] = i;
}
return {
toEncoding: toSingleByteEncoding,
fromEncoding: fromSingleByteEncoding,
options: options,
};
},
// Codepage double-byte encodings.
table: function(options) {
if (!options.table) {
throw new Error("Encoding '" + options.type + "' has incorect 'table' option");
}
if (!options.revCharsTable) {
var revCharsTable = options.revCharsTable = {};
for (var i = 0; i <= 0xFFFF; i++) {
revCharsTable[i] = 0;
}
var table = options.table;
for (var key in table) {
revCharsTable[table[key]] = +key;
}
}
return {
toEncoding: toTableEncoding,
fromEncoding: fromTableEncoding,
options: options,
};
}
}
};
function toInternalEncoding(str) {
return new Buffer(ensureString(str), this.options.originalEncoding);
}
function fromInternalEncoding(buf) {
return ensureBuffer(buf).toString(this.options.originalEncoding);
}
function toTableEncoding(str) {
str = ensureString(str);
var strLen = str.length;
var revCharsTable = this.options.revCharsTable;
var newBuf = new Buffer(strLen*2), gbkcode, unicode,
defaultChar = revCharsTable[iconv.defaultCharUnicode.charCodeAt(0)];
for (var i = 0, j = 0; i < strLen; i++) {
unicode = str.charCodeAt(i);
if (unicode >> 7) {
gbkcode = revCharsTable[unicode] || defaultChar;
newBuf[j++] = gbkcode >> 8; //high byte;
newBuf[j++] = gbkcode & 0xFF; //low byte
} else {//ascii
newBuf[j++] = unicode;
}
}
return newBuf.slice(0, j);
}
function fromTableEncoding(buf) {
buf = ensureBuffer(buf);
var bufLen = buf.length;
var table = this.options.table;
var newBuf = new Buffer(bufLen*2), unicode, gbkcode,
defaultChar = iconv.defaultCharUnicode.charCodeAt(0);
for (var i = 0, j = 0; i < bufLen; i++, j+=2) {
gbkcode = buf[i];
if (gbkcode & 0x80) {
gbkcode = (gbkcode << 8) + buf[++i];
unicode = table[gbkcode] || defaultChar;
} else {
unicode = gbkcode;
}
newBuf[j] = unicode & 0xFF; //low byte
newBuf[j+1] = unicode >> 8; //high byte
}
return newBuf.slice(0, j).toString('ucs2');
}
function toSingleByteEncoding(str) {
str = ensureString(str);
var buf = new Buffer(str.length);
var revCharsBuf = this.options.revCharsBuf;
for (var i = 0; i < str.length; i++)
buf[i] = revCharsBuf[str.charCodeAt(i)];
return buf;
}
function fromSingleByteEncoding(buf) {
buf = ensureBuffer(buf);
// Strings are immutable in JS -> we use ucs2 buffer to speed up computations.
var charsBuf = this.options.charsBuf;
var newBuf = new Buffer(buf.length*2);
var idx1 = 0, idx2 = 0;
for (var i = 0, _len = buf.length; i < _len; i++) {
idx1 = buf[i]*2; idx2 = i*2;
newBuf[idx2] = charsBuf[idx1];
newBuf[idx2+1] = charsBuf[idx1+1];
}
return newBuf.toString('ucs2');
}
// Add aliases to convert functions
iconv.encode = iconv.toEncoding;
iconv.decode = iconv.fromEncoding;
// Load other encodings manually from files in /encodings dir.
function applyEncodings(encodings) {
for (var key in encodings)
iconv.encodings[key] = encodings[key]
}
// Utilities
var asciiString = '\x00\x01\x02\x03\x04\x05\x06\x07\x08\t\n\x0b\x0c\r\x0e\x0f\x10\x11\x12\x13\x14\x15\x16\x17\x18\x19\x1a\x1b\x1c\x1d\x1e\x1f'+
' !"#$%&\'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_`abcdefghijklmnopqrstuvwxyz{|}~\x7f';
var ensureBuffer = function(buf) {
buf = buf || new Buffer(0);
return (buf instanceof Buffer) ? buf : new Buffer(""+buf, "binary");
}
var ensureString = function(str) {
str = str || "";
return (str instanceof Buffer) ? str.toString('utf8') : (""+str);
}
var getType = function(obj) {
return Object.prototype.toString.call(obj).slice(8, -1);
}