You cannot select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
143 lines
3.7 KiB
JavaScript
143 lines
3.7 KiB
JavaScript
var fs = require("fs");
|
|
var Iconv = require("iconv").Iconv;
|
|
|
|
|
|
var encodingFamilies = [
|
|
{
|
|
// Windows code pages
|
|
encodings: [1250, 1251, 1252, 1253, 1254, 1255, 1256, 1257, 1258],
|
|
convert: function(cp) {
|
|
return {
|
|
name: "windows-"+cp,
|
|
aliases: ["win"+cp, "cp"+cp, ""+cp],
|
|
}
|
|
}
|
|
},
|
|
{
|
|
// ISO-8859 code pages
|
|
encodings: [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 13, 14, 15, 16],
|
|
convert: function(i) {
|
|
return {
|
|
name: "iso-8859-"+i,
|
|
aliases: ["cp"+(28590+i), (28590+i)],
|
|
}
|
|
}
|
|
},
|
|
{
|
|
// IBM/DOS code pages
|
|
encodings: [437, 737, 775, 850, 852, 855, 857, 858, 860, 861, 862, 863, 864, 865, 866, 869],
|
|
convert: function(cp) {
|
|
return {
|
|
name: "CP"+cp,
|
|
aliases: ["ibm"+cp, ""+cp],
|
|
}
|
|
}
|
|
},
|
|
{
|
|
// Macintosh code pages
|
|
encodings: ["macCroatian", "macCyrillic", "macGreek",
|
|
"macIceland", "macRoman", "macRomania",
|
|
"macThai", "macTurkish", "macUkraine"],
|
|
},
|
|
{
|
|
// KOI8 code pages
|
|
encodings: ["KOI8-R", "KOI8-U"],
|
|
},
|
|
];
|
|
|
|
|
|
var encodings = {
|
|
// Aliases.
|
|
"ascii8bit": "ascii",
|
|
"usascii": "ascii",
|
|
|
|
"latin1": "iso88591",
|
|
"latin2": "iso88592",
|
|
"latin3": "iso88593",
|
|
"latin4": "iso88594",
|
|
"latin6": "iso885910",
|
|
"latin7": "iso885913",
|
|
"latin8": "iso885914",
|
|
"latin9": "iso885915",
|
|
"latin10": "iso885916",
|
|
|
|
"cp819": "iso88951",
|
|
"arabic": "iso88596",
|
|
"arabic8": "iso88596",
|
|
"greek" : "iso88597",
|
|
"greek8" : "iso88597",
|
|
"hebrew": "iso88598",
|
|
"hebrew8": "iso88598",
|
|
"turkish": "iso88599",
|
|
"turkish8": "iso88599",
|
|
"thai": "iso885911",
|
|
"thai8": "iso885911",
|
|
"tis620": "iso885911",
|
|
"windows874": "iso885911",
|
|
"win874": "iso885911",
|
|
"cp874": "iso885911",
|
|
"874": "iso885911",
|
|
"celtic": "iso885914",
|
|
"celtic8": "iso885914",
|
|
|
|
"cp20866": "koi8r",
|
|
"20866": "koi8r",
|
|
"ibm878": "koi8r",
|
|
"cp21866": "koi8u",
|
|
"21866": "koi8u",
|
|
"ibm1168": "koi8u",
|
|
|
|
};
|
|
|
|
// Add all encodings from encodingFamilies.
|
|
encodingFamilies.forEach(function(family){
|
|
family.encodings.forEach(function(encoding){
|
|
if (family.convert)
|
|
encoding = family.convert(encoding);
|
|
|
|
var encodingIconvName = encoding.name ? encoding.name : encoding;
|
|
var encodingName = encodingIconvName.replace(/[-_]/g, "").toLowerCase();
|
|
|
|
encodings[encodingName] = {
|
|
type: "singlebyte",
|
|
chars: generateCharsString(encodingIconvName)
|
|
};
|
|
|
|
if (encoding.aliases)
|
|
encoding.aliases.forEach(function(alias){
|
|
encodings[alias] = encodingName;
|
|
});
|
|
});
|
|
});
|
|
|
|
// Write encodings.
|
|
fs.writeFileSync("encodings/singlebyte.js",
|
|
"module.exports = " + JSON.stringify(encodings, undefined, " ") + ";");
|
|
|
|
|
|
function generateCharsString(encoding) {
|
|
console.log("Generate encoding for " + encoding);
|
|
var iconvToUtf8 = new Iconv(encoding, "UTF-8");
|
|
var chars = "";
|
|
|
|
for (var b = 0x80; b < 0x100; b++) {
|
|
|
|
try {
|
|
var convertedChar = iconvToUtf8.convert(new Buffer([b])).toString();
|
|
|
|
if (convertedChar.length != 1)
|
|
throw new Error("Single-byte encoding error: Must return single char.");
|
|
} catch (exception) {
|
|
if (exception.code === "EILSEQ") {
|
|
convertedChar = "\ufffd";
|
|
} else {
|
|
throw exception;
|
|
}
|
|
}
|
|
|
|
chars += convertedChar;
|
|
}
|
|
|
|
return chars;
|
|
}
|