You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
375 lines
6.4 KiB
375 lines
6.4 KiB
/*************************************************************************** |
|
* Copyright (C) 2004-2007 by Georgy Yunaev, gyunaev@ulduzsoft.com * |
|
* Please do not use email address above for bug reports; see * |
|
* the README file * |
|
* * |
|
* This program is free software; you can redistribute it and/or modify * |
|
* it under the terms of the GNU General Public License as published by * |
|
* the Free Software Foundation; either version 2 of the License, or * |
|
* (at your option) any later version. * |
|
* * |
|
* This program is distributed in the hope that it will be useful, * |
|
* but WITHOUT ANY WARRANTY; without even the implied warranty of * |
|
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * |
|
* GNU General Public License for more details. * |
|
* * |
|
* You should have received a copy of the GNU General Public License * |
|
* along with this program; if not, write to the * |
|
* Free Software Foundation, Inc., * |
|
* 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. * |
|
***************************************************************************/ |
|
|
|
#include "libchmfileimpl.h" |
|
#include "libchmtextencoding.h" |
|
|
|
// Do not try to embed those in the text_encoding_table, it does not work - at least with gcc. |
|
static short lcid_arabic[] = |
|
{ |
|
0x1401, 0x3C01, 0x0C01, 0x0801, 0x2C01, 0x3401, 0x3001, 0x1001, |
|
0x1801, 0x2001, 0x4001, 0x0401, 0x2801, 0x1C01, 0x3801, 0x2401, |
|
0x0429, 0x0420, |
|
0, |
|
}; |
|
|
|
static short lcid_baltic[] = |
|
{ |
|
0x0425, 0x0426, 0x0427, 0 |
|
}; |
|
|
|
static short lcid_centralEuropean[] = |
|
{ |
|
0x041C, 0x041A, 0x0405, 0x040E, 0x0415, 0x0418, 0x081A, 0x041B, 0x0424, 0 |
|
}; |
|
|
|
static short lcid_ChineseSimplifiedGB18030[] = |
|
{ |
|
0x0404, 0 |
|
}; |
|
|
|
|
|
static short lcid_ChineseSimplifiedGBK[] = |
|
{ |
|
0x0804, 0 |
|
}; |
|
|
|
|
|
static short lcid_ChineseSimplifiedGB2313[] = |
|
{ |
|
0x1004, 0 |
|
}; |
|
|
|
static short lcid_ChineseTraditionalBig5[] = |
|
{ |
|
0x0C04, 0x1404, 0x0404, 0 |
|
}; |
|
|
|
static short lcid_ChineseTraditionalBigHKSCS[] = |
|
{ |
|
0x0404, 0 |
|
}; |
|
|
|
static short lcid_CyrillicCP1251[] = |
|
{ |
|
0x082C, 0x0423, 0x0402, 0x042F, 0x0419, 0x0C1A, 0x0444, 0x0422, 0x0843, 0 |
|
}; |
|
|
|
static short lcid_CyrillicKOI8R[] = |
|
{ |
|
0x7001, // artifical LCID |
|
0 |
|
}; |
|
|
|
static short lcid_CyrillicBrokenCPKOI[] = |
|
{ |
|
0x7002, // artifical LCID |
|
0 |
|
}; |
|
|
|
static short lcid_CyrillicBrokenKOICP[] = |
|
{ |
|
0x7003, // artifical LCID |
|
0 |
|
}; |
|
|
|
|
|
static short lcid_Greek[] = |
|
{ |
|
0x0408, 0 |
|
}; |
|
|
|
static short lcid_Hebrew[] = |
|
{ |
|
0x040D, 0 |
|
}; |
|
|
|
static short lcid_Japanese_eucJP[] = |
|
{ |
|
0x0411, 0 |
|
}; |
|
|
|
|
|
static short lcid_Japanese_JIS7[] = |
|
{ |
|
0x0411, 0 |
|
}; |
|
|
|
static short lcid_Japanese_ShiftJIS[] = |
|
{ |
|
0x0411, 0 |
|
}; |
|
|
|
static short lcid_Korean_eucKR[] = |
|
{ |
|
0x0412, 0 |
|
}; |
|
|
|
static short lcid_TamilTSCII[] = |
|
{ |
|
0x0449, 0 |
|
}; |
|
|
|
static short lcid_ThaiTIS[] = |
|
{ |
|
0x041E, 0 |
|
}; |
|
|
|
static short lcid_UkrainianKOI[] = |
|
{ |
|
0x7006, 0 |
|
}; |
|
|
|
static short lcid_Turkish[] = |
|
{ |
|
0x042C, 0x041F, 0x0443, 0 |
|
}; |
|
|
|
static short lcid_Vietnamese[] = |
|
{ |
|
0x042A, 0 |
|
}; |
|
|
|
static short lcid_UnicodeUTF8[] = |
|
{ |
|
0x7004, // artifical LCID |
|
0 |
|
}; |
|
|
|
static short lcid_UnicodeUTF16[] = |
|
{ |
|
0x7005, // artifical LCID |
|
0 |
|
}; |
|
|
|
static short lcid_Western[] = |
|
{ |
|
0x0436, 0x042D, 0x0403, 0x0406, 0x0813, 0x0413, 0x0C09, 0x2809, |
|
0x1009, 0x2409, 0x1809, 0x2009, 0x1409, 0x3409, 0x1C09, 0x2C09, |
|
0x0809, 0x0409, 0x0438, 0x040B, 0x080C, 0x0C0C, 0x040C, 0x140C, |
|
0x100C, 0x0C07, 0x0407, 0x1407, 0x1007, 0x0807, 0x040F, 0x0421, |
|
0x0410, 0x0810, 0x083E, 0x043E, 0x0414, 0x0814, 0x0416, 0x0816, |
|
0x0432, 0x2C0A, 0x400A, 0x340A, 0x240A, 0x140A, 0x1C0A, 0x300A, |
|
0x440A, 0x100A, 0x480A, 0x080A, 0x4C0A, 0x180A, 0x3C0A, 0x280A, |
|
0x500A, 0x0C0A, 0x380A, 0x200A, 0x0441, 0x081D, 0x041D, 0x0434, |
|
0x0435, 0x042B, 0x042C, 0x0439, 0x043A, 0x044E, 0x044F, 0x081A, |
|
0x0443, |
|
0 |
|
}; |
|
|
|
|
|
static const LCHMTextEncoding text_encoding_table [] = |
|
{ |
|
{ |
|
"Arabic", |
|
"CP1256", |
|
lcid_arabic |
|
}, |
|
|
|
{ |
|
"Baltic", |
|
"CP1257", |
|
lcid_baltic |
|
}, |
|
|
|
{ |
|
"Central European", |
|
"CP1250", |
|
lcid_centralEuropean |
|
}, |
|
|
|
{ |
|
"Chinese Simplified", |
|
"GB18030", |
|
lcid_ChineseSimplifiedGB18030 |
|
}, |
|
|
|
{ |
|
"Chinese Simplified", |
|
"GBK", |
|
lcid_ChineseSimplifiedGBK |
|
}, |
|
|
|
{ |
|
"Chinese Simplified", |
|
"GB2313", |
|
lcid_ChineseSimplifiedGB2313 |
|
}, |
|
|
|
{ |
|
"Chinese Traditional", |
|
"Big5", |
|
lcid_ChineseTraditionalBig5 |
|
}, |
|
|
|
{ |
|
"Chinese Traditional", |
|
"Big5-HKSCS", |
|
lcid_ChineseTraditionalBigHKSCS |
|
}, |
|
|
|
{ |
|
"Cyrillic", |
|
"CP1251", |
|
lcid_CyrillicCP1251 |
|
}, |
|
|
|
{ |
|
"Cyrillic", |
|
"KOI8-R", |
|
lcid_CyrillicKOI8R |
|
}, |
|
|
|
{ |
|
"Cyrillic Broken", |
|
"CP1251/KOI8-R", |
|
lcid_CyrillicBrokenCPKOI |
|
}, |
|
|
|
{ |
|
"Cyrillic Broken", |
|
"KOI8-R/CP1251", |
|
lcid_CyrillicBrokenKOICP |
|
}, |
|
|
|
{ |
|
"Greek", |
|
"CP1253", |
|
lcid_Greek |
|
}, |
|
|
|
{ |
|
"Hebrew", |
|
"CP1255", |
|
lcid_Hebrew |
|
}, |
|
|
|
{ |
|
"Japanese", |
|
"eucJP", |
|
lcid_Japanese_eucJP |
|
}, |
|
|
|
{ |
|
"Japanese", |
|
"JIS7", |
|
lcid_Japanese_JIS7 |
|
}, |
|
|
|
{ |
|
"Japanese", |
|
"Shift-JIS", |
|
lcid_Japanese_ShiftJIS |
|
}, |
|
|
|
{ |
|
"Korean", |
|
"eucKR", |
|
lcid_Korean_eucKR |
|
}, |
|
|
|
{ |
|
"Tamil", |
|
"TSCII", |
|
lcid_TamilTSCII |
|
}, |
|
|
|
{ |
|
"Thai", |
|
"TIS-620", |
|
lcid_ThaiTIS |
|
}, |
|
|
|
{ |
|
"Ukrainian", |
|
"KOI8-U", |
|
lcid_UkrainianKOI |
|
}, |
|
|
|
{ |
|
"Turkish", |
|
"CP1254", |
|
lcid_Turkish |
|
}, |
|
|
|
{ |
|
"Vietnamese", |
|
"CP1258", |
|
lcid_Vietnamese |
|
}, |
|
|
|
{ |
|
"Unicode", |
|
"UTF-8", |
|
lcid_UnicodeUTF8 |
|
}, |
|
|
|
{ |
|
"Unicode", |
|
"UTF-16", |
|
lcid_UnicodeUTF16 |
|
}, |
|
|
|
{ |
|
"Western", |
|
"CP1252", |
|
lcid_Western |
|
}, |
|
|
|
{ 0, 0, 0 } |
|
}; |
|
|
|
|
|
const LCHMTextEncoding * LCHMFileImpl::getTextEncodingTable() |
|
{ |
|
return text_encoding_table; |
|
} |
|
|
|
const LCHMTextEncoding * LCHMFileImpl::lookupByLCID( short lcid ) |
|
{ |
|
for ( const LCHMTextEncoding * t = text_encoding_table; t->family; t++ ) |
|
{ |
|
for ( const short * lcids = t->lcids; *lcids; lcids++ ) |
|
if ( *lcids == lcid ) |
|
return t; |
|
} |
|
|
|
return 0; |
|
} |
|
|
|
|
|
int LCHMFileImpl::getEncodingIndex( const LCHMTextEncoding * enc) |
|
{ |
|
for ( int i = 0; text_encoding_table[i].family; i++ ) |
|
if ( (text_encoding_table + i) == enc ) |
|
return i; |
|
|
|
return -1; |
|
} |
|
|
|
const LCHMTextEncoding * LCHMFileImpl::lookupByQtCodec(const QString & codec) |
|
{ |
|
for ( const LCHMTextEncoding * t = text_encoding_table; t->family; t++ ) |
|
if ( codec == t->qtcodec ) |
|
return t; |
|
|
|
return 0; |
|
}
|
|
|