aboutsummaryrefslogtreecommitdiffstats
path: root/camel/camel-charset-map.c
diff options
context:
space:
mode:
Diffstat (limited to 'camel/camel-charset-map.c')
-rw-r--r--camel/camel-charset-map.c322
1 files changed, 0 insertions, 322 deletions
diff --git a/camel/camel-charset-map.c b/camel/camel-charset-map.c
deleted file mode 100644
index d5d7665dac..0000000000
--- a/camel/camel-charset-map.c
+++ /dev/null
@@ -1,322 +0,0 @@
-
-#include <stdio.h>
-
-/*
- if you want to build the charset map, add the root directory of
- libunicode to the include path and define BUILD_MAP,
- then run it as
- ./a.out > camel-charset-map-private.h
-
- The tables genereated work like this:
-
- An indirect array for each page of unicode character
- Each array element has an indirect pointer to one of the bytes of
- the generated bitmask.
-*/
-
-#ifdef BUILD_MAP
-#include "iso/iso8859-2.h"
-#include "iso/iso8859-3.h"
-#include "iso/iso8859-4.h"
-#include "iso/iso8859-5.h"
-#include "iso/iso8859-6.h"
-#include "iso/iso8859-7.h"
-#include "iso/iso8859-8.h"
-#include "iso/iso8859-9.h"
-#include "iso/iso8859-10.h"
-#include "iso/iso8859-13.h"
-#include "iso/iso8859-14.h"
-#include "iso/iso8859-15.h"
-#include "iso/windows-1250.h"
-#include "iso/windows-1252.h"
-#include "iso/windows-1257.h"
-#include "iso/koi8-r.h"
-#include "iso/koi8-u.h"
-#include "iso/tis620.2533-1.h"
-#include "iso/armscii-8.h"
-#include "iso/georgian-academy.h"
-#include "iso/georgian-ps.h"
-#include "msft/cp932.h"
-#include "jis/shiftjis.h"
-
-static struct {
- unsigned short *table;
- char *name;
- int type; /* type of table */
- unsigned int bit; /* assigned bit */
-} tables[] = {
- { iso8859_2_table, "iso-8859-2", 0, 0} ,
- { iso8859_3_table, "iso-8859-3", 0, 0} ,
- { iso8859_4_table, "iso-8859-4", 0, 0},
- { iso8859_5_table, "iso-8859-5", 0, 0},
-/* apparently -6 has special digits? */
- { iso8859_6_table, "iso-8859-6", 0, 0},
- { iso8859_7_table, "iso-8859-7", 0, 0},
- { iso8859_8_table, "iso-8859-8", 0, 0},
- { iso8859_9_table, "iso-8859-9", 0, 0},
- { iso8859_10_table, "iso-8859-10", 0, 0},
- { iso8859_13_table, "iso-8859-13", 0, 0},
- { iso8859_14_table, "iso-8859-14", 0, 0},
- { iso8859_15_table, "iso-8859-15", 0, 0},
- { windows_1250_table, "windows-1250", 0, 0},
- { windows_1252_table, "windows-1252", 0, 0},
- { windows_1257_table, "windows-1257", 0, 0},
- { koi8_r_table, "koi8-r", 0, 0},
- { koi8_u_table, "koi8-u", 0, 0},
- { tis_620_table, "tis620.2533-1", 0, 0},
- { armscii_8_table, "armscii-8", 0, 0},
- { georgian_academy_table, "georgian-academy", 0, 0},
- { georgian_ps_table, "georgian-ps", 0, 0},
- { cp932_table, "CP932", 1, 0},
- { sjis_table, "Shift-JIS", 1, 0},
- { 0, 0}
-};
-
-unsigned int encoding_map[256 * 256];
-
-static void
-add_bigmap(unsigned short **table, int bit)
-{
- int i;
- int j;
-
- for (i=0;i<256;i++) {
- unsigned short *tab = table[i];
- if (tab) {
- for (j=0;j<256;j++) {
- if (tab[j])
- encoding_map[tab[j]] |= bit;
- }
- }
- }
-}
-
-void main(void)
-{
- int i, j;
- unsigned short *tab;
- int max, min;
- int bit = 0x01;
- int k;
- int bytes;
-
-#if 0
- /* iso-latin-1 (not needed-detected in code) */
- for (i=0;i<256;i++) {
- encoding_map[i] |= bit;
- }
- bit <<= 1;
-#endif
-
- /* dont count the terminator */
- bytes = ((sizeof(tables)/sizeof(tables[0]))+7-1)/8;
-
- /* the other latin charsets */
- for (j=0;tables[j].table;j++) {
- switch (tables[j].type) {
- case 0: /* table from 128-256 */
- tab = tables[j].table;
- for (i=0;i<128;i++) {
- /* 0-127 is the common */
- encoding_map[i] |= bit;
- encoding_map[tab[i]] |= bit;
- }
- break;
- case 1: /* sparse table */
- add_bigmap(tables[j].table, bit);
- break;
- }
- tables[j].bit = bit;
- bit <<= 1;
- }
-
- printf("/* This file is automatically generated: DO NOT EDIT */\n\n");
-
- for (i=0;i<256;i++) {
- /* first, do we need this block? */
- for (k=0;k<bytes;k++) {
- for (j=0;j<256;j++) {
- if ((encoding_map[i*256 + j] & (0xff << (k*8))) != 0)
- break;
- }
- if (j < 256) {
- /* yes, dump it */
- printf("static unsigned char m%02x%x[256] = {\n\t", i, k);
- for (j=0;j<256;j++) {
- printf("0x%02x, ", (encoding_map[i*256+j] >> (k*8)) & 0xff );
- if (((j+1)&7) == 0 && j<255)
- printf("\n\t");
- }
- printf("\n};\n\n");
- }
- }
- }
-
- printf("struct {\n");
- for (k=0;k<bytes;k++) {
- printf("\tunsigned char *bits%d;\n", k);
- }
- printf("} camel_charmap[256] = {\n\t");
- for (i=0;i<256;i++) {
- /* first, do we need this block? */
- printf("{ ");
- for (k=0;k<bytes;k++) {
- for (j=0;j<256;j++) {
- if ((encoding_map[i*256 + j] & (0xff << (k*8))) != 0)
- break;
- }
- if (j < 256) {
- printf("m%02x%x, ", i, k);
- } else {
- printf("0, ");
- }
- }
- printf("}, ");
- if (((i+1)&7) == 0 && i<255)
- printf("\n\t");
- }
- printf("\n};\n\n");
-
- printf("struct {\n\tconst char *name;\n\tunsigned int bit;\n} camel_charinfo[] = {\n");
- for (j=0;tables[j].table;j++) {
- printf("\t{ \"%s\", 0x%04x },\n", tables[j].name, tables[j].bit);
- }
- printf("};\n\n");
-
- printf("#define charset_mask(x) \\\n");
- for (k=0;k<bytes;k++) {
- if (k!=0)
- printf("\t| ");
- else
- printf("\t");
- printf("(camel_charmap[(x)>>8].bits%d?camel_charmap[(x)>>8].bits%d[(x)&0xff]<<%d:0)", k, k, k*8);
- if (k<bytes-1)
- printf("\t\\\n");
- }
- printf("\n\n");
-
-}
-
-#else
-
-#include "camel-charset-map.h"
-#include "camel-charset-map-private.h"
-#include <unicode.h>
-#include <locale.h>
-#include <glib.h>
-
-void camel_charset_init(CamelCharset *c)
-{
- c->mask = ~0;
- c->level = 0;
-}
-
-void
-camel_charset_step(CamelCharset *c, const char *in, int len)
-{
- register unsigned int mask;
- register int level;
- const char *inptr = in, *inend = in+len;
-
- mask = c->mask;
- level = c->level;
-
- /* check what charset a given string will fit in */
- while (inptr < inend) {
- unicode_char_t c;
- const char *newinptr;
- newinptr = unicode_get_utf8(inptr, &c);
- if (newinptr == NULL) {
- inptr++;
- continue;
- }
- inptr = newinptr;
- if (c<=0xffff) {
- mask &= charset_mask(c);
-
- if (c>=128 && c<256)
- level = MAX(level, 1);
- else if (c>=256)
- level = MAX(level, 2);
- } else {
- mask = 0;
- level = MAX(level, 2);
- }
- }
-
- c->mask = mask;
- c->level = level;
-}
-
-/* gets the best charset from the mask of chars in it */
-static const char *
-camel_charset_best_mask(unsigned int mask)
-{
- int i;
-
- for (i=0;i<sizeof(camel_charinfo)/sizeof(camel_charinfo[0]);i++) {
- if (camel_charinfo[i].bit & mask)
- return camel_charinfo[i].name;
- }
- return "UTF-8";
-}
-
-const char *camel_charset_best_name(CamelCharset *charset)
-{
- if (charset->level == 1)
- return "ISO-8859-1";
- else if (charset->level == 2)
- return camel_charset_best_mask(charset->mask);
- else
- return NULL;
-
-}
-
-/* finds the minimum charset for this string NULL means US-ASCII */
-const char *
-camel_charset_best(const char *in, int len)
-{
- CamelCharset charset;
-
- camel_charset_init(&charset);
- camel_charset_step(&charset, in, len);
- return camel_charset_best_name(&charset);
-}
-
-char *
-camel_charset_locale_name (void)
-{
- char *locale, *charset;
-
- locale = setlocale (LC_ALL, NULL);
-
- if (!locale || !strcmp (locale, "C") || !strcmp (locale, "POSIX")) {
- /* The locale "C" or "POSIX" is a portable locale; its
- * LC_CTYPE part corresponds to the 7-bit ASCII character
- * set.
- */
-
- return NULL;
- } else {
- /* A locale name is typically of the form language[_terri-
- * tory][.codeset][@modifier], where language is an ISO 639
- * language code, territory is an ISO 3166 country code, and
- * codeset is a character set or encoding identifier like
- * ISO-8859-1 or UTF-8.
- */
- char *p;
- int len;
-
- p = strchr (locale, '@');
- len = p ? (p - locale) : strlen (locale);
- if ((p = strchr (locale, '.'))) {
- charset = g_strndup (p + 1, len - (p - locale) + 1);
- g_strdown (charset);
- }
- }
-
- return charset;
-}
-
-#endif /* !BUILD_MAP */
-