aboutsummaryrefslogtreecommitdiffstats
path: root/e-util/e-iconv.c
diff options
context:
space:
mode:
Diffstat (limited to 'e-util/e-iconv.c')
-rw-r--r--e-util/e-iconv.c610
1 files changed, 0 insertions, 610 deletions
diff --git a/e-util/e-iconv.c b/e-util/e-iconv.c
deleted file mode 100644
index 9c33209012..0000000000
--- a/e-util/e-iconv.c
+++ /dev/null
@@ -1,610 +0,0 @@
-/* -*- Mode: C; tab-width: 8; indent-tabs-mode: t; c-basic-offset: 8 -*- */
-/*
- * e-iconv.c
- * Copyright 2000, 2001, Ximian, Inc.
- *
- * Authors:
- * Michael Zucchi <notzed@ximian.com>
- * Jeffery Stedfast <fejj@ximian.com>
- *
- * This library is free software; you can redistribute it and/or
- * modify it under the terms of the GNU Library General Public
- * License, version 2, as published by the Free Software Foundation.
- *
- * This library is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * Library General Public License for more details.
- *
- * You should have received a copy of the GNU Library General Public
- * License along with this library; if not, write to the Free Software
- * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA
- * 02111-1307, USA.
- */
-
-#ifdef HAVE_CONFIG_H
-#include <config.h>
-#endif
-
-#include <stdio.h>
-#include <stdlib.h>
-#include <string.h>
-#include <errno.h>
-
-#include <glib.h>
-#include "e-iconv.h"
-
-#include <locale.h>
-
-#ifdef HAVE_CODESET
-#include <langinfo.h>
-#endif
-
-#include "iconv-detect.h"
-
-#define cd(x)
-
-#ifdef G_THREADS_ENABLED
-static GStaticMutex lock = G_STATIC_MUTEX_INIT;
-#define LOCK() g_static_mutex_lock(&lock)
-#define UNLOCK() g_static_mutex_unlock(&lock)
-#else
-#define LOCK()
-#define UNLOCK()
-#endif
-
-typedef struct _EDListNode {
- struct _EDListNode *next;
- struct _EDListNode *prev;
-} EDListNode;
-
-typedef struct _EDList {
- struct _EDListNode *head;
- struct _EDListNode *tail;
- struct _EDListNode *tailpred;
-} EDList;
-
-#define E_DLIST_INITIALISER(l) { (EDListNode *)&l.tail, 0, (EDListNode *)&l.head }
-
-struct _iconv_cache_node {
- struct _iconv_cache_node *next;
- struct _iconv_cache_node *prev;
-
- struct _iconv_cache *parent;
-
- int busy;
- iconv_t ip;
-};
-
-struct _iconv_cache {
- struct _iconv_cache *next;
- struct _iconv_cache *prev;
-
- char *conv;
-
- EDList open; /* stores iconv_cache_nodes, busy ones up front */
-};
-
-#define E_ICONV_CACHE_SIZE (16)
-
-static EDList iconv_cache_list;
-static GHashTable *iconv_cache;
-static GHashTable *iconv_cache_open;
-static unsigned int iconv_cache_size = 0;
-
-static GHashTable *iconv_charsets = NULL;
-static char *locale_charset = NULL;
-static char *locale_lang = NULL;
-
-struct {
- char *charset;
- char *iconv_name;
-} known_iconv_charsets[] = {
-#if 0
- /* charset name, iconv-friendly charset name */
- { "iso-8859-1", "iso-8859-1" },
- { "iso8859-1", "iso-8859-1" },
- /* the above mostly serves as an example for iso-style charsets,
- but we have code that will populate the iso-*'s if/when they
- show up in e_iconv_charset_name() so I'm
- not going to bother putting them all in here... */
- { "windows-cp1251", "cp1251" },
- { "windows-1251", "cp1251" },
- { "cp1251", "cp1251" },
- /* the above mostly serves as an example for windows-style
- charsets, but we have code that will parse and convert them
- to their cp#### equivalents if/when they show up in
- e_iconv_charset_name() so I'm not going to bother
- putting them all in here either... */
-#endif
- /* charset name (lowercase!), iconv-friendly name (sometimes case sensitive) */
- { "utf-8", "UTF-8" },
-
- /* 10646 is a special case, its usually UCS-2 big endian */
- /* This might need some checking but should be ok for solaris/linux */
- { "iso-10646-1", "UCS-2BE" },
- { "iso_10646-1", "UCS-2BE" },
- { "iso10646-1", "UCS-2BE" },
- { "iso-10646", "UCS-2BE" },
- { "iso_10646", "UCS-2BE" },
- { "iso10646", "UCS-2BE" },
-
- { "ks_c_5601-1987", "EUC-KR" },
-
- /* FIXME: Japanese/Korean/Chinese stuff needs checking */
- { "euckr-0", "EUC-KR" },
- { "5601", "EUC-KR" },
- { "big5-0", "BIG5" },
- { "big5.eten-0", "BIG5" },
- { "big5hkscs-0", "BIG5HKCS" },
- { "gb2312-0", "gb2312" },
- { "gb2312.1980-0", "gb2312" },
- { "gb-2312", "gb2312" },
- { "gb18030-0", "gb18030" },
- { "gbk-0", "GBK" },
-
- { "eucjp-0", "eucJP" },
- { "ujis-0", "ujis" },
- { "jisx0208.1983-0","SJIS" },
- { "jisx0212.1990-0","SJIS" },
- { "pck", "SJIS" },
- { NULL, NULL }
-};
-
-
-
-/* Another copy of this trivial list implementation
- Why? This stuff gets called a lot (potentially), should run fast,
- and g_list's are f@@#$ed up to make this a hassle */
-static void e_dlist_init(EDList *v)
-{
- v->head = (EDListNode *)&v->tail;
- v->tail = 0;
- v->tailpred = (EDListNode *)&v->head;
-}
-
-static EDListNode *e_dlist_addhead(EDList *l, EDListNode *n)
-{
- n->next = l->head;
- n->prev = (EDListNode *)&l->head;
- l->head->prev = n;
- l->head = n;
- return n;
-}
-
-static EDListNode *e_dlist_addtail(EDList *l, EDListNode *n)
-{
- n->next = (EDListNode *)&l->tail;
- n->prev = l->tailpred;
- l->tailpred->next = n;
- l->tailpred = n;
- return n;
-}
-
-static EDListNode *e_dlist_remove(EDListNode *n)
-{
- n->next->prev = n->prev;
- n->prev->next = n->next;
- return n;
-}
-
-
-/* fucking glib... */
-static const char *
-e_strdown (char *str)
-{
- register char *s = str;
-
- while (*s) {
- if (*s >= 'A' && *s <= 'Z')
- *s += 0x20;
- s++;
- }
-
- return str;
-}
-
-static const char *
-e_strup (char *str)
-{
- register char *s = str;
-
- while (*s) {
- if (*s >= 'a' && *s <= 'z')
- *s -= 0x20;
- s++;
- }
-
- return str;
-}
-
-
-static void
-locale_parse_lang (const char *locale)
-{
- char *codeset, *lang;
-
- if ((codeset = strchr (locale, '.')))
- lang = g_strndup (locale, codeset - locale);
- else
- lang = g_strdup (locale);
-
- /* validate the language */
- if (strlen (lang) >= 2) {
- if (lang[2] == '-' || lang[2] == '_') {
- /* canonicalise the lang */
- e_strdown (lang);
-
- /* validate the country code */
- if (strlen (lang + 3) > 2) {
- /* invalid country code */
- lang[2] = '\0';
- } else {
- lang[2] = '-';
- e_strup (lang + 3);
- }
- } else if (lang[2] != '\0') {
- /* invalid language */
- g_free (lang);
- lang = NULL;
- }
-
- locale_lang = lang;
- } else {
- /* invalid language */
- locale_lang = NULL;
- g_free (lang);
- }
-}
-
-/* NOTE: Owns the lock on return if keep is TRUE ! */
-static void
-e_iconv_init(int keep)
-{
- char *from, *to, *locale;
- int i;
-
- LOCK();
-
- if (iconv_charsets != NULL) {
- if (!keep)
- UNLOCK();
- return;
- }
-
- iconv_charsets = g_hash_table_new(g_str_hash, g_str_equal);
-
- for (i = 0; known_iconv_charsets[i].charset != NULL; i++) {
- from = g_strdup(known_iconv_charsets[i].charset);
- to = g_strdup(known_iconv_charsets[i].iconv_name);
- e_strdown (from);
- g_hash_table_insert(iconv_charsets, from, to);
- }
-
- e_dlist_init(&iconv_cache_list);
- iconv_cache = g_hash_table_new(g_str_hash, g_str_equal);
- iconv_cache_open = g_hash_table_new(NULL, NULL);
-
- locale = setlocale (LC_ALL, NULL);
-
- if (!locale || !strcmp (locale, "C") || !strcmp (locale, "POSIX")) {
- /* The locale "C" or "POSIX" is a portable locale; its
- * LC_CTYPE part corresponds to the 7-bit ASCII character
- * set.
- */
-
- locale_charset = NULL;
- locale_lang = NULL;
- } else {
-#ifdef HAVE_CODESET
- locale_charset = g_strdup (nl_langinfo (CODESET));
- e_strdown (locale_charset);
-#else
- /* A locale name is typically of the form language[_terri-
- * tory][.codeset][@modifier], where language is an ISO 639
- * language code, territory is an ISO 3166 country code, and
- * codeset is a character set or encoding identifier like
- * ISO-8859-1 or UTF-8.
- */
- char *codeset, *p;
-
- codeset = strchr (locale, '.');
- if (codeset) {
- codeset++;
-
- /* ; is a hack for debian systems and / is a hack for Solaris systems */
- for (p = codeset; *p && !strchr ("@;/", *p); p++);
- locale_charset = g_strndup (codeset, p - codeset);
- e_strdown (locale_charset);
- } else {
- /* charset unknown */
- locale_charset = NULL;
- }
-#endif
-
- /* parse the locale lang */
- locale_parse_lang (locale);
-
- }
-
- if (!keep)
- UNLOCK();
-}
-
-const char *e_iconv_charset_name(const char *charset)
-{
- char *name, *ret, *tmp;
-
- if (charset == NULL)
- return NULL;
-
- name = g_alloca (strlen (charset) + 1);
- strcpy (name, charset);
- e_strdown (name);
-
- e_iconv_init(TRUE);
- ret = g_hash_table_lookup(iconv_charsets, name);
- if (ret != NULL) {
- UNLOCK();
- return ret;
- }
-
- /* Unknown, try canonicalise some basic charset types to something that should work */
- if (strncmp(name, "iso", 3) == 0) {
- /* Convert iso-nnnn-n or isonnnn-n or iso_nnnn-n to iso-nnnn-n or isonnnn-n */
- int iso, codepage;
- char *p;
-
- tmp = name + 3;
- if (*tmp == '-' || *tmp == '_')
- tmp++;
-
- iso = strtoul (tmp, &p, 10);
-
- if (iso == 10646) {
- /* they all become ICONV_10646 */
- ret = g_strdup (ICONV_10646);
- } else {
- tmp = p;
- if (*tmp == '-' || *tmp == '_')
- tmp++;
-
- codepage = strtoul (tmp, &p, 10);
-
- if (p > tmp) {
- /* codepage is numeric */
-#ifdef __aix__
- if (codepage == 13)
- ret = g_strdup ("IBM-921");
- else
-#endif /* __aix__ */
- ret = g_strdup_printf (ICONV_ISO_D_FORMAT, iso, codepage);
- } else {
- /* codepage is a string - probably iso-2022-jp or something */
- ret = g_strdup_printf (ICONV_ISO_S_FORMAT, iso, p);
- }
- }
- } else if (strncmp(name, "windows-", 8) == 0) {
- /* Convert windows-nnnnn or windows-cpnnnnn to cpnnnn */
- tmp = name+8;
- if (!strncmp(tmp, "cp", 2))
- tmp+=2;
- ret = g_strdup_printf("CP%s", tmp);
- } else if (strncmp(name, "microsoft-", 10) == 0) {
- /* Convert microsoft-nnnnn or microsoft-cpnnnnn to cpnnnn */
- tmp = name+10;
- if (!strncmp(tmp, "cp", 2))
- tmp+=2;
- ret = g_strdup_printf("CP%s", tmp);
- } else {
- /* Just assume its ok enough as is, case and all */
- ret = g_strdup(charset);
- }
-
- g_hash_table_insert(iconv_charsets, g_strdup(name), ret);
- UNLOCK();
-
- return ret;
-}
-
-static void
-flush_entry(struct _iconv_cache *ic)
-{
- struct _iconv_cache_node *in, *nn;
-
- in = (struct _iconv_cache_node *)ic->open.head;
- nn = in->next;
- while (nn) {
- if (in->ip != (iconv_t)-1) {
- g_hash_table_remove(iconv_cache_open, in->ip);
- iconv_close(in->ip);
- }
- g_free(in);
- in = nn;
- nn = in->next;
- }
- g_free(ic->conv);
- g_free(ic);
-}
-
-/* This should run pretty quick, its called a lot */
-iconv_t e_iconv_open(const char *oto, const char *ofrom)
-{
- const char *to, *from;
- char *tofrom;
- struct _iconv_cache *ic;
- struct _iconv_cache_node *in;
- int errnosav;
- iconv_t ip;
-
- if (oto == NULL || ofrom == NULL) {
- errno = EINVAL;
- return (iconv_t) -1;
- }
-
- to = e_iconv_charset_name (oto);
- from = e_iconv_charset_name (ofrom);
- tofrom = g_alloca (strlen (to) + strlen (from) + 2);
- sprintf(tofrom, "%s%%%s", to, from);
-
- LOCK();
-
- ic = g_hash_table_lookup(iconv_cache, tofrom);
- if (ic) {
- e_dlist_remove((EDListNode *)ic);
- } else {
- struct _iconv_cache *last = (struct _iconv_cache *)iconv_cache_list.tailpred;
- struct _iconv_cache *prev;
-
- prev = last->prev;
- while (prev && iconv_cache_size > E_ICONV_CACHE_SIZE) {
- in = (struct _iconv_cache_node *)last->open.head;
- if (in->next && !in->busy) {
- cd(printf("Flushing iconv converter '%s'\n", last->conv));
- e_dlist_remove((EDListNode *)last);
- g_hash_table_remove(iconv_cache, last->conv);
- flush_entry(last);
- iconv_cache_size--;
- }
- last = prev;
- prev = last->prev;
- }
-
- iconv_cache_size++;
-
- ic = g_malloc(sizeof(*ic));
- e_dlist_init(&ic->open);
- ic->conv = g_strdup(tofrom);
- g_hash_table_insert(iconv_cache, ic->conv, ic);
-
- cd(printf("Creating iconv converter '%s'\n", ic->conv));
- }
- e_dlist_addhead(&iconv_cache_list, (EDListNode *)ic);
-
- /* If we have a free iconv, use it */
- in = (struct _iconv_cache_node *)ic->open.tailpred;
- if (in->prev && !in->busy) {
- cd(printf("using existing iconv converter '%s'\n", ic->conv));
- ip = in->ip;
- if (ip != (iconv_t)-1) {
- /* work around some broken iconv implementations
- * that die if the length arguments are NULL
- */
- size_t buggy_iconv_len = 0;
- char *buggy_iconv_buf = NULL;
-
- /* resets the converter */
- iconv(ip, &buggy_iconv_buf, &buggy_iconv_len, &buggy_iconv_buf, &buggy_iconv_len);
- in->busy = TRUE;
- e_dlist_remove((EDListNode *)in);
- e_dlist_addhead(&ic->open, (EDListNode *)in);
- }
- } else {
- cd(printf("creating new iconv converter '%s'\n", ic->conv));
- ip = iconv_open(to, from);
- in = g_malloc(sizeof(*in));
- in->ip = ip;
- in->parent = ic;
- e_dlist_addhead(&ic->open, (EDListNode *)in);
- if (ip != (iconv_t)-1) {
- g_hash_table_insert(iconv_cache_open, ip, in);
- in->busy = TRUE;
- } else {
- errnosav = errno;
- g_warning("Could not open converter for '%s' to '%s' charset", from, to);
- in->busy = FALSE;
- errno = errnosav;
- }
- }
-
- UNLOCK();
-
- return ip;
-}
-
-size_t e_iconv(iconv_t cd, const char **inbuf, size_t *inbytesleft, char ** outbuf, size_t *outbytesleft)
-{
- return iconv(cd, (char **) inbuf, inbytesleft, outbuf, outbytesleft);
-}
-
-void
-e_iconv_close(iconv_t ip)
-{
- struct _iconv_cache_node *in;
-
- if (ip == (iconv_t)-1)
- return;
-
- LOCK();
- in = g_hash_table_lookup(iconv_cache_open, ip);
- if (in) {
- cd(printf("closing iconv converter '%s'\n", in->parent->conv));
- e_dlist_remove((EDListNode *)in);
- in->busy = FALSE;
- e_dlist_addtail(&in->parent->open, (EDListNode *)in);
- } else {
- g_warning("trying to close iconv i dont know about: %p", ip);
- iconv_close(ip);
- }
- UNLOCK();
-
-}
-
-const char *e_iconv_locale_charset(void)
-{
- e_iconv_init(FALSE);
-
- return locale_charset;
-}
-
-
-const char *
-e_iconv_locale_language (void)
-{
- e_iconv_init (FALSE);
-
- return locale_lang;
-}
-
-/* map CJKR charsets to their language code */
-/* NOTE: only support charset names that will be returned by
- * e_iconv_charset_name() so that we don't have to keep track of all
- * the aliases too. */
-static struct {
- char *charset;
- char *lang;
-} cjkr_lang_map[] = {
- { "Big5", "zh" },
- { "BIG5HKCS", "zh" },
- { "gb2312", "zh" },
- { "gb18030", "zh" },
- { "gbk", "zh" },
- { "euc-tw", "zh" },
- { "iso-2022-jp", "ja" },
- { "sjis", "ja" },
- { "ujis", "ja" },
- { "eucJP", "ja" },
- { "euc-jp", "ja" },
- { "euc-kr", "ko" },
- { "koi8-r", "ru" },
- { "koi8-u", "uk" }
-};
-
-#define NUM_CJKR_LANGS (sizeof (cjkr_lang_map) / sizeof (cjkr_lang_map[0]))
-
-const char *
-e_iconv_charset_language (const char *charset)
-{
- int i;
-
- if (!charset)
- return NULL;
-
- charset = e_iconv_charset_name (charset);
- for (i = 0; i < NUM_CJKR_LANGS; i++) {
- if (!strcasecmp (cjkr_lang_map[i].charset, charset))
- return cjkr_lang_map[i].lang;
- }
-
- return NULL;
-}