aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorJeffrey Stedfast <fejj@ximian.com>2001-12-18 09:28:27 +0800
committerJeffrey Stedfast <fejj@src.gnome.org>2001-12-18 09:28:27 +0800
commitf6408daa103092f18789a719a4123224b259f71f (patch)
tree838b491516e1b3669428136d73019aa9afe5f2c3
parent13299ab7e073cf4d412cec019e4240a7634c1cf5 (diff)
downloadgsoc2013-evolution-f6408daa103092f18789a719a4123224b259f71f.tar
gsoc2013-evolution-f6408daa103092f18789a719a4123224b259f71f.tar.gz
gsoc2013-evolution-f6408daa103092f18789a719a4123224b259f71f.tar.bz2
gsoc2013-evolution-f6408daa103092f18789a719a4123224b259f71f.tar.lz
gsoc2013-evolution-f6408daa103092f18789a719a4123224b259f71f.tar.xz
gsoc2013-evolution-f6408daa103092f18789a719a4123224b259f71f.tar.zst
gsoc2013-evolution-f6408daa103092f18789a719a4123224b259f71f.zip
New function to map ISO charsets to the Windows charsets.
2001-12-17 Jeffrey Stedfast <fejj@ximian.com> * camel-charset-map.c (camel_charset_iso_to_windows): New function to map ISO charsets to the Windows charsets. * camel-mime-part-utils.c (broken_windows_charset): Detect Windows charsets. (simple_data_wrapper_construct_from_parser): Simplify a tad and also check for iso-8859-* charsets that are really Windows charsets. Fixes bug #12631. svn path=/trunk/; revision=15144
-rw-r--r--camel/ChangeLog26
-rw-r--r--camel/camel-charset-map.c55
-rw-r--r--camel/camel-charset-map.h2
-rw-r--r--camel/camel-mime-part-utils.c75
4 files changed, 127 insertions, 31 deletions
diff --git a/camel/ChangeLog b/camel/ChangeLog
index cb8a9eaf6a..b82266ff9e 100644
--- a/camel/ChangeLog
+++ b/camel/ChangeLog
@@ -1,3 +1,14 @@
+2001-12-17 Jeffrey Stedfast <fejj@ximian.com>
+
+ * camel-charset-map.c (camel_charset_iso_to_windows): New function
+ to map ISO charsets to the Windows charsets.
+
+ * camel-mime-part-utils.c (broken_windows_charset): Detect Windows
+ charsets.
+ (simple_data_wrapper_construct_from_parser): Simplify a tad and
+ also check for iso-8859-* charsets that are really Windows
+ charsets. Fixes bug #12631.
+
2001-12-17 Dan Winship <danw@ximian.com>
* Makefile.am (INCLUDES): define CAMEL_PROVIDERDIR to be the
@@ -7,11 +18,16 @@
* providers/imap/Makefile.am (camel_provider_LTLIBRARIES,
camel_provider_DATA): renamed from provider_LTLIBRARIES,
- provider_DATA.
- * providers/local/Makefile.am: Likewise
- * providers/nntp/Makefile.am: Likewise
- * providers/pop3/Makefile.am: Likewise
- * providers/sendmail/Makefile.am: Likewise
+ provider_DATA.
+
+ * providers/local/Makefile.am: Likewise
+
+ * providers/nntp/Makefile.am: Likewise
+
+ * providers/pop3/Makefile.am: Likewise
+
+ * providers/sendmail/Makefile.am: Likewise
+
* providers/smtp/Makefile.am: Likewise
2001-12-16 Jeffrey Stedfast <fejj@ximian.com>
diff --git a/camel/camel-charset-map.c b/camel/camel-charset-map.c
index 17962d74be..2416dd2504 100644
--- a/camel/camel-charset-map.c
+++ b/camel/camel-charset-map.c
@@ -292,5 +292,60 @@ camel_charset_best (const char *in, int len)
return camel_charset_best_name (&charset);
}
+
+/**
+ * camel_charset_iso_to_windows:
+ * @isocharset: an ISO charset
+ *
+ * Returns the equivalent Windows charset.
+ **/
+const char *
+camel_charset_iso_to_windows (const char *isocharset)
+{
+ /* According to http://czyborra.com/charsets/codepages.html,
+ * the charset mapping is as follows:
+ *
+ * iso-8859-1 maps to windows-cp1252
+ * iso-8859-2 maps to windows-cp1250
+ * iso-8859-3 maps to windows-cp????
+ * iso-8859-4 maps to windows-cp????
+ * iso-8859-5 maps to windows-cp1251
+ * iso-8859-6 maps to windows-cp1256
+ * iso-8859-7 maps to windows-cp1253
+ * iso-8859-8 maps to windows-cp1255
+ * iso-8859-9 maps to windows-cp1254
+ * iso-8859-10 maps to windows-cp????
+ * iso-8859-11 maps to windows-cp????
+ * iso-8859-12 maps to windows-cp????
+ * iso-8859-13 maps to windows-cp1257
+ *
+ * Assumptions:
+ * - I'm going to assume that since iso-8859-4 and
+ * iso-8859-13 are Baltic that it also maps to
+ * windows-cp1257.
+ */
+
+ if (!strcasecmp (isocharset, "iso-8859-1"))
+ return "windows-cp1252";
+ else if (!strcasecmp (isocharset, "iso-8859-2"))
+ return "windows-cp1250";
+ else if (!strcasecmp (isocharset, "iso-8859-4"))
+ return "windows-cp1257";
+ else if (!strcasecmp (isocharset, "iso-8859-5"))
+ return "windows-cp1251";
+ else if (!strcasecmp (isocharset, "iso-8859-6"))
+ return "windows-cp1256";
+ else if (!strcasecmp (isocharset, "iso-8859-7"))
+ return "windows-cp1253";
+ else if (!strcasecmp (isocharset, "iso-8859-8"))
+ return "windows-cp1255";
+ else if (!strcasecmp (isocharset, "iso-8859-9"))
+ return "windows-cp1254";
+ else if (!strcasecmp (isocharset, "iso-8859-13"))
+ return "windows-cp1257";
+
+ return isocharset;
+}
+
#endif /* !BUILD_MAP */
diff --git a/camel/camel-charset-map.h b/camel/camel-charset-map.h
index 7c7022c0a1..0cae1916a6 100644
--- a/camel/camel-charset-map.h
+++ b/camel/camel-charset-map.h
@@ -37,4 +37,6 @@ const char *camel_charset_best_name(CamelCharset *);
/* helper function */
const char *camel_charset_best(const char *in, int len);
+const char *camel_charset_iso_to_windows (const char *isocharset);
+
#endif /* ! _CAMEL_CHARSET_MAP_H */
diff --git a/camel/camel-mime-part-utils.c b/camel/camel-mime-part-utils.c
index 65c99c6dc8..08787df2cd 100644
--- a/camel/camel-mime-part-utils.c
+++ b/camel/camel-mime-part-utils.c
@@ -155,6 +155,28 @@ convert_buffer (GByteArray *in, const char *to, const char *from)
return out;
}
+/* We don't really use the charset argument except for debugging... */
+static gboolean
+broken_windows_charset (GByteArray *buffer, const char *charset)
+{
+ register unsigned char *inptr;
+ unsigned char *inend;
+
+ inptr = buffer->data;
+ inend = inptr + buffer->len;
+
+ while (inptr < inend) {
+ register unsigned char c = *inptr++;
+
+ if (c >= 128 && c <= 159) {
+ g_warning ("Encountered Windows charset parading as %s", charset);
+ return TRUE;
+ }
+ }
+
+ return FALSE;
+}
+
static gboolean
is_7bit (GByteArray *buffer)
{
@@ -172,33 +194,24 @@ static void
simple_data_wrapper_construct_from_parser (CamelDataWrapper *dw, CamelMimeParser *mp)
{
CamelMimeFilter *fdec = NULL, *fcrlf = NULL;
+ CamelMimeFilterBasicType enctype;
int len, decid = -1, crlfid = -1;
struct _header_content_type *ct;
+ const char *charset = NULL;
GByteArray *buffer;
char *encoding, *buf;
- const char *charset = NULL;
- CamelMimeFilterBasicType enctype = 0;
CamelStream *mem;
-
- d(printf("constructing data-wrapper\n"));
+
+ d(printf ("simple_data_wrapper_construct_from_parser()\n"));
/* first, work out conversion, if any, required, we dont care about what we dont know about */
- encoding = header_content_encoding_decode(camel_mime_parser_header(mp, "content-transfer-encoding", NULL));
+ encoding = header_content_encoding_decode (camel_mime_parser_header (mp, "Content-Transfer-Encoding", NULL));
if (encoding) {
- if (!strcasecmp(encoding, "base64")) {
- d(printf("Adding base64 decoder ...\n"));
- enctype = CAMEL_MIME_FILTER_BASIC_BASE64_DEC;
- } else if (!strcasecmp(encoding, "quoted-printable")) {
- d(printf("Adding quoted-printable decoder ...\n"));
- enctype = CAMEL_MIME_FILTER_BASIC_QP_DEC;
- } else if (!strcasecmp (encoding, "x-uuencode")) {
- d(printf("Adding uudecoder ...\n"));
- enctype = CAMEL_MIME_FILTER_BASIC_UU_DEC;
- }
+ enctype = camel_mime_part_encoding_from_string (encoding);
g_free (encoding);
- if (enctype != 0) {
- fdec = (CamelMimeFilter *)camel_mime_filter_basic_new_type(enctype);
+ if (enctype != CAMEL_MIME_PART_ENCODING_DEFAULT) {
+ fdec = (CamelMimeFilter *) camel_mime_filter_basic_new_type (enctype);
decid = camel_mime_parser_filter_add (mp, fdec);
}
}
@@ -229,21 +242,32 @@ simple_data_wrapper_construct_from_parser (CamelDataWrapper *dw, CamelMimeParser
charset = check_html_charset(buffer->data, buffer->len);
/* if we need to do charset conversion, see if we can/it works/etc */
- if (charset && !(strcasecmp(charset, "us-ascii") == 0
- || strcasecmp(charset, "utf-8") == 0
- || strncasecmp(charset, "x-", 2) == 0)) {
+ if (charset && !(strcasecmp (charset, "us-ascii") == 0
+ || strcasecmp (charset, "utf-8") == 0
+ || strncasecmp (charset, "x-", 2) == 0)) {
GByteArray *out;
- out = convert_buffer(buffer, "UTF-8", charset);
+ /* You often see Microsoft Windows users announcing their texts
+ * as being in ISO-8859-1 even when in fact they contain funny
+ * characters from the Windows-CP1252 superset.
+ */
+ if (!strncasecmp (charset, "iso-8859", 8)) {
+ /* check for Windows-specific chars... */
+ if (broken_windows_charset (buffer, charset)) {
+ charset = camel_charset_iso_to_windows (charset);
+ charset = e_iconv_charset_name (charset);
+ }
+ }
+
+ out = convert_buffer (buffer, "UTF-8", charset);
if (out) {
/* converted ok, use this data instead */
g_byte_array_free(buffer, TRUE);
buffer = out;
} else {
- g_warning("Storing text as raw, unknown charset '%s' or invalid format", charset);
/* else failed to convert, leave as raw? */
+ g_warning("Storing text as raw, unknown charset '%s' or invalid format", charset);
dw->rawtext = TRUE;
- /* should we change the content-type header? */
}
} else if (header_content_type_is (ct, "text", "*")) {
if (charset == NULL) {
@@ -258,10 +282,9 @@ simple_data_wrapper_construct_from_parser (CamelDataWrapper *dw, CamelMimeParser
dw->rawtext = !g_utf8_validate (buffer->data, buffer->len, NULL);
}
}
-
-
+
d(printf("message part kept in memory!\n"));
-
+
mem = camel_stream_mem_new_with_byte_array(buffer);
camel_data_wrapper_construct_from_stream(dw, mem);
camel_object_unref((CamelObject *)mem);