aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorChristophe Fergeau <teuf@users.sourceforge.net>2005-02-09 02:49:04 +0800
committerSivaiah Nallagatla <siva@src.gnome.org>2005-02-09 02:49:04 +0800
commitbd22689900a6b68ba6a19b203bc64ce10ff32901 (patch)
treee49f8f4cf50bdaca505830cc8194ba7beeef7dc3
parente5cf0530c890fa3afae9042093de30fc2b480eb8 (diff)
downloadgsoc2013-evolution-bd22689900a6b68ba6a19b203bc64ce10ff32901.tar
gsoc2013-evolution-bd22689900a6b68ba6a19b203bc64ce10ff32901.tar.gz
gsoc2013-evolution-bd22689900a6b68ba6a19b203bc64ce10ff32901.tar.bz2
gsoc2013-evolution-bd22689900a6b68ba6a19b203bc64ce10ff32901.tar.lz
gsoc2013-evolution-bd22689900a6b68ba6a19b203bc64ce10ff32901.tar.xz
gsoc2013-evolution-bd22689900a6b68ba6a19b203bc64ce10ff32901.tar.zst
gsoc2013-evolution-bd22689900a6b68ba6a19b203bc64ce10ff32901.zip
New functions which peeks at the conents of the file and guesses the
2005-02-08 Christophe Fergeau <teuf@users.sourceforge.net> * importers/evolution-vcard-importer.c (has_bom) (fix_utf16_endianness) (utf16_to_utf8) guess_vcard_encoding) : New functions which peeks at the conents of the file and guesses the encoding and to convert UTF-16 strings to UTF-8. (load_file_fn) : check the encoding of the file and convert UTF-16 and locale encoding to UTF-8 Fixes #54825 svn path=/trunk/; revision=28750
-rw-r--r--addressbook/ChangeLog12
-rw-r--r--addressbook/importers/evolution-vcard-importer.c139
2 files changed, 136 insertions, 15 deletions
diff --git a/addressbook/ChangeLog b/addressbook/ChangeLog
index 99cdb61453..f92bffac53 100644
--- a/addressbook/ChangeLog
+++ b/addressbook/ChangeLog
@@ -1,3 +1,15 @@
+2005-02-08 Christophe Fergeau <teuf@users.sourceforge.net>
+
+ * importers/evolution-vcard-importer.c (has_bom)
+ (fix_utf16_endianness) (utf16_to_utf8)
+ guess_vcard_encoding) : New functions which peeks at
+ the conents of the file and guesses the encoding and
+ to convert UTF-16 strings to UTF-8.
+ (load_file_fn) : check the encoding of the file
+ and convert UTF-16 and locale encoding to UTF-8
+
+ Fixes #54825
+
2005-02-07 JP Rosevear <jpr@novell.com>
* gui/component/apps_evolution_addressbook.schemas.in.in: clean up
diff --git a/addressbook/importers/evolution-vcard-importer.c b/addressbook/importers/evolution-vcard-importer.c
index 8179179558..8a141466eb 100644
--- a/addressbook/importers/evolution-vcard-importer.c
+++ b/addressbook/importers/evolution-vcard-importer.c
@@ -267,35 +267,127 @@ static char *supported_extensions[3] = {
NULL
};
-/* Actually check the contents of this file */
+#define BOM (gunichar2)0xFEFF
+#define ANTIBOM (gunichar2)0xFFFE
+
static gboolean
-check_file_is_vcard (const char *filename)
+has_bom (const gunichar2 *utf16)
+{
+
+ if ((utf16 == NULL) || (*utf16 == '\0')) {
+ return FALSE;
+ }
+
+ return ((*utf16 == BOM) || (*utf16 == ANTIBOM));
+}
+
+static void
+fix_utf16_endianness (gunichar2 *utf16)
+{
+ gunichar2 *it;
+
+
+ if ((utf16 == NULL) || (*utf16 == '\0')) {
+ return;
+ }
+
+ if (*utf16 != ANTIBOM) {
+ return;
+ }
+
+ for (it = utf16; *it != '\0'; it++) {
+ *it = GUINT16_SWAP_LE_BE (*it);
+ }
+}
+
+/* Converts an UTF-16 string to an UTF-8 string removing the BOM character
+ * WARNING: this may modify the utf16 argument if the function detects the
+ * string isn't using the local endianness
+ */
+static gchar *
+utf16_to_utf8 (gunichar2 *utf16)
+{
+
+ if (utf16 == NULL) {
+ return NULL;
+ }
+
+ fix_utf16_endianness (utf16);
+
+ if (*utf16 == BOM) {
+ utf16++;
+ }
+
+ return g_utf16_to_utf8 (utf16, -1, NULL, NULL, NULL);
+}
+
+
+enum _VCardEncoding {
+ VCARD_ENCODING_NONE,
+ VCARD_ENCODING_UTF8,
+ VCARD_ENCODING_UTF16,
+ VCARD_ENCODING_LOCALE
+};
+
+typedef enum _VCardEncoding VCardEncoding;
+
+
+/* Actually check the contents of this file */
+static VCardEncoding
+guess_vcard_encoding (const char *filename)
{
FILE *handle;
char line[4096];
- gboolean result;
+ char *line_utf8;
+ VCardEncoding encoding = VCARD_ENCODING_NONE;
handle = fopen (filename, "r");
if (handle == NULL) {
g_print ("\n");
- return FALSE;
+ return VCARD_ENCODING_NONE;
}
fgets (line, 4096, handle);
if (line == NULL) {
fclose (handle);
g_print ("\n");
- return FALSE;
+ return VCARD_ENCODING_NONE;
}
-
- if (g_ascii_strncasecmp (line, "BEGIN:VCARD", 11) == 0) {
- result = TRUE;
+ fclose (handle);
+
+ if (has_bom ((gunichar2*)line)) {
+ gunichar2 *utf16 = (gunichar2*)line;
+ /* Check for a BOM to try to detect UTF-16 encoded vcards
+ * (MacOSX address book creates such vcards for example)
+ */
+ line_utf8 = utf16_to_utf8 (utf16);
+ if (line_utf8 == NULL) {
+ return VCARD_ENCODING_NONE;
+ }
+ encoding = VCARD_ENCODING_UTF16;
+ } else if (g_utf8_validate (line, -1, NULL)) {
+ line_utf8 = g_strdup (line);
+ encoding = VCARD_ENCODING_UTF8;
} else {
- result = FALSE;
+ line_utf8 = g_locale_to_utf8 (line, -1, NULL, NULL, NULL);
+ if (line_utf8 == NULL) {
+ return VCARD_ENCODING_NONE;
+ }
+ encoding = VCARD_ENCODING_LOCALE;
}
- fclose (handle);
- return result;
+ if (g_ascii_strncasecmp (line_utf8, "BEGIN:VCARD", 11) != 0) {
+ encoding = VCARD_ENCODING_NONE;
+ }
+
+ g_free (line_utf8);
+ return encoding;
+}
+
+static gboolean
+check_file_is_vcard (const char *filename)
+{
+ return guess_vcard_encoding (filename) != VCARD_ENCODING_NONE;
}
static void
@@ -354,8 +446,9 @@ support_format_fn (EvolutionImporter *importer,
return check_file_is_vcard (filename);
}
for (i = 0; supported_extensions[i] != NULL; i++) {
- if (g_ascii_strcasecmp (supported_extensions[i], ext) == 0)
+ if (g_ascii_strcasecmp (supported_extensions[i], ext) == 0) {
return check_file_is_vcard (filename);
+ }
}
return FALSE;
@@ -386,8 +479,10 @@ load_file_fn (EvolutionImporter *importer,
{
VCardImporter *gci;
char *contents;
-
- if (check_file_is_vcard (filename) == FALSE) {
+ VCardEncoding encoding;
+
+ encoding = guess_vcard_encoding (filename);
+ if (encoding == VCARD_ENCODING_NONE) {
return FALSE;
}
@@ -408,7 +503,21 @@ load_file_fn (EvolutionImporter *importer,
if (!g_file_get_contents (filename, &contents, NULL, NULL)) {
g_message (G_STRLOC ":Couldn't read file.");
return FALSE;
- }
+ }
+
+ if (encoding == VCARD_ENCODING_UTF16) {
+ gchar *tmp;
+ gunichar2 *contents_utf16 = (gunichar2*)contents;
+ tmp = utf16_to_utf8 (contents_utf16);
+ g_free (contents);
+ contents = tmp;
+ } else if (encoding == VCARD_ENCODING_LOCALE) {
+ gchar *tmp;
+ tmp = g_locale_to_utf8 (contents, -1, NULL, NULL, NULL);
+ g_free (contents);
+ contents = tmp;
+ }
+
gci->contactlist = eab_contact_list_from_string (contents);
g_free (contents);