aboutsummaryrefslogtreecommitdiffstats
path: root/camel/camel-mime-filter-tohtml.c
diff options
context:
space:
mode:
Diffstat (limited to 'camel/camel-mime-filter-tohtml.c')
-rw-r--r--camel/camel-mime-filter-tohtml.c573
1 files changed, 337 insertions, 236 deletions
diff --git a/camel/camel-mime-filter-tohtml.c b/camel/camel-mime-filter-tohtml.c
index 60c4686824..b638a315b4 100644
--- a/camel/camel-mime-filter-tohtml.c
+++ b/camel/camel-mime-filter-tohtml.c
@@ -27,37 +27,15 @@
#include <stdio.h>
#include <string.h>
+#include <ctype.h>
-#include "camel-url-scanner.h"
#include "camel-mime-filter-tohtml.h"
-#include "camel-utf8.h"
#define d(x)
-#define CONVERT_WEB_URLS CAMEL_MIME_FILTER_TOHTML_CONVERT_URLS
-#define CONVERT_ADDRSPEC CAMEL_MIME_FILTER_TOHTML_CONVERT_ADDRESSES
-
-static struct {
- unsigned int mask;
- urlpattern_t pattern;
-} patterns[] = {
- { CONVERT_WEB_URLS, { "file://", "", camel_url_file_start, camel_url_file_end } },
- { CONVERT_WEB_URLS, { "ftp://", "", camel_url_web_start, camel_url_web_end } },
- { CONVERT_WEB_URLS, { "http://", "", camel_url_web_start, camel_url_web_end } },
- { CONVERT_WEB_URLS, { "https://", "", camel_url_web_start, camel_url_web_end } },
- { CONVERT_WEB_URLS, { "news://", "", camel_url_web_start, camel_url_web_end } },
- { CONVERT_WEB_URLS, { "nntp://", "", camel_url_web_start, camel_url_web_end } },
- { CONVERT_WEB_URLS, { "telnet://", "", camel_url_web_start, camel_url_web_end } },
- { CONVERT_WEB_URLS, { "www.", "http://", camel_url_web_start, camel_url_web_end } },
- { CONVERT_WEB_URLS, { "ftp.", "ftp://", camel_url_web_start, camel_url_web_end } },
- { CONVERT_ADDRSPEC, { "@", "mailto:", camel_url_addrspec_start, camel_url_addrspec_end } },
-};
-
-#define NUM_URL_PATTERNS (sizeof (patterns) / sizeof (patterns[0]))
-
static void camel_mime_filter_tohtml_class_init (CamelMimeFilterToHTMLClass *klass);
-static void camel_mime_filter_tohtml_init (CamelMimeFilterToHTML *filter);
-static void camel_mime_filter_tohtml_finalize (CamelObject *obj);
+static void camel_mime_filter_tohtml_init (CamelObject *o);
+static void camel_mime_filter_tohtml_finalize (CamelObject *o);
static CamelMimeFilterClass *camel_mime_filter_tohtml_parent;
@@ -82,22 +60,15 @@ camel_mime_filter_tohtml_get_type (void)
}
static void
-camel_mime_filter_tohtml_finalize (CamelObject *obj)
+camel_mime_filter_tohtml_finalize (CamelObject *o)
{
- CamelMimeFilterToHTML *filter = (CamelMimeFilterToHTML *) obj;
-
- camel_url_scanner_free (filter->scanner);
+ ;
}
static void
-camel_mime_filter_tohtml_init (CamelMimeFilterToHTML *filter)
+camel_mime_filter_tohtml_init (CamelObject *o)
{
- filter->scanner = camel_url_scanner_new ();
-
- filter->flags = 0;
- filter->colour = 0;
- filter->column = 0;
- filter->pre_open = FALSE;
+ ;
}
@@ -118,97 +89,188 @@ check_size (CamelMimeFilter *filter, char *outptr, char **outend, size_t len)
return filter->outbuf + offset;
}
-static int
-citation_depth (const char *in)
+
+static unsigned short special_chars[128] = {
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 7, 7, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 7, 4, 3, 0, 0, 0, 0, 7, 3, 7, 0, 0, 7, 12, 12, 1,
+ 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 5, 7, 3, 0, 7, 4,
+ 1, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8,
+ 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 3, 7, 3, 0, 4,
+ 7, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8,
+ 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 0, 7, 4, 0, 0,
+};
+
+
+#define IS_NON_ADDR (1 << 0)
+#define IS_NON_URL (1 << 1)
+#define IS_GARBAGE (1 << 2)
+#define IS_DOMAIN (1 << 3)
+
+#define NON_EMAIL_CHARS "()<>@,;:\\\"/[]`'|\n\t "
+#define NON_URL_CHARS "()<>,;\\\"[]`'|\n\t "
+#define TRAILING_URL_GARBAGE ",.!?;:>)}\\`'-_|\n\t "
+
+#define is_addr_char(c) ((unsigned char) (c) < 128 && !(special_chars[(unsigned char) (c)] & IS_NON_ADDR))
+#define is_url_char(c) ((unsigned char) (c) < 128 && !(special_chars[(unsigned char) (c)] & IS_NON_URL))
+#define is_trailing_garbage(c) ((unsigned char) (c) > 127 || (special_chars[(unsigned char) (c)] & IS_GARBAGE))
+#define is_domain_name_char(c) ((unsigned char) (c) < 128 && (special_chars[(unsigned char) (c)] & IS_DOMAIN))
+
+
+#if 0
+static void
+table_init (void)
{
- register const char *inptr = in;
- int depth = 1;
+ int max, ch, i;
+ char *c;
+
+ memset (special_chars, 0, sizeof (special_chars));
+ for (c = NON_EMAIL_CHARS; *c; c++)
+ special_chars[(int) *c] |= IS_NON_ADDR;
+ for (c = NON_URL_CHARS; *c; c++)
+ special_chars[(int) *c] |= IS_NON_URL;
+ for (c = TRAILING_URL_GARBAGE; *c; c++)
+ special_chars[(int) *c] |= IS_GARBAGE;
+
+#define is_ascii_alpha(c) (((c) >= 'A' && (c) <= 'Z') || ((c) >= 'a' && (c) <= 'z'))
+
+ for (ch = 0; ch < 128; ch++) {
+ if (is_ascii_alpha (ch) || isdigit (ch) || ch == '.' || ch == '-')
+ special_chars[ch] |= IS_DOMAIN;
+ }
- if (*inptr++ != '>')
- return 0;
+ max = sizeof (special_chars) / sizeof (special_chars[0]);
+ printf ("static unsigned short special_chars[%d] = {", max);
+ for (i = 0; i < max; i++) {
+ if (i % 16 == 0)
+ printf ("\n\t");
+ printf ("%3d,", special_chars[i]);
+ }
+ printf ("\n};\n");
+}
+#endif
+
+static char *
+url_extract (char **in, int inlen, gboolean check, gboolean *backup)
+{
+ unsigned char *inptr, *inend, *p;
+ char *url;
- /* check that it isn't an escaped From line */
- if (!strncmp (inptr, "From", 4))
- return 0;
+ inptr = (unsigned char *) *in;
+ inend = inptr + inlen;
- while (*inptr != '\n') {
- if (*inptr == ' ')
- inptr++;
-
- if (*inptr++ != '>')
- break;
-
- depth++;
+ while (inptr < inend && is_url_char (*inptr))
+ inptr++;
+
+ if ((char *) inptr == *in)
+ return NULL;
+
+ /* back up if we probably went too far. */
+ while (inptr > (unsigned char *) *in && is_trailing_garbage (*(inptr - 1)))
+ inptr--;
+
+ if (check) {
+ /* make sure we weren't fooled. */
+ p = memchr (*in, ':', (char *) inptr - *in);
+ if (!p)
+ return NULL;
}
- return depth;
+ if (inptr == inend && backup) {
+ *backup = TRUE;
+ return NULL;
+ }
+
+ url = g_strndup (*in, (char *) inptr - *in);
+ *in = inptr;
+
+ return url;
}
static char *
-writeln (CamelMimeFilter *filter, const char *in, const char *inend, char *outptr, char **outend)
+email_address_extract (char **in, char *inend, char *start, char **outptr, gboolean *backup)
{
- CamelMimeFilterToHTML *html = (CamelMimeFilterToHTML *) filter;
- const char *inptr = in;
-
- while (inptr < inend) {
- guint32 u;
+ char *addr, *pre, *end, *dot;
+
+ /* *in points to the '@'. Look backward for a valid local-part */
+ pre = *in;
+ while (pre - 1 >= start && is_addr_char (*(pre - 1)))
+ pre--;
+
+ if (pre == *in)
+ return NULL;
+
+ /* Now look forward for a valid domain part */
+ for (end = *in + 1, dot = NULL; end < inend && is_domain_name_char (*end); end++) {
+ if (*end == '.' && !dot)
+ dot = end;
+ }
+
+ if (end >= inend && backup) {
+ *backup = TRUE;
+ *outptr -= (*in - pre);
+ *in = pre;
+ return NULL;
+ }
+
+ if (!dot)
+ return NULL;
+
+ /* Remove trailing garbage */
+ while (end > *in && is_trailing_garbage (*(end - 1)))
+ end--;
+ if (dot > end)
+ return NULL;
+
+ addr = g_strndup (pre, end - pre);
+ *outptr -= (*in - pre);
+ *in = end;
+
+ return addr;
+}
- outptr = check_size (filter, outptr, outend, 16);
+static gboolean
+is_citation (char *inptr, char *inend, gboolean saw_citation, gboolean *backup)
+{
+ if (*inptr != '>')
+ return FALSE;
+
+ if (inend - inptr >= 6) {
+ /* make sure this isn't just mbox From-magling... */
+ if (strncmp (inptr, ">From ", 6) != 0)
+ return TRUE;
+ } else if (backup) {
+ /* we don't have enough data to tell, so return */
+ *backup = TRUE;
+ return saw_citation;
+ }
+
+ /* if the previous line was a citation, then say this one is too */
+ if (saw_citation)
+ return TRUE;
+
+ /* otherwise it was just an isolated ">From " line */
+ return FALSE;
+}
- u = camel_utf8_getc_limit ((const unsigned char **) &inptr, inend);
- switch (u) {
- case 0xffff:
- g_warning("Truncated utf8 buffer");
- return outptr;
- case '<':
- outptr = g_stpcpy (outptr, "&lt;");
- html->column++;
- break;
- case '>':
- outptr = g_stpcpy (outptr, "&gt;");
- html->column++;
- break;
- case '&':
- outptr = g_stpcpy (outptr, "&amp;");
- html->column++;
- break;
- case '"':
- outptr = g_stpcpy (outptr, "&quot;");
- html->column++;
- break;
- case '\t':
- if (html->flags & (CAMEL_MIME_FILTER_TOHTML_CONVERT_SPACES)) {
- do {
- outptr = check_size (filter, outptr, outend, 7);
- outptr = g_stpcpy (outptr, "&nbsp;");
- html->column++;
- } while (html->column % 8);
- break;
- }
- /* otherwise, FALL THROUGH */
- case ' ':
- if (html->flags & CAMEL_MIME_FILTER_TOHTML_CONVERT_SPACES
- && ((inptr == (in + 1) || *inptr == ' ' || *inptr == '\t'))) {
- outptr = g_stpcpy (outptr, "&nbsp;");
- html->column++;
- break;
- }
- /* otherwise, FALL THROUGH */
- default:
- if (u >= 20 && u <0x80)
- *outptr++ = u;
- else {
- if (html->flags & CAMEL_MIME_FILTER_TOHTML_ESCAPE_8BIT)
- *outptr++ = '?';
- else
- outptr += sprintf(outptr, "&#%u;", u);
- }
- html->column++;
- break;
- }
+static gboolean
+is_protocol (char *inptr, char *inend, gboolean *backup)
+{
+ if (inend - inptr >= 8) {
+ if (!strncasecmp (inptr, "http://", 7) ||
+ !strncasecmp (inptr, "https://", 8) ||
+ !strncasecmp (inptr, "ftp://", 6) ||
+ !strncasecmp (inptr, "nntp://", 7) ||
+ !strncasecmp (inptr, "mailto:", 7) ||
+ !strncasecmp (inptr, "news:", 5) ||
+ !strncasecmp (inptr, "file:", 5))
+ return TRUE;
+ } else if (backup) {
+ *backup = TRUE;
+ return FALSE;
}
- return outptr;
+ return FALSE;
}
static void
@@ -216,128 +278,193 @@ html_convert (CamelMimeFilter *filter, char *in, size_t inlen, size_t prespace,
char **out, size_t *outlen, size_t *outprespace, gboolean flush)
{
CamelMimeFilterToHTML *html = (CamelMimeFilterToHTML *) filter;
- register char *inptr, *outptr;
- char *start, *outend;
- const char *inend;
- int depth;
+ char *inptr, *inend, *outptr, *outend, *start;
+ gboolean backup = FALSE;
camel_mime_filter_set_size (filter, inlen * 2 + 6, FALSE);
- inptr = in;
+ inptr = start = in;
inend = in + inlen;
outptr = filter->outbuf;
outend = filter->outbuf + filter->outsize;
if (html->flags & CAMEL_MIME_FILTER_TOHTML_PRE && !html->pre_open) {
- outptr = g_stpcpy (outptr, "<pre>");
+ outptr += sprintf (outptr, "%s", "<pre>");
html->pre_open = TRUE;
}
- start = inptr;
- while (inptr < inend && *inptr != '\n')
- inptr++;
-
while (inptr < inend) {
- html->column = 0;
- depth = 0;
+ unsigned char u;
- if (html->flags & CAMEL_MIME_FILTER_TOHTML_MARK_CITATION) {
- if ((depth = citation_depth (start)) > 0) {
- char font[25];
-
- /* FIXME: we could easily support multiple colour depths here */
-
- g_snprintf (font, 25, "<font color=\"#%06x\">", html->colour);
-
- outptr = check_size (filter, outptr, &outend, 25);
- outptr = g_stpcpy (outptr, font);
- } else if (*start == '>') {
- /* >From line */
- start++;
+ if (html->flags & CAMEL_MIME_FILTER_TOHTML_MARK_CITATION && html->column == 0) {
+ html->saw_citation = is_citation (inptr, inend, html->saw_citation,
+ flush ? &backup : NULL);
+ if (backup)
+ break;
+
+ if (html->saw_citation) {
+ if (!html->coloured) {
+ char font[25];
+
+ g_snprintf (font, 25, "<font color=\"#%06x\">", html->colour);
+
+ outptr = check_size (filter, outptr, &outend, 25);
+ outptr += sprintf (outptr, "%s", font);
+ html->coloured = TRUE;
+ }
+ } else if (html->coloured) {
+ outptr = check_size (filter, outptr, &outend, 10);
+ outptr += sprintf (outptr, "%s", "</font>");
+ html->coloured = FALSE;
}
- } else if (html->flags & CAMEL_MIME_FILTER_TOHTML_CITE) {
+
+ /* display mbox-mangled ">From " as "From " */
+ if (*inptr == '>' && !html->saw_citation)
+ inptr++;
+ } else if (html->flags & CAMEL_MIME_FILTER_TOHTML_CITE && html->column == 0) {
outptr = check_size (filter, outptr, &outend, 6);
- outptr = g_stpcpy (outptr, "&gt; ");
- html->column += 2;
+ outptr += sprintf (outptr, "%s", "&gt; ");
}
-#define CONVERT_URLS (CAMEL_MIME_FILTER_TOHTML_CONVERT_URLS | CAMEL_MIME_FILTER_TOHTML_CONVERT_ADDRESSES)
- if (html->flags & CONVERT_URLS) {
- size_t matchlen, buflen, len;
- urlmatch_t match;
-
- len = inptr - start;
+ if (html->flags & CAMEL_MIME_FILTER_TOHTML_CONVERT_URLS && isalpha ((int) *inptr)) {
+ char *refurl = NULL, *dispurl = NULL;
- do {
- if (camel_url_scanner_scan (html->scanner, start, len, &match)) {
- /* write out anything before the first regex match */
- outptr = writeln (filter, start, start + match.um_so,
- outptr, &outend);
-
- start += match.um_so;
- len -= match.um_so;
-
- matchlen = match.um_eo - match.um_so;
-
- buflen = 20 + strlen (match.prefix) + matchlen + matchlen;
- outptr = check_size (filter, outptr, &outend, buflen);
-
- /* write out the href tag */
- outptr = g_stpcpy (outptr, "<a href=\"");
- outptr = g_stpcpy (outptr, match.prefix);
- memcpy (outptr, start, matchlen);
- outptr += matchlen;
- outptr = g_stpcpy (outptr, "\">");
-
- /* now write the matched string */
- memcpy (outptr, start, matchlen);
- html->column += matchlen;
- outptr += matchlen;
- start += matchlen;
- len -= matchlen;
-
- /* close the href tag */
- outptr = g_stpcpy (outptr, "</a>");
- } else {
- /* nothing matched so write out the remainder of this line buffer */
- outptr = writeln (filter, start, start + len, outptr, &outend);
+ if (is_protocol (inptr, inend, flush ? &backup : NULL)) {
+ dispurl = url_extract (&inptr, inend - inptr, TRUE,
+ flush ? &backup : NULL);
+ if (backup)
break;
+
+ if (dispurl)
+ refurl = g_strdup (dispurl);
+ } else {
+ if (backup)
+ break;
+
+ if (!strncasecmp (inptr, "www.", 4) && ((unsigned char) inptr[4]) < 0x80
+ && isalnum ((int) inptr[4])) {
+ dispurl = url_extract (&inptr, inend - inptr, FALSE,
+ flush ? &backup : NULL);
+ if (backup)
+ break;
+
+ if (dispurl)
+ refurl = g_strdup_printf ("http://%s", dispurl);
}
- } while (len > 0);
- } else {
- outptr = writeln (filter, start, inptr, outptr, &outend);
- }
-
- if ((html->flags & CAMEL_MIME_FILTER_TOHTML_MARK_CITATION) && depth > 0) {
- outptr = check_size (filter, outptr, &outend, 8);
- outptr = g_stpcpy (outptr, "</font>");
+ }
+
+ if (dispurl) {
+ outptr = check_size (filter, outptr, &outend,
+ strlen (refurl) +
+ strlen (dispurl) + 15);
+ outptr += sprintf (outptr, "<a href=\"%s\">%s</a>",
+ refurl, dispurl);
+ html->column += strlen (dispurl);
+ g_free (refurl);
+ g_free (dispurl);
+ }
+
+ if (inptr >= inend)
+ break;
}
- if (html->flags & CAMEL_MIME_FILTER_TOHTML_CONVERT_NL) {
- outptr = check_size (filter, outptr, &outend, 5);
- outptr = g_stpcpy (outptr, "<br>");
+ if (*inptr == '@' && (html->flags & CAMEL_MIME_FILTER_TOHTML_CONVERT_ADDRESSES)) {
+ char *addr, *outaddr;
+
+ addr = email_address_extract (&inptr, inend, start, &outptr,
+ flush ? &backup : NULL);
+ if (backup)
+ break;
+
+ if (addr) {
+ outaddr = g_strdup_printf ("<a href=\"mailto:%s\">%s</a>",
+ addr, addr);
+ outptr = check_size (filter, outptr, &outend, strlen (outaddr));
+ outptr += sprintf (outptr, "%s", outaddr);
+ html->column += strlen (addr);
+ g_free (addr);
+ g_free (outaddr);
+ }
}
- *outptr++ = '\n';
+ outptr = check_size (filter, outptr, &outend, 32);
- start = ++inptr;
- while (inptr < inend && *inptr != '\n')
- inptr++;
+ switch ((u = (unsigned char) *inptr++)) {
+ case '<':
+ outptr += sprintf (outptr, "%s", "&lt;");
+ html->column++;
+ break;
+
+ case '>':
+ outptr += sprintf (outptr, "%s", "&gt;");
+ html->column++;
+ break;
+
+ case '&':
+ outptr += sprintf (outptr, "%s", "&amp;");
+ html->column++;
+ break;
+
+ case '"':
+ outptr += sprintf (outptr, "%s", "&quot;");
+ html->column++;
+ break;
+
+ case '\n':
+ if (html->flags & CAMEL_MIME_FILTER_TOHTML_CONVERT_NL)
+ outptr += sprintf (outptr, "%s", "<br>");
+
+ *outptr++ = '\n';
+ start = inptr;
+ html->column = 0;
+ break;
+
+ case '\t':
+ if (html->flags & (CAMEL_MIME_FILTER_TOHTML_CONVERT_SPACES)) {
+ do {
+ outptr = check_size (filter, outptr, &outend, 7);
+ outptr += sprintf (outptr, "%s", "&nbsp;");
+ html->column++;
+ } while (html->column % 8);
+ break;
+ }
+ /* otherwise, FALL THROUGH */
+
+ case ' ':
+ if (html->flags & CAMEL_MIME_FILTER_TOHTML_CONVERT_SPACES) {
+ if (inptr == in || (inptr < inend && (*(inptr + 1) == ' ' ||
+ *(inptr + 1) == '\t' ||
+ *(inptr - 1) == '\n'))) {
+ outptr += sprintf (outptr, "%s", "&nbsp;");
+ html->column++;
+ break;
+ }
+ }
+ /* otherwise, FALL THROUGH */
+
+ default:
+ if ((u >= 0x20 && u < 0x80) ||
+ (u == '\r' || u == '\t') || html->flags & CAMEL_MIME_FILTER_TOHTML_PRESERVE_8BIT) {
+ /* Default case, just copy. */
+ *outptr++ = (char) u;
+ } else {
+ if (html->flags & CAMEL_MIME_FILTER_TOHTML_ESCAPE_8BIT)
+ *outptr++ = '?';
+ else
+ outptr += g_snprintf (outptr, 9, "&#%d;", (int) u);
+ }
+ html->column++;
+ break;
+ }
}
- if (flush) {
- /* flush the rest of our input buffer */
- if (start < inend)
- outptr = writeln (filter, start, inend, outptr, &outend);
-
- if (html->pre_open) {
- /* close the pre-tag */
- outptr = check_size (filter, outptr, &outend, 10);
- outptr = g_stpcpy (outptr, "</pre>");
- }
- } else if (start < inend) {
- /* backup */
- camel_mime_filter_backup (filter, start, (unsigned) (inend - start));
+ if (inptr < inend)
+ camel_mime_filter_backup (filter, inptr, inend - inptr);
+
+ if (flush && html->pre_open) {
+ outptr = check_size (filter, outptr, &outend, 10);
+ outptr += sprintf (outptr, "%s", "</pre>");
+ html->pre_open = FALSE;
}
*out = filter->outbuf;
@@ -366,6 +493,8 @@ filter_reset (CamelMimeFilter *filter)
html->column = 0;
html->pre_open = FALSE;
+ html->saw_citation = FALSE;
+ html->coloured = FALSE;
}
static void
@@ -394,39 +523,11 @@ CamelMimeFilter *
camel_mime_filter_tohtml_new (guint32 flags, guint32 colour)
{
CamelMimeFilterToHTML *new;
- int i;
new = CAMEL_MIME_FILTER_TOHTML (camel_object_new (camel_mime_filter_tohtml_get_type ()));
new->flags = flags;
new->colour = colour;
- for (i = 0; i < NUM_URL_PATTERNS; i++) {
- if (patterns[i].mask & flags)
- camel_url_scanner_add (new->scanner, &patterns[i].pattern);
- }
-
return CAMEL_MIME_FILTER (new);
}
-
-
-char *
-camel_text_to_html (const char *in, guint32 flags, guint32 colour)
-{
- CamelMimeFilter *filter;
- size_t outlen, outpre;
- char *outbuf;
-
- g_return_val_if_fail (in != NULL, NULL);
-
- filter = camel_mime_filter_tohtml_new (flags, colour);
-
- camel_mime_filter_complete (filter, (char *) in, strlen (in), 0,
- &outbuf, &outlen, &outpre);
-
- outbuf = g_strndup (outbuf, outlen);
-
- camel_object_unref (filter);
-
- return outbuf;
-}