##---------------------------------------------------------------------------##
## File:
## $Id: mhtxtenrich.pl,v 2.11 2010/12/31 20:34:00 ehood Exp $
## Author:
## Earl Hood mhonarc@mhonarc.org
## Description:
## Library defines a routine for MHonArc to filter text/enriched
## data.
##
## Filter routine can be registered with the following:
##
##
## text/enriched;m2h_text_enriched::filter;mhtxtenrich.pl
## text/richtext;m2h_text_enriched::filter;mhtxtenrich.pl
##
##
##---------------------------------------------------------------------------##
## MHonArc -- Internet mail-to-HTML converter
## Copyright (C) 1997-2002 Earl Hood, mhonarc@mhonarc.org
##
## This program is free software; you can redistribute it and/or modify
## it under the terms of the GNU General Public License as published by
## the Free Software Foundation; either version 2 of the License, or
## (at your option) any later version.
##
## This program is distributed in the hope that it will be useful,
## but WITHOUT ANY WARRANTY; without even the implied warranty of
## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
## GNU General Public License for more details.
##
## You should have received a copy of the GNU General Public License
## along with this program; if not, write to the Free Software
## Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA
## 02111-1307, USA
##---------------------------------------------------------------------------##
package m2h_text_enriched;
my %enriched_tags = (
'bigger' => 1,
'bold' => 1,
'center' => 1,
'color' => 1,
'comment' => 1,
'excerpt' => 1,
'fixed' => 1,
'flushboth' => 1,
'flushleft' => 1,
'flushright' => 1,
'fontfamily' => 1,
'indent' => 1,
'indentright' => 1,
'italic' => 1,
'lang' => 1,
'lt' => 1,
'nl' => 1,
'nofill' => 1,
'paraindent' => 1,
'param' => 1,
'samepage' => 1,
'signature' => 1,
'smaller' => 1,
'subscript' => 1,
'superscript' => 1,
'underline' => 1,
);
my %special_to_char = (
'lt' => '<',
'gt' => '>',
);
##---------------------------------------------------------------------------
## Filter routine.
## XXX: Need to update this filter. However, does anyone still use
## text/enriched anymore.
##
sub filter {
my($fields, $data, $isdecode, $args) = @_;
my($innofill, $chunk);
my $charset = $fields->{'x-mha-charset'};
my($charcnv, $real_charset_name) =
readmail::MAILload_charset_converter($charset);
my $ret = "";
$args = "" unless defined($args);
## Get content-type
my($ctype) = $fields->{'content-type'}[0] =~ m%^\s*([\w\-\./]+)%;
my $richtext = $ctype =~ /\btext\/richtext\b/i;
if (defined($charcnv) && defined(&$charcnv)) {
$$data = &$charcnv($$data, $real_charset_name);
} else {
mhonarc::htmlize($data);
warn qq/\n/,
qq/Warning: Unrecognized character set: $charset\n/,
qq/ Message-Id: <$mhonarc::MHAmsgid>\n/,
qq/ Message Subject: /, $fields->{'x-mha-subject'}, qq/\n/,
qq/ Message Number: $mhonarc::MHAmsgnum\n/
unless ($charcnv eq '-decode-');
}
## Fixup any EOL mess
$$data =~ s/\r?\n/\n/g;
$$data =~ s/\r/\n/g;
# translate back <>'s for tag processing
$$data =~ s/&([lg]t);/$special_to_char{$1}/g;
## Convert specials
if (!$richtext) {
$$data =~ s/<\</g;
}
## Make sure only non-enriched tags are escaped
$$data =~ s{<(/?)([^>]*)>}
{
my $eot = $1;
my $tag = lc $2;
$tag =~ s/\s+//g;
($enriched_tags{$tag}) ? '<'.$eot.$tag.'>' : '<'.$eot.$tag.'>';
}gexs;
$innofill = 0;
foreach $chunk (split(m|(?nofill>)|i, $$data)) {
if ($chunk =~ m||i) {
$ret .= '';
$innofill = 1;
next;
}
if ($chunk =~ m|
|i) {
$ret .= '';
$innofill = 0;
next;
}
convert_tags(\$chunk, $richtext);
if (!$richtext && !$innofill) {
$chunk =~ s/(\n\s*)/&nl_seq_to_brs($1)/ge;
}
$ret .= $chunk;
}
$ret;
}
##---------------------------------------------------------------------------
## convert_tags translates text/enriched commands to HTML tags.
##
sub convert_tags {
my $str = shift;
my $richtext = shift;
$$str =~ s{.*?}{}gis;
$$str =~ s{<(/?)bold\s*>}{<$1b>}gi;
$$str =~ s{<(/?)italic\s*>}{<$1i>}gi;
$$str =~ s{<(/?)underline\s*>}{<$1u>}gi;
$$str =~ s{<(/?)fixed\s*>}{<$1tt>}gi;
$$str =~ s{<(/?)smaller\s*>}{<$1small>}gi;
$$str =~ s{<(/?)bigger\s*>}{<$1big>}gi;
$$str =~ s{<(/?)signature\s*>}{<$1pre>}gi;
$$str =~ s{\s*([^<]+)}
{}gix;
$$str =~ s|||gi;
$$str =~ s{\s*\s*(\S+)\s*}
{}gix;
$$str =~ s|||gi;
$$str =~ s|
||gi;
$$str =~ s|
||gi;
$$str =~ s|||gi;
$$str =~ s|
||gi;
$$str =~ s|||gi;
$$str =~ s|
||gi;
$$str =~ s|||gi;
$$str =~ s|
||gi;
$$str =~ s|\s*([^<]*)||gi;
$$str =~ s|
||gi;
$$str =~ s|\s*(([^<]*))?||gi;
$$str =~ s|
||gi;
$$str =~ s|\s*([^<]*)||gi;
$$str =~ s||
|gi;
# richtext commands
$$str =~ s{?samepage\s*>}{}gi;
$$str =~ s{<(/?)subscript\s*>}{<$1sub>}gi;
$$str =~ s{<(/?)superscript\s*>}{<$1sup>}gi;
$$str =~ s{}{<}gi;
$$str =~ s{}{\f}gi;
$$str =~ s{}{}gi;
$$str =~ s{
\n?}{}gis;
$$str =~ s{}{}gi;
$$str =~ s{
}{}gi;
$$str =~ s{}{}gi;
$$str =~ s{
}{}gi;
if ($richtext) {
$$str =~ s{\n?}{
}gis;
} else {
$$str =~ s{}{}gis;
}
# Cleanup bad tags
$$str =~ s{?(?:para(?:m|indent)|excerpt|lang|color|fontfamily)\s*>}{}g;
}
##---------------------------------------------------------------------------
## nl_seq_to_brs returns a "
" string based on the number
## of eols in a string.
##
sub nl_seq_to_brs {
my($str) = shift;
my($n);
$n = $str =~ tr/\n/\n/;
--$n;
if ($n <= 0) {
return " ";
} else {
return "
\n" x $n;
}
}
##---------------------------------------------------------------------------
## preserve_space returns a string with all spaces and tabs
## converted to nbsps.
##
sub preserve_space {
my($str) = shift;
1 while
$str =~ s/^([^\t]*)(\t+)/$1 . ' ' x (length($2) * 8 - length($1) % 8)/e;
$str =~ s/ /\ /g;
$str;
}
##---------------------------------------------------------------------------
1;