#!/usr/local/bin/perl ##---------------------------------------------------------------------------## ## File: ## $Id: mha-preview,v 1.4 2005/07/05 02:06:21 ehood Exp $ ## Author: ## Earl Hood earl@earlhood.com ## Description: ## Custom MHonArc-based program that supports $X-MSG-PREVIEW$ ## resource variable using the callback API. ## ## Invoke program with -man option to see manpage. ##---------------------------------------------------------------------------## ## Copyright (C) 2002,2005 Earl Hood, earl@earlhood.com ## This program is free software; you can redistribute it and/or modify ## it under the same terms as MHonArc itself. ##---------------------------------------------------------------------------## # Uncomment and modify the following if MHonArc libraries were not # installed in a perl's site directory or in perl's normal search path. #use lib qw(/path/to/mhonarc/libraries); package MHAPreview; use Getopt::Long; # Max size of preview text: This is the maximum amount that will be # saved for each message. The resource variable length modifier can # be used to always display less than max, but it is best to avoid # doing that since it is a slow operation. We have a custom command-line # option to set the max size if code change is not desired. my $PreviewLen = 256; ##-----------------------------------------------------------------------## ## Main Block ##-----------------------------------------------------------------------## MAIN: { unshift(@INC, 'lib'); # Should I leave this line in? ## Grab options from @ARGV unique to this program my %opts = ( ); Getopt::Long::Configure('pass_through'); GetOptions(\%opts, 'prv-maxlen=i', 'help', 'man' ); usage(1) if $opts{'help'}; usage(2) if $opts{'man'}; if ($opts{'prv-maxlen'} && ($opts{'prv-maxlen'} > 0)) { $PreviewLen = $opts{'prv-maxlen'}; } ## Reset pass-through of options Getopt::Long::Configure('no_pass_through'); ## Initialize MHonArc require 'mhamain.pl' || die qq/ERROR: Unable to require "mhamain.pl"\n/; mhonarc::initialize(); ## Register callbacks for handling preview text register_callbacks(); ## Process input. mhonarc::process_input() ? exit(0) : exit($mhonarc::CODE); } ##-----------------------------------------------------------------------## ## Callback Functions ##-----------------------------------------------------------------------## sub register_callbacks { $mhonarc::CBMessageBodyRead = \&msg_body_read; $mhonarc::CBRcVarExpand = \&rc_var_expand; $mhonarc::CBDbSave = \&db_save; } sub msg_body_read { my($fields, $html, $files) = @_; my $mha_index = $fields->{'x-mha-index'}; my $preview = extract_preview($html, $PreviewLen); $X_MessagePreview{$mha_index} = $preview; 1; } sub rc_var_expand { my($mha_index, $var_name, $arg_str) = @_; # $X-MSG-PREVIEW(mesg_spec)$ if ($var_name eq 'X-MSG-PREVIEW') { # Use MHonArc function to support a mesg_spec argument my ($lref, $key, $pos, $opt) = mhonarc::compute_msg_pos($mha_index, $var_name, $arg_str); return ($X_MessagePreview{$key}||"", 0, 1); } # If we do not recognize $var_name, make sure to tell # MHonArc we do not so it will try. (undef, 0, 0); } sub db_save { my($db_fh) = @_; # Make sure variable is package qualified! mhonarc::print_var($db_fh, 'MHAPreview::X_MessagePreview', \%X_MessagePreview); } ##-----------------------------------------------------------------------## ## Support Functions ##-----------------------------------------------------------------------## sub extract_preview { # Extracting the preview text of the message body is not as # trivial as you may expect. We have to deal with HTML tags # and entity references, but want to avoid the overhead of # using a full-blown HTML parser. We also want to skip any # quoted text, otherwise preview text of replies would mainly # contain quoted text, making preview less useful. my $html_ref = shift; # reference to HTML message body my $prev_len = shift; # length of preview to extract # Make copy since we will be pre-process data to make extraction easier my $html = $$html_ref; # Normalize EOLs to make other patterns simplier $html =~ s/\r\n/\n/g; # Strip out quoting using
(for flowed and/or fancy-quoting) $html =~ s/]*>.*?<\/blockquote\s*>//gis; # Strip tags $html =~ s/<[^>]*>//g; # Quoting using > and other common styles $html =~ s/^(?:>|[\|:\+]).*$//gm; # Outhouse method of quoting $html =~ s/^-----Original Message-----.*\Z//; # Remove signatures $html =~ s/\n-- \n.*\z//s; # Preamble side comments $html =~ s/\A(?:\s*\[[^\]]*\])+//; # Common quote preambles $html =~ s/\A\s*In\s+article.*?(?:wrote|writes|said|says):[^\S\n]*\n//si; $html =~ s/\A.*(?:wrote|writes|said|says):[^\S\n]*\n//si; # Minimize whitespace $html =~ s/\s+/ /g; my $text = ""; my $html_len = length($html); my($pos, $sublen, $erlen, $real_len); for ( $pos=0, $sublen=$prev_len; $pos < $html_len; ) { $text .= substr($html, $pos, $sublen); $pos += $sublen; # check for clipped entity reference while (($pos < $html_len) && ($text =~ s/\&[^;]*\Z//)) { $text .= substr($html, $pos, 1); ++$pos; } # compute entity reference lengths to determine "real" character # count and not raw character count. $er_len = 0; while ($text =~ /(\&[^;]+);/g) { $er_len += length($1); } # done if we have enough $real_len = length($text)-$er_len; if ($real_len >= $prev_len) { if ($real_len < $html_len) { $text .= '...'; } last; } $sublen = $prev_len - (length($text)-$er_len); } $text; } sub usage { require Pod::Usage; my $verbose = shift; if ($verbose == 0) { Pod::Usage::pod2usage(-verbose => $verbose); } else { my $pager = $ENV{'PAGER'} || 'more'; local(*PAGER); my $fh = (-t STDOUT && open(PAGER, "|$pager")) ? \*PAGER : \*STDOUT; Pod::Usage::pod2usage(-verbose => $verbose, -output => $fh); } exit 0; } ##-----------------------------------------------------------------------## __END__ =head1 NAME mha-preview - MHonArc front-end to support message preview variable =head1 SYNOPSIS S [I] [I ]> =head1 DESCRIPTION B is an example program the utilizes MHonArc's callback API to support the special resource variable C<$X-MSG-PREVIEW$>. The C<$X-MSG-PREVIEW$> represents the initial text of a message body. With this variable, index pages can contain be customized to give a listing like some MUAs that provide a glimpse of the message body in the mail listing of a mail folder. When extracting the preview text of the message body, all HTML tags are removed and whitespace is compressed. B : If B is used for an archive, it should always be used to process the archive. Otherwise, the message preview data will be lost. =head1 OPTIONS B takes the same options available to B along with the following additional options: =over =item C<-help> Print a usage summary of this program (this option overrides B 's C<-help> option). =item C<-man> Print the manpage for this program. =item C<-prv-maxlen> Maximum amount of characters of the message body to store for each message. The default value is 256. =back =head1 NOTES =over =item * The functionality of this program could be placed into the C library to avoid the need for this program and to make it part of the locally installed B . This would avoid the problem noted in the CAUTION mentioned in the L . =item * The body preview resource variable may be worth putting into the MHonArc code base directly. =back =head1 SEE ALSO mhonarc(1) =head1 LICENSE B comes with ABSOLUTELY NO WARRANTY and can be distributed under the same terms as MHonArc itself. =head1 AUTHOR Earl Hood, earl@earlhood.com =cut