view src/filter.c @ 500:614a3177b15c noffle tip

Add mail-from option. Some modern mail systems will try and ensure the sender email is a legitimate address. Which will fail if there isn't such an address.
author Jim Hague <jim.hague@acm.org>
date Wed, 14 Aug 2013 12:04:39 +0100
parents 0a5dc5f69746
children
line wrap: on
line source

/*
  filter.c
  
  Article filtering.
  
  $Id: filter.c 628 2004-10-13 21:59:41Z bears $
*/

#if HAVE_CONFIG_H
#include <config.h>
#endif

#include <ctype.h>
#include "common.h"
#include "configfile.h"
#include "itemlist.h"
#include "log.h"
#include "wildmat.h"
#include "group.h"
#include "util.h"
#include "filter.h"

struct
{
    int nFilters;
    int maxFilters;
    const Filter **filters;
    Bool needGroups;
} filter = { 0, 0, NULL, FALSE };

static unsigned long
countGroups( const char *grps )
{
    unsigned long res;

    res = 1;
    while ( *grps != '\0' )
    {
	if ( *grps == ',' )
	    res++;
	grps++;
    }

    return res;
}
static unsigned long
countRefs( const char *refs )
{
    unsigned long res;
    Bool inRef;

    res = 0;
    inRef = FALSE;

    while ( *refs != '\0' )
    {
	if ( inRef )
	{
	    if ( *refs == '>' )
	    {
		inRef = FALSE;
		res++;
	    }
	}
	else if ( *refs == '<' )
	    inRef = TRUE;
	refs++;
    }

    return res;
}

/* Check a single rule to see if it passes. */
static Bool
checkRule( const char *thisGrp, const char *newsgroups,
	   const Over *ov, const FilterRule *r )
{
    unsigned long ul;
    ItemList *grps;
    const char *p;
    time_t articletime;
    Bool res;
    
    switch( r->type )
    {
    case RULE_NEWSGROUP:
	if ( Wld_match( thisGrp, r->data.grp ) )
	{
	    Log_dbg( LOG_DBG_FILTER,
		     "Newsgroup rule: %s matches current group",
		     r->data.grp, thisGrp );
	    return TRUE;
	}
	if ( newsgroups != NULL )
	{
	    grps = new_Itl( newsgroups, " ,\t" );
	    for ( p = Itl_first( grps ); p != NULL; p = Itl_next( grps ) )
		if ( Wld_match( p, r->data.grp ) )
		{
		    Log_dbg( LOG_DBG_FILTER,
			     "Newsgroup rule: %s matched in %s",
			     r->data.grp, newsgroups );
		    return TRUE;
		}	    
	    del_Itl( grps );
	}
	return FALSE;

    case RULE_SUBJECT:
	res = ( regexec( &r->data.regex, Ov_subj( ov ), 0, NULL, 0 ) == 0 );
	if ( res )
	    Log_dbg( LOG_DBG_FILTER,
		     "Subject rule: %s matches",
		     Ov_subj( ov ) );
	return res;

    case RULE_REFERENCE:        /* kill thread by Msg-Id in References: */
	res = ( regexec( &r->data.regex, Ov_ref( ov ), 0, NULL, 0 ) == 0 );
	if ( res )
	    Log_dbg( LOG_DBG_FILTER,
		     "Reference rule: %s matches",
		     Ov_ref( ov ) );
	return res;

    case RULE_FROM:
	res = ( regexec( &r->data.regex, Ov_from( ov ), 0, NULL, 0 ) == 0 );
	if ( res )
	    Log_dbg( LOG_DBG_FILTER,
		     "From rule: %s matches",
		     Ov_from( ov ) );
	return res;

    case RULE_BYTES_LT:
	res = ( Ov_bytes( ov ) < r->data.amount );
	if ( res )
	    Log_dbg( LOG_DBG_FILTER,
		     "Length rule: bytes %d < %d",
		     Ov_bytes( ov ), r->data.amount );
	return res;

    case RULE_BYTES_EQ:
	res = ( Ov_bytes( ov ) == r->data.amount );
	if ( res )
	    Log_dbg( LOG_DBG_FILTER,
		     "Length rule: bytes %d = %d",
		     Ov_bytes( ov ), r->data.amount );
	return res;

    case RULE_BYTES_GT:
	res = ( Ov_bytes( ov ) > r->data.amount );
	if ( res )
	    Log_dbg( LOG_DBG_FILTER,
		     "Length rule: bytes %d > %d",
		     Ov_bytes( ov ), r->data.amount );
	return res;

    case RULE_LINES_LT:
	res = ( Ov_lines( ov ) < r->data.amount );
	if ( res )
	    Log_dbg( LOG_DBG_FILTER,
		     "Length rule: lines %d < %d",
		     Ov_lines( ov ), r->data.amount );
	return res;

    case RULE_LINES_EQ:
	res = ( Ov_lines( ov ) == r->data.amount );
	if ( res )
	    Log_dbg( LOG_DBG_FILTER,
		     "Length rule: lines %d = %d",
		     Ov_lines( ov ), r->data.amount );
	return res;

    case RULE_LINES_GT:
	res = ( Ov_lines( ov ) > r->data.amount );
	if ( res )
	    Log_dbg( LOG_DBG_FILTER,
		     "Length rule: lines %d > %d",
		     Ov_lines( ov ), r->data.amount );
	return res;

    case RULE_MSGID:
	res = ( regexec( &r->data.regex, Ov_msgId( ov ), 0, NULL, 0 ) == 0 );
	if ( res )
	    Log_dbg( LOG_DBG_FILTER,
		     "Msg-Id rule: %s matches",
		     Ov_msgId( ov ) );
	return res;

    case RULE_DATE_LT:
        /* Utl_parseNewsDate() is quite picky. I'm not entirely happy 
           about this, but I won't implement a relaxed date parser. */
	articletime = Utl_parseNewsDate( Ov_date( ov ) );
        if ( articletime == (time_t) -1 )
            return FALSE;
        res = ( articletime < r->data.reftime.calctime );
	if ( res )
	    Log_dbg( LOG_DBG_FILTER,
		     "Date before rule: %s matches",
		     Ov_date( ov ) );
	return res;

    case RULE_DATE_EQ:
	articletime = Utl_parseNewsDate( Ov_date( ov ) );
        if ( ( articletime == (time_t) -1) 
            && ( r->data.reftime.vartime == INVALID ))
	{
	    Log_dbg( LOG_DBG_FILTER,
		     "Date equals rule: invalid date matches" );
	    return TRUE;
	}
        if ( ( articletime == (time_t) -1) 
            != ( r->data.reftime.vartime == INVALID ))
                return FALSE;
        res =  ( ( articletime <= r->data.reftime.calctime 
		   + RULE_DATE_EQ_PRECISION )
		 && ( articletime >= r->data.reftime.calctime 
		      - RULE_DATE_EQ_PRECISION ) );
	if ( res )
	    Log_dbg( LOG_DBG_FILTER,
		     "Date equals rule: %s matches",
		     Ov_date( ov ) );
	return res;

    case RULE_DATE_GT:
	articletime = Utl_parseNewsDate( Ov_date( ov ) );
        if ( articletime == (time_t) -1 )
            return FALSE;
	res = ( articletime > r->data.reftime.calctime );
	if ( res )
	    Log_dbg( LOG_DBG_FILTER,
		     "Date after rule: %s matches",
		     Ov_date( ov ) );
	return res;

    case RULE_NOREFS_LT:
	ul = countRefs( Ov_ref( ov ) );
	res = ( ul < r->data.amount );
	if ( res )
	    Log_dbg( LOG_DBG_FILTER,
		     "Number of references rule: %d < %d",
		     ul, r->data.amount );
	return res;

    case RULE_NOREFS_EQ:
	ul = countRefs( Ov_ref( ov ) );
	res = ( ul == r->data.amount );
	if ( res )
	    Log_dbg( LOG_DBG_FILTER,
		     "Number of references rule: %d = %d",
		     ul, r->data.amount );
	return res;

    case RULE_NOREFS_GT:
	ul = countRefs( Ov_ref( ov ) );
	res = ( ul > r->data.amount );
	if ( res )
	    Log_dbg( LOG_DBG_FILTER,
		     "Number of references rule: %d > %d",
		     ul, r->data.amount );
	return res;

    case RULE_XPOSTS_LT:
	if ( newsgroups == NULL )
	    return FALSE;
	ul = countGroups( newsgroups );
	res = ( ul < r->data.amount );
	if ( res )
	    Log_dbg( LOG_DBG_FILTER,
		     "Number of cross-posts rule: %d < %d",
		     ul, r->data.amount );
	return res;

    case RULE_XPOSTS_EQ:
	if ( newsgroups == NULL )
	    return FALSE;
	ul = countGroups( newsgroups );
	res = ( ul == r->data.amount );
	if ( res )
	    Log_dbg( LOG_DBG_FILTER,
		     "Number of cross-posts rule: %d = %d",
		     ul, r->data.amount );
	return res;

    case RULE_XPOSTS_GT:
	if ( newsgroups == NULL )
	    return FALSE;
	ul = countGroups( newsgroups );
	res = ( ul > r->data.amount );
	if ( res )
	    Log_dbg( LOG_DBG_FILTER,
		     "Number of cross-posts rule: %d > %d",
		     ul, r->data.amount );
	return res;

    case RULE_POST_STATUS:
	if ( Grp_postAllow( thisGrp ) == r->data.postAllow )
	{
	    Log_dbg( LOG_DBG_FILTER,
		     "Post status rule: group status matches %c",
		     r->data.postAllow );
	    return TRUE;
	}
        return FALSE;

    }

    ASSERT( FALSE );	/* Shouldn't get here */
    return 0;		/* Keep compiler quiet */
}

/* Check a single filter to see if it fires. */
static Bool
checkFilter( const char *thisGrp, const char *newsgroups,
	     const Over *ov, const Filter *f )
{
    int i;

    for ( i = 0; i < f->nRules; i++ )
	if ( ! checkRule( thisGrp, newsgroups, ov, &f->rules[i] ) )
	     return FALSE;

    return TRUE;
}

/* Add a filter to the list of filters. */
void
Flt_addFilter( const Filter *f )
{
    ASSERT( f != NULL );

    if ( ( filter.nFilters + 1 ) > filter.maxFilters )
    {
	filter.filters =
	    ( const Filter ** ) realloc( filter.filters,
					 ( filter.maxFilters + 5 )
					 * sizeof( Filter * ) );
	if ( filter.filters == NULL )
	    Log_fatal( "Could not realloc filter list" );
	filter.maxFilters += 5;
    }
    filter.filters[ filter.nFilters++ ] = f;
}


/*
 * Called by Fetch_init().
 * Must be called before
 * Fetch_getNewGrps(), Client_getNewgrps(), client.c:processGrps()
 * because processGrps() sets the stampfile needed by lastupdate.
 */
void
Flt_init( const char * server )
{
    int index1, index2;
    time_t now, lastupdate;
    FilterRule * thisRule ;
    Str filename;

    time ( &now );
    lastupdate = (time_t) 0;    /* defaults to start of epoch */

    snprintf( filename, MAXCHAR, "%s/lastupdate.%s",
              Cfg_spoolDir(), server );
    if ( !Utl_getStamp( &lastupdate , filename ) )
        /* There's no stamp file if server has never been queried.
         * 
         */
        Log_dbg( LOG_DBG_FILTER,
            "Filter unable to get stamp file %s . Please query server.", filename ); 

    /* traverse all rules of all filters */
 
    for ( index1 = 0; index1 < filter.nFilters; index1++ )
    {
        for ( index2 = 0; index2 < filter.filters[ index1 ] -> nRules; index2++ )
        {
            thisRule = & ( filter.filters[ index1 ] -> rules[ index2 ] );
            switch ( thisRule -> type )
            {
            /* evaluate variable date specs */
                case RULE_DATE_LT:
                case RULE_DATE_EQ:
                case RULE_DATE_GT:
                    thisRule -> data.reftime.calctime = 
                       thisRule ->data.reftime.timeoffset;
                    switch ( thisRule ->data.reftime.vartime )
                    {
                        case NOW:
                            thisRule -> data.reftime.calctime += now;
                            break;
                        case LASTUPDATE:
                            thisRule -> data.reftime.calctime += lastupdate;
                            break;
                        default:
                            break;
                    } /* end switch( ... vartime) */

                    /* Silently fix absolute dates before the epoch.
                     * This is not the place to mock about strange dates.
                     */
                    if ( thisRule -> data.reftime.calctime < (time_t) 0 )
                        thisRule -> data.reftime.calctime = (time_t) 0 ;

#if 0		    
                    Log_dbg( LOG_DBG_FILTER, "%d: %dl = %dl + %d",
                             thisRule -> type,
                             (long) thisRule -> data.reftime.calctime,
                             (long) thisRule ->data.reftime.timeoffset,
                             (int) thisRule ->data.reftime.vartime == NOW
				   ? now
				   : thisRule ->data.reftime.vartime == LASTUPDATE
			               ? lastupdate
			               : thisRule ->data.reftime.vartime );
#endif
                    break;
                default:
                    break;
            } /* end switch( ... -> type) */ 
        } /* end for() */
    } /* end for() */
    return ;
}

/*
 * Run the rules over the supplied overview. If a specific rule fires,
 * returns its action. If no rule fires, or a rule specifying the default
 * action fires, return the default read mode.
 */
FilterAction
Flt_checkFilters( const char *thisGrp, const char *newsgroups,
		  const Over *ov, FetchMode mode )
{
    int i;

    for ( i = 0; i < filter.nFilters; i++ )
	if ( checkFilter( thisGrp, newsgroups, ov, filter.filters[ i ] ) )
	{
	    FilterAction action = filter.filters[ i ]->action;
	    
	    Log_dbg( LOG_DBG_FILTER,
		     "Filter %d fired on message %s",
		     i, Ov_msgId( ov ) );
	    if ( action == FILTER_DEFAULT )
		break;
	    else
		return action;
	}

    switch( mode )
    {
    case FULL:		return FILTER_FULL;
    case THREAD:	return FILTER_THREAD;
    case OVER:		return FILTER_XOVER;
    }

    ASSERT( FALSE );	/* Shouldn't get here */
    return FILTER_FULL;	/* Keep compiler quiet */
}

Filter *
new_Filter( void )
{
    Filter *f;

    if ( ! ( f = ( Filter * ) malloc( sizeof( Filter ) ) ) )
        Log_fatal( "Cannot allocate Filter" );
    f->nRules = 0;
    f->maxRules = 0;
    f->rules = NULL;
    f->action = FILTER_DEFAULT;
    return f;
}

void
del_Filter( Filter *f )
{
    if ( f == NULL )
	return;

    if ( f->rules != NULL )
	free( f->rules );
    free( f );    
}

FilterAction
Flt_action( const Filter *f )
{
    return f->action;
}

int
Flt_nRules( const Filter *f )
{
    return f->nRules;
}

/*
 * Do we have a rule requiring us to fetch the Newsgroups: headers of
 * articles?
 */
Bool
Flt_getNewsgroups( void )
{
    return filter.needGroups;
}

FilterRule
Flt_rule( const Filter *f, int ruleNo )
{
    ASSERT( ruleNo < f->nRules );
    return f->rules[ ruleNo ];
}

void
Flt_setAction( Filter *f, FilterAction action )
{
    f->action = action;
}

void
Flt_addRule( Filter *f, FilterRule rule )
{
    /* Does the rule require Newsgroups: headers to be fetched? */
    if ( rule.type == RULE_NEWSGROUP ||
	 ( rule.type >= RULE_XPOSTS_LT && rule.type <= RULE_XPOSTS_GT ) )
	filter.needGroups = TRUE;
	
    if ( f->nRules + 1 > f->maxRules )
    {
	f->rules =
	    ( FilterRule * ) realloc( f->rules,
				      ( f->maxRules + 5 )
				      * sizeof( FilterRule ) );

	if ( f->rules == NULL )
	    Log_fatal( "Could not realloc rule list" );
	f->maxRules += 5;
    }
    f->rules[ f->nRules++ ] = rule;
}