# HG changeset patch # User mirkol # Date 1021386345 -3600 # Node ID 0340b9c17edc86248bcbb97a232508255604eb56 # Parent cd022deb83903b4508e80b25decf238c8155dfc6 [svn] *** empty log message *** diff -r cd022deb8390 -r 0340b9c17edc ChangeLog --- a/ChangeLog Tue Mar 26 17:52:48 2002 +0000 +++ b/ChangeLog Tue May 14 15:25:45 2002 +0100 @@ -1,3 +1,18 @@ +Tue May 14 2002 Mirko Liß + +* src/configfile.c,src/filter.c,src/filter.h,src/fetch.c,src/noffle.c, + noffle.conf.example,docs/noffle.conf.5: Added new filter rules + 'reference=regex', 'older=lastupdate-3', 'date=invalid', 'newer=now+1.5'. +* src/group.c:Grp_isValidGroupName(): discard the groups 'poster', 'junk', + and the hierarchies 'to', 'control', 'ctl','example', '+', '-'. +* src/protocol.c: Allow hostname=%name@dom.ain to generate MsgIds like + . The '@' sign will only be added if there's + no '@' present in the hostname. +* src/protocol.c:Prt_genFromHdr(): Replace the oldfashioned From:-Header + content 'pwname@domain (Name)' by '"Name" '. +* src/request.c:storeMsgId(): check of realloc() result added. + + Tue Mar 26 2002 Mirko Liß * src/database.c,src/protocol.c,src/post.c: Handle header line folding diff -r cd022deb8390 -r 0340b9c17edc docs/noffle.conf.5 --- a/docs/noffle.conf.5 Tue Mar 26 17:52:48 2002 +0000 +++ b/docs/noffle.conf.5 Tue May 14 15:25:45 2002 +0100 @@ -1,5 +1,5 @@ .TH noffle.conf 5 -.\" $Id: noffle.conf.5 365 2002-02-08 17:06:47Z bears $ +.\" $Id: noffle.conf.5 381 2002-05-14 14:25:45Z mirkol $ .SH NAME noffle.conf \- Configuration file for NOFFLE news server @@ -406,12 +406,38 @@ Matches if the number of articles referenced by the article is less than, equal to, or greater than the given number. .PP +.B reference += +.IR "" . +Matches if one of the message IDs in the reference line matches the +given regular expression. See the section on regular expressions below. +.PP .B xposts < or = or > .IR . Matches if the number of groups the article is posted to is less than, equal to, or greater than the given number. .PP +.B date +< or = or > +.IR "" . +Matches if the article is older, from the same day or newer than the +given date expression. See the section on date expressions below. +.PP +.B older += +.IR "" . +Equals +.B date < +. +.PP +.B newer += +.IR "" . +Equals +.B date > +. +.PP .B post-status = .IR "mod|yes|no" . @@ -447,6 +473,29 @@ text. A full description is to be found in .BR regex (7). +.SH DATE EXPRESSIONS + +.B NOFFLE +uses very simple date expressions. You can use fixed dates in rfc-2822 style +or variable dates: +.PP +.I date="14 May 2002 18:32:50 +0200" +matches any article sent up to 24 hours before or after the above fixed date. +Please don't forget the timezone specification. +.PP +.I date>"now+1.5" +matches any article newer than 36 hours from the current date. +.PP +.I date="lastupdate-14" +matches any article older than 14 days since the date of the last +.B noffle --fetch +or +.B noffle --query groups +from the current newsserver. +.PP +.I date="invalid" +matches any article with an invalid date header. +.PP .SH SEE ALSO .BR noffle (1) diff -r cd022deb8390 -r 0340b9c17edc noffle.conf.example --- a/noffle.conf.example Tue Mar 26 17:52:48 2002 +0000 +++ b/noffle.conf.example Tue May 14 15:25:45 2002 +0100 @@ -63,6 +63,7 @@ replace-messageid no #hostname UserId-XXXXX_newsserver +#hostname %user@domain.tld # Post articles to remote groups into the local database immediately # on receipt of article. @@ -89,7 +90,12 @@ # 2. Fetch articles cross-posted to more than 3 groups where one of the # groups is alt.flame in overview mode only. # 3. Fetch all articles > 20k in size in overview mode only. +# 4. Fetch all articles older than 9.5 weeks in overview mode only. +# 5. Fetch all articles referencing your or other NOFFLE users' +# articles in full mode. Works only with 'replace-messageid yes'. #filter subject="\$+.*Make.*Money.*Now.*\$\$\$" action=discard #filter xposts>3 group=alt.flame action=over #filter bytes>20k action=over +#filter date 31536000.0 || timef < -31536000.0 ) + return FALSE; + *timeoffsetp = (time_t) timef; + /* Todo: check if any garbage follows. */ + return TRUE; +} + static void getFilter( const char *line ) { @@ -730,33 +774,49 @@ rule.type = RULE_NEWSGROUP; else if ( strcmp( ruleName, "subject" ) == 0 ) rule.type = RULE_SUBJECT; + else if ( strcmp( ruleName, "reference" ) == 0 ) + rule.type = RULE_REFERENCE; else if ( strcmp( ruleName, "from" ) == 0 ) rule.type = RULE_FROM; else if ( strcmp( ruleName, "msgid" ) == 0 ) rule.type = RULE_MSGID; else if ( strcmp( ruleName, "bytes" ) == 0 ) - rule.type = RULE_BYTES_LT; + rule.type = RULE_BYTES_EQ; else if ( strcmp( ruleName, "lines" ) == 0 ) - rule.type = RULE_LINES_LT; + rule.type = RULE_LINES_EQ; else if ( strcmp( ruleName, "refs" ) == 0 ) - rule.type = RULE_NOREFS_LT; + rule.type = RULE_NOREFS_EQ; else if ( strcmp( ruleName, "xposts" ) == 0 ) - rule.type = RULE_XPOSTS_LT; + rule.type = RULE_XPOSTS_EQ; else if ( strcmp( ruleName, "post-status" ) == 0 ) rule.type = RULE_POST_STATUS; + else if ( strcmp( ruleName, "date" ) == 0 ) + rule.type = RULE_DATE_EQ; + /* datenow+1.5 equals newer=now+1.5 + * date=now equals older=now+1 AND newer=now-1 + * Stupid people like Mirko keep making mistakes + * if they're forced using date< or date>. + */ + else if ( strcmp( ruleName, "older" ) == 0 ) + rule.type = RULE_DATE_LT; + else if ( strcmp( ruleName, "newer" ) == 0 ) + rule.type = RULE_DATE_GT; + else if ( strcmp( ruleName, "action" ) != 0 ) goto synErr; - if ( rule.type == RULE_BYTES_LT || - rule.type == RULE_LINES_LT || - rule.type == RULE_NOREFS_LT || - rule.type == RULE_XPOSTS_LT ) + if ( rule.type == RULE_BYTES_EQ || + rule.type == RULE_LINES_EQ || + rule.type == RULE_NOREFS_EQ || + rule.type == RULE_XPOSTS_EQ || + rule.type == RULE_DATE_EQ ) { - if ( *l == '=' ) - rule.type += 1; + if ( *l == '<' ) + rule.type--; else if ( *l == '>' ) - rule.type += 2; - else if ( *l != '<' ) + rule.type++; + else if ( *l != '=' ) goto synErr; } else if ( *l != '=' ) @@ -796,6 +856,7 @@ goto synErr; } else if (rule.type == RULE_POST_STATUS ) + { if ( ( strcmp( value, "yes" ) == 0 ) || \ ( strcmp( value, "no" ) == 0 ) || \ ( strncmp( value, "mod", 3 ) == 0 ) ) @@ -803,6 +864,17 @@ rule.data.postAllow = value[0]; /* 'y','n' or 'm' */ else goto synErr; + } + else if ( rule.type == RULE_DATE_LT || + rule.type == RULE_DATE_EQ || + rule.type == RULE_DATE_GT ) + { + if ( !get_simpledate( &rule.data.reftime.timeoffset, &rule.data.reftime.vartime, value ) ) + goto synErr; + if ( rule.type != RULE_DATE_EQ && + rule.data.reftime.vartime == INVALID ) + goto synErr; + } else { char * endVal; diff -r cd022deb8390 -r 0340b9c17edc src/fetch.c --- a/src/fetch.c Tue Mar 26 17:52:48 2002 +0000 +++ b/src/fetch.c Tue May 14 15:25:45 2002 +0100 @@ -1,7 +1,7 @@ /* fetch.c - $Id: fetch.c 342 2001-12-09 12:31:57Z bears $ + $Id: fetch.c 381 2002-05-14 14:25:45Z mirkol $ */ #if HAVE_CONFIG_H @@ -29,6 +29,7 @@ #include "dynamicstring.h" #include "fetch.h" #include "fetchlist.h" +#include "filter.h" #include "request.h" #include "group.h" #include "lock.h" @@ -169,6 +170,10 @@ Log_err( "Could not open message base" ); return FALSE; } + Flt_init( fetch.serv ); /* Get filter data. Sorry, can't do it in Client_getOver(). + * This is the lowest procedure not in the + * noffle.c:doFetch() tree. */ + return fetchNewArts( name, mode ); } diff -r cd022deb8390 -r 0340b9c17edc src/filter.c --- a/src/filter.c Tue Mar 26 17:52:48 2002 +0000 +++ b/src/filter.c Tue May 14 15:25:45 2002 +0100 @@ -3,7 +3,7 @@ Article filtering. - $Id: filter.c 331 2001-11-22 12:04:45Z mirkol $ + $Id: filter.c 381 2002-05-14 14:25:45Z mirkol $ */ #if HAVE_CONFIG_H @@ -12,11 +12,13 @@ #include #include "common.h" -#include "filter.h" +#include "configfile.h" #include "itemlist.h" #include "log.h" #include "wildmat.h" #include "group.h" +#include "util.h" +#include "filter.h" struct { @@ -76,6 +78,7 @@ unsigned long ul; ItemList *grps; const char *p; + time_t articletime; switch( r->type ) { @@ -95,6 +98,9 @@ case RULE_SUBJECT: return ( regexec( &r->data.regex, Ov_subj( ov ), 0, NULL, 0 ) == 0 ); + case RULE_REFERENCE: /* kill thread by Msg-Id in References: */ + return ( regexec( &r->data.regex, Ov_ref( ov ), 0, NULL, 0 ) == 0 ); + case RULE_FROM: return ( regexec( &r->data.regex, Ov_from( ov ), 0, NULL, 0 ) == 0 ); @@ -119,6 +125,33 @@ case RULE_MSGID: return ( regexec( &r->data.regex, Ov_msgId( ov ), 0, NULL, 0 ) == 0 ); + case RULE_DATE_LT: + /* Utl_parseNewsDate() is quite picky. I'm not entirely happy + about this, but I won't implement a relaxed date parser. */ + articletime = Utl_parseNewsDate( Ov_date( ov ) ); + if ( articletime == (time_t) -1 ) + return FALSE; + return ( articletime < r->data.reftime.calctime ); + + case RULE_DATE_EQ: + articletime = Utl_parseNewsDate( Ov_date( ov ) ); + if ( ( articletime == (time_t) -1) + && ( r->data.reftime.vartime == INVALID )) + return TRUE; + if ( ( articletime == (time_t) -1) + != ( r->data.reftime.vartime == INVALID )) + return FALSE; + return ( ( articletime <= r->data.reftime.calctime + + RULE_DATE_EQ_PRECISION ) + && ( articletime >= r->data.reftime.calctime + - RULE_DATE_EQ_PRECISION ) ); + + case RULE_DATE_GT: + articletime = Utl_parseNewsDate( Ov_date( ov ) ); + if ( articletime == (time_t) -1 ) + return FALSE; + return ( articletime > r->data.reftime.calctime ); + case RULE_NOREFS_LT: ul = countRefs( Ov_ref( ov ) ); return ( ul < r->data.amount ); @@ -196,6 +229,83 @@ filter.filters[ filter.nFilters++ ] = f; } + +/* + * Called by Fetch_init(). + * Must be called before + * Fetch_getNewGrps(), Client_getNewgrps(), client.c:processGrps() + * because processGrps() sets the stampfile needed by lastupdate. + */ +void +Flt_init( const char * server ) +{ + int index1, index2; + time_t now, lastupdate; + FilterRule * thisRule ; + Str filename; + + time ( &now ); + lastupdate = (time_t) 0; /* defaults to start of epoch */ + + snprintf( filename, MAXCHAR, "%s/lastupdate.%s", + Cfg_spoolDir(), server ); + if ( !Utl_getStamp( &lastupdate , filename ) ) + /* There's no stamp file if server has never been queried. + * + */ + Log_dbg( LOG_DBG_FILTER, + "Filter unable to get stamp file %s . Please query server.", filename ); + + /* traverse all rules of all filters */ + + for ( index1 = 0; index1 < filter.nFilters; index1++ ) + { + for ( index2 = 0; index2 < filter.filters[ index1 ] -> nRules; index2++ ) + { + thisRule = & ( filter.filters[ index1 ] -> rules[ index2 ] ); + switch ( thisRule -> type ) + { + /* evaluate variable date specs */ + case RULE_DATE_LT: + case RULE_DATE_EQ: + case RULE_DATE_GT: + thisRule -> data.reftime.calctime = + thisRule ->data.reftime.timeoffset; + switch ( thisRule ->data.reftime.vartime ) + { + case NOW: + thisRule -> data.reftime.calctime += now; + break; + case LASTUPDATE: + thisRule -> data.reftime.calctime += lastupdate; + break; + default: + break; + } /* end switch( ... vartime) */ + + /* Silently fix absolute dates before the epoch. + * This is not the place to mock about strange dates. + */ + if ( thisRule -> data.reftime.calctime < (time_t) 0 ) + thisRule -> data.reftime.calctime = (time_t) 0 ; + +/* Log_dbg( LOG_DBG_FILTER, "%d: %dl = %dl + %d", + * thisRule -> type, + * (long) thisRule -> data.reftime.calctime, + * (long) thisRule ->data.reftime.timeoffset, + * (int) ( thisRule ->data.reftime.vartime == NOW ? + * now : + * ( thisRule ->data.reftime.vartime == LASTUPDATE ? + * lastupdate : thisRule ->data.reftime.vartime ) ) ); +*/ break; + default: + break; + } /* end switch( ... -> type) */ + } /* end for() */ + } /* end for() */ + return ; +} + /* * Run the rules over the supplied overview. If a specific rule fires, * returns its action. If no rule fires, or a rule specifying the default diff -r cd022deb8390 -r 0340b9c17edc src/filter.h --- a/src/filter.h Tue Mar 26 17:52:48 2002 +0000 +++ b/src/filter.h Tue May 14 15:25:45 2002 +0100 @@ -3,7 +3,7 @@ Article filtering. - $Id: filter.h 331 2001-11-22 12:04:45Z mirkol $ + $Id: filter.h 381 2002-05-14 14:25:45Z mirkol $ */ #ifndef FILTER_H @@ -27,20 +27,41 @@ typedef enum { RULE_NEWSGROUP, /* Wildmat data */ RULE_SUBJECT, /* Regex data */ + RULE_REFERENCE, RULE_FROM, RULE_MSGID, RULE_BYTES_LT, RULE_BYTES_EQ, RULE_BYTES_GT, /* Number data */ RULE_LINES_LT, RULE_LINES_EQ, RULE_LINES_GT, RULE_NOREFS_LT, RULE_NOREFS_EQ, RULE_NOREFS_GT, RULE_XPOSTS_LT, RULE_XPOSTS_EQ, RULE_XPOSTS_GT, + RULE_DATE_LT, RULE_DATE_EQ, RULE_DATE_GT, RULE_POST_STATUS /* 'y','n','m' */ } FilterRuleType; +/* Data for Date: header parsing. */ + +#define RULE_DATE_EQ_PRECISION ((time_t) (24*60*60)) /* +/- 24 hours precision */ + +typedef enum { + NOW, /* beginning of fetch */ + LASTUPDATE, /* of last fetch */ + INVALID, /* invalid dates, only RULE_DATE_EQ */ + FIXED /* fixed time */ +} FilterRuleDateEnumType; + +typedef struct { + time_t calctime; /* calctime = vartime + timeoffset */ + time_t timeoffset; + FilterRuleDateEnumType vartime; +} FilterRuleDateType; + + typedef union { regex_t regex; unsigned long amount; char *grp; char postAllow; /* 'y','n','m' */ + FilterRuleDateType reftime; } FilterRuleData; typedef struct { @@ -60,6 +81,14 @@ void Flt_addFilter( const Filter *f ); + +/* + * Called by client.c once before processing a batch of overviews + * with Flt_checkFilters(). + */ +void +Flt_init( const char *filename ); + /* * Run the rules over the supplied overview. If a specific rule fires, * returns its action. If no rule fires, return the default read mode. diff -r cd022deb8390 -r 0340b9c17edc src/group.c --- a/src/group.c Tue Mar 26 17:52:48 2002 +0000 +++ b/src/group.c Tue May 14 15:25:45 2002 +0100 @@ -7,7 +7,7 @@ loadGrp() and saveGrp(). This is done transparently. Access to the groups database is done by group name, by the functions defined in group.h. - $Id: group.c 374 2002-03-15 10:49:56Z bears $ + $Id: group.c 381 2002-05-14 14:25:45Z mirkol $ */ #if HAVE_CONFIG_H @@ -416,16 +416,19 @@ Grp_isValidGroupName( const char *name) { const char *pname, *ppat; - const char *illegalchars = "\t\n\r,"; /* Are there any other illegal characters? */ + const char *illegalchars = "\t\n\r,/:\\"; /* Are there any other dangerous characters? */ /* Find directory prefixes to prevent exploits. */ switch ( name[0] ) { case '.': /* prevent noffle -C ../fetchlist */ - case '/': /* prevent noffle -C /etc/noffle.conf */ - case ':': - case '\\': - return FALSE; /* group name invalid */ + case '+': + case '-': /* reserved for internal use of implementations + * rf. draft-ietf-usefor-article-06.txt, ch 5.5.1 */ + return FALSE; /* group name invalid */ + break; + default: + break; } /* Find illegal characters. */ @@ -442,6 +445,24 @@ else pname += 3; } + + /* Find "ctl", "ctl.*", "*.ctl" or "*.ctl.*" */ + pname = name; + while ( ( ppat = strstr( pname, "ctl" ) ) != NULL ) + { + if ( ( ppat == name || *(ppat - 1) == '.' ) + && ( *(ppat+4) == '\0' || *(ppat+4) == '.' ) ) + return FALSE; + else + pname += 3; + } + /* Find some special groups and hierarchies. */ + if ( !( strcmp( name, "poster" ) && strcmp( name, "junk" ) + && strcmp( name, "control" ) && strcmp( name, "to" ) + && strncmp( name, "control.", 8 ) && strncmp( name, "to.", 3 ) + && strncmp( name, "example.", 8 ) ) ) + return FALSE; + /* Group name is hopefully valid. */ return TRUE; diff -r cd022deb8390 -r 0340b9c17edc src/noffle.c --- a/src/noffle.c Tue Mar 26 17:52:48 2002 +0000 +++ b/src/noffle.c Tue May 14 15:25:45 2002 +0100 @@ -10,7 +10,7 @@ received for some seconds (to allow multiple clients connect at the same time). - $Id: noffle.c 368 2002-02-14 17:14:34Z bears $ + $Id: noffle.c 381 2002-05-14 14:25:45Z mirkol $ */ #if HAVE_CONFIG_H @@ -32,6 +32,7 @@ #include "database.h" #include "fetch.h" #include "fetchlist.h" +#include "filter.h" #include "group.h" #include "itemlist.h" #include "log.h" @@ -170,6 +171,8 @@ connOK = Fetch_postArts(); + Flt_init( serv ); /* get filter data before processGrps() calls Utl_stamp(). */ + connOK = connOK && Fetch_getNewGrps(); /* Get overviews of new articles and store IDs of new articles diff -r cd022deb8390 -r 0340b9c17edc src/protocol.c --- a/src/protocol.c Tue Mar 26 17:52:48 2002 +0000 +++ b/src/protocol.c Tue May 14 15:25:45 2002 +0100 @@ -1,7 +1,7 @@ /* protocol.c - $Id: protocol.c 379 2002-03-26 17:52:01Z mirkol $ + $Id: protocol.c 381 2002-05-14 14:25:45Z mirkol $ */ #if HAVE_CONFIG_H @@ -270,9 +270,12 @@ Str head, domain; int len, headLen; const char *p; + const char * specials = "\t\r\n ()@<>"; /* hmm, check "\\\'\"[]" as well? */ len = strlen( msgId ); - p = strstr( msgId, "@" ); + if ( len > 250 ) + return FALSE; /* see draft-ietf-usefor-article-06.txt, ch 5.3 */ + p = strchr( msgId, '@' ); if ( msgId[ 0 ] != '<' || msgId[ len - 1 ] != '>' || p == NULL ) return FALSE; strcpy( domain, p + 1 ); @@ -280,12 +283,20 @@ headLen = p - msgId - 1; Utl_cpyStrN( head, msgId + 1, headLen ); head[ headLen ] = '\0'; + for ( p = msgId ; *p != '\0' ; p++ ) + { + if ( ( (unsigned char ) *p ) >= 128 ) + return FALSE; /* pure 7bit ASCII */ + } + if ( strpbrk( head, specials ) ) + return FALSE; + if ( strpbrk( domain, specials ) ) + return FALSE; /* - To do: check for special characters in head and domain (non-printable - or '@', '<', '>'). Maybe compare domain with a config option + To do: Maybe compare domain with a config option and replace it by the config option, if not equal. */ - if ( strstr( domain, "." ) == NULL ) + if ( strchr( domain, '.' ) == NULL ) return FALSE; return TRUE; } @@ -295,12 +306,17 @@ { Str domain, date; time_t t; - static long count = 0; + static long count = 0; + const char *pattern; getDomain( domain, from ); time( &t ); strftime( date, MAXCHAR, "%Y%m%d%H%M%S", gmtime( &t ) ); - snprintf( msgId, MAXCHAR, "<%s.%X.%lx.%s@%s>", date, getpid(), count++ ,suffix, domain ); + if ( strchr( domain, '@' ) ) + pattern = "<%s.%X.%lx.%s%s>"; + else + pattern = "<%s.%X.%lx.%s@%s>"; + snprintf( msgId, MAXCHAR, pattern , date, getpid(), count++ ,suffix, domain ); ASSERT( Prt_isValidMsgId( msgId ) ); } @@ -359,12 +375,21 @@ } /* OK, build From: contents */ +/* deprecated. Utl_cpyStr( fromHdr, pwd->pw_name ); Utl_catStr( fromHdr, "@" ); Utl_catStr( fromHdr, domain ); Utl_catStr( fromHdr, " (" ); Utl_catStr( fromHdr, name ); Utl_catStr( fromHdr, ")" ); +*/ + Utl_cpyStr( fromHdr, "\"" ); + Utl_catStr( fromHdr, name ); + Utl_catStr( fromHdr, "\" <" ); + Utl_catStr( fromHdr, pwd->pw_name ); + Utl_catStr( fromHdr, "@" ); + Utl_catStr( fromHdr, domain ); + Utl_catStr( fromHdr, ">" ); return TRUE; } diff -r cd022deb8390 -r 0340b9c17edc src/request.c --- a/src/request.c Tue Mar 26 17:52:48 2002 +0000 +++ b/src/request.c Tue May 14 15:25:45 2002 +0100 @@ -3,7 +3,7 @@ Collection of articles that are marked for download. - $Id: request.c 316 2001-10-31 11:44:53Z bears $ + $Id: request.c 381 2002-05-14 14:25:45Z mirkol $ */ #if HAVE_CONFIG_H @@ -190,7 +190,11 @@ if (rs->reql_length >= rs->reql_capacity) { int c1 = rs->reql_capacity*2 + 10; - rs->reql = (char**) realloc(rs->reql, c1*sizeof(char*)); + if ( ! ( rs->reql = (char**) realloc(rs->reql, c1*sizeof(char*) ) ) ) + { + Log_err( "Could not realloc requests." ); + exit( EXIT_FAILURE ); + } rs->reql_capacity = c1; }