changeset 255:52f467c7213b noffle

[svn] * docs/noffle.1,src/Makefile.am,src/Makefile.in,src/content.c, src/content.h,src/database.c,src/database.h,src/expire.c, src/expire.h,src/noffle.c: Split out expire code from database.c, change to remove articles in place (rather than rebuild article database) and add separate command to rebuild article database from articles listed in overviews. This may help if the article database gets corrupted.
author bears
date Wed, 26 Jun 2002 14:15:44 +0100
parents 4c0f54d51591
children b510f6a65a79
files ChangeLog docs/noffle.1 src/Makefile.am src/Makefile.in src/content.c src/content.h src/database.c src/database.h src/expire.c src/expire.h src/noffle.c
diffstat 11 files changed, 484 insertions(+), 203 deletions(-) [+]
line wrap: on
line diff
--- a/ChangeLog	Wed Jun 26 14:14:56 2002 +0100
+++ b/ChangeLog	Wed Jun 26 14:15:44 2002 +0100
@@ -1,3 +1,20 @@
+Wed Jun 26 2002 Jim Hague <jim.hague@acm.org>
+
+* aclocal.m4: New aclocal version.
+* Makefile.in,docs/Makefile.in: Update to reflect last changes to
+  Makefile.am(s). Oops.
+* configure,configure.in: Add -DDEBUG to build lines when configured
+  with enable-debug.
+* docs/noffle.1,src/Makefile.am,src/Makefile.in,src/content.c,
+  src/content.h,src/database.c,src/database.h,src/expire.c,
+  src/expire.h,src/noffle.c: Split out expire code from database.c,
+  change to remove articles in place (rather than rebuild article
+  database) and add separate command to rebuild article database
+  from articles listed in overviews. This may help if the article
+  database gets corrupted.
+* src/protocol.c: Change strcpy to Utl_strcpy and replace ascii check
+  with isascii().
+	
 Wed Jun 5 2002 Mirko Liß <mirko.liss@web.de>
 
 * src/group.c,src/client.c,src/noffle.c: rename Grp_isValidGroupname
--- a/docs/noffle.1	Wed Jun 26 14:14:56 2002 +0100
+++ b/docs/noffle.1	Wed Jun 26 14:15:44 2002 +0100
@@ -1,5 +1,5 @@
 .TH noffle 1
-.\" $Id: noffle.1 328 2001-11-14 20:23:15Z mirkol $
+.\" $Id: noffle.1 387 2002-06-26 13:15:44Z bears $
 .SH NAME
 noffle \- Usenet package optimized for dialup connections.
 
@@ -54,6 +54,9 @@
 \-q | \-\-query groups|desc [server pattern]
 .br
 .B noffle
+\-B | \-\-rebuild
+.br
+.B noffle
 \-R | \-\-requested
 .br
 .B noffle
@@ -279,6 +282,15 @@
 .B HELP.
 
 .TP
+.B \-B, \-\-rebuild
+Rebuild the article database. This build a fresh copy of the
+.B NOFFLE
+article database. All locally held groups are scanned, and all articles
+currently active in those groups are copied from the old article
+database to the new one. The new one then replaces the old one.
+This can be useful in cases of article database corruption.
+
+.TP
 .B \-R, \-\-requested
 List articles that are marked for download.
 
--- a/src/Makefile.am	Wed Jun 26 14:14:56 2002 +0100
+++ b/src/Makefile.am	Wed Jun 26 14:15:44 2002 +0100
@@ -11,6 +11,7 @@
 control.c control.h \
 database.c database.h \
 dynamicstring.c dynamicstring.h \
+expire.c expire.h \
 fetch.c fetch.h \
 fetchlist.c fetchlist.h \
 filter.c filter.h \
--- a/src/Makefile.in	Wed Jun 26 14:14:56 2002 +0100
+++ b/src/Makefile.in	Wed Jun 26 14:15:44 2002 +0100
@@ -71,7 +71,7 @@
 
 bin_PROGRAMS = noffle
 
-noffle_SOURCES =  client.c client.h common.h configfile.c configfile.h content.c content.h control.c control.h database.c database.h dynamicstring.c dynamicstring.h fetch.c fetch.h fetchlist.c fetchlist.h filter.c filter.h group.c group.h itemlist.c itemlist.h lock.c lock.h log.c log.h noffle.c online.c online.h outgoing.c outgoing.h over.c over.h portable.h post.c post.h protocol.c protocol.h pseudo.c pseudo.h request.c request.h server.c server.h util.c util.h wildmat.c wildmat.h
+noffle_SOURCES =  client.c client.h common.h configfile.c configfile.h content.c content.h control.c control.h database.c database.h dynamicstring.c dynamicstring.h expire.c expire.h fetch.c fetch.h fetchlist.c fetchlist.h filter.c filter.h group.c group.h itemlist.c itemlist.h lock.c lock.h log.c log.h noffle.c online.c online.h outgoing.c outgoing.h over.c over.h portable.h post.c post.h protocol.c protocol.h pseudo.c pseudo.h request.c request.h server.c server.h util.c util.h wildmat.c wildmat.h
 
 
 noffle_LDADD = -lgdbm
@@ -86,9 +86,9 @@
 LDFLAGS = @LDFLAGS@
 LIBS = @LIBS@
 noffle_OBJECTS =  client.o configfile.o content.o control.o database.o \
-dynamicstring.o fetch.o fetchlist.o filter.o group.o itemlist.o lock.o \
-log.o noffle.o online.o outgoing.o over.o post.o protocol.o pseudo.o \
-request.o server.o util.o wildmat.o
+dynamicstring.o expire.o fetch.o fetchlist.o filter.o group.o \
+itemlist.o lock.o log.o noffle.o online.o outgoing.o over.o post.o \
+protocol.o pseudo.o request.o server.o util.o wildmat.o
 noffle_DEPENDENCIES = 
 noffle_LDFLAGS = 
 CFLAGS = @CFLAGS@
@@ -103,11 +103,12 @@
 TAR = tar
 GZIP_ENV = --best
 DEP_FILES =  .deps/client.P .deps/configfile.P .deps/content.P \
-.deps/control.P .deps/database.P .deps/dynamicstring.P .deps/fetch.P \
-.deps/fetchlist.P .deps/filter.P .deps/group.P .deps/itemlist.P \
-.deps/lock.P .deps/log.P .deps/noffle.P .deps/online.P .deps/outgoing.P \
-.deps/over.P .deps/post.P .deps/protocol.P .deps/pseudo.P \
-.deps/request.P .deps/server.P .deps/util.P .deps/wildmat.P
+.deps/control.P .deps/database.P .deps/dynamicstring.P .deps/expire.P \
+.deps/fetch.P .deps/fetchlist.P .deps/filter.P .deps/group.P \
+.deps/itemlist.P .deps/lock.P .deps/log.P .deps/noffle.P .deps/online.P \
+.deps/outgoing.P .deps/over.P .deps/post.P .deps/protocol.P \
+.deps/pseudo.P .deps/request.P .deps/server.P .deps/util.P \
+.deps/wildmat.P
 SOURCES = $(noffle_SOURCES)
 OBJECTS = $(noffle_OBJECTS)
 
--- a/src/content.c	Wed Jun 26 14:14:56 2002 +0100
+++ b/src/content.c	Wed Jun 26 14:15:44 2002 +0100
@@ -1,7 +1,7 @@
 /*
   content.c
 
-  $Id: content.c 357 2001-12-18 15:24:49Z mirkol $
+  $Id: content.c 387 2002-06-26 13:15:44Z bears $
 */
 
 #if HAVE_CONFIG_H
@@ -397,3 +397,14 @@
     }
     return Cont_nextGrp( result );
 }
+
+Bool
+Cont_exists( const char *grp )
+{
+    Str fname;
+
+    /* Do we have a content/overview file for this group? */
+    snprintf( fname, MAXCHAR, "%s/overview/%s", Cfg_spoolDir(), grp );
+    return ( access( fname, R_OK ) == 0 );    
+}
+
--- a/src/content.h	Wed Jun 26 14:14:56 2002 +0100
+++ b/src/content.h	Wed Jun 26 14:15:44 2002 +0100
@@ -8,7 +8,7 @@
   filename SPOOLDIR/overview/GROUPNAME. One entire overview file is read
   and cached in memory, at a time.
 
-  $Id: content.h 342 2001-12-09 12:31:57Z bears $ 
+  $Id: content.h 387 2002-06-26 13:15:44Z bears $ 
 */
 
 #ifndef CONT_H
@@ -70,4 +70,7 @@
 void
 Cont_expire( void );
 
+Bool
+Cont_exists( const char *grp );
+
 #endif
--- a/src/database.c	Wed Jun 26 14:14:56 2002 +0100
+++ b/src/database.c	Wed Jun 26 14:15:44 2002 +0100
@@ -1,7 +1,7 @@
 /*
   database.c
 
-  $Id: database.c 379 2002-03-26 17:52:01Z mirkol $
+  $Id: database.c 387 2002-06-26 13:15:44Z bears $
 
   Uses GNU gdbm library. Using Berkeley db (included in libc6) was
   cumbersome. It is based on Berkeley db 1.85, which has severe bugs
@@ -16,19 +16,23 @@
 #include <stdio.h>
 #include <ctype.h>
 #include <errno.h>
-#include <fcntl.h>
 #include <gdbm.h>
 #include <unistd.h>
 #include <sys/types.h>
 #include <sys/stat.h>
 #include "configfile.h"
+#include "content.h"
 #include "database.h"
-#include "itemlist.h"
+#include "group.h"
 #include "log.h"
 #include "protocol.h"
 #include "util.h"
 #include "portable.h"
 
+static const char ARTICLE_FILENAME_FMT[] = "%s/data/articles.gdbm";
+static const char ARTICLE_NEW_FILENAME_FMT[] = "%s/data/articles.gdbm.new";
+
+
 static struct Db
 {
     GDBM_FILE dbf;
@@ -60,7 +64,7 @@
 static const char *
 errMsg( void )
 {
-    if ( errno != 0 )
+    if ( gdbm_errno == GDBM_NO_ERROR )
         return strerror( errno );
     return gdbm_strerror( gdbm_errno );
 }
@@ -72,7 +76,7 @@
     int flags;
 
     ASSERT( db.dbf == NULL );
-    snprintf( name, MAXCHAR, "%s/data/articles.gdbm", Cfg_spoolDir() );
+    snprintf( name, MAXCHAR, ARTICLE_FILENAME_FMT, Cfg_spoolDir() );
     flags = GDBM_WRCREAT | GDBM_FAST;
 
     if ( ! ( db.dbf = gdbm_open( name, 512, flags, 0644, NULL ) ) )
@@ -582,123 +586,170 @@
     return ( cursor.dptr != NULL );
 }
 
-static int
-calcExpireDays( const char *msgId )
+void
+Db_compact( void )
+{
+    ASSERT( db.dbf );
+    if ( gdbm_reorganize( db.dbf ) != 0 )
+	Log_err( "Error compacting article base: %s", errMsg() );
+}
+
+/*
+  Helper functions for database rebuild.
+*/
+
+static struct DbNew
+{
+    GDBM_FILE dbf;
+
+} dbNew = { NULL };
+
+
+static Bool
+newOpen( void )
 {
-    const char *xref;
-    ItemList *refs;
-    const char *ref;
-    int res;
+    Str name;
+    int flags;
+
+    ASSERT( dbNew.dbf == NULL );
+    snprintf( name, MAXCHAR, ARTICLE_NEW_FILENAME_FMT, Cfg_spoolDir() );
+    flags = GDBM_WRCREAT | GDBM_FAST;
+
+    if ( ! ( dbNew.dbf = gdbm_open( name, 512, flags, 0644, NULL ) ) )
+    {
+        Log_err( "Error opening %s for r/w (%s)", name, errMsg() );
+        return FALSE;
+    }
+    Log_dbg( LOG_DBG_NEWSBASE, "%s opened for r/w", name );
+    return TRUE;
+}
 
-    xref = Db_xref( msgId );
-    if ( xref[ 0 ] == '\0' )
-	return -1;
+static Bool
+newClose( Bool makeMain )
+{
+    Str newName;
+    
+    ASSERT( dbNew.dbf );
+    Log_dbg( LOG_DBG_NEWSBASE, "Closing new database" );
+    gdbm_close( dbNew.dbf );
+    dbNew.dbf = NULL;
+
+    snprintf( newName, MAXCHAR, ARTICLE_NEW_FILENAME_FMT, Cfg_spoolDir() );
+
+    if ( makeMain )
+    {
+	Str name;
 
-    res = -1;
-    refs = new_Itl( xref, " :" );
-    for ( ref = Itl_first( refs ); ref != NULL; ref = Itl_next( refs ) )
+	ASSERT( db.dbf );
+	Db_close();
+	snprintf( name, MAXCHAR, ARTICLE_FILENAME_FMT, Cfg_spoolDir() );
+	if ( rename( newName, name ) != 0 )
+	{
+	    Log_err( "Rename %s to %s failed: %s",
+		     newName, name, strerror( errno ) );
+	    return FALSE;
+	}
+	Log_dbg( LOG_DBG_NEWSBASE, "Renamed %s to %s", newName, name );
+	return Db_open();
+    }
+    else
     {
-	int days;
+	if ( unlink( newName ) != 0 )
+	{
+	    Log_err( "Unlink %s failed: %s", newName, strerror( errno ) );
+	    return FALSE;
+	}
+	Log_dbg( LOG_DBG_NEWSBASE, "Deleted %s", newName );
+	return TRUE;
+    }
+}
+
+static Bool
+newCopyArt( const char *msgId )
+{
+    datum key, val;
 
-	days = Cfg_expire( ref );
-	if ( days == 0
-	     || ( days > res && res != 0 ) )
-	    res = days;
+    ASSERT( db.dbf );
+    ASSERT( dbNew.dbf );
+    key.dptr = (void *)msgId;
+    key.dsize = strlen( msgId ) + 1;
+
+    val = gdbm_fetch( db.dbf, key );
+    if ( val.dptr != NULL )
+    {
+	Bool res;
 	
-	Itl_next( refs );	/* Throw away group number */
+	res = ( gdbm_store( dbNew.dbf, key, val, GDBM_INSERT ) == 0 );
+	if ( ! res )
+	    Log_err( "Could not store %s in new database (%s)",
+		     msgId, errMsg() );
+	free( val.dptr );
+	return res;
     }
-    del_Itl( refs );
+    Log_err( "%s not found in database", msgId );
+    return FALSE;
+}
 
-    return res;
+static Bool
+newContains( const char *msgId )
+{
+    datum key;
+
+    ASSERT( dbNew.dbf );
+    key.dptr = (void*)msgId;
+    key.dsize = strlen( msgId ) + 1;
+    return gdbm_exists( dbNew.dbf, key );
 }
 
 Bool
-Db_expire( void )
+Db_rebuild( void )
 {
-    int cntDel, cntLeft, flags, expDays;
-    time_t nowTime, lastAccess;
+    const Over *ov;
+    int i;
+    Str grp;
     const char *msgId;
-    Str name, tmpName;
-    GDBM_FILE tmpDbf;
-    datum key, val;
-    Str expires;
-    time_t texpires;
+    Bool err;
 
-    if ( ! Db_open() )
+    if ( ! Cont_firstGrp( grp ) )
         return FALSE;
-    snprintf( name, MAXCHAR, "%s/data/articles.gdbm", Cfg_spoolDir() );
-    snprintf( tmpName, MAXCHAR, "%s/data/articles.gdbm.new", Cfg_spoolDir() );
-    flags = GDBM_NEWDB | GDBM_FAST;
-    if ( ! ( tmpDbf = gdbm_open( tmpName, 512, flags, 0644, NULL ) ) )
+    if ( ! newOpen() )
+	return FALSE;
+    
+    Log_inf( "Rebuilding article database" );
+    err = FALSE;
+    do
     {
-        Log_err( "Error opening %s for read/write (%s)", tmpName, errMsg() );
-        Db_close();
-        return FALSE;
-    }
-    Log_inf( "Expiring articles" );
-    cntDel = 0;
-    cntLeft = 0;
-    nowTime = time( NULL );
-    if ( Db_first( &msgId ) )
-        do
+	if ( ! Grp_exists( grp ) )
+            Log_err( "Overview file for unknown group %s exists", grp );
+        else
         {
-	    expDays = calcExpireDays( msgId );
-            lastAccess = Db_lastAccess( msgId );
-	    if ( Prt_searchHeader( Db_header( msgId ), "Expires", expires ) )
-		texpires = Utl_parseNewsDate( expires );
-	    else
-		texpires = (time_t) -1;
-	    
-	    if ( expDays == -1 )
-		Log_err( "Internal error: Failed expiry calculation on %s",
-			 msgId );
-	    else if ( lastAccess == -1 )
-                Log_err( "Internal error: Getting lastAccess of %s failed",
-                         msgId );
-            else if ( expDays > 0
-		      && difftime( nowTime, lastAccess ) >
-		          ( (double) expDays * 24 * 3600 ) )
-            {
-#ifdef DEBUG
-		Str last, now;
+            Cont_read( grp );
+            for ( i = Cont_first(); i <= Cont_last(); ++i )
+	    {
+		if ( ! Cont_validNumb( i ) )
+		    continue;
+		
+                if ( ( ov = Cont_get( i ) ) )
+                {
+                    msgId = Ov_msgId( ov );
+		    if ( msgId == NULL )
+		    {
+			err = TRUE;
+			Log_err( "Overview in %s has no msg id", grp );
+		    }
+		    else if ( ! newContains( msgId ) )
+			err |= ! newCopyArt( msgId );
+                }
+		else
+		{    
+		    err = TRUE;
+		    Log_err( "Overview %d not available in group %s", i, grp );
+		}
+	    }
+        }
+    }
+    while ( Cont_nextGrp( grp ) );
 
-		Utl_cpyStr( last, ctime( &lastAccess ) );
-		last[ strlen( last ) - 1 ] = '\0';
-		Utl_cpyStr( now, ctime( &nowTime ) );
-		last[ strlen( now ) - 1 ] = '\0';
-                Log_dbg( LOG_DBG_EXPIRE,
-			 "Expiring %s: last access %s, time now %s",
-			 msgId, last, now );
-#endif
-                ++cntDel;
-            }
-	    else if ( ( texpires != (time_t) -1 )
-		      && nowTime > texpires )
-	    {
-		Log_dbg( LOG_DBG_EXPIRE,
-			 "Expiring %s: Expires header activated", msgId );
-		++cntDel;
-	    }
-            else
-            {
-                ++cntLeft;
-                key.dptr = (void *)msgId;
-                key.dsize = strlen( msgId ) + 1;
+    return newClose( ! err );
+}
 
-                val = gdbm_fetch( db.dbf, key );
-                if ( val.dptr != NULL )
-                {
-                    if ( gdbm_store( tmpDbf, key, val, GDBM_INSERT ) != 0 )
-                        Log_err( "Could not store %s in new database (%s)",
-                                 errMsg() );
-                    free( val.dptr );
-                }
-            }
-        }
-        while ( Db_next( &msgId ) );
-    Log_inf( "%lu articles deleted, %lu left", cntDel, cntLeft );
-    gdbm_close( tmpDbf );
-    Db_close();
-    rename( tmpName, name );
-    return TRUE;
-}
--- a/src/database.h	Wed Jun 26 14:14:56 2002 +0100
+++ b/src/database.h	Wed Jun 26 14:15:44 2002 +0100
@@ -3,7 +3,7 @@
 
   Article database.
 
-  $Id: database.h 183 2000-07-25 12:14:54Z bears $
+  $Id: database.h 387 2002-06-26 13:15:44Z bears $
 */
 
 #ifndef DB_H
@@ -107,11 +107,12 @@
 Bool
 Db_next( const char** msgId );
 
-/*
-  Expire all articles that have not been accessed for a number of
-  days determined by their group membership and noffle configuration.
- */
+/* Compact database if appropriate - give deleted article space back to OS */
+void
+Db_compact( void );
+
+/* Rebuild the article database. */
 Bool
-Db_expire( void );
+Db_rebuild( void );
 
 #endif
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/src/expire.c	Wed Jun 26 14:15:44 2002 +0100
@@ -0,0 +1,203 @@
+/*
+  expire.c
+
+  $Id: expire.c 387 2002-06-26 13:15:44Z bears $
+
+  Handle expiring articles from the article base.
+*/
+
+#if HAVE_CONFIG_H
+#include <config.h>
+#endif
+
+#include <stdio.h>
+#include <errno.h>
+#include <unistd.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+#include "configfile.h"
+#include "content.h"
+#include "database.h"
+#include "expire.h"
+#include "fetchlist.h"
+#include "group.h"
+#include "itemlist.h"
+#include "log.h"
+#include "protocol.h"
+#include "pseudo.h"
+#include "util.h"
+#include "portable.h"
+
+/*
+ * Find the maximum expire time in days for this article.
+ * Different groups may have different limits, so we need to
+ * check the limit for each group.
+ */
+static int
+calcExpireDays( const char *msgId )
+{
+    const char *xref;
+    ItemList *refs;
+    const char *ref;
+    int res;
+
+    xref = Db_xref( msgId );
+    if ( xref[ 0 ] == '\0' )
+	return -1;
+
+    res = -1;
+    refs = new_Itl( xref, " :" );
+    for ( ref = Itl_first( refs ); ref != NULL; ref = Itl_next( refs ) )
+    {
+	int days;
+
+	days = Cfg_expire( ref );
+	if ( days == 0
+	     || ( days > res && res != 0 ) )
+	    res = days;
+	
+	Itl_next( refs );	/* Throw away group number */
+    }
+    del_Itl( refs );
+
+    return res;
+}
+
+/* Does this article need to be expired? */
+static Bool
+articleExpired( const char *msgId, time_t now )
+{
+    int expDays;
+    time_t lastAccess;
+    Str expires;
+    time_t texpires;
+
+    expDays = calcExpireDays( msgId );
+    if ( expDays == -1 )
+    {
+	Log_err( "Internal error: Failed expiry calculation on %s",
+		 msgId );
+	return TRUE;
+    }
+    
+    lastAccess = Db_lastAccess( msgId );
+    if ( lastAccess == -1 )
+    {
+	Log_err( "Internal error: Getting lastAccess of %s failed",
+		 msgId );
+	return TRUE;
+    }
+    
+    if ( Prt_searchHeader( Db_header( msgId ), "Expires", expires ) )
+	texpires = Utl_parseNewsDate( expires );
+    else
+	texpires = (time_t) -1;
+	    
+    if ( expDays > 0 &&
+	 difftime( now, lastAccess ) > ( (double) expDays * 24 * 3600 ) )
+    {
+#ifdef DEBUG
+	Str lastStr, nowStr;
+
+	Utl_cpyStr( lastStr, ctime( &lastAccess ) );
+	lastStr[ strlen( lastStr ) - 1 ] = '\0';
+	Utl_cpyStr( nowStr, ctime( &now ) );
+	nowStr[ strlen( nowStr ) - 1 ] = '\0';
+	Log_dbg( LOG_DBG_EXPIRE,
+		 "Expiring %s: last access %s, time now %s",
+		 msgId, lastStr, nowStr );
+#endif
+    }
+    else if ( ( texpires != (time_t) -1 ) && now > texpires )
+    {
+	Log_dbg( LOG_DBG_EXPIRE,
+		 "Expiring %s: Expires header activated", msgId );
+    }
+    else
+	return FALSE;
+
+    return TRUE;
+}
+
+/* Work though all overviews looking for articles to expire. */
+void
+Exp_expire( void )
+{
+    const Over *ov;
+    int i;
+    int cntDel, cntLeft;
+    Str grp;
+    Bool autoUnsubscribe;
+    int autoUnsubscribeDays;
+    time_t now, maxAge = 0;
+    const char *msgId;
+
+    autoUnsubscribe = Cfg_autoUnsubscribe();
+    autoUnsubscribeDays = Cfg_autoUnsubscribeDays();
+    maxAge = Cfg_autoUnsubscribeDays() * 24 * 3600;
+    if ( ! Cont_firstGrp( grp ) )
+        return;
+    Log_inf( "Expiring articles" );
+    Fetchlist_read();
+    now = time( NULL );
+    do
+    {
+	if ( ! Grp_exists( grp ) )
+            Log_err( "Overview file for unknown group %s exists", grp );
+        else
+        {
+            cntDel = cntLeft = 0;
+            Cont_read( grp );
+            for ( i = Cont_first(); i <= Cont_last(); ++i )
+	    {
+		if ( ! Cont_validNumb( i ) )
+		    continue;
+		
+                if ( ( ov = Cont_get( i ) ) )
+                {
+                    msgId = Ov_msgId( ov );
+		    /* Crossposted articles may have already been deleted. */
+		    if ( ! Db_contains( msgId ) )
+		    {
+			Cont_delete( i );
+			++cntDel;
+		    } else if ( articleExpired( msgId, now ) )
+                    {
+                        Cont_delete( i );
+			Db_delete( msgId );
+                        ++cntDel;
+                    }
+                    else
+                        ++cntLeft;
+                }
+	    }
+
+	    /*
+	     * Auto unsubscribe where applicable if last article arrival
+	     * time is maxAge newer than the last access time. This ensures
+	     * the low traffic groups don't get expired simply because
+	     * there's been nothing to read.
+	     */
+            if ( ! Grp_local( grp )
+                 && Fetchlist_contains( grp, NULL )
+                 && autoUnsubscribe
+                 && difftime( Grp_lastPostTime(grp),
+			      Grp_lastAccess( grp ) ) > maxAge )
+            {
+		Log_ntc( "Auto-unsubscribing from %s after %d "
+			 "days without access",
+			 grp, autoUnsubscribeDays );
+		Pseudo_autoUnsubscribed( grp, autoUnsubscribeDays );
+		Fetchlist_remove( grp );
+		Grp_setRmtNext( grp, GRP_RMT_NEXT_NOT_SUBSCRIBED );
+            }
+            if ( Cont_write() )
+                Grp_setFirstLast( grp, Cont_first(), Cont_last() );
+            Log_inf( "%ld overviews deleted from group %s, %ld left (%ld-%ld)",
+                     cntDel, grp, cntLeft, Grp_first( grp ), Grp_last( grp ) );
+        }
+    }
+    while ( Cont_nextGrp( grp ) );
+    Fetchlist_write();
+    Db_compact();
+}
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/src/expire.h	Wed Jun 26 14:15:44 2002 +0100
@@ -0,0 +1,16 @@
+/*
+  expire.h
+
+  Handle expiring articles
+
+  $Id: expire.h 387 2002-06-26 13:15:44Z bears $
+*/
+
+#ifndef EXPIRE_H
+#define EXPIRE_H
+
+/* Expire articles from the article base. */
+void
+Exp_expire( void );
+
+#endif 
--- a/src/noffle.c	Wed Jun 26 14:14:56 2002 +0100
+++ b/src/noffle.c	Wed Jun 26 14:15:44 2002 +0100
@@ -10,7 +10,7 @@
   received for some seconds (to allow multiple clients connect at the same
   time).
 
-  $Id: noffle.c 382 2002-06-05 22:03:44Z mirkol $
+  $Id: noffle.c 387 2002-06-26 13:15:44Z bears $
 */
 
 #if HAVE_CONFIG_H
@@ -30,6 +30,7 @@
 #include "control.h"
 #include "configfile.h"
 #include "database.h"
+#include "expire.h"
 #include "fetch.h"
 #include "fetchlist.h"
 #include "filter.h"
@@ -236,84 +237,17 @@
             }
 }
 
-/* Expire all overviews not in database */
 static void
-expireContents( void )
+doExpire( void )
 {
-    const Over *ov;
-    int i;
-    int cntDel, cntLeft;
-    Str grp;
-    Bool autoUnsubscribe;
-    int autoUnsubscribeDays;
-    time_t maxAge = 0;
-    const char *msgId;
-
-    autoUnsubscribe = Cfg_autoUnsubscribe();
-    autoUnsubscribeDays = Cfg_autoUnsubscribeDays();
-    maxAge = Cfg_autoUnsubscribeDays() * 24 * 3600;
-    if ( ! Cont_firstGrp( grp ) )
-        return;
-    Log_inf( "Expiring overviews not in database" );
-    Fetchlist_read();
-    do
-    {
-	if ( ! Grp_exists( grp ) )
-            Log_err( "Overview file for unknown group %s exists", grp );
-        else
-        {
-            cntDel = cntLeft = 0;
-            Cont_read( grp );
-            for ( i = Cont_first(); i <= Cont_last(); ++i )
-                if ( ( ov = Cont_get( i ) ) )
-                {
-                    msgId = Ov_msgId( ov );
-                    if ( ! Db_contains( msgId ) )
-                    {
-                        Cont_delete( i );
-                        ++cntDel;
-                    }
-                    else
-                        ++cntLeft;
-                }
-
-	    /*
-	     * Auto unsubscribe where applicable if last article arrival
-	     * time is maxAge newer than the last access time. This ensures
-	     * the low traffic groups don't get expired simply because
-	     * there's been nothing to read.
-	     */
-            if ( ! Grp_local( grp )
-                 && Fetchlist_contains( grp, NULL )
-                 && autoUnsubscribe
-                 && difftime( Grp_lastPostTime(grp),
-			      Grp_lastAccess( grp ) ) > maxAge )
-            {
-		Log_ntc( "Auto-unsubscribing from %s after %d "
-			 "days without access",
-			 grp, autoUnsubscribeDays );
-		Pseudo_autoUnsubscribed( grp, autoUnsubscribeDays );
-		Fetchlist_remove( grp );
-		Grp_setRmtNext( grp, GRP_RMT_NEXT_NOT_SUBSCRIBED );
-            }
-            if ( Cont_write() )
-                Grp_setFirstLast( grp, Cont_first(), Cont_last() );
-            Log_inf( "%ld overviews deleted from group %s, %ld left (%ld-%ld)",
-                     cntDel, grp, cntLeft, Grp_first( grp ), Grp_last( grp ) );
-        }
-    }
-    while ( Cont_nextGrp( grp ) );
-    Fetchlist_write();
+    Exp_expire();
 }
 
 static void
-doExpire( void )
+doRebuild( void )
 {
-    Db_close();
-    Db_expire();
-    if ( ! Db_open() )
-        return;
-    expireContents();
+    if ( ! Db_rebuild() )
+	fprintf( stderr, "Rebuild failed.\n" );
 }
 
 static void
@@ -374,8 +308,9 @@
         Log_inf( "Deleting group '%s'", name );
 
 	/*
-	  Delete all articles that are only in the group. Check the
-	  article Xref for more than one group.
+	  Delete all articles that are only in the group or are
+	  crossposted only to groups that do not exist on this
+	  server.
 	 */
 	Cont_read( name );
 	for ( i = Cont_first(); i <= Cont_last(); i++ )
@@ -384,17 +319,42 @@
 	    Bool toDelete;
 	    Str msgId;
 
+	    if ( ! Cont_validNumb( i ) )
+		continue;
+
 	    over = Cont_get( i );
 	    toDelete = TRUE;
 	    if ( over != NULL )
 	    {
-		ItemList * xref;
+		ItemList *xrefs;
+		const char *xref;
+		int localXrefs = 0;
 
 		Utl_cpyStr( msgId, Ov_msgId( over ) );
-		xref = new_Itl( Db_xref( msgId ), " " );
-		if ( Itl_count( xref ) > 1 )
+		xrefs = new_Itl( Db_xref( msgId ), " " );
+		for ( xref = Itl_first( xrefs );
+		      xref != NULL;
+		      xref = Itl_next( xrefs) )
+		{
+		    Str xgrp;
+		    int no;
+
+		    if ( sscanf( xref, "%s:%d", xgrp, &no ) != 2 )
+		    {
+			/* Malformed xref - leave article just in case */
+			Log_err( "Malformed Xref: entry in %s: %s",
+				 msgId, xref);
+			toDelete = FALSE;
+			break;
+		    }
+		    
+		    if ( Cont_exists( xgrp ) )
+			++localXrefs;
+		}
+		
+		if ( localXrefs > 1 )
 		    toDelete = FALSE;
-		del_Itl( xref );
+		del_Itl( xrefs );
 	    }
 	    Cont_delete( i );
 	    if ( toDelete )
@@ -565,6 +525,7 @@
       "Usage: noffle <option>\n"
       "Option is one of the following:\n"
       " -a | --article <msg id>|all      Show article(s) in database\n"
+      " -B | --rebuild                   Rebuild article database\n"
       " -c | --cancel <msg id>           Remove article from database\n"
       " -C | --create <grp>              Create a local group\n"
       " -d | --database                  Show content of article database\n"
@@ -797,6 +758,7 @@
 	{ "--online", 		"-n" },
 	{ "--post", 		"-p" },
 	{ "--query", 		"-q" },
+	{ "--rebuild", 		"-B" },
 	{ "--server",		"-r" },
 	{ "--requested",	"-R" },
 	{ "--subscribe-over",	"-s" },
@@ -870,6 +832,9 @@
         else
             doArt( *argv );
         break;
+    case 'B':
+	doRebuild();
+	break;
     case 'c':
         if ( *argv == NULL )
         {