view src/database.c @ 499:1c4d3397e99f noffle

More items for .hgignore.
author Jim Hague <jim.hague@acm.org>
date Wed, 14 Aug 2013 11:50:21 +0100
parents a04c52f87b6e
children
line wrap: on
line source

/*
  database.c

  $Id: database.c 629 2004-10-13 23:26:48Z bears $

  Uses GNU gdbm library. Using Berkeley db (included in libc6) was
  cumbersome. It is based on Berkeley db 1.85, which has severe bugs
  (e.g. it is not recommended to delete or overwrite entries with
  overflow pages).
*/

#if HAVE_CONFIG_H
#include <config.h>
#endif

#include <stdio.h>
#include <ctype.h>
#include <errno.h>
#include <gdbm.h>
#include <unistd.h>
#include <sys/types.h>
#include <sys/stat.h>
#include "configfile.h"
#include "content.h"
#include "database.h"
#include "group.h"
#include "log.h"
#include "protocol.h"
#include "util.h"
#include "portable.h"

static const char ARTICLE_FILENAME_FMT[] = "%s/data/articles.gdbm";
static const char ARTICLE_NEW_FILENAME_FMT[] = "%s/data/articles.gdbm.new";


static struct Db
{
    GDBM_FILE dbf;

    /* Start string for Xref header line: "Xref: <host>" */
    Str xrefHost;

    /* Msg Id of presently loaded article, empty if none loaded */
    Str msgId;

    /* Status of loaded article */
    unsigned status; /* Flags */
    time_t lastAccess;

    /* Overview of loaded article */
    Str subj; 
    Str from;
    Str date;
    Str ref;
    Str xref;
    unsigned long bytes;
    unsigned long lines;

    /* Article text (except for overview header lines) */
    DynStr *txt;

} db = { NULL, "(unknown)", "", 0, 0, "", "", "", "", "", 0, 0, NULL };

static const char *
errMsg( void )
{
    if ( gdbm_errno == GDBM_NO_ERROR )
        return strerror( errno );
    return gdbm_strerror( gdbm_errno );
}

Bool
Db_open( void )
{
    Str name, host;
    int flags;

    ASSERT( db.dbf == NULL );
    snprintf( name, MAXCHAR, ARTICLE_FILENAME_FMT, Cfg_spoolDir() );
    flags = GDBM_WRCREAT | GDBM_FAST;

    if ( ! ( db.dbf = gdbm_open( name, 512, flags, 0644, Log_gdbm_fatal ) ) )
    {
        Log_err( "Error opening %s for r/w (%s)", name, errMsg() );
        return FALSE;
    }
    Log_dbg( LOG_DBG_NEWSBASE, "%s opened for r/w", name );

    if ( db.txt == NULL )
        db.txt = new_DynStr( 5000 );

    if ( ! Utl_getFQDN( host ) )
	Utl_cpyStr( host, "localhost.localdomain" );
    snprintf( db.xrefHost, MAXCHAR, "Xref: %s", host );

    return TRUE;
}

void
Db_close( void )
{
    ASSERT( db.dbf );
    Log_dbg( LOG_DBG_NEWSBASE, "Closing database" );
    gdbm_close( db.dbf );
    db.dbf = NULL;
    del_DynStr( db.txt );
    db.txt = NULL;
    Utl_cpyStr( db.msgId, "" );
}

static Bool
loadArt( const char *msgId )
{
    static void *dptr = NULL;
    
    datum key, val;
    Str t = "";
    const char *p;
    
    ASSERT( db.dbf );

    if ( strcmp( msgId, db.msgId ) == 0 )
        return TRUE;

    key.dptr = (void *)msgId;
    key.dsize = strlen( msgId ) + 1;
    if ( dptr != NULL )
    {
        free( dptr );
        dptr = NULL;
    }
    val = gdbm_fetch( db.dbf, key );
    dptr = val.dptr;
    if ( dptr == NULL )
    {
        Log_dbg( LOG_DBG_NEWSBASE,
		 "database.c loadArt: gdbm_fetch found no entry" );
        return FALSE;
    }
    
    Utl_cpyStr( db.msgId, msgId );
    p = Utl_getLn( t, (char *)dptr );
    if ( ! p || sscanf( t, "%x", &db.status ) != 1 )
    {
        Log_err( "Entry in database '%s' is corrupt (status)", msgId );
        return FALSE;
    }
    p = Utl_getLn( t, p );
    if ( ! p || sscanf( t, "%lu", (unsigned long *)&db.lastAccess ) != 1 )
    {
        Log_err( "Entry in database '%s' is corrupt (lastAccess)", msgId );
        return FALSE;
    }
    p = Utl_getHeaderLn( db.subj, p );
    p = Utl_getHeaderLn( db.from, p );
    p = Utl_getHeaderLn( db.date, p );
    p = Utl_getHeaderLn( db.ref, p );
    p = Utl_getHeaderLn( db.xref, p );
    if ( ! p )
    {
        Log_err( "Entry in database '%s' is corrupt (overview)", msgId );
        return FALSE;
    }
    p = Utl_getHeaderLn( t, p );
    if ( ! p || sscanf( t, "%lu", &db.bytes ) != 1 )
    {
        Log_err( "Entry in database '%s' is corrupt (bytes)", msgId );
        return FALSE;
    }
    p = Utl_getHeaderLn( t, p );
    if ( ! p || sscanf( t, "%lu", &db.lines ) != 1 )
    {
        Log_err( "Entry in database '%s' is corrupt (lines)", msgId );
        return FALSE;
    }
    DynStr_clear( db.txt );
    DynStr_app( db.txt, p );
    return TRUE;
}

static Bool
saveArt( void )
{
    DynStr *s;
    Str t = "";
    datum key, val;

    if ( strcmp( db.msgId, "" ) == 0 )
        return FALSE;
    s = new_DynStr( 5000 );
    snprintf( t, MAXCHAR, "%x", db.status );
    DynStr_appLn( s, t );
    snprintf( t, MAXCHAR, "%lu", db.lastAccess );
    DynStr_appLn( s, t );
    DynStr_appLn( s, db.subj );
    DynStr_appLn( s, db.from );
    DynStr_appLn( s, db.date );
    DynStr_appLn( s, db.ref );
    DynStr_appLn( s, db.xref );
    snprintf( t, MAXCHAR, "%lu", db.bytes );
    DynStr_appLn( s, t );
    snprintf( t, MAXCHAR, "%lu", db.lines );
    DynStr_appLn( s, t );
    DynStr_appDynStr( s, db.txt );

    key.dptr = (void *)db.msgId;
    key.dsize = strlen( db.msgId ) + 1;
    val.dptr = (void *)DynStr_str( s );
    val.dsize = DynStr_len( s ) + 1;
    if ( gdbm_store( db.dbf, key, val, GDBM_REPLACE ) != 0 )
    {
        Log_err( "Could not store %s in database (%s)", errMsg() );
        return FALSE;
    }

    del_DynStr( s );
    return TRUE;
}

Bool
Db_prepareEntry( const Over *ov, const char *grp, int numb )
{
    const char *msgId;

    ASSERT( db.dbf );
    ASSERT( ov );
    ASSERT( grp );

    msgId = Ov_msgId( ov );
    Log_dbg( LOG_DBG_NEWSBASE, "Preparing entry %s", msgId );
    if ( Db_contains( msgId ) )
        Log_err( "Preparing article twice: %s", msgId );

    db.status = DB_NOT_DOWNLOADED;
    db.lastAccess = time( NULL );

    Utl_cpyStr( db.msgId, msgId );
    Utl_cpyStr( db.subj, Ov_subj( ov ) );
    Utl_cpyStr( db.from, Ov_from( ov ) );
    Utl_cpyStr( db.date, Ov_date( ov ) );
    Utl_cpyStr( db.ref, Ov_ref( ov ) );
    snprintf( db.xref, MAXCHAR, "%s:%i", grp, numb );
    db.bytes = Ov_bytes( ov );
    db.lines = Ov_lines( ov );

    DynStr_clear( db.txt );

    return saveArt();
}

Bool
Db_storeArt( const char *msgId, const char *artTxt )
{
    Str line, lineEx, field, value;

    ASSERT( db.dbf );

    Log_dbg( LOG_DBG_NEWSBASE, "Store article %s", msgId );
    if ( ! loadArt( msgId ) )
    {
        Log_err( "Cannot find info about '%s' in database", msgId );
        return FALSE;
    }
    if ( ! ( db.status & DB_NOT_DOWNLOADED ) )
    {
        Log_err( "Trying to store already retrieved article '%s'", msgId );
        return FALSE;
    }
    db.status &= ~DB_NOT_DOWNLOADED;
    db.status &= ~DB_RETRIEVING_FAILED;
    db.lastAccess = time( NULL );

    DynStr_clear( db.txt );

    /* Read header */
    while ( ( artTxt = Utl_getHeaderLn( lineEx, artTxt ) ) != NULL )
    {
	Bool continuation;
	
        if ( lineEx[ 0 ] == '\0' )
        {
            DynStr_appLn( db.txt, lineEx );
            break;
        }
        /* Remove fields already in overview and handle x-noffle
           headers correctly in case of cascading NOFFLEs */
        if ( Prt_getField( field, value, &continuation, lineEx ) )
        {
            if ( strcmp( field, "x-noffle-status" ) == 0 )
            {
                if ( strstr( value, "NOT_DOWNLOADED" ) != 0 )
                    db.status |= DB_NOT_DOWNLOADED;
            }
            else if ( strcmp( field, "message-id" ) != 0
                      && strcmp( field, "xref" ) != 0
                      && strcmp( field, "references" ) != 0
                      && strcmp( field, "subject" ) != 0
                      && strcmp( field, "from" ) != 0
                      && strcmp( field, "date" ) != 0
                      && strcmp( field, "bytes" ) != 0
                      && strcmp( field, "lines" ) != 0
                      && strcmp( field, "x-noffle-lastaccess" ) != 0 )
                DynStr_appLn( db.txt, lineEx );
        }
    }

    if ( artTxt == NULL )
    {
	/*
	 * This article has no body. Bereft of text it lies...
	 *
	 * I'm not completely sure how surprising the rest of
	 * Noffle would find a body-less article, so substitute
	 * an empty line.
	 */
	Log_inf( "Article %s malformed: missing body", msgId );
	artTxt = "\n";
    }

    /* Read body */
    while ( ( artTxt = Utl_getLn( line, artTxt ) ) != NULL )
        if ( ! ( db.status & DB_NOT_DOWNLOADED ) )
            DynStr_appLn( db.txt, line );
    
    return saveArt();
}

void
Db_setStatus( const char *msgId, unsigned status )
{
    if ( loadArt( msgId ) )
    {
        db.status = status;
        saveArt();
    }
}

void
Db_updateLastAccess( const char *msgId )
{
    if ( loadArt( msgId ) )
    {
        db.lastAccess = time( NULL );
        saveArt();
    }
}

void
Db_setXref( const char *msgId, const char *xref )
{
    if ( loadArt( msgId ) )
    {
        Utl_cpyStr( db.xref, xref );
        saveArt();
    }
}

/* Search best position for breaking a line */
static const char *
searchBreakPos( const char *line, int wantedLength )
{
    const char *lastSpace = NULL;
    Bool lastWasSpace = FALSE;
    int len = 0;

    while ( *line != '\0' )
    {
        if ( isspace( *line ) )
        {
            if ( len > wantedLength && lastSpace != NULL )
                return lastSpace;
            if ( ! lastWasSpace )
                lastSpace = line;
            lastWasSpace = TRUE;
        }
        else
            lastWasSpace = FALSE;
        ++len;
        ++line;
    }
    if ( len > wantedLength && lastSpace != NULL )
        return lastSpace;
    return line;
}

/* Append header line by breaking long line into multiple lines */
static void
appendLongHeader( DynStr *target, const char *field, const char *value )
{
    const int wantedLength = 78;
    const char *breakPos, *old;
    int len;

    len = strlen( field );
    DynStr_appN( target, field, len );
    DynStr_appN( target, " ", 1 );
    old = value;
    while ( isspace( *old ) )
        ++old;
    breakPos = searchBreakPos( old, wantedLength - len - 1 );
    DynStr_appN( target, old, breakPos - old );
    if ( *breakPos == '\0' )
    {
        DynStr_appN( target, "\n", 1 );
        return;
    }
    DynStr_appN( target, "\n ", 2 );
    while ( TRUE )
    {
        old = breakPos;
        while ( isspace( *old ) )
            ++old;
        breakPos = searchBreakPos( old, wantedLength - 1 );
        DynStr_appN( target, old, breakPos - old );
        if ( *breakPos == '\0' )
        {
            DynStr_appN( target, "\n", 1 );
            return;
        }
        DynStr_appN( target, "\n ", 2 );
    }
}

const char *
Db_header( const char *msgId )
{
    static DynStr *s = NULL;

    Str date, t;
    unsigned status;
    const char *p;

    if ( s == NULL )
        s = new_DynStr( 5000 );
    else
        DynStr_clear( s );
    ASSERT( db.dbf );
    if ( ! loadArt( msgId ) )
        return NULL;
    strftime( date, MAXCHAR, "%Y-%m-%d %H:%M:%S",
              localtime( &db.lastAccess ) );
    status = db.status;
    snprintf( t, MAXCHAR,
              "Message-ID: %s\n"
              "X-NOFFLE-Status:%s%s%s\n"
              "X-NOFFLE-LastAccess: %s\n",
              msgId,
              status & DB_INTERESTING ? " INTERESTING" : "",
              status & DB_NOT_DOWNLOADED ? " NOT_DOWNLOADED" : "",
              status & DB_RETRIEVING_FAILED ? " RETRIEVING_FAILED" : "",
              date );
    DynStr_app( s, t );
    appendLongHeader( s, "Subject:", db.subj );
    appendLongHeader( s, "From:", db.from );
    appendLongHeader( s, "Date:", db.date );
    appendLongHeader( s, "References:", db.ref );
    DynStr_app( s, "Bytes: " );
    snprintf( t, MAXCHAR, "%lu", db.bytes );
    DynStr_appLn( s, t );
    DynStr_app( s, "Lines: " );
    snprintf( t, MAXCHAR, "%lu", db.lines );
    DynStr_appLn( s, t );
    appendLongHeader( s, db.xrefHost, db.xref );
    p = strstr( DynStr_str( db.txt ), "\n\n" );
    if ( ! p )
        DynStr_appDynStr( s, db.txt );
    else
        DynStr_appN( s, DynStr_str( db.txt ), p - DynStr_str( db.txt ) + 1 );
    return DynStr_str( s );
}

const char *
Db_body( const char *msgId )
{
    const char *p;

    if ( ! loadArt( msgId ) )
        return "";
    p = strstr( DynStr_str( db.txt ), "\n\n" );
    if ( ! p )
        return "";
    return ( p + 2 );
}

unsigned
Db_status( const char *msgId )
{
    if ( ! loadArt( msgId ) )
        return 0;
    return db.status;
}

time_t
Db_lastAccess( const char *msgId )
{
    if ( ! loadArt( msgId ) )
        return -1;
    return db.lastAccess;
}

const char *
Db_ref( const char *msgId )
{
    if ( ! loadArt( msgId ) )
        return "";
    return db.ref;
}

const char *
Db_xref( const char *msgId )
{
    if ( ! loadArt( msgId ) )
        return "";
    return db.xref;
}

const char *
Db_from( const char *msgId )
{
    if ( ! loadArt( msgId ) )
        return "";
    return db.from;
}

const char *
Db_date( const char *msgId )
{
    if ( ! loadArt( msgId ) )
        return "";
    return db.date;
}

Over *
Db_over( const char *msgId )
{
    if ( ! loadArt( msgId ) )
	return NULL;
    return new_Over( db.subj, db.from, db.date, msgId,
		     db.ref, db.bytes, db.lines );
}

Bool
Db_contains( const char *msgId )
{
    datum key;

    ASSERT( db.dbf );
    if ( strcmp( msgId, db.msgId ) == 0 )
        return TRUE;
    key.dptr = (void*)msgId;
    key.dsize = strlen( msgId ) + 1;
    return gdbm_exists( db.dbf, key );
}

void
Db_delete( const char *msgId )
{
    datum key;

    ASSERT( db.dbf );
    if ( strcmp( msgId, db.msgId ) == 0 )
        db.msgId[ 0 ] = '\0';
    key.dptr = (void*)msgId;
    key.dsize = strlen( msgId ) + 1;
    gdbm_delete( db.dbf, key );
}

static datum cursor = { NULL, 0 };

Bool
Db_first( const char** msgId )
{
    ASSERT( db.dbf );
    if ( cursor.dptr != NULL )
    {
        free( cursor.dptr );
        cursor.dptr = NULL;
    }
    cursor = gdbm_firstkey( db.dbf );
    *msgId = cursor.dptr;
    return ( cursor.dptr != NULL );
}

Bool
Db_next( const char** msgId )
{
    void *oldDptr = cursor.dptr;

    ASSERT( db.dbf );
    if ( cursor.dptr == NULL )
        return FALSE;
    cursor = gdbm_nextkey( db.dbf, cursor );
    free( oldDptr );
    *msgId = cursor.dptr;
    return ( cursor.dptr != NULL );
}

void
Db_compact( void )
{
    ASSERT( db.dbf );

    /*
     * You'd think it would be sensible to do something like
     *
     * if ( gdbm_reorganize( db.dbf ) != 0 )
     * 	 Log_err( "Error compacting article base: %s", errMsg() );
     *
     * here. But this just copies the database to a new one and renames,
     * which is what Db_rebuild() does.
     *
     * So do nothing. We don't want expire to chew disc space.
     */
}

/*
  Helper functions for database rebuild.
*/

static struct DbNew
{
    GDBM_FILE dbf;

} dbNew = { NULL };


static Bool
newOpen( void )
{
    Str name;
    int flags;

    ASSERT( dbNew.dbf == NULL );
    snprintf( name, MAXCHAR, ARTICLE_NEW_FILENAME_FMT, Cfg_spoolDir() );
    flags = GDBM_WRCREAT | GDBM_FAST;

    if ( ! ( dbNew.dbf = gdbm_open( name, 512, flags, 0644, Log_gdbm_fatal ) ) )
    {
        Log_err( "Error opening %s for r/w (%s)", name, errMsg() );
        return FALSE;
    }
    Log_dbg( LOG_DBG_NEWSBASE, "%s opened for r/w", name );
    return TRUE;
}

static Bool
newClose( Bool makeMain )
{
    Str newName;
    
    ASSERT( dbNew.dbf );
    Log_dbg( LOG_DBG_NEWSBASE, "Closing new database" );
    gdbm_close( dbNew.dbf );
    dbNew.dbf = NULL;

    snprintf( newName, MAXCHAR, ARTICLE_NEW_FILENAME_FMT, Cfg_spoolDir() );

    if ( makeMain )
    {
	Str name;

	ASSERT( db.dbf );
	Db_close();
	snprintf( name, MAXCHAR, ARTICLE_FILENAME_FMT, Cfg_spoolDir() );
	if ( rename( newName, name ) != 0 )
	{
	    Log_err( "Rename %s to %s failed: %s",
		     newName, name, strerror( errno ) );
	    return FALSE;
	}
	Log_dbg( LOG_DBG_NEWSBASE, "Renamed %s to %s", newName, name );
	return Db_open();
    }
    else
    {
	if ( unlink( newName ) != 0 )
	{
	    Log_err( "Unlink %s failed: %s", newName, strerror( errno ) );
	    return FALSE;
	}
	Log_dbg( LOG_DBG_NEWSBASE, "Deleted %s", newName );
	return TRUE;
    }
}

static Bool
newCopyArt( const char *msgId )
{
    datum key, val;

    ASSERT( db.dbf );
    ASSERT( dbNew.dbf );
    key.dptr = (void *)msgId;
    key.dsize = strlen( msgId ) + 1;

    val = gdbm_fetch( db.dbf, key );
    if ( val.dptr != NULL )
    {
	Bool res;
	
	res = ( gdbm_store( dbNew.dbf, key, val, GDBM_INSERT ) == 0 );
	if ( ! res )
	    Log_err( "Could not store %s in new database (%s)",
		     msgId, errMsg() );
	free( val.dptr );
	return res;
    }
    Log_err( "%s not found in database", msgId );
    return FALSE;
}

static Bool
newContains( const char *msgId )
{
    datum key;

    ASSERT( dbNew.dbf );
    key.dptr = (void*)msgId;
    key.dsize = strlen( msgId ) + 1;
    return gdbm_exists( dbNew.dbf, key );
}

Bool
Db_rebuild( void )
{
    const Over *ov;
    int i;
    Str grp;
    const char *msgId;
    Bool err;

    if ( ! Cont_firstGrp( grp ) )
        return FALSE;
    if ( ! newOpen() )
	return FALSE;
    
    Log_inf( "Rebuilding article database" );
    err = FALSE;
    do
    {
	if ( ! Grp_exists( grp ) )
            Log_err( "Overview file for unknown group %s exists", grp );
        else
        {
            Cont_read( grp );
            for ( i = Cont_first(); i <= Cont_last(); ++i )
	    {
		if ( ! Cont_validNumb( i ) )
		    continue;
		
                if ( ( ov = Cont_get( i ) ) )
                {
                    msgId = Ov_msgId( ov );
		    if ( msgId == NULL )
		    {
			err = TRUE;
			Log_err( "Overview in %s has no msg id", grp );
		    }
		    else if ( ! newContains( msgId ) )
			err |= ! newCopyArt( msgId );
                }
		else
		{    
		    err = TRUE;
		    Log_err( "Overview %d not available in group %s", i, grp );
		}
	    }
        }
    }
    while ( Cont_nextGrp( grp ) );

    return newClose( ! err );
}

/* Utility function. Find the upstream server for a particular message. */
Bool
Db_findServer( const char *msgId, Str server )
{
    const char *p, *pColon, *srv;
    Str s, grp;
    Bool res = FALSE;

    if ( Db_contains( msgId ) )
    {
        Utl_cpyStr( s, Db_xref( msgId ) );
        p = strtok( s, " \t" );
        if ( p )
            do
            {
                pColon = strstr( p, ":" );
                if ( pColon )
                {
                    Utl_cpyStrN( grp, p, pColon - p );
                    srv = Grp_server( grp );
                    if ( Cfg_servIsPreferential( srv, server ) )
		    {
                        Utl_cpyStr( server, srv );
			res = TRUE;
		    }
                }
            }
            while ( ( p = strtok( NULL, " \t" ) ) );
    }

    return res;
}