diff options
author | Chris Dyer <cdyer@cab.ark.cs.cmu.edu> | 2012-10-02 00:19:43 -0400 |
---|---|---|
committer | Chris Dyer <cdyer@cab.ark.cs.cmu.edu> | 2012-10-02 00:19:43 -0400 |
commit | 925087356b853e2099c1b60d8b757d7aa02121a9 (patch) | |
tree | 579925c5c9d3da51f43018a5c6d1c4dfbb72b089 /jam-files/engine/hash.c | |
parent | ea79e535d69f6854d01c62e3752971fb6730d8e7 (diff) |
cdec cleanup, remove bayesian stuff, parsing stuff
Diffstat (limited to 'jam-files/engine/hash.c')
-rw-r--r-- | jam-files/engine/hash.c | 459 |
1 files changed, 0 insertions, 459 deletions
diff --git a/jam-files/engine/hash.c b/jam-files/engine/hash.c deleted file mode 100644 index fbd1a899..00000000 --- a/jam-files/engine/hash.c +++ /dev/null @@ -1,459 +0,0 @@ -/* - * Copyright 1993, 1995 Christopher Seiwald. - * - * This file is part of Jam - see jam.c for Copyright information. - */ - -# include "jam.h" -# include "hash.h" -# include "compile.h" -# include <assert.h> - -/* - * hash.c - simple in-memory hashing routines - * - * External routines: - * - * hashinit() - initialize a hash table, returning a handle - * hashitem() - find a record in the table, and optionally enter a new one - * hashdone() - free a hash table, given its handle - * - * Internal routines: - * - * hashrehash() - resize and rebuild hp->tab, the hash table - * - * 4/29/93 - ensure ITEM's are aligned - */ - -/* */ -#define HASH_DEBUG_PROFILE 1 -/* */ - -char *hashsccssid="@(#)hash.c 1.14 () 6/20/88"; - -/* Header attached to all data items entered into a hash table. */ - -struct hashhdr -{ - struct item * next; - unsigned int keyval; /* for quick comparisons */ -}; - -/* This structure overlays the one handed to hashenter(). Its actual size is - * given to hashinit(). - */ - -struct hashdata -{ - char * key; - /* rest of user data */ -}; - -typedef struct item -{ - struct hashhdr hdr; - struct hashdata data; -} ITEM ; - -# define MAX_LISTS 32 - -struct hash -{ - /* - * the hash table, just an array of item pointers - */ - struct { - int nel; - ITEM **base; - } tab; - - int bloat; /* tab.nel / items.nel */ - int inel; /* initial number of elements */ - - /* - * the array of records, maintained by these routines - * essentially a microallocator - */ - struct { - int more; /* how many more ITEMs fit in lists[ list ] */ - ITEM *free; /* free list of items */ - char *next; /* where to put more ITEMs in lists[ list ] */ - int datalen; /* length of records in this hash table */ - int size; /* sizeof( ITEM ) + aligned datalen */ - int nel; /* total ITEMs held by all lists[] */ - int list; /* index into lists[] */ - - struct { - int nel; /* total ITEMs held by this list */ - char *base; /* base of ITEMs array */ - } lists[ MAX_LISTS ]; - } items; - - char * name; /* just for hashstats() */ -}; - -static void hashrehash( struct hash *hp ); -static void hashstat( struct hash *hp ); -static void * hash_mem_alloc(size_t datalen, size_t size); -static void hash_mem_free(size_t datalen, void * data); -#ifdef OPT_BOEHM_GC -static void hash_mem_finalizer(char * key, struct hash * hp); -#endif - -static unsigned int jenkins_one_at_a_time_hash(const unsigned char *key) -{ - unsigned int hash = 0; - - while ( *key ) - { - hash += *key++; - hash += (hash << 10); - hash ^= (hash >> 6); - } - hash += (hash << 3); - hash ^= (hash >> 11); - hash += (hash << 15); - - return hash; -} - -/* -static unsigned int knuth_hash(const unsigned char *key) -{ - unsigned int keyval = *key; - while ( *key ) - keyval = keyval * 2147059363 + *key++; - return keyval; -} -*/ - -static unsigned int hash_keyval( const char * key_ ) -{ - /* - return knuth_hash((const unsigned char *)key_); - */ - return jenkins_one_at_a_time_hash((const unsigned char *)key_); -} - -#define hash_bucket(hp,keyval) ((hp)->tab.base + ( (keyval) % (hp)->tab.nel )) - -/* Find the hash item for the given data. Returns pointer to the - item and if given a pointer to the item before the found item. - If it's the first item in a bucket, there is no previous item, - and zero is returned for the previous item instead. -*/ -static ITEM * hash_search( - struct hash *hp, - unsigned int keyval, - const char * keydata, - ITEM * * previous ) -{ - ITEM * i = *hash_bucket(hp,keyval); - ITEM * p = 0; - - for ( ; i; i = i->hdr.next ) - { - if ( ( keyval == i->hdr.keyval ) && - !strcmp( i->data.key, keydata ) ) - { - if (previous) - { - *previous = p; - } - return i; - } - p = i; - } - - return 0; -} - -/* - * hash_free() - remove the given item from the table if it's there. - * Returns 1 if found, 0 otherwise. - * - * NOTE: 2nd argument is HASHDATA*, not HASHDATA** as elsewhere. - */ -int -hash_free( - register struct hash *hp, - HASHDATA *data) -{ - ITEM * i = 0; - ITEM * prev = 0; - unsigned int keyval = hash_keyval(data->key); - - i = hash_search( hp, keyval, data->key, &prev ); - if (i) - { - /* mark it free so we skip it during enumeration */ - i->data.key = 0; - /* unlink the record from the hash chain */ - if (prev) prev->hdr.next = i->hdr.next; - else *hash_bucket(hp,keyval) = i->hdr.next; - /* link it into the freelist */ - i->hdr.next = hp->items.free; - hp->items.free = i; - /* we have another item */ - hp->items.more++; - - return 1; - } - return 0; -} - -/* - * hashitem() - find a record in the table, and optionally enter a new one - */ - -int -hashitem( - register struct hash *hp, - HASHDATA **data, - int enter ) -{ - register ITEM *i; - char *b = (*data)->key; - unsigned int keyval = hash_keyval(b); - - #ifdef HASH_DEBUG_PROFILE - profile_frame prof[1]; - if ( DEBUG_PROFILE ) - profile_enter( 0, prof ); - #endif - - if ( enter && !hp->items.more ) - hashrehash( hp ); - - if ( !enter && !hp->items.nel ) - { - #ifdef HASH_DEBUG_PROFILE - if ( DEBUG_PROFILE ) - profile_exit( prof ); - #endif - return 0; - } - - i = hash_search( hp, keyval, (*data)->key, 0 ); - if (i) - { - *data = &i->data; - #ifdef HASH_DEBUG_PROFILE - if ( DEBUG_PROFILE ) profile_exit( prof ); - #endif - return !0; - } - - if ( enter ) - { - ITEM * * base = hash_bucket(hp,keyval); - - /* try to grab one from the free list */ - if ( hp->items.free ) - { - i = hp->items.free; - hp->items.free = i->hdr.next; - assert( i->data.key == 0 ); - } - else - { - i = (ITEM *)hp->items.next; - hp->items.next += hp->items.size; - } - hp->items.more--; - memcpy( (char *)&i->data, (char *)*data, hp->items.datalen ); - i->hdr.keyval = keyval; - i->hdr.next = *base; - *base = i; - *data = &i->data; - #ifdef OPT_BOEHM_GC - if (sizeof(HASHDATA) == hp->items.datalen) - { - GC_REGISTER_FINALIZER(i->data.key,&hash_mem_finalizer,hp,0,0); - } - #endif - } - - #ifdef HASH_DEBUG_PROFILE - if ( DEBUG_PROFILE ) - profile_exit( prof ); - #endif - return 0; -} - -/* - * hashrehash() - resize and rebuild hp->tab, the hash table - */ - -static void hashrehash( register struct hash *hp ) -{ - int i = ++hp->items.list; - hp->items.more = i ? 2 * hp->items.nel : hp->inel; - hp->items.next = (char *)hash_mem_alloc( hp->items.datalen, hp->items.more * hp->items.size ); - hp->items.free = 0; - - hp->items.lists[i].nel = hp->items.more; - hp->items.lists[i].base = hp->items.next; - hp->items.nel += hp->items.more; - - if ( hp->tab.base ) - hash_mem_free( hp->items.datalen, (char *)hp->tab.base ); - - hp->tab.nel = hp->items.nel * hp->bloat; - hp->tab.base = (ITEM **)hash_mem_alloc( hp->items.datalen, hp->tab.nel * sizeof(ITEM **) ); - - memset( (char *)hp->tab.base, '\0', hp->tab.nel * sizeof( ITEM * ) ); - - for ( i = 0; i < hp->items.list; ++i ) - { - int nel = hp->items.lists[i].nel; - char *next = hp->items.lists[i].base; - - for ( ; nel--; next += hp->items.size ) - { - register ITEM *i = (ITEM *)next; - ITEM **ip = hp->tab.base + i->hdr.keyval % hp->tab.nel; - /* code currently assumes rehashing only when there are no free items */ - assert( i->data.key != 0 ); - - i->hdr.next = *ip; - *ip = i; - } - } -} - -void hashenumerate( struct hash * hp, void (* f)( void *, void * ), void * data ) -{ - int i; - for ( i = 0; i <= hp->items.list; ++i ) - { - char * next = hp->items.lists[i].base; - int nel = hp->items.lists[i].nel; - if ( i == hp->items.list ) - nel -= hp->items.more; - - for ( ; nel--; next += hp->items.size ) - { - ITEM * i = (ITEM *)next; - if ( i->data.key != 0 ) /* DO not enumerate freed items. */ - f( &i->data, data ); - } - } -} - -/* --- */ - -# define ALIGNED(x) ( ( x + sizeof( ITEM ) - 1 ) & ~( sizeof( ITEM ) - 1 ) ) - -/* - * hashinit() - initialize a hash table, returning a handle - */ - -struct hash * -hashinit( - int datalen, - char *name ) -{ - struct hash *hp = (struct hash *)hash_mem_alloc( datalen, sizeof( *hp ) ); - - hp->bloat = 3; - hp->tab.nel = 0; - hp->tab.base = (ITEM **)0; - hp->items.more = 0; - hp->items.free = 0; - hp->items.datalen = datalen; - hp->items.size = sizeof( struct hashhdr ) + ALIGNED( datalen ); - hp->items.list = -1; - hp->items.nel = 0; - hp->inel = 11 /* 47 */; - hp->name = name; - - return hp; -} - -/* - * hashdone() - free a hash table, given its handle - */ - -void -hashdone( struct hash *hp ) -{ - int i; - - if ( !hp ) - return; - - if ( DEBUG_MEM || DEBUG_PROFILE ) - hashstat( hp ); - - if ( hp->tab.base ) - hash_mem_free( hp->items.datalen, (char *)hp->tab.base ); - for ( i = 0; i <= hp->items.list; ++i ) - hash_mem_free( hp->items.datalen, hp->items.lists[i].base ); - hash_mem_free( hp->items.datalen, (char *)hp ); -} - -static void * hash_mem_alloc(size_t datalen, size_t size) -{ - if (sizeof(HASHDATA) == datalen) - { - return BJAM_MALLOC_RAW(size); - } - else - { - return BJAM_MALLOC(size); - } -} - -static void hash_mem_free(size_t datalen, void * data) -{ - if (sizeof(HASHDATA) == datalen) - { - BJAM_FREE_RAW(data); - } - else - { - BJAM_FREE(data); - } -} - -#ifdef OPT_BOEHM_GC -static void hash_mem_finalizer(char * key, struct hash * hp) -{ - HASHDATA d; - d.key = key; - hash_free(hp,&d); -} -#endif - - -/* ---- */ - -static void hashstat( struct hash * hp ) -{ - ITEM * * tab = hp->tab.base; - int nel = hp->tab.nel; - int count = 0; - int sets = 0; - int run = ( tab[ nel - 1 ] != (ITEM *)0 ); - int i; - int here; - - for ( i = nel; i > 0; --i ) - { - if ( ( here = ( *tab++ != (ITEM *)0 ) ) ) - count++; - if ( here && !run ) - sets++; - run = here; - } - - printf( "%s table: %d+%d+%d (%dK+%luK) items+table+hash, %f density\n", - hp->name, - count, - hp->items.nel, - hp->tab.nel, - hp->items.nel * hp->items.size / 1024, - (long unsigned)hp->tab.nel * sizeof( ITEM ** ) / 1024, - (float)count / (float)sets ); -} |