You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
407 lines
8.8 KiB
407 lines
8.8 KiB
/*
|
|
* Copyright 2006 Rob Kendrick <rjek@rjek.com>
|
|
* Copyright 2006 Richard Wilson <info@tinct.net>
|
|
*
|
|
* This file is part of NetSurf, http://www.netsurf-browser.org/
|
|
*
|
|
* NetSurf is free software; you can redistribute it and/or modify
|
|
* it under the terms of the GNU General Public License as published by
|
|
* the Free Software Foundation; version 2 of the License.
|
|
*
|
|
* NetSurf is distributed in the hope that it will be useful,
|
|
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
* GNU General Public License for more details.
|
|
*
|
|
* You should have received a copy of the GNU General Public License
|
|
* along with this program. If not, see <http://www.gnu.org/licenses/>.
|
|
*/
|
|
|
|
/**
|
|
* \file
|
|
* Write-Once hash table for string to string mappings.
|
|
*
|
|
* This implementation is unit tested, if you make changes please
|
|
* ensure the tests continute to pass and if possible, through
|
|
* valgrind to make sure there are no memory leaks or invalid memory
|
|
* accesses. If you add new functionality, please include a test for
|
|
* it that has good coverage along side the other tests.
|
|
*/
|
|
|
|
#include <stdint.h>
|
|
#include <stdbool.h>
|
|
#include <stdlib.h>
|
|
#include <string.h>
|
|
#include <zlib.h>
|
|
#include <errno.h>
|
|
|
|
#include "utils/log.h"
|
|
#include "utils/hashtable.h"
|
|
|
|
|
|
struct hash_entry {
|
|
char *pairing; /**< block containing 'key\0value\0' */
|
|
unsigned int key_length; /**< length of key */
|
|
struct hash_entry *next; /**< next entry */
|
|
};
|
|
|
|
struct hash_table {
|
|
unsigned int nchains;
|
|
struct hash_entry **chain;
|
|
};
|
|
|
|
/** maximum length of line for file or inline add */
|
|
#define LINE_BUFFER_SIZE 512
|
|
|
|
/**
|
|
* Hash a string, returning a 32bit value. The hash algorithm used is
|
|
* Fowler Noll Vo - a very fast and simple hash, ideal for short strings.
|
|
* See http://en.wikipedia.org/wiki/Fowler_Noll_Vo_hash for more details.
|
|
*
|
|
* \param datum The string to hash.
|
|
* \param len Pointer to unsigned integer to record datum's length in.
|
|
* \return The calculated hash value for the datum.
|
|
*/
|
|
static inline unsigned int hash_string_fnv(const char *datum, unsigned int *len)
|
|
{
|
|
unsigned int z = 0x811c9dc5;
|
|
const char *start = datum;
|
|
*len = 0;
|
|
|
|
if (datum == NULL)
|
|
return 0;
|
|
|
|
while (*datum) {
|
|
z *= 0x01000193;
|
|
z ^= *datum++;
|
|
}
|
|
*len = datum - start;
|
|
|
|
return z;
|
|
}
|
|
|
|
|
|
|
|
/**
|
|
* process a line of input.
|
|
*
|
|
* \param hash The hash table to add the line to
|
|
* \param ln The line to process
|
|
* \param lnlen The length of \ln
|
|
* \return NSERROR_OK on success else NSERROR_INVALID
|
|
*/
|
|
static nserror
|
|
process_line(struct hash_table *hash, uint8_t *ln, int lnlen)
|
|
{
|
|
uint8_t *key;
|
|
uint8_t *value;
|
|
uint8_t *colon;
|
|
|
|
key = ln; /* set key to start of line */
|
|
value = ln + lnlen; /* set value to end of line */
|
|
|
|
/* skip leading whitespace */
|
|
while ((key < value) &&
|
|
((*key == ' ') || (*key == '\t'))) {
|
|
key++;
|
|
}
|
|
|
|
/* empty or comment lines */
|
|
if ((*key == 0) || (*key == '#')) {
|
|
return NSERROR_OK;
|
|
}
|
|
|
|
/* find first colon as key/value separator */
|
|
for (colon = key; colon < value; colon++) {
|
|
if (*colon == ':') {
|
|
break;
|
|
}
|
|
}
|
|
if (colon == value) {
|
|
/* no colon found */
|
|
return NSERROR_INVALID;
|
|
}
|
|
|
|
*colon = 0; /* terminate key */
|
|
value = colon + 1;
|
|
|
|
if (hash_add(hash, (char *)key, (char *)value) == false) {
|
|
NSLOG(netsurf, INFO,
|
|
"Unable to add %s:%s to hash table", ln, value);
|
|
return NSERROR_INVALID;
|
|
}
|
|
return NSERROR_OK;
|
|
}
|
|
|
|
|
|
/**
|
|
* adds key/value pairs to a hash from a memory area
|
|
*/
|
|
static nserror
|
|
hash_add_inline_plain(struct hash_table *ht, const uint8_t *data, size_t size)
|
|
{
|
|
uint8_t s[LINE_BUFFER_SIZE]; /* line buffer */
|
|
unsigned int slen = 0;
|
|
nserror res = NSERROR_OK;
|
|
|
|
while (size > 0) {
|
|
s[slen] = *data;
|
|
|
|
if (s[slen] == '\n') {
|
|
s[slen] = 0; /* replace newline with null termination */
|
|
res = process_line(ht, s, slen);
|
|
slen = 0;
|
|
if (res != NSERROR_OK) {
|
|
break;
|
|
}
|
|
} else {
|
|
slen++;
|
|
if (slen > sizeof s) {
|
|
NSLOG(netsurf, INFO, "Overlength line\n");
|
|
slen = 0;
|
|
}
|
|
}
|
|
|
|
size--;
|
|
data++;
|
|
}
|
|
if (slen > 0) {
|
|
s[slen] = 0;
|
|
res = process_line(ht, s, slen);
|
|
}
|
|
|
|
return res;
|
|
}
|
|
|
|
/**
|
|
* adds key/value pairs to a hash from a compressed memory area
|
|
*/
|
|
static nserror
|
|
hash_add_inline_gzip(struct hash_table *ht, const uint8_t *data, size_t size)
|
|
{
|
|
nserror res;
|
|
int ret; /* zlib return value */
|
|
z_stream strm;
|
|
uint8_t s[LINE_BUFFER_SIZE]; /* line buffer */
|
|
size_t used = 0; /* number of bytes in buffer in use */
|
|
uint8_t *nl;
|
|
|
|
strm.zalloc = Z_NULL;
|
|
strm.zfree = Z_NULL;
|
|
strm.opaque = Z_NULL;
|
|
|
|
strm.next_in = (uint8_t *)data;
|
|
strm.avail_in = size;
|
|
|
|
ret = inflateInit2(&strm, 32 + MAX_WBITS);
|
|
if (ret != Z_OK) {
|
|
NSLOG(netsurf, INFO, "inflateInit returned %d", ret);
|
|
return NSERROR_INVALID;
|
|
}
|
|
|
|
do {
|
|
strm.next_out = s + used;
|
|
strm.avail_out = sizeof(s) - used;
|
|
|
|
ret = inflate(&strm, Z_NO_FLUSH);
|
|
if ((ret != Z_OK) && (ret != Z_STREAM_END)) {
|
|
break;
|
|
}
|
|
|
|
used = sizeof(s) - strm.avail_out;
|
|
while (used > 0) {
|
|
/* find nl */
|
|
for (nl = &s[0]; nl < &s[used]; nl++) {
|
|
if (*nl == '\n') {
|
|
break;
|
|
}
|
|
}
|
|
if (nl == &s[used]) {
|
|
/* no nl found */
|
|
break;
|
|
}
|
|
/* found newline */
|
|
*nl = 0; /* null terminate line */
|
|
res = process_line(ht, &s[0], nl - &s[0]);
|
|
if (res != NSERROR_OK) {
|
|
inflateEnd(&strm);
|
|
return res;
|
|
}
|
|
|
|
/* move data down */
|
|
memmove(&s[0], nl + 1, used - ((nl + 1) - &s[0]) );
|
|
used -= ((nl +1) - &s[0]);
|
|
}
|
|
if (used == sizeof(s)) {
|
|
/* entire buffer used and no newline */
|
|
NSLOG(netsurf, INFO, "Overlength line");
|
|
used = 0;
|
|
}
|
|
} while (ret != Z_STREAM_END);
|
|
|
|
inflateEnd(&strm);
|
|
|
|
if (ret != Z_STREAM_END) {
|
|
NSLOG(netsurf, INFO, "inflate returned %d", ret);
|
|
return NSERROR_INVALID;
|
|
}
|
|
return NSERROR_OK;
|
|
|
|
}
|
|
|
|
|
|
/* exported interface documented in utils/hashtable.h */
|
|
struct hash_table *hash_create(unsigned int chains)
|
|
{
|
|
struct hash_table *r = malloc(sizeof(struct hash_table));
|
|
|
|
if (r == NULL) {
|
|
NSLOG(netsurf, INFO, "Not enough memory for hash table.");
|
|
return NULL;
|
|
}
|
|
|
|
r->nchains = chains;
|
|
r->chain = calloc(chains, sizeof(struct hash_entry *));
|
|
|
|
if (r->chain == NULL) {
|
|
NSLOG(netsurf, INFO,
|
|
"Not enough memory for %d hash table chains.", chains);
|
|
free(r);
|
|
return NULL;
|
|
}
|
|
|
|
return r;
|
|
}
|
|
|
|
|
|
/* exported interface documented in utils/hashtable.h */
|
|
void hash_destroy(struct hash_table *ht)
|
|
{
|
|
unsigned int i;
|
|
|
|
if (ht == NULL)
|
|
return;
|
|
|
|
for (i = 0; i < ht->nchains; i++) {
|
|
if (ht->chain[i] != NULL) {
|
|
struct hash_entry *e = ht->chain[i];
|
|
while (e) {
|
|
struct hash_entry *n = e->next;
|
|
free(e->pairing);
|
|
free(e);
|
|
e = n;
|
|
}
|
|
}
|
|
}
|
|
|
|
free(ht->chain);
|
|
free(ht);
|
|
}
|
|
|
|
|
|
/* exported interface documented in utils/hashtable.h */
|
|
bool hash_add(struct hash_table *ht, const char *key, const char *value)
|
|
{
|
|
unsigned int h, c, v;
|
|
struct hash_entry *e;
|
|
|
|
if (ht == NULL || key == NULL || value == NULL)
|
|
return false;
|
|
|
|
e = malloc(sizeof(struct hash_entry));
|
|
if (e == NULL) {
|
|
NSLOG(netsurf, INFO, "Not enough memory for hash entry.");
|
|
return false;
|
|
}
|
|
|
|
h = hash_string_fnv(key, &(e->key_length));
|
|
c = h % ht->nchains;
|
|
|
|
v = strlen(value) ;
|
|
e->pairing = malloc(v + e->key_length + 2);
|
|
if (e->pairing == NULL) {
|
|
NSLOG(netsurf, INFO,
|
|
"Not enough memory for string duplication.");
|
|
free(e);
|
|
return false;
|
|
}
|
|
memcpy(e->pairing, key, e->key_length + 1);
|
|
memcpy(e->pairing + e->key_length + 1, value, v + 1);
|
|
|
|
e->next = ht->chain[c];
|
|
ht->chain[c] = e;
|
|
|
|
return true;
|
|
}
|
|
|
|
|
|
/* exported interface documented in utils/hashtable.h */
|
|
const char *hash_get(struct hash_table *ht, const char *key)
|
|
{
|
|
unsigned int h, c, key_length;
|
|
struct hash_entry *e;
|
|
|
|
if (ht == NULL || key == NULL)
|
|
return NULL;
|
|
|
|
h = hash_string_fnv(key, &key_length);
|
|
c = h % ht->nchains;
|
|
|
|
for (e = ht->chain[c]; e; e = e->next) {
|
|
if ((key_length == e->key_length) &&
|
|
(memcmp(key, e->pairing, key_length) == 0)) {
|
|
return e->pairing + key_length + 1;
|
|
}
|
|
}
|
|
|
|
return NULL;
|
|
}
|
|
|
|
|
|
|
|
/* exported interface documented in utils/hashtable.h */
|
|
nserror hash_add_file(struct hash_table *ht, const char *path)
|
|
{
|
|
nserror res = NSERROR_OK;
|
|
char s[LINE_BUFFER_SIZE]; /* line buffer */
|
|
gzFile fp; /* compressed file handle */
|
|
|
|
if (path == NULL) {
|
|
return NSERROR_BAD_PARAMETER;
|
|
}
|
|
|
|
fp = gzopen(path, "r");
|
|
if (!fp) {
|
|
NSLOG(netsurf, INFO,
|
|
"Unable to open file \"%.100s\": %s", path,
|
|
strerror(errno));
|
|
|
|
return NSERROR_NOT_FOUND;
|
|
}
|
|
|
|
while (gzgets(fp, s, sizeof s)) {
|
|
int slen = strlen(s);
|
|
s[--slen] = 0; /* remove \n at end */
|
|
|
|
res = process_line(ht, (uint8_t *)s, slen);
|
|
if (res != NSERROR_OK) {
|
|
break;
|
|
}
|
|
}
|
|
|
|
gzclose(fp);
|
|
|
|
return res;
|
|
}
|
|
|
|
|
|
/* exported interface documented in utils/hashtable.h */
|
|
nserror hash_add_inline(struct hash_table *ht, const uint8_t *data, size_t size)
|
|
{
|
|
if ((data[0]==0x1f) && (data[1] == 0x8b)) {
|
|
/* gzip header detected */
|
|
return hash_add_inline_gzip(ht, data, size);
|
|
}
|
|
return hash_add_inline_plain(ht, data, size);
|
|
}
|