ELinks 0.18.0
fastfind.c File Reference

Very fast search_keyword_in_list. More...

#include <stdio.h>
#include <stdlib.h>
#include "elinks.h"
#include "util/conv.h"
#include "util/error.h"
#include "util/fastfind.h"
#include "util/memdebug.h"
#include "util/memory.h"
Include dependency graph for fastfind.c:

Data Structures

struct  ff_node_c
struct  ff_data
struct  fastfind_info

Macros

#define USE_32_BITS
 Define it to generate performance and memory usage statistics to stderr.
#define END_LEAF_BITS   1
#define COMPRESSED_BITS   1
#define POINTER_INDEX_BITS   10 /* 1024 */
#define LEAFSET_INDEX_BITS   13 /* 8192 */
#define COMP_CHAR_INDEX_BITS   7 /* 128 */
#define ff_node   ff_node_c /* Both are 32 bits long. */
#define FF_MAX_KEYS   (1 << POINTER_INDEX_BITS)
#define FF_MAX_LEAFSETS   ((1 << LEAFSET_INDEX_BITS) - 1)
#define FF_MAX_CHARS   (1 << COMP_CHAR_INDEX_BITS)
#define FF_MAX_KEYLEN   255
#define FF_DBG_mem(x, size)
#define FF_DBG_test(x)
#define FF_DBG_iter(x)
#define FF_DBG_cnode(x)
#define FF_DBG_found(x)
#define FF_DBG_comment(x, comment)
#define FF_DBG_search_stats(info, key_len)
#define FF_DBG_dump_stats(info)
#define ifcase(c)
#define FF_SEARCH(what)
 This macro searchs for the key in indexed list.

Functions

static struct fastfind_infoinit_fastfind (struct fastfind_index *index, fastfind_flags_T flags)
static int alloc_ff_data (struct fastfind_info *info)
static void add_to_ff_data (void *p, int key_len, struct fastfind_info *info)
 Add pointer and its key length to correspondant arrays, incrementing internal counter.
static int alloc_leafset (struct fastfind_info *info)
static int char2idx (unsigned char c, struct fastfind_info *info)
static void init_idxtab (struct fastfind_info *info)
static void compress_node (struct ff_node *leafset, struct fastfind_info *info, int i, int pos)
static void compress_tree (struct ff_node *leafset, struct fastfind_info *info)
struct fastfind_index * fastfind_index (struct fastfind_index *index, fastfind_flags_T flags)
void * fastfind_search (struct fastfind_index *index, const char *key, int key_len)
void fastfind_done (struct fastfind_index *index)

Detailed Description

Very fast search_keyword_in_list.

It replaces bsearch() + strcasecmp() + callback + ...

Following conditions should be met:

  • list keys are C strings.
  • keys should not be greater than 255 characters, and optimally < 20 characters. It can work with greater keys but then memory usage will grow a lot.
  • each key must be unique and non empty.
  • list do not have to be ordered.
  • total number of unique characters used in all keys should be <= 128
  • idealy total number of keys should be <= 512 (but see below)

    (c) 2003 Laurent MONIN (aka Zas) Feel free to do whatever you want with that code.

These routines use a tree search. First, a big tree is composed from the keys on input. Then, when searching we just go through the tree. If we will end up on an 'ending' node, we've got it.

Hm, okay. For keys { 'head', 'h1', 'body', 'bodyrock', 'bodyground' }, it would look like:

*             [root]
*          b          h
*          o        e   1
*          d        a
*          Y        D
*        g   r
*        r   o
*        o   c
*        u   K
*        D
* 

(the ending nodes are upcased just for this drawing, not in real)

To optimize this for speed, leafs of nodes are organized in per-node arrays (so-called 'leafsets'), indexed by symbol value of the key's next character. But to optimize that for memory, we first compose own alphabet consisting only from the chars we ever use in the key strings. fastfind_info.uniq_chars holds that alphabet and fastfind_info.idxtab is used to translate between it and ASCII.

Tree building: O((L+M)*N) (L: mean key length, M: alphabet size, N: number of items). String lookup: O(N) (N: string length).

Macro Definition Documentation

◆ COMP_CHAR_INDEX_BITS

#define COMP_CHAR_INDEX_BITS   7 /* 128 */

◆ COMPRESSED_BITS

#define COMPRESSED_BITS   1

◆ END_LEAF_BITS

#define END_LEAF_BITS   1

◆ FF_DBG_cnode

#define FF_DBG_cnode ( x)

◆ FF_DBG_comment

#define FF_DBG_comment ( x,
comment )

◆ FF_DBG_dump_stats

#define FF_DBG_dump_stats ( info)

◆ FF_DBG_found

#define FF_DBG_found ( x)

◆ FF_DBG_iter

#define FF_DBG_iter ( x)

◆ FF_DBG_mem

#define FF_DBG_mem ( x,
size )

◆ FF_DBG_search_stats

#define FF_DBG_search_stats ( info,
key_len )

◆ FF_DBG_test

#define FF_DBG_test ( x)

◆ FF_MAX_CHARS

#define FF_MAX_CHARS   (1 << COMP_CHAR_INDEX_BITS)

◆ FF_MAX_KEYLEN

#define FF_MAX_KEYLEN   255

◆ FF_MAX_KEYS

#define FF_MAX_KEYS   (1 << POINTER_INDEX_BITS)

◆ FF_MAX_LEAFSETS

#define FF_MAX_LEAFSETS   ((1 << LEAFSET_INDEX_BITS) - 1)

◆ ff_node

#define ff_node   ff_node_c /* Both are 32 bits long. */

◆ FF_SEARCH

#define FF_SEARCH ( what)

This macro searchs for the key in indexed list.

◆ ifcase

#define ifcase ( c)
Value:
( info->case_aware ? (c) : (info->locale_indep ? c_toupper(c) : toupper(c)) )
int c_toupper(int c)
Definition conv.c:567

◆ LEAFSET_INDEX_BITS

#define LEAFSET_INDEX_BITS   13 /* 8192 */

◆ POINTER_INDEX_BITS

#define POINTER_INDEX_BITS   10 /* 1024 */

◆ USE_32_BITS

#define USE_32_BITS

Define it to generate performance and memory usage statistics to stderr.

Define whether to use 32 or 64 bits per compressed element.

Function Documentation

◆ add_to_ff_data()

void add_to_ff_data ( void * p,
int key_len,
struct fastfind_info * info )
static

Add pointer and its key length to correspondant arrays, incrementing internal counter.

◆ alloc_ff_data()

int alloc_ff_data ( struct fastfind_info * info)
static
Returns
1 on success, 0 on allocation failure

◆ alloc_leafset()

int alloc_leafset ( struct fastfind_info * info)
static
Returns
1 on success, 0 on allocation failure

◆ char2idx()

int char2idx ( unsigned char c,
struct fastfind_info * info )
inlinestatic

◆ compress_node()

void compress_node ( struct ff_node * leafset,
struct fastfind_info * info,
int i,
int pos )
inlinestatic

◆ compress_tree()

void compress_tree ( struct ff_node * leafset,
struct fastfind_info * info )
static

◆ fastfind_done()

void fastfind_done ( struct fastfind_index * index)
related

◆ fastfind_index()

struct fastfind_index * fastfind_index ( struct fastfind_index * index,
fastfind_flags_T flags )
related

◆ fastfind_search()

void * fastfind_search ( struct fastfind_index * index,
const char * key,
int key_len )
related

◆ init_fastfind()

struct fastfind_info * init_fastfind ( struct fastfind_index * index,
fastfind_flags_T flags )
static

◆ init_idxtab()

void init_idxtab ( struct fastfind_info * info)
inlinestatic