JudyHS(3) | JudyHS(3) |
cc [flags] sourcefiles -lJudy #include <Judy.h> Word_t * PValue; // JudyHS array element int Rc_int; // return flag Word_t Rc_word; // full word return value Pvoid_t PJHSArray = (Pvoid_t) NULL; // initialize JudyHS array uint8_t * Index; // array-of-bytes pointer Word_t Length; // number of bytes in Index Pvoid_t PJHSIter = (Pvoid_t) NULL; // initialize JudyHSIter JHSI( PValue, PJHSArray, Index, Length); // JudyHSIns() JHSD( Rc_int, PJHSArray, Index, Length); // JudyHSDel() JHSG( PValue, PJHSArray, Index, Length); // JudyHSGet() JHSFA(Rc_word, PJHSArray); // JudyHSFreeArray() JHSIF( PValue, PJHSArray, PJHSIter, Index, Length); // JudyHSIterFirst() JHSIN( PValue, PJHSArray, PJHSIter, Index, Length); // JudyHSNIterext() JHSIL( PValue, PJHSArray, PJHSIter, Index, Length); // JudyHSLIterast() JHSIP( PValue, PJHSArray, PJHSIter, Index, Length); // JudyHSPIterrev() JHSFI(Rc_word, PJHSIter); // JudyHSFreeIter()
The Length of each array-of-bytes can be from 0 to the limits of malloc() (about 2GB). Since JudyHS is based on a hash method, Indexes are not stored in any externally useful order; therefore the JudyHSIterFirst(), JudyHSIterNext(), JudyHSIterPrev() and JudyHSIterLast() neighbor search functions should be considered as providing an unordered enumeration of keys and values.
The hallmark of JudyHS is speed with scalability, but memory efficiency is excellent. The speed is very competitive with the best hashing methods. The memory efficiency is similar to a linked list of the same Indexes and Values. JudyHS is designed to scale from 0 to billions of Indexes.
A JudyHS array is allocated with a NULL pointer
Pvoid_t PJHSArray = (Pvoid_t) NULL;
Because the macro forms of the API have a simpler error handling interface than the equivalent functions, they are the preferred way to use JudyHS.
Return PValue pointing to Value. Your program should use this pointer to read or modify the Value, for example:
Value = *PValue; *PValue = 1234;
Note: JHSI() and JHSD can reorganize the JudyHS array. Therefore, pointers returned from previous JudyHS calls become invalid and must be re-acquired (using JHSG()).
Return Rc_int set to 1 if successfully removed from the array. Return Rc_int set to 0 if Index was not present.
Return PValue pointing to Index's Value. Return PValue set to NULL if the Index was not present.
Return Rc_word set to the number of bytes freed and PJHSArray set to NULL.
If successful, Index is returned set to a pointer to the found index (in a read-only buffer owned by the iterator which will be overwritten by later search operations), Length is returned set to the length of the found index, and PValue is returned set to a pointer to Index's Value. If unsuccessful, PValue is returned set to NULL, and Index and Length are returned set to NULL and 0 respectively. PValue must be tested for non-NULL prior to using Index and Length, since a search failure is possible.
A JudyHS iterator is allocated with a NULL pointer
Pvoid_t PJHSIter = (Pvoid_t) NULL;and when freed is reset to a NULL pointer.
Example of use:
Pvoid_t PJHSIter; uint8_t *Index; Word_t Length; Word_t Rc_word; PJHSIter = (Pvoid_t) NULL; Index = (uint8_t) NULL; Length = 0; JHSIF(PValue, PJHSArray, PJHSIter, Index, Length); while (PValue) { use(Index, Length, PValue); JHSIN(PValue, PJHSArray, PJHSIter, Index, Length); } JHSFI(Rc_word, PJHSIter);
Return Rc_word set to the number of bytes freed and PJHSIter set to NULL.
#include <unistd.h> #include <stdio.h> #include <string.h> #include <Judy.h> // Compiled: // cc -O PrintDupLines.c -lJudy -o PrintDupLines #define MAXLINE 1000000 /* max fgets length of line */ uint8_t Index[MAXLINE]; // string to check int // Usage: PrintDupLines < file main() { Pvoid_t PJArray = (PWord_t)NULL; // Judy array. PWord_t PValue; // Judy array element pointer. Word_t Bytes; // size of JudyHS array. Word_t LineNumb = 0; // current line number Word_t Dups = 0; // number of duplicate lines while (fgets(Index, MAXLINE, stdin) != (char *)NULL) { LineNumb++; // line number // store string into array JHSI(PValue, PJArray, Index, strlen(Index)); if (PValue == PJERR) // See ERRORS section { fprintf(stderr, "Out of memory -- exit\n"); exit(1); } if (*PValue != 0) // check if duplicate { Dups++; printf("Duplicate lines %lu:%lu:%s", *PValue, LineNumb, Index); } else { *PValue = LineNumb; // store Line number } } printf("%lu Duplicates, JudyHS array of %lu Lines\n", Dups, LineNumb - Dups); // dump lines in hash order printf("Lines in hash order:\n"); { Pvoid_t PJHSIter = (Pvoid_t) NULL; // JudyHS iterator uint8_t *Index2 = (uint8_t) NULL; // JudyHS key: line Word_t Length2 = 0; // length of key PWord_t PValue2; // pointer to value Word_t IterBytes; // size of iterator JHSIF(PValue2, PJHSArray, PJHSIter, Index2, Length2); while (PValue2) { printf(" line %lu: %*.*s", *PValue2, Index2, Length2, Length2); JHSIN(PValue2, PJHSArray, PJHSIter, Index2, Length2); } JHSFI(IterBytes, PJHSIter); printf("JudyHSFreeIter() freed %lu bytes of memory\n", IterBytes); } JHSFA(Bytes, PJArray); // free JudyHS array printf("JudyHSFreeArray() freed %lu bytes of memory\n", Bytes); return (0); }