10 * This file implements a "database" of "bad" domains, loaded from
11 * ".acl" files of a fairly strict format; each domain to block is
12 * written on a line starting with a period, immediately followed by
13 * the domain to block, then an optional comment.
15 * The database is populated by using the call sequence:
16 * 1. start_domain_database_loading();
17 * 2. load_domains( filename ); // repeated
18 * N. end_domain_database_loading();
20 * The final call triggers a reordering of domains so as to support
21 * binary search in reverse text order, for matching domain suffixes.
22 * See the function `tail_compare` for details.
26 * This is the Entry type for the "database", which basically is an
27 * array of these. The domain pointer will point at a domain name in
28 * the loaded ".acl" file, and length is the domain name length.
30 typedef struct _Entry {
32 unsigned char *domain;
36 * This is the domain name database root structure. It holds a pointer
37 * to the array of Entry records, the fill of that array, and the
38 * allocated size for that array (no lesser than the fill, of course).
44 } database = { 0, 0, 0 };
47 * This function compares strings backwars; the last k bytes of string
48 * (a,na) versus string (b,nb). It also holds '.' as the least of
49 * characters, so as to ensure that refined/extended domain names are
50 * comparatively greater that their base domain names.
52 static int tail_compare(unsigned char *a,unsigned char *b,int k) {
54 int c = *(--a) - *(--b);
69 * Extend the domain name table to allow additions.
71 #define STARTSIZE 100000
73 if ( database.table ) {
74 Entry *old = database.table;
75 int s = database.size;
76 database.size += 100000;
77 database.table = (Entry*) calloc( database.size, sizeof( Entry ) );
78 memcpy( database.table, old, s * sizeof( Entry ) );
81 database.table = (Entry*) calloc( STARTSIZE, sizeof( Entry ) );
82 database.size = STARTSIZE;
87 * Determine the index for given domain. This matches computes a tail
88 * match between the given domain and the databse domains, returning
89 * the index for the matching database entry, or (-index-1) to
90 * indicate insertion point. In lookup mode, a database entry being a
91 * tail domain part of the given domain is also considered a match.
93 static int index_domain(unsigned char *domain,int n,int lookup) {
95 int hi = database.fill;
97 int m = ( lo + hi ) / 2;
98 Entry *p = &database.table[ m ];
103 int q = tail_compare( p->domain + p->length, domain + n, k );
105 fprintf( stderr, "%s %d %d %d\n", domain, k, m, q );
108 if ( p->length < n ) {
109 // table entry shorter => new entry after, or match on lookup
110 if ( lookup && *(domain+n-k-1) == '.' ) {
114 } else if ( p->length > n ) {
115 // table entry longer => new entry before
121 } else if ( q < 0 ) {
133 * Determine the length of a "word"
135 static int wordlen(unsigned char *p) {
136 unsigned char *q = p;
144 static void add_domain(char *domain) {
145 if ( database.fill >= database.size ) {
148 int length = wordlen( domain );
149 int i = index_domain( domain, length, 0 );
152 int tail = database.fill - i;
154 memmove( &database.table[ i+1 ],
156 tail * sizeof( Entry ) );
158 database.table[ i ].domain = domain;
159 database.table[ i ].length = length;
162 char *p1 = strndup( domain, length );
163 char *p2 = strndup( database.table[i].domain,
164 database.table[i].length );
165 fprintf( stderr, "fill = %d %d %s == %s\n",
166 i, database.fill, p1, p2 );
173 static void fast_add_domain(unsigned char *domain,int length) {
174 int fill = database.fill;
175 if ( fill >= database.size ) {
178 database.table[ fill ].length = length;
179 database.table[ fill ].domain = domain;
183 static int table_order(Entry *a,Entry *b) {
184 int k = ( a->length < b->length )? a->length : b->length;
185 int c = tail_compare( a->domain + a->length,
186 b->domain + b->length, k );
190 return a->length - b->length;
194 * External call to check a given domain.
196 unsigned int check_domain(unsigned char *domain) {
197 int i = index_domain( domain, wordlen( domain ), 1 );
198 return ( i < 0 )? 0 : ( i + 1 );
201 void start_domain_database_loading(void) {
205 static void dump_table() {
206 fprintf( stderr, "Table fill=%d size=%d\n", database.fill, database.size );
208 for ( ; i < database.fill; i++ ) {
209 char *p = strndup( database.table[i].domain,
210 database.table[i].length );
211 fprintf( stderr, "[%d] %d %p %s\n",
212 i, database.table[i].length, database.table[i].domain, p );
218 void end_domain_database_loading(void) {
219 qsort( database.table, database.fill, sizeof( Entry ),
220 (__compar_fn_t) table_order );
225 * Load BAD domain names from file. The file is line based where data
226 * lines consist of domain name starting with period and ending with
227 * space or newline, and other lines ignored.
229 void load_domains(char *file) {
232 //fprintf( stderr, "state(\"%s\",&info)\n", file );
233 if ( stat( file, &info ) ) {
237 int n = info.st_size;
238 data = (unsigned char *) malloc( n );
239 //fprintf( stderr, "open(\"%s\",)\n", file );
240 int fd = open( file, O_RDONLY );
245 //fprintf( stderr, "Loading %s\n", file );
246 unsigned char *end = data;
248 int k = read( fd, end, n );
250 fprintf( stderr, "Premature EOF for %s\n", file );
256 //fprintf( stderr, "processing %s %p %p\n", file, data, end );
257 unsigned char *p = data;
263 if ( ( ++count % 10000 ) == 0 ) {
264 fprintf( stderr, "%d rules\n", count );
268 unsigned char *domain = ++p;
272 fast_add_domain( domain, p - domain );
274 while ( p < end && *p != '\n' ) {