--- /dev/null
+# Distribution Makefile for blockdomains
+
+# Generated files
+SBINFILES = blockdomains
+BINFILES = blockdomainsctl
+MANFILES = blockdomains.5 blockdomains.8 blockdomainsctl.8
+CFGDIRS = blocked/ acl/
+
+default: $(SBINFILES) $(MANFILES)
+
+.PHONY: install clean
+
+# Building targets
+
+$(MANFILES): %: doc/%.adoc
+ asciidoctor -bmanpage -o $@ $<
+
+README.html: README.adoc
+ asciidoctor -bhtml $<
+
+blockdomains: src/blockdomains.c src/cache.c src/database.c
+ gcc -g -Wall -o $@ $^ -lnetfilter_queue
+
+# Installation
+
+SBINDIR = $(DESTDIR)/usr/sbin
+BINDIR = $(DESTDIR)/usr/bin
+CFGTOP = $(DESTDIR)/etc/blockdomains
+MAN5DIR = $(DESTDIR)/usr/share/man/man5
+MAN8DIR = $(DESTDIR)/usr/share/man/man8
+SYSVINIT = $(DESTDIR)/etc/init.d/blockdomains
+
+install: $(addprefix $(SBINDIR)/,$(SBINFILES))
+install: $(addprefix $(BINDIR)/,$(BINFILES))
+install: $(addprefix $(MAN5DIR)/,$(filter %.5,$(MANFILES)))
+install: $(addprefix $(MAN8DIR)/,$(filter %.8,$(MANFILES)))
+install: $(addprefix $(CFGTOP)/,$(CFGDIRS))
+install: $(SYSVINIT)
+
+$(SYSVINIT): init/blockdomains
+ install -D $< $@
+
+$(CFGTOP)/%/:
+ mkdir -p $@
+
+$(SBINDIR)/% $(BINDIR)/% $(MAN5DIR)/% $(MAN8DIR)/%: %
+ install -D $< $@
+
+# Cleaning up
+
+clean:
+ rm -f $(SBINFILES) $(MANFILES)
--- /dev/null
+= Blocklist based domain name filtering
+
+The `blockdomains` utility is a blacklist based network traffic filter
+for `iptables` via `libnetfilter-queue`. It applies to HTTP and SSL
+traffic for recognizing and dropping packets that are directed to
+blacklisted domain names.
+
+== Dendencies
+
+Operationally `blockdomains` depends on the `libnetfilter-queue-dev` and
+`iptables` packages, and for building, you'll also need a C build
+environment including `make`.
+
+The blacklist format is that of squidblacklist.org, which you'll need
+to acquire separately.
+
+== Build and Install
+
+`blockdomains` is distributed in a tar file, which should be unpacked at
+its future residence; e.g., as /usr/local/src/blockdomains-1.0.0. Then
+`cd` into that directory and type:
+
+> `# make`
+
+This will build the binary filter, and install the control script as
+`/usr/local/sbin/blockdomains.sh`. Edit the Makefile to install
+elsewhere.
+
+== Setup and Confguration
+
+The utility has a configuration directory `acl` that is intended to
+hold all available access control lists, and a directory `blocked`
+that should be set up with links to the access control list files
+to use. For example:
+
+> `# ( cd blocked && ln -s ../acl/youtube-google-videos.acl )`
+
+That command will set up `youtube-google-videos.acl` to be an included
+blacklist. Do the opposite to remove; for example:
+
+> `# rm blocked/youtube-google-videos.acl`
+
+== Running
+
+The `blockdomains` is started with the following command:
+
+> `# blockdomains.sh start`
+
+With the `start` argument, the script adds appropriate `iptables`
+rules to use direct certain traffic to net-filter queue 99, and it
+starts a background process fot that filtering.
+
+> `# blockdomains.sh reload`
+
+With the `reload` argument, the control script stops and restarts the
+filter without changing `iptables` rules.
+
+> `# blockdomains.sh stop`
+
+With the `stop` argument, the control script removes the `iptables`
+rules and terminates the filtering process.
+
+== Technical Detail
+
+The filtering uses the given lists of domain names for rejecting
+packets. It recognizes HTTP message headers and SSL certificate
+requests, from where it picks out the targeted domain name. If that
+name is blacklised or in a blacklisted domain, then the packet is
+rejected.
+
+The filtering also uses a fixed size decision cache, so that
+subsequent decisions for the same target can be made quickly.
--- /dev/null
+#!/bin/sh
+#
+# Control script for manual use of blockdomains.
+
+do_start() {
+ iptables -I OUTPUT -p tcp -j NFQUEUE --queue-num 99
+ blockdomains /etc/blockdomains/blocked/*.acl &
+}
+
+do_stop() {
+ iptables -D OUTPUT -p tcp -j NFQUEUE --queue-num 99
+ pkill blockdomains
+}
+
+case "$1" in
+ start) do_start ;;
+ reload) do_stop ; do_start ;;
+ stop) do_stop ;;
+ *) echo "Use start, stop or reload" >&2 ;;
+esac
--- /dev/null
+= blockdomains(5)
+
+== NAME
+blockdomains - block list file format
+
+== SYNOPSIS
+
+/etc/blockdomains/acl/blocklist.acl
+
+ln -s ../acl/blocklist.acl /etc/blockdomains/blocked/
+
+== DESCRIPTION
+
+**blockdomains** uses one or more block list files which contain
+declaratios of the domains to block, one domain per line that starts
+with any number of whitespace characters followed by a period (".")
+before the domain to block. The blocking applies to the domain and all
+its sub domains.
+
+====
+Anything not starting with a period (".") is a comment and leading
+whitespace is ignored. Block list domains start with optional
+whitespace and a period, followed by the domain name to block,
+optionally followed by a whitespace and a comment. Like the following:
+====
+
+.Example of block list
+----
+ .bad.domain.com -- domain name up to whitespace is blocked
+
+Blank lines are fine too; they treated as comments. The block list
+domains don't need to be column aligned. So, here is another:
+
+.another.domain.to.block
+End of block list example.
+----
+
+== SEE ALSO
+
+blockdomains(8)
+
+== AUTHOR
+
+Ralph Ronnquist <rrq@rrq.au>
--- /dev/null
+= blockdomains(8)
+
+== NAME
+
+blockdomains - Firewall agent blocking selected HTTP and HTTPS
+connections
+
+== SYNOPSIS
+
+blockdomains __blocklistfile__+
+
+== DESCRIPTION
+
+The **blockdomains** utility is a blacklist based network traffic
+filter for iptables via libnetfilter-queue. It applies to HTTP and SSL
+traffic for recognizing and dropping packets that are directed to
+blacklisted domain names.
+
+== SEE ALSO
+
+blockdomains(5)
+
+== AUTHOR
+
+Ralph Ronnquist <rrq@rrq.au>
--- /dev/null
+= blockdomainsctl(8)
+
+== NAME
+
+blockdomainsctl - utility for manual start/reload/stop of blockdomains
+
+== SYNOPSIS
+
+blockdomainsctl __action__
+
+== DESCRIPTION
+
+blockdomainsctl is a utility for manual operation of blockdomains.
+
+== SEE ALSO
+
+blockdomains(8)
+
+== AUTHOR
+
+Ralph Ronnquist <rrq@rrq.au>
--- /dev/null
+const unsigned char good_data_2[] = {
+ // TLS record
+ 0x16, // [0] Content Type: Handshake
+ 0x03, 0x01, // [1,2] Version: TLS 1.0
+ 0x00, 0x6c, // [3,4] Length (use for bounds checking)
+ // Handshake
+ 0x01, // [5] Handshake Type: Client Hello
+ 0x00, 0x00, 0x68, // [6,7,8] Length (use for bounds checking)
+ 0x03, 0x03, // [9,10] Version: TLS 1.2
+ // [11,,42] Random (32 bytes fixed length)
+ 0xb6, 0xb2, 0x6a, 0xfb, 0x55, 0x5e, 0x03, 0xd5,
+ 0x65, 0xa3, 0x6a, 0xf0, 0x5e, 0xa5, 0x43, 0x02,
+ 0x93, 0xb9, 0x59, 0xa7, 0x54, 0xc3, 0xdd, 0x78,
+ 0x57, 0x58, 0x34, 0xc5, 0x82, 0xfd, 0x53, 0xd1,
+ 0x00, // [43] Session ID Length (skip past this much)
+ 0x00, 0x04, // [44,45] Cipher Suites Length (skip past this much)
+ 0x00, 0x01, // NULL-MD5
+ 0x00, 0xff, // RENEGOTIATION INFO SCSV
+ 0x01, // Compression Methods Length (skip past this much)
+ 0x00, // NULL
+ 0x00, 0x3b, // Extensions Length (use for bounds checking)
+ // Extension
+ 0x00, 0x00, // Extension Type: Server Name (check extension type)
+ 0x00, 0x0e, // Length (use for bounds checking)
+ 0x00, 0x0c, // Server Name Indication Length
+ 0x00, // Server Name Type: host_name (check server name type)
+ 0x00, 0x09, // Length (length of your data)
+ // "localhost" (data your after)
+ 0x6c, 0x6f, 0x63, 0x61, 0x6c, 0x68, 0x6f, 0x73, 0x74,
+ // Extension
+ 0x00, 0x0d, // Extension Type: Signature Algorithms (check extension type)
+ 0x00, 0x20, // Length (skip past since this is the wrong extension)
+ // Data
+ 0x00, 0x1e, 0x06, 0x01, 0x06, 0x02, 0x06, 0x03,
+ 0x05, 0x01, 0x05, 0x02, 0x05, 0x03, 0x04, 0x01,
+ 0x04, 0x02, 0x04, 0x03, 0x03, 0x01, 0x03, 0x02,
+ 0x03, 0x03, 0x02, 0x01, 0x02, 0x02, 0x02, 0x03,
+ // Extension
+ 0x00, 0x0f, // Extension Type: Heart Beat (check extension type)
+ 0x00, 0x01, // Length (skip past since this is the wrong extension)
+ 0x01 // Mode: Peer allows to send requests
+};
--- /dev/null
+#!/lib/init/init-d-script
+### BEGIN INIT INFO
+# Provides: blockdomains
+# Required-Start: mountkernfs $local_fs
+# X-Start-Before: $network
+# Required-Stop: mountkernfs $local_fs
+# Default-Start: S
+# Default-Stop: 0 6
+# Short-Description: Block selected HTTP and HTTPS connections
+# Description: Firewall agent that blocks outbound connections
+# for selected domains
+### END INIT INFO
+DAEMON=/usr/sbin/blockdomains
+DAEMON_ARGS="$(ls /etc/blockdomains/blocked/*.acl 2>/dev/null)"
+START_ARGS="-b -O /var/log/blockdomains.log"
+PIDFILE=none
+
+do_start_prepare() {
+ iptables -I OUTPUT -p tcp -j NFQUEUE --queue-num 99
+}
+
+do_stop_prepare() {
+ iptables -D OUTPUT -p tcp -j NFQUEUE --queue-num 99
+}
--- /dev/null
+#include <linux/types.h>
+#include <netinet/in.h>
+#include <netinet/ip.h>
+#include <netinet/ip6.h>
+#include <netinet/tcp.h>
+#include <arpa/inet.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <unistd.h>
+#include <linux/netfilter.h> /* for NF_ACCEPT */
+
+#include <libnetfilter_queue/libnetfilter_queue.h>
+
+// Caching of verdicts
+unsigned int lookup_cache(unsigned char *domain);
+void add_cache(unsigned char *domain,unsigned int ix);
+int hash_code(unsigned char *domain);
+
+// BAD domains database
+unsigned int check_domain(unsigned char *domain);
+void load_domains(char *file);
+void start_domain_database_loading(void);
+void end_domain_database_loading(void);
+
+/**
+ * Return packet id, or 0 on error.
+ */
+static u_int32_t get_packet_id(struct nfq_data *tb) {
+ struct nfqnl_msg_packet_hdr *ph = nfq_get_msg_packet_hdr( tb );
+ return ( ph )? ntohl( ph->packet_id ) : 0;
+}
+
+struct ipv4_pkt {
+ struct ip first; // .ip_dst[4 bytes]
+ struct tcphdr second;
+};
+
+struct ipv6_pkt {
+ struct ip6_hdr first; // .ip6_dst[16 bytes]
+ struct tcphdr second;
+};
+
+// Payload packet
+struct packet {
+ union {
+ struct ipv4_pkt pkt4;
+ struct ipv6_pkt pkt6;
+ } p;
+ //unsigned char pad[12]; // ??
+};
+
+static struct packet *get_headerP(unsigned char *data) {
+ return (struct packet *) data;
+}
+
+///////// Debugging
+//const char *inet_ntop(int af, const void *restrict src,
+// char dst[restrict .size], socklen_t size);
+
+static const char *tell_ip(struct packet *ip) {
+ static char THEIP[200];
+ switch ( ip->p.pkt4.first.ip_v ) {
+ case 4:
+ return inet_ntop( AF_INET, &ip->p.pkt4.first.ip_dst, THEIP, 200 );
+ case 6:
+ return inet_ntop( AF_INET6, &ip->p.pkt6.first.ip6_dst, THEIP, 200 );
+ }
+ snprintf( THEIP, 200, "%d ???", ip->p.pkt4.first.ip_v );
+ return THEIP;
+}
+
+/**
+ * Review payload packet payload
+ */
+static void view_payload(unsigned char *data,int length) {
+ struct packet *header = get_headerP( data );
+ u_int16_t port = 0;
+ u_int8_t syn = 0;
+ unsigned char *body = data ;//+ sizeof( struct packet );
+ switch ( header->p.pkt4.first.ip_v ) {
+ case 4:
+ port = ntohs( ((struct ipv4_pkt *) data )->second.th_dport );
+ syn = sizeof( struct ipv4_pkt );
+ break;
+ case 6:
+ port = ntohs( ((struct ipv6_pkt *) data )->second.th_dport );
+ syn = sizeof( struct ipv6_pkt );
+ break;
+ }
+#define END 400
+ unsigned char * end = body + ( ( length > END )? END : length );
+ fprintf( stderr, "%s %d %d %d ", tell_ip( header ), syn, port, length );
+ while ( body < end ) {
+ unsigned char c = *body++;
+ if ( c < ' ' || c >= 127 || 1 ) {
+ fprintf( stderr, "%02x ", c );
+ } else {
+ fprintf( stderr, "%c", c );
+ }
+ }
+ fprintf( stderr, "\n" );
+}
+
+//////////////////
+static unsigned char buffer[1000];
+
+/**
+ * SSL traffic includes a data packet with a clear text host name.
+ * This is knwon as the SNI extension.
+ */
+static unsigned char *ssl_host(unsigned char *data,int length) {
+ // Check that it's a "Client Hello" message
+ unsigned char *p;
+ switch ( ((struct packet *) data)->p.pkt4.first.ip_v ) {
+ case 4:
+ p = data + sizeof( struct ipv4_pkt ) + 12; //??
+ break;
+ case 6:
+ p = data + sizeof( struct ipv6_pkt ) + 0; //??
+ break;
+ default:
+ return 0;
+ }
+ if ( p[0] != 0x16 || p[1] != 0x03 || p[5] != 0x01 || p[6] != 0x00 ) {
+ return 0;
+ }
+ fprintf( stderr, "Client Hello\n" );
+ // Note minor version p[2] is not checked
+ // record_length = 256 * p[3] + p[4]
+ // handshake_message_length = 256 * p[7] + p[8]
+ if ( p[9] != 0x03 || p[10] != 0x03 ) { // TLS 1.2 (?ralph?)
+ return 0;
+ }
+ fprintf( stderr, "TLS 1.2\n" );
+ unsigned int i = 46 + ( 256 * p[44] ) + p[45];
+ i += p[i] + 1;
+ unsigned int extensions_length = ( 256 * p[i] ) + p[i+1];
+ i += 2;
+ int k = 0;
+ fprintf( stderr, "TLS 1.2 %d %d\n", i, extensions_length );
+ while ( k < extensions_length ) {
+ unsigned int type = ( 256 * p[i+k] ) + p[i+k+1];
+ k += 2;
+ unsigned int length = ( 256 * p[i+k] ) + p[i+k+1];
+ k += 2;
+ fprintf( stderr, "Extension %d %d\n", k-4, type );
+ if ( type == 0 ) { // Server Name
+ if ( p[i+k+2] ) {
+ break; // Name badness
+ }
+ unsigned int name_length = ( 256 * p[i+k+3] ) + p[i+k+4];
+ unsigned char *path = &p[i+k+5];
+ memcpy( buffer, path, name_length );
+ buffer[ name_length ] = '\0';
+ return buffer;
+ }
+ k += length;
+ }
+ // This point is only reached on "missing or bad SNI".
+ view_payload( data, length );
+ return 0;
+}
+
+/**
+ * HTTP traffic includes a data packet with the host name as a
+ * "Host:" attribute.
+ */
+static unsigned char *http_host(unsigned char *data,int length) {
+ unsigned char *body = data + sizeof( struct packet );
+ switch ( ((struct packet *) data)->p.pkt4.first.ip_v ) {
+ case 4:
+ body = data + sizeof( struct ipv4_pkt );
+ break;
+ case 6:
+ body = data + sizeof( struct ipv6_pkt );
+ break;
+ default:
+ return 0;
+ }
+ if ( ( strncmp( (char*) body, "GET ", 4 ) != 0 ) &&
+ ( strncmp( (char*) body, "POST ", 5 ) != 0 ) ) {
+ return 0;
+ }
+ unsigned char *end = data + length - 6;
+ int check = 0;
+ for ( ; body < end; body++ ) {
+ if ( check ) {
+ if ( strncmp( (char*) body, "Host:", 5 ) == 0 ) {
+ body += 5;
+ for( ; body < end; body++ ) if ( *body != ' ' ) break;
+ unsigned char *start = body;
+ int n = 0;
+ for( ; body < end; n++, body++ ) if ( *body <= ' ' ) break;
+ if ( n < 5 ) {
+ return 0;
+ }
+ memcpy( buffer, start, n );
+ buffer[ n ] = '\0';
+ return buffer;
+ }
+ if ( strncmp( (char*) body, "\r\n", 2 ) == 0 ) {
+ return 0;
+ }
+ for( ; body < end; body++ ) if ( *body == '\n' ) break;
+ if ( body >= end ) {
+ return 0;
+ }
+ }
+ check = ( *body == '\n' );
+ }
+ return 0;
+}
+
+/**
+ * Callback function to handle a packet.
+ */
+static int cb(
+ struct nfq_q_handle *qh,
+ struct nfgenmsg *nfmsg,
+ struct nfq_data *nfa, void *code )
+{
+ u_int32_t id = get_packet_id( nfa );
+ unsigned char *data;
+ int length = nfq_get_payload( nfa, &data);
+ int verdict = NF_ACCEPT;
+ struct packet *header = get_headerP( data );
+#if 0
+ fprintf( stderr, "PKT %s %d\n", tell_ip( header ), length );
+#endif
+ if ( length >= 100 ) {
+ unsigned char *host = http_host( data, length );
+#if 1
+ if ( host ) {
+ fprintf( stderr, "HTTP HOST %s %s\n", tell_ip( header ), host );
+ }
+#endif
+ if ( host == 0 ) {
+ host = ssl_host( data, length );
+#if 1
+ if ( host ) {
+ fprintf( stderr, "SSL HOST %s %s\n", tell_ip( header ), host );
+ }
+#endif
+ }
+ if ( host ) {
+ int i = lookup_cache( host );
+ if ( i < 0 ) {
+ unsigned int ix = check_domain( host );
+ add_cache( host, ix );
+#if 1
+ fprintf( stderr, "%s %d %s ** %d\n",
+ tell_ip( header ), hash_code( host ), host, ix );
+#endif
+ if ( ix > 0 ) {
+ verdict = NF_DROP;
+ }
+ } else if ( i > 0 ) {
+ verdict = NF_DROP;
+ }
+ }
+ }
+ return nfq_set_verdict(qh, id, verdict, 0, NULL);
+}
+
+/**
+ * Program main function.
+ */
+int main(int argc, char **argv) {
+ // Load the database
+ start_domain_database_loading();
+ int n = 1;
+ for ( ; n < argc; n++ ) {
+ fprintf( stderr, "Loading blacklist %s\n", argv[ n ] );
+ load_domains( argv[ n ] );
+ }
+ end_domain_database_loading();
+
+ struct nfq_handle *h;
+ struct nfq_q_handle *qh;
+ //struct nfnl_handle *nh;
+ int fd;
+ int rv;
+ char buf[4096] __attribute__ ((aligned));
+
+ fprintf( stderr, "opening library handle\n");
+ h = nfq_open();
+ if ( !h ) {
+ fprintf(stderr, "error during nfq_open()\n");
+ exit(1);
+ }
+
+ fprintf( stderr, "unbinding any existing nf_queue handler\n" );
+ if ( nfq_unbind_pf(h, AF_INET) < 0 ) {
+ fprintf(stderr, "error during nfq_unbind_pf()\n");
+ exit(1);
+ }
+
+ fprintf( stderr, "binding nfnetlink_queue as nf_queue handler\n" );
+ if ( nfq_bind_pf(h, AF_INET) < 0 ) {
+ fprintf(stderr, "error during nfq_bind_pf()\n");
+ exit(1);
+ }
+
+#define THEQUEUE 99
+ fprintf( stderr, "binding this socket to queue '%d'\n", THEQUEUE );
+ qh = nfq_create_queue( h, THEQUEUE, &cb, NULL );
+ if ( !qh ) {
+ fprintf(stderr, "error during nfq_create_queue()\n");
+ exit(1);
+ }
+
+ fprintf( stderr, "setting copy_packet mode\n" );
+ if ( nfq_set_mode(qh, NFQNL_COPY_PACKET, 0xffff ) < 0) {
+ fprintf(stderr, "can't set packet_copy mode\n");
+ exit(1);
+ }
+
+ fd = nfq_fd( h );
+
+ while ( ( rv = recv(fd, buf, sizeof(buf), 0) ) && rv >= 0 ) {
+ //printf( "pkt received\n" );
+ nfq_handle_packet(h, buf, rv);
+ }
+
+ fprintf( stderr, "unbinding from queue %d\n", THEQUEUE);
+ nfq_destroy_queue(qh);
+
+#ifdef INSANE
+ /* normally, applications SHOULD NOT issue this command, since it
+ detaches other programs/sockets from AF_INET, too ! */
+ fprintf( stderr, "unbinding from AF_INET\n");
+ nfq_unbind_pf(h, AF_INET);
+#endif
+
+ fprintf( stderr, "closing library handle\n");
+ nfq_close( h );
+
+ exit( 0 );
+}
--- /dev/null
+#include <stdlib.h>
+#include <string.h>
+#include <sys/types.h>
+
+typedef struct _CacheEntry {
+ unsigned char *domain;
+ unsigned int ix;
+} CacheEntry;
+
+struct {
+ CacheEntry *table;
+ int size;
+} cache;
+
+int hash_code(unsigned char *domain) {
+ int i = 0;
+ for ( ; *domain; domain++ ) {
+ i += *domain;
+ }
+ return i % cache.size;
+}
+
+int lookup_cache(unsigned char *domain) {
+ if ( cache.table ) {
+ int i = hash_code( domain );
+ if ( cache.table[i].domain &&
+ strcmp( (char*) domain, (char*) cache.table[i].domain ) == 0 ) {
+ return cache.table[i].ix;
+ }
+ }
+ return -1;
+}
+
+void add_cache(unsigned char *domain,unsigned int ix) {
+ if ( cache.table == 0 ) {
+ cache.size = 1024;
+ cache.table = (CacheEntry*) calloc( cache.size, sizeof( CacheEntry ) );
+ }
+ int i = hash_code( domain );
+ if ( cache.table[i].domain ) {
+ free( cache.table[i].domain );
+ }
+ cache.table[i].domain = (unsigned char*) strdup( (char*) domain );
+ cache.table[i].ix = ix;
+}
--- /dev/null
+#include <fcntl.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <sys/stat.h>
+#include <sys/types.h>
+#include <unistd.h>
+
+/**
+ * This file implements a "database" of "bad" domains, loaded from
+ * ".acl" files of a fairly strict format; each domain to block is
+ * written on a line starting with a period, immediately followed by
+ * the domain to block, then an optional comment.
+ *
+ * The database is populated by using the call sequence:
+ * 1. start_domain_database_loading();
+ * 2. load_domains( filename ); // repeated
+ * N. end_domain_database_loading();
+ *
+ * The final call triggers a reordering of domains so as to support
+ * binary search in reverse text order, for matching domain suffixes.
+ * See the function `tail_compare` for details.
+ */
+
+/**
+ * This is the Entry type for the "database", which basically is an
+ * array of these. The domain pointer will point at a domain name in
+ * the loaded ".acl" file, and length is the domain name length.
+ */
+typedef struct _Entry {
+ int length;
+ unsigned char *domain;
+} Entry;
+
+/**
+ * This is the domain name database root structure. It holds a pointer
+ * to the array of Entry records, the fill of that array, and the
+ * allocated size for that array (no lesser than the fill, of course).
+ */
+static struct {
+ Entry *table;
+ int fill;
+ int size;
+} database = { 0, 0, 0 };
+
+/**
+ * This function compares strings backwars; the last k bytes of string
+ * (a,na) versus string (b,nb). It also holds '.' as the least of
+ * characters, so as to ensure that refined/extended domain names are
+ * comparatively greater that their base domain names.
+ */
+static int tail_compare(unsigned char *a,unsigned char *b,int k) {
+ while ( k-- > 0 ) {
+ int c = *(--a) - *(--b);
+ if ( c != 0) {
+ if ( *a == '.' ) {
+ return -1;
+ }
+ if ( *b == '.' ) {
+ return 1;
+ }
+ return c;
+ }
+ }
+ return 0;
+}
+
+/**
+ * Extend the domain name table to allow additions.
+ */
+#define STARTSIZE 100000
+static void grow() {
+ if ( database.table ) {
+ Entry *old = database.table;
+ int s = database.size;
+ database.size += 100000;
+ database.table = (Entry*) calloc( database.size, sizeof( Entry ) );
+ memcpy( database.table, old, s * sizeof( Entry ) );
+ free( old );
+ } else {
+ database.table = (Entry*) calloc( STARTSIZE, sizeof( Entry ) );
+ database.size = STARTSIZE;
+ }
+}
+
+/**
+ * Determine the index for given domain. This matches computes a tail
+ * match between the given domain and the databse domains, returning
+ * the index for the matching database entry, or (-index-1) to
+ * indicate insertion point. In lookup mode, a database entry being a
+ * tail domain part of the given domain is also considered a match.
+ */
+static int index_domain(unsigned char *domain,int n,int lookup) {
+ int lo = 0;
+ int hi = database.fill;
+ while ( lo < hi ) {
+ int m = ( lo + hi ) / 2;
+ Entry *p = &database.table[ m ];
+ int k = p->length;
+ if ( n < k ) {
+ k = n;
+ }
+ int q = tail_compare( p->domain + p->length, domain + n, k );
+#if 0
+ fprintf( stderr, "%s %d %d %d\n", domain, k, m, q );
+#endif
+ if ( q == 0 ) {
+ if ( p->length < n ) {
+ // table entry shorter => new entry after, or match on lookup
+ if ( lookup && *(domain+n-k-1) == '.' ) {
+ return m;
+ }
+ lo = m + 1;
+ } else if ( p->length > n ) {
+ // table entry longer => new entry before
+ hi = m;
+ } else {
+ // equal
+ return m;
+ }
+ } else if ( q < 0 ) {
+ // new entry after
+ lo = m + 1;
+ } else {
+ // new entry before
+ hi = m;
+ }
+ }
+ return -lo - 1;
+}
+
+/**
+ * Determine the length of a "word"
+ */
+static int wordlen(unsigned char *p) {
+ unsigned char *q = p;
+ while ( *q > ' ' ) {
+ q++;
+ }
+ return q - p;
+}
+
+#if 0
+static void add_domain(char *domain) {
+ if ( database.fill >= database.size ) {
+ grow();
+ }
+ int length = wordlen( domain );
+ int i = index_domain( domain, length, 0 );
+ if ( i < 0 ) {
+ i = -i-1;
+ int tail = database.fill - i;
+ if ( tail ) {
+ memmove( &database.table[ i+1 ],
+ &database.table[i],
+ tail * sizeof( Entry ) );
+ }
+ database.table[ i ].domain = domain;
+ database.table[ i ].length = length;
+ database.fill++;
+ } else {
+ char *p1 = strndup( domain, length );
+ char *p2 = strndup( database.table[i].domain,
+ database.table[i].length );
+ fprintf( stderr, "fill = %d %d %s == %s\n",
+ i, database.fill, p1, p2 );
+ free( p1 );
+ free( p2 );
+ }
+}
+#endif
+
+static void fast_add_domain(unsigned char *domain,int length) {
+ int fill = database.fill;
+ if ( fill >= database.size ) {
+ grow();
+ }
+ database.table[ fill ].length = length;
+ database.table[ fill ].domain = domain;
+ database.fill++;
+}
+
+static int table_order(Entry *a,Entry *b) {
+ int k = ( a->length < b->length )? a->length : b->length;
+ int c = tail_compare( a->domain + a->length,
+ b->domain + b->length, k );
+ if ( c != 0 ) {
+ return c;
+ }
+ return a->length - b->length;
+}
+
+/**
+ * External call to check a given domain.
+ */
+unsigned int check_domain(unsigned char *domain) {
+ int i = index_domain( domain, wordlen( domain ), 1 );
+ return ( i < 0 )? 0 : ( i + 1 );
+}
+
+void start_domain_database_loading(void) {
+}
+
+#if 0
+static void dump_table() {
+ fprintf( stderr, "Table fill=%d size=%d\n", database.fill, database.size );
+ int i = 0;
+ for ( ; i < database.fill; i++ ) {
+ char *p = strndup( database.table[i].domain,
+ database.table[i].length );
+ fprintf( stderr, "[%d] %d %p %s\n",
+ i, database.table[i].length, database.table[i].domain, p );
+ free( p );
+ }
+}
+#endif
+
+void end_domain_database_loading(void) {
+ qsort( database.table, database.fill, sizeof( Entry ),
+ (__compar_fn_t) table_order );
+ //dump_table();
+}
+
+/**
+ * Load BAD domain names from file. The file is line based where data
+ * lines consist of domain name starting with period and ending with
+ * space or newline, and other lines ignored.
+ */
+void load_domains(char *file) {
+ struct stat info;
+ unsigned char *data;
+ //fprintf( stderr, "state(\"%s\",&info)\n", file );
+ if ( stat( file, &info ) ) {
+ perror( file );
+ exit( 1 );
+ }
+ int n = info.st_size;
+ data = (unsigned char *) malloc( n );
+ //fprintf( stderr, "open(\"%s\",)\n", file );
+ int fd = open( file, O_RDONLY );
+ if ( fd < 0 ) {
+ perror( file );
+ exit( 1 );
+ }
+ //fprintf( stderr, "Loading %s\n", file );
+ unsigned char *end = data;
+ while ( n > 0 ) {
+ int k = read( fd, end, n );
+ if ( k == 0 ) {
+ fprintf( stderr, "Premature EOF for %s\n", file );
+ exit( 1 );
+ }
+ end += k;
+ n -= k;
+ }
+ //fprintf( stderr, "processing %s %p %p\n", file, data, end );
+ unsigned char *p = data;
+#if 0
+ int count = 0;
+#endif
+ while( p < end ) {
+#if 0
+ if ( ( ++count % 10000 ) == 0 ) {
+ fprintf( stderr, "%d rules\n", count );
+ }
+#endif
+ if ( *p == '.' ) {
+ unsigned char *domain = ++p;
+ while ( *p > ' ' ) {
+ p++;
+ }
+ fast_add_domain( domain, p - domain );
+ }
+ while ( p < end && *p != '\n' ) {
+ p++;
+ }
+ p++;
+ }
+ close( fd );
+}