/* * Copyright (c) 2012 Elias Norberg * * Permission to use, copy, modify, and distribute this software for any * purpose with or without fee is hereby granted, provided that the above * copyright notice and this permission notice appear in all copies. * * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. */ #include #define TLD_TREE_END_NODE 1 #define TLD_TREE_EXCEPTION 2 struct tld_tree_node { struct tld_tree_node *next; struct tld_tree_node *child; const char *lbl; char flags; }; struct tld_tree_node tld_tree_root = { NULL, NULL, "" }; #define TREE_INSERT_CHILD(n, data) \ n->child = g_malloc(sizeof *n); \ n->child->next = NULL; \ n->child->child = NULL; \ n->child->flags = 0; \ n->child->lbl = data; #define TREE_INSERT_NEXT(n, data) \ n->next = g_malloc(sizeof *n); \ n->next->next = NULL; \ n->next->child = NULL; \ n->next->flags = 0; \ n->next->lbl = data; #define P_BASE (36) #define P_TMIN (1) #define P_TMAX (26) #define P_SKEW (38) #define P_DAMP (700) #define INITIAL_BIAS (72) #define INITIAL_N (128) int adapt(int delta, int numpoints, int firsttime) { int k; if (firsttime) delta = delta / P_DAMP; else delta = delta / 2; delta += (delta / numpoints); k = 0; while (delta > (((P_BASE - P_TMIN) * P_TMAX) / 2)) { delta = delta / (P_BASE - P_TMIN); k += P_BASE; } k += (((P_BASE - P_TMIN + 1) * delta) / (delta + P_SKEW)); return (k); } int get_minimum_char(char *str, int n) { gunichar ch = 0; gunichar min = 0xffffff; for(; *str; str = g_utf8_next_char(str)) { ch = g_utf8_get_char(str); if (ch >= n && ch < min) min = ch; } return (min); } char encode_digit(int n) { if (n < 26) return n + 'a'; return (n - 26) + '0'; } char * punycode_encode(char *str) { char output[1024]; char *s; gunichar c; int need_coding = 0; int l, len, i; int n = INITIAL_N; int delta = 0; int bias = INITIAL_BIAS; int h, b, m, k, t, q; l = 0; for (s=str; *s; s = g_utf8_next_char(s)) { c = g_utf8_get_char(s); if (c < 128) output[l++] = *s; else need_coding = 1; } output[l] = '\0'; if (!need_coding) return g_strdup(output); h = b = strlen(output); if (l > 0) output[l++] = '-'; output[l] = '\0'; len = g_utf8_strlen(str, -1); while (h < len) { m = get_minimum_char(str, n); delta += (m - n) * (h + 1); n = m; for (s=str; *s; s = g_utf8_next_char(s)) { c = g_utf8_get_char(s); if (c < n) delta ++; if (c == n) { q = delta; for (k=P_BASE;; k+=P_BASE) { if (k <= bias) t = P_TMIN; else if(k >= bias + P_TMAX) t = P_TMAX; else t = k - bias; if (q < t) break; output[l++] = encode_digit(t+((q-t)%(P_BASE-t))); q = (q - t) / (P_BASE - t); } output[l++] = encode_digit(q); bias = adapt(delta, h + 1, h == b); delta = 0; h ++; } } delta ++; n ++; } output[l] = '\0'; for (i=l+4;i>=4;i--) output[i] = output[i-4]; l += 4; output[0] = 'x'; output[1] = 'n'; output[2] = '-'; output[3] = '-'; output[l] = '\0'; return g_strdup(output); } /* * strrchr2(str, saveptr, ch) * * Walk backwards through str, jumping to next 'ch' * On first call, *saveptr should be set to NULL. * On following calls, supply the same saveptr. * * Returns NULL when the whole string 'str' has been * looped through. Otherwise returns the position * before the next 'ch'. */ const char * strrchr2(const char *str, const char **saveptr, int ch) { const char *ptr; if (str != NULL && *saveptr == NULL) { *saveptr = str + strlen(str); } else if (str == *saveptr) { return (NULL); } for (ptr= *saveptr - 1; ptr != str && *ptr != ch; ptr--) ; *saveptr = ptr; if (ptr != str) return (ptr+1); return (ptr); } /* * tld_tree_add(rule) * * Adds a tld-rule to the tree */ void tld_tree_add(const char *rule) { struct tld_tree_node *n; const char *lbl; const char *saveptr; saveptr = NULL; lbl = strrchr2(rule, &saveptr, '.'); for (n = &tld_tree_root; lbl != NULL;) { if (strcmp(n->lbl, lbl) == 0) { lbl = strrchr2(rule, &saveptr, '.'); if (!n->child) break; n = n->child; continue; } if (n->next == NULL) { TREE_INSERT_NEXT(n, lbl); n = n->next; lbl = strrchr2(rule, &saveptr, '.'); break; } n = n->next; } while (lbl) { TREE_INSERT_CHILD(n, lbl); lbl = strrchr2(rule, &saveptr, '.'); n = n->child; } n->flags |= TLD_TREE_END_NODE; if (n->lbl[0] == '!') { n->flags |= TLD_TREE_EXCEPTION; n->lbl ++; } } void tld_tree_init() { FILE *fd; char buf[1024]; char file[PATH_MAX]; char *ptr, *next_lbl; char *enc_lbl; char *rule, *rp; char extra_ch; snprintf(file, sizeof file, "%s" PS "tld-rules", resource_dir); fd = fopen(file, "r"); if (fd == NULL) { /* a poor replacement for the real list - but it's * better than nothing. */ tld_tree_add("*"); startpage_add("Could not open %s: this file is required " "to handle TLD whitelisting properly", file); return; } for (;;) { ptr = fgets(buf, sizeof buf, fd); if (ptr == NULL || feof(fd)) break; /* skip comments */ if ((ptr = strstr(buf, "//")) != NULL) *ptr = '\0'; /* skip anything after space or tab */ for (ptr = buf; *ptr; ptr++) if (*ptr == ' ' || *ptr == '\t' || *ptr == '\n' || *ptr == '\r') break; *ptr = '\0'; if (!strlen(buf)) continue; extra_ch = 0; ptr = buf; if (buf[0] == '!' && buf[0] == '*') { extra_ch = buf[0]; ptr ++; } rule = NULL; /* split into labels, and convert them one by one */ for (;;) { if ((next_lbl = strchr(ptr, '.'))) *next_lbl = '\0'; enc_lbl = punycode_encode(ptr); if (rule) { rp = rule; rule = g_strdup_printf("%s%s%s", rp, enc_lbl, next_lbl ? "." : ""); g_free(rp); g_free(enc_lbl); } else { rule = g_strdup_printf("%.1s%s%s", extra_ch ? buf:"", enc_lbl, next_lbl ? ".":""); g_free(enc_lbl); } if (!next_lbl) break; ptr = next_lbl + 1; } tld_tree_add(rule); } fclose(fd); } /* * tld_get_suffix(domain) * * Find the public suffix for domain. * * Returns a pointer to the suffix position * in domain, or NULL if no public suffix * was found. */ char * tld_get_suffix(const char *domain) { struct tld_tree_node *n; const char *suffix; const char *lbl, *saveptr; const char *tmp_saveptr, *tmp_lbl; if (domain == NULL) return (NULL); if (domain[0] == '.') return (NULL); saveptr = NULL; suffix = NULL; lbl = strrchr2(domain, &saveptr, '.'); for (n = &tld_tree_root; n != NULL && lbl != NULL;) { if (!strlen(n->lbl)) { n = n->next; continue; } if (n->lbl[0] == '*') { if (n->flags & TLD_TREE_END_NODE) { tmp_saveptr = saveptr; tmp_lbl = lbl; lbl = strrchr2(domain, &saveptr, '.'); /* Save possible public suffix */ suffix = lbl; saveptr = tmp_saveptr; lbl = tmp_lbl; } n = n->next; continue; } if (strcmp(n->lbl, lbl) == 0) { if (n->flags & TLD_TREE_EXCEPTION) { /* We're done looking */ suffix = lbl; break; } lbl = strrchr2(domain, &saveptr, '.'); /* Possible public suffix - other rules might * still apply */ if (n->flags & TLD_TREE_END_NODE) suffix = lbl; /* Domain too short */ if (lbl == NULL) { /* Check if we have a child that is '*' */ for (n = n->child; n; n = n->next) if (n->lbl[0] == '*') suffix = NULL; break; } if (n->child == NULL) break; n = n->child; continue; } if (n->next == NULL) break; n = n->next; } /* If we can't find a matching suffix, it can mean that either * a) the user is surfing a local prefix * b) the list is not properly updated * * In any case - in order not to break stuff while surfing * new TLD's, we return the public suffix as the top 2 labels * * www.abc.xyz therefore has public suffix 'abc.xyz' */ if (!suffix) { saveptr = NULL; strrchr2(domain, &saveptr, '.'); lbl = strrchr2(domain, &saveptr, '.'); suffix = lbl; } return ((char*)suffix); }