From 9479f490c2c84d7bad8cf205c3dee0fd359421e1 Mon Sep 17 00:00:00 2001 From: Josh Rickmar Date: Wed, 10 Oct 2012 12:54:26 -0400 Subject: Implement header changes as requested by jy-p This change introduces a new RB tree to keep track of identities (defined as being different combinations of modified HTTP headers, so far only User-Agent and Accept). Whenever a site is visited, this tree is checked to see if it has been accessed before, and if it has, the previously used Accept and User-Agent headers will be used. If the site has not been visited before during the browser's lifetime, a new entry will be created in this tree to keep track of which headers to use the next time. A site is defined as a FQDN, so requests made to cross site resources or resources on a different subdomain will generate a new saved identity. The second change adds two new config files to the resource dir to read in additional user_agent and http_accept values scraped from the logs of www.bitrig.org. The idea of this is to keep rotating through each of these on every new site visit to provide more anonymity and thwart web tracking by looking at the headers being sent. --- Makefile | 2 ++ settings.c | 20 +++++++---- xombrero.1 | 15 ++++++++ xombrero.c | 114 ++++++++++++++++++++++++++++++------------------------------- xombrero.h | 15 ++++++-- 5 files changed, 100 insertions(+), 66 deletions(-) diff --git a/Makefile b/Makefile index f9b4c49..6b227b1 100644 --- a/Makefile +++ b/Makefile @@ -68,6 +68,8 @@ beforeinstall: install -m 644 ${.CURDIR}/tld-rules ${PREFIX}/share/xombrero install -m 644 ${.CURDIR}/style.css ${PREFIX}/share/xombrero install -m 644 ${.CURDIR}/hsts-preload ${PREFIX}/share/xombrero + install -m 644 ${.CURDIR}/user-agent-headers ${PREFIX}/share/xombrero + install -m 644 ${.CURDIR}/http-accept-headers ${PREFIX}/share/xombrero ${PROG} ${OBJS} beforedepend: javascript.h tooltip.h diff --git a/settings.c b/settings.c index 30b1a94..5ad06f3 100644 --- a/settings.c +++ b/settings.c @@ -50,14 +50,13 @@ char *resource_dir = NULL; char download_dir[PATH_MAX]; int allow_volatile_cookies = 0; int save_global_history = 0; /* save global history to disk */ -struct user_agent *user_agent = NULL; -struct http_accept *http_accept = NULL; int save_rejected_cookies = 0; gint max_connections = 25; gint max_host_connections = 5; int history_autosave = 0; int edit_mode = XT_EM_HYBRID; char *include_config = NULL; +int anonymize_headers = 0; int tabless = 0; /* allow only 1 tab */ /* runtime settings */ @@ -216,6 +215,7 @@ int set_gnutls_priority_string(struct settings *, char *); int check_allow_insecure_content(char **); int check_allow_insecure_scripts(char **); int check_allow_volatile_cookies(char **); +int check_anonymize_headers(char **); int check_append_next(char **); int check_auto_load_images(char **); int check_autofocus_onload(char **); @@ -492,14 +492,15 @@ struct special s_gnutls_priority_string = { struct settings rs[] = { { "allow_insecure_content", XT_S_BOOL, 0, &allow_insecure_content, NULL, NULL, NULL, set_allow_insecure_content, check_allow_insecure_content, TT_ALLOW_INSECURE_CONTENT }, { "allow_insecure_scripts", XT_S_BOOL, 0, &allow_insecure_scripts, NULL, NULL, NULL, set_allow_insecure_scripts, check_allow_insecure_scripts, TT_ALLOW_INSECURE_SCRIPTS}, - { "allow_volatile_cookies", XT_S_BOOL, 0, &allow_volatile_cookies, NULL, NULL, NULL, NULL, check_allow_volatile_cookies, TT_ALLOW_VOLATILE_COOKIES}, - { "append_next", XT_S_BOOL, 0, &append_next, NULL, NULL, NULL, set_append_next, check_append_next, TT_APPEND_NEXT}, + { "allow_volatile_cookies", XT_S_BOOL, 0, &allow_volatile_cookies, NULL, NULL, NULL, NULL, check_allow_volatile_cookies, TT_ALLOW_VOLATILE_COOKIES }, + { "anonymize_headers", XT_S_BOOL, 0, &anonymize_headers, NULL, NULL, NULL, NULL, check_anonymize_headers, TT_ANONYMIZE_HEADERS }, + { "append_next", XT_S_BOOL, 0, &append_next, NULL, NULL, NULL, set_append_next, check_append_next, TT_APPEND_NEXT }, { "auto_load_images", XT_S_BOOL, 0, &auto_load_images, NULL, NULL, NULL, set_auto_load_images, check_auto_load_images, TT_AUTO_LOAD_IMAGES }, { "autofocus_onload", XT_S_BOOL, 0, &autofocus_onload, NULL, NULL, NULL, set_autofocus_onload, check_autofocus_onload, TT_AUTOFOCUS_ONLOAD }, { "browser_mode", XT_S_STR, 0, NULL, NULL,&s_browser_mode, NULL, NULL, check_browser_mode, TT_BROWSER_MODE }, - { "cmd_font", XT_S_STR, 0, NULL, &cmd_font_name, NULL, NULL, set_cmd_font, check_cmd_font, TT_CMD_FONT}, + { "cmd_font", XT_S_STR, 0, NULL, &cmd_font_name, NULL, NULL, set_cmd_font, check_cmd_font, TT_CMD_FONT }, { "color_visited_uris", XT_S_BOOL, 0, &color_visited_uris , NULL, NULL, NULL, set_color_visited_uris, check_color_visited_uris, TT_COLOR_VISITED_URIS }, - { "cookie_policy", XT_S_STR, 0, NULL, NULL,&s_cookie, NULL, set_cookie_policy_rt, check_cookie_policy, TT_COOKIE_POLICY}, + { "cookie_policy", XT_S_STR, 0, NULL, NULL,&s_cookie, NULL, set_cookie_policy_rt, check_cookie_policy, TT_COOKIE_POLICY }, { "cookies_enabled", XT_S_BOOL, 0, &cookies_enabled, NULL, NULL, NULL, set_cookies_enabled, check_cookies_enabled, TT_COOKIES_ENABLED }, { "ctrl_click_focus", XT_S_BOOL, 0, &ctrl_click_focus, NULL, NULL, NULL, set_ctrl_click_focus, check_ctrl_click_focus, TT_CTRL_CLICK_FOCUS }, { "default_script", XT_S_STR, 1, NULL, NULL,&s_default_script, NULL, set_default_script_rt, check_default_script, TT_DEFAULT_SCRIPT }, @@ -977,6 +978,13 @@ check_allow_volatile_cookies(char **tt) return (allow_volatile_cookies != 0); } +int +check_anonymize_headers(char **tt) +{ + *tt = g_strdup("Default: Disabled"); + return (anonymize_headers != 0); +} + int set_browser_mode(struct settings *s, char *val) { diff --git a/xombrero.1 b/xombrero.1 index 81a5422..4bbb5f9 100644 --- a/xombrero.1 +++ b/xombrero.1 @@ -1025,6 +1025,21 @@ If set cookies are stored in the session cache but will be discarded once .Nm exits. Unfortunately enabling this does allow for some limited tracking on the web. +.It Cm anonymize_headers +If set, HTTP headers taken from the +.Lk https://www.bitrig.org/ "Bitrig website" +will be used to feed additional values to the +.Nm xombrero +HTTP header settings. +Currently only the User-Agent and Http-Accept headers are modified, +corresponding to the +.Cm user_agent +and +.Cm http_accept +settings, respectively. +These headers are set after any header settings are read in from your +.Cm ~/.xombrero.conf . +Default is 0. .It Cm append_next When set a new tab is appended after the current tab instead of being appended as the last tab. diff --git a/xombrero.c b/xombrero.c index 4d09a32..3602879 100644 --- a/xombrero.c +++ b/xombrero.c @@ -224,6 +224,7 @@ struct keybinding_list kbl; struct sp_list spl; struct user_agent_list ua_list; struct http_accept_list ha_list; +struct domain_id_list di_list; struct cmd_alias_list cal; struct custom_uri_list cul; struct command_list chl; @@ -713,6 +714,13 @@ http_accept_rb_cmp(struct http_accept *ha1, struct http_accept *ha2) } RB_GENERATE(http_accept_list, http_accept, entry, http_accept_rb_cmp); +int +domain_id_rb_cmp(struct domain_id *d1, struct domain_id *d2) +{ + return (strcmp(d1->domain, d2->domain)); +} +RB_GENERATE(domain_id_list, domain_id, entry, domain_id_rb_cmp); + struct valid_url_types { char *type; } vut[] = { @@ -4946,7 +4954,6 @@ webview_npd_cb(WebKitWebView *wv, WebKitWebFrame *wf, WebKitWebPolicyDecision *pd, struct tab *t) { WebKitWebNavigationReason reason; - struct user_agent ua_find, *ua; char *uri; if (t == NULL) { @@ -4996,30 +5003,6 @@ webview_npd_cb(WebKitWebView *wv, WebKitWebFrame *wf, return (TRUE); /* we made the decission */ } - /* Change user agent if more than one has been given. */ - if (!RB_EMPTY(&ua_list)) { - ua_find.id = t->user_agent_id; - - if ((ua = RB_FIND(user_agent_list, &ua_list, &ua_find)) == NULL) { - ua_find.id = 0; - t->user_agent_id = 1; - user_agent = RB_FIND(user_agent_list, &ua_list, &ua_find); - } else { - ++t->user_agent_id; - user_agent = ua; - } - - g_free(t->user_agent); - t->user_agent = g_strdup(user_agent->value); - - DNPRINTF(XT_D_NAV, "user-agent: %s\n", t->user_agent); - - g_object_set(G_OBJECT(t->settings), - "user-agent", t->user_agent, (char *)NULL); - - webkit_web_view_set_settings(wv, t->settings); - } - /* * This is a little hairy but it comes down to this: * when we run in whitelist mode we have to assist the browser in @@ -5045,7 +5028,8 @@ webview_rrs_cb(WebKitWebView *wv, WebKitWebFrame *wf, WebKitWebResource *res, SoupMessage *msg = NULL; SoupURI *uri = NULL; struct http_accept ha_find, *ha = NULL; - const char *accept = NULL; + struct user_agent ua_find, *ua = NULL; + struct domain_id di_find, *di = NULL; char *uri_s = NULL; msg = webkit_network_request_get_message(request); @@ -5068,29 +5052,45 @@ webview_rrs_cb(WebKitWebView *wv, WebKitWebFrame *wf, WebKitWebResource *res, if (do_not_track) soup_message_headers_append(msg->request_headers, "DNT", "1"); - /* Round-robin through HTTP Accept headers if any have been set */ - if (!RB_EMPTY(&ha_list)) { - accept = soup_message_headers_get_list(msg->request_headers, - "Accept"); - if (accept == NULL || - strncmp(accept, "text/html", strlen("text/html"))) - goto done; + /* + * Check if resources on this domain have been loaded before. If + * not, add the current tab's http-accept and user-agent id's to a + * new domain_id and insert into the RB tree. Use these http headers + * for all resources loaded from this domain for the lifetime of the + * browser. + */ + if ((di_find.domain = uri->host) == NULL) + goto done; + if ((di = RB_FIND(domain_id_list, &di_list, &di_find)) == NULL) { + di = g_malloc(sizeof *di); + di->domain = g_strdup(uri->host); + di->ua_id = t->user_agent_id++; + di->ha_id = t->http_accept_id++; + RB_INSERT(domain_id_list, &di_list, di); + + ua_find.id = t->user_agent_id; + ua = RB_FIND(user_agent_list, &ua_list, &ua_find); + if (ua == NULL) + t->user_agent_id = 0; ha_find.id = t->http_accept_id; ha = RB_FIND(http_accept_list, &ha_list, &ha_find); - if (ha == NULL) { - ha_find.id = 0; - t->http_accept_id = 1; - http_accept = RB_FIND(http_accept_list, &ha_list, - &ha_find); - } else { - ++t->http_accept_id; - http_accept = ha; - } - - soup_message_headers_replace(msg->request_headers, "Accept", - http_accept->value); + if (ha == NULL) + t->http_accept_id = 0; } + + ua_find.id = di->ua_id; + ua = RB_FIND(user_agent_list, &ua_list, &ua_find); + ha_find.id = di->ha_id; + ha = RB_FIND(http_accept_list, &ha_list, &ha_find); + + if (ua != NULL) + soup_message_headers_replace(msg->request_headers, + "User-Agent", ua->value); + if (ha != NULL) + soup_message_headers_replace(msg->request_headers, + "Accept", ha->value); + done: if (uri_s) g_free(uri_s); @@ -6998,8 +6998,6 @@ setup_webkit(struct tab *t) FALSE, (char *)NULL); else warnx("webkit does not have \"enable-dns-prefetching\" property"); - g_object_set(G_OBJECT(t->settings), - "user-agent", t->user_agent, (char *)NULL); g_object_set(G_OBJECT(t->settings), "enable-scripts", enable_scripts, (char *)NULL); g_object_set(G_OBJECT(t->settings), @@ -7094,7 +7092,6 @@ GtkWidget * create_browser(struct tab *t) { GtkWidget *w; - gchar *strval; GtkAdjustment *adjustment; if (t == NULL) { @@ -7122,14 +7119,6 @@ create_browser(struct tab *t) g_object_set(t->settings, "default-encoding", encoding, (char *)NULL); - if (user_agent == NULL) { - g_object_get(G_OBJECT(t->settings), "user-agent", &strval, - (char *)NULL); - t->user_agent = g_strdup_printf("%s %s+", strval, version); - g_free(strval); - } else - t->user_agent = g_strdup(user_agent->value); - t->stylesheet = g_strdup(stylesheet); t->load_images = auto_load_images; @@ -7495,7 +7484,6 @@ delete_tab(struct tab *t) gtk_widget_destroy(t->tab_elems.eventbox); gtk_widget_destroy(t->vbox); - g_free(t->user_agent); g_free(t->stylesheet); g_free(t->tmp_uri); g_free(t->status); @@ -8629,6 +8617,8 @@ main(int argc, char **argv) RB_INIT(&st_tree); RB_INIT(&svl); RB_INIT(&ua_list); + RB_INIT(&ha_list); + RB_INIT(&di_list); TAILQ_INIT(&sessions); TAILQ_INIT(&tabs); @@ -8763,6 +8753,16 @@ main(int argc, char **argv) config_parse(conf, 0); } + /* check whether to read in a crapton of additional http headers */ + if (anonymize_headers) { + snprintf(conf, sizeof conf, "%s" PS "%s", + resource_dir, XT_USER_AGENT_FILE); + config_parse(conf, 0); + snprintf(conf, sizeof conf, "%s" PS "%s", + resource_dir, XT_HTTP_ACCEPT_FILE); + config_parse(conf, 0); + } + /* init fonts */ cmd_font = pango_font_description_from_string(cmd_font_name); oops_font = pango_font_description_from_string(oops_font_name); diff --git a/xombrero.h b/xombrero.h index 5ab0623..42bb8e5 100644 --- a/xombrero.h +++ b/xombrero.h @@ -291,7 +291,6 @@ struct tab { /* settings */ WebKitWebSettings *settings; - gchar *user_agent; int user_agent_id; int http_accept_id; gboolean load_images; @@ -378,6 +377,15 @@ struct http_accept { RB_HEAD(http_accept_list, http_accept); RB_PROTOTYPE(http_accept_list, http_accept, entry, http_accept_rb_cmp); +struct domain_id { + RB_ENTRY(domain_id) entry; + int ua_id; /* user agent id key */ + int ha_id; /* http accept header id key */ + char *domain; +}; +RB_HEAD(domain_id_list, domain_id); +RB_PROTOTYPE(domain_id_list, domain_id, entry, domain_id_rb_cmp); + /* utility */ #define XT_NAME ("xombrero") #define XT_DIR (".xombrero") @@ -388,6 +396,8 @@ RB_PROTOTYPE(http_accept_list, http_accept, entry, http_accept_rb_cmp); #define XT_FAVS_FILE ("favorites") #define XT_SOD_FILE ("startofday") #define XT_HSTS_PRELOAD_FILE ("hsts-preload") +#define XT_USER_AGENT_FILE ("user-agent-headers") +#define XT_HTTP_ACCEPT_FILE ("http-accept-headers") #define XT_RESERVED_CHARS "$&+,/:;=?@ \"<>#%%{}|^~[]`" int run_script(struct tab *, char *); @@ -898,8 +908,6 @@ extern char runtime_settings[PATH_MAX]; extern int allow_volatile_cookies; extern int color_visited_uris; extern int save_global_history; -extern struct user_agent *user_agent; -extern struct http_accept *http_accept; extern int save_rejected_cookies; extern int session_autosave; extern int guess_search; @@ -936,6 +944,7 @@ extern int allow_insecure_scripts; extern int do_not_track; extern int preload_strict_transport; extern char *gnutls_priority_string; +extern int anonymize_headers; /* globals */ extern void (*os_init)(void); -- cgit 1.4.1-2-gfad0