about summary refs log tree commit diff stats
diff options
context:
space:
mode:
authorJosh Rickmar <jrick@devio.us>2012-10-10 12:54:26 -0400
committerJosh Rickmar <jrick@devio.us>2012-10-10 12:54:26 -0400
commit9479f490c2c84d7bad8cf205c3dee0fd359421e1 (patch)
treeb5c8d80c0d0ff49605ce3b2d6f98ff8dd8d196c6
parent8421b1728ffe8da4b288d76d48d46d96fb4d0dfe (diff)
downloadxombrero-9479f490c2c84d7bad8cf205c3dee0fd359421e1.tar.gz
Implement header changes as requested by jy-p
This change introduces a new RB tree to keep track of identities
(defined as being different combinations of modified HTTP headers, so
far only User-Agent and Accept).  Whenever a site is visited, this
tree is checked to see if it has been accessed before, and if it has,
the previously used Accept and User-Agent headers will be used.  If
the site has not been visited before during the browser's lifetime, a
new entry will be created in this tree to keep track of which headers
to use the next time.  A site is defined as a FQDN, so requests made
to cross site resources or resources on a different subdomain will
generate a new saved identity.

The second change adds two new config files to the resource dir to
read in additional user_agent and http_accept values scraped from the
logs of www.bitrig.org.  The idea of this is to keep rotating through
each of these on every new site visit to provide more anonymity and
thwart web tracking by looking at the headers being sent.
-rw-r--r--Makefile2
-rw-r--r--settings.c20
-rw-r--r--xombrero.115
-rw-r--r--xombrero.c114
-rw-r--r--xombrero.h15
5 files changed, 100 insertions, 66 deletions
diff --git a/Makefile b/Makefile
index f9b4c49..6b227b1 100644
--- a/Makefile
+++ b/Makefile
@@ -68,6 +68,8 @@ beforeinstall:
 	install -m 644 ${.CURDIR}/tld-rules ${PREFIX}/share/xombrero
 	install -m 644 ${.CURDIR}/style.css ${PREFIX}/share/xombrero
 	install -m 644 ${.CURDIR}/hsts-preload ${PREFIX}/share/xombrero
+	install -m 644 ${.CURDIR}/user-agent-headers ${PREFIX}/share/xombrero
+	install -m 644 ${.CURDIR}/http-accept-headers ${PREFIX}/share/xombrero
 
 ${PROG} ${OBJS} beforedepend: javascript.h tooltip.h
 
diff --git a/settings.c b/settings.c
index 30b1a94..5ad06f3 100644
--- a/settings.c
+++ b/settings.c
@@ -50,14 +50,13 @@ char		*resource_dir = NULL;
 char		download_dir[PATH_MAX];
 int		allow_volatile_cookies = 0;
 int		save_global_history = 0; /* save global history to disk */
-struct user_agent	*user_agent = NULL;
-struct http_accept	*http_accept = NULL;
 int		save_rejected_cookies = 0;
 gint		max_connections = 25;
 gint		max_host_connections = 5;
 int		history_autosave = 0;
 int		edit_mode = XT_EM_HYBRID;
 char		*include_config = NULL;
+int		anonymize_headers = 0;
 int		tabless = 0;	/* allow only 1 tab */
 
 /* runtime settings */
@@ -216,6 +215,7 @@ int		set_gnutls_priority_string(struct settings *, char *);
 int		check_allow_insecure_content(char **);
 int		check_allow_insecure_scripts(char **);
 int		check_allow_volatile_cookies(char **);
+int		check_anonymize_headers(char **);
 int		check_append_next(char **);
 int		check_auto_load_images(char **);
 int		check_autofocus_onload(char **);
@@ -492,14 +492,15 @@ struct special		s_gnutls_priority_string = {
 struct settings		rs[] = {
 	{ "allow_insecure_content",	XT_S_BOOL, 0,		&allow_insecure_content, NULL, NULL, NULL, set_allow_insecure_content, check_allow_insecure_content, TT_ALLOW_INSECURE_CONTENT },
 	{ "allow_insecure_scripts",	XT_S_BOOL, 0,		&allow_insecure_scripts, NULL, NULL, NULL, set_allow_insecure_scripts, check_allow_insecure_scripts, TT_ALLOW_INSECURE_SCRIPTS},
-	{ "allow_volatile_cookies",	XT_S_BOOL, 0,		&allow_volatile_cookies, NULL, NULL, NULL, NULL, check_allow_volatile_cookies, TT_ALLOW_VOLATILE_COOKIES},
-	{ "append_next",		XT_S_BOOL, 0,		&append_next, NULL, NULL, NULL, set_append_next, check_append_next, TT_APPEND_NEXT},
+	{ "allow_volatile_cookies",	XT_S_BOOL, 0,		&allow_volatile_cookies, NULL, NULL, NULL, NULL, check_allow_volatile_cookies, TT_ALLOW_VOLATILE_COOKIES },
+	{ "anonymize_headers",		XT_S_BOOL, 0,		&anonymize_headers, NULL, NULL, NULL, NULL, check_anonymize_headers, TT_ANONYMIZE_HEADERS },
+	{ "append_next",		XT_S_BOOL, 0,		&append_next, NULL, NULL, NULL, set_append_next, check_append_next, TT_APPEND_NEXT },
 	{ "auto_load_images",		XT_S_BOOL, 0,		&auto_load_images, NULL, NULL, NULL, set_auto_load_images, check_auto_load_images, TT_AUTO_LOAD_IMAGES },
 	{ "autofocus_onload",		XT_S_BOOL, 0,		&autofocus_onload, NULL, NULL, NULL, set_autofocus_onload, check_autofocus_onload, TT_AUTOFOCUS_ONLOAD },
 	{ "browser_mode",		XT_S_STR, 0, NULL, NULL,&s_browser_mode, NULL, NULL, check_browser_mode, TT_BROWSER_MODE },
-	{ "cmd_font",			XT_S_STR, 0, NULL, &cmd_font_name, NULL, NULL, set_cmd_font, check_cmd_font, TT_CMD_FONT},
+	{ "cmd_font",			XT_S_STR, 0, NULL, &cmd_font_name, NULL, NULL, set_cmd_font, check_cmd_font, TT_CMD_FONT },
 	{ "color_visited_uris",		XT_S_BOOL, 0,		&color_visited_uris , NULL, NULL, NULL, set_color_visited_uris, check_color_visited_uris, TT_COLOR_VISITED_URIS },
-	{ "cookie_policy",		XT_S_STR, 0, NULL, NULL,&s_cookie, NULL, set_cookie_policy_rt, check_cookie_policy, TT_COOKIE_POLICY},
+	{ "cookie_policy",		XT_S_STR, 0, NULL, NULL,&s_cookie, NULL, set_cookie_policy_rt, check_cookie_policy, TT_COOKIE_POLICY },
 	{ "cookies_enabled",		XT_S_BOOL, 0,		&cookies_enabled, NULL, NULL, NULL, set_cookies_enabled, check_cookies_enabled, TT_COOKIES_ENABLED },
 	{ "ctrl_click_focus",		XT_S_BOOL, 0,		&ctrl_click_focus, NULL, NULL, NULL, set_ctrl_click_focus, check_ctrl_click_focus, TT_CTRL_CLICK_FOCUS },
 	{ "default_script",		XT_S_STR, 1, NULL, NULL,&s_default_script, NULL, set_default_script_rt, check_default_script, TT_DEFAULT_SCRIPT },
@@ -978,6 +979,13 @@ check_allow_volatile_cookies(char **tt)
 }
 
 int
+check_anonymize_headers(char **tt)
+{
+	*tt = g_strdup("Default: Disabled");
+	return (anonymize_headers != 0);
+}
+
+int
 set_browser_mode(struct settings *s, char *val)
 {
 	if (!strcmp(val, "whitelist")) {
diff --git a/xombrero.1 b/xombrero.1
index 81a5422..4bbb5f9 100644
--- a/xombrero.1
+++ b/xombrero.1
@@ -1025,6 +1025,21 @@ If set cookies are stored in the session cache but will be discarded once
 .Nm
 exits.
 Unfortunately enabling this does allow for some limited tracking on the web.
+.It Cm anonymize_headers
+If set, HTTP headers taken from the
+.Lk https://www.bitrig.org/ "Bitrig website"
+will be used to feed additional values to the
+.Nm xombrero
+HTTP header settings.
+Currently only the User-Agent and Http-Accept headers are modified,
+corresponding to the
+.Cm user_agent
+and
+.Cm http_accept
+settings, respectively.
+These headers are set after any header settings are read in from your
+.Cm ~/.xombrero.conf .
+Default is 0.
 .It Cm append_next
 When set a new tab is appended after the current tab instead of being appended
 as the last tab.
diff --git a/xombrero.c b/xombrero.c
index 4d09a32..3602879 100644
--- a/xombrero.c
+++ b/xombrero.c
@@ -224,6 +224,7 @@ struct keybinding_list	kbl;
 struct sp_list		spl;
 struct user_agent_list	ua_list;
 struct http_accept_list	ha_list;
+struct domain_id_list	di_list;
 struct cmd_alias_list	cal;
 struct custom_uri_list	cul;
 struct command_list	chl;
@@ -713,6 +714,13 @@ http_accept_rb_cmp(struct http_accept *ha1, struct http_accept *ha2)
 }
 RB_GENERATE(http_accept_list, http_accept, entry, http_accept_rb_cmp);
 
+int
+domain_id_rb_cmp(struct domain_id *d1, struct domain_id *d2)
+{
+	return (strcmp(d1->domain, d2->domain));
+}
+RB_GENERATE(domain_id_list, domain_id, entry, domain_id_rb_cmp);
+
 struct valid_url_types {
 	char		*type;
 } vut[] = {
@@ -4946,7 +4954,6 @@ webview_npd_cb(WebKitWebView *wv, WebKitWebFrame *wf,
     WebKitWebPolicyDecision *pd, struct tab *t)
 {
 	WebKitWebNavigationReason	reason;
-	struct user_agent		ua_find, *ua;
 	char				*uri;
 
 	if (t == NULL) {
@@ -4996,30 +5003,6 @@ webview_npd_cb(WebKitWebView *wv, WebKitWebFrame *wf,
 		return (TRUE); /* we made the decission */
 	}
 
-	/* Change user agent if more than one has been given. */
-	if (!RB_EMPTY(&ua_list)) {
-		ua_find.id = t->user_agent_id;
-
-		if ((ua = RB_FIND(user_agent_list, &ua_list, &ua_find)) == NULL) {
-			ua_find.id = 0;
-			t->user_agent_id = 1;
-			user_agent = RB_FIND(user_agent_list, &ua_list, &ua_find);
-		} else {
-			++t->user_agent_id;
-			user_agent = ua;
-		}
-
-		g_free(t->user_agent);
-		t->user_agent = g_strdup(user_agent->value);
-
-		DNPRINTF(XT_D_NAV, "user-agent: %s\n", t->user_agent);
-
-		g_object_set(G_OBJECT(t->settings),
-			"user-agent", t->user_agent, (char *)NULL);
-
-		webkit_web_view_set_settings(wv, t->settings);
-	}
-
 	/*
 	 * This is a little hairy but it comes down to this:
 	 * when we run in whitelist mode we have to assist the browser in
@@ -5045,7 +5028,8 @@ webview_rrs_cb(WebKitWebView *wv, WebKitWebFrame *wf, WebKitWebResource *res,
 	SoupMessage		*msg = NULL;
 	SoupURI			*uri = NULL;
 	struct http_accept	ha_find, *ha = NULL;
-	const char		*accept = NULL;
+	struct user_agent	ua_find, *ua = NULL;
+	struct domain_id	di_find, *di = NULL;
 	char			*uri_s = NULL;
 
 	msg = webkit_network_request_get_message(request);
@@ -5068,29 +5052,45 @@ webview_rrs_cb(WebKitWebView *wv, WebKitWebFrame *wf, WebKitWebResource *res,
 	if (do_not_track)
 		soup_message_headers_append(msg->request_headers, "DNT", "1");
 
-	/* Round-robin through HTTP Accept headers if any have been set */
-	if (!RB_EMPTY(&ha_list)) {
-		accept = soup_message_headers_get_list(msg->request_headers,
-		    "Accept");
-		if (accept == NULL ||
-		    strncmp(accept, "text/html", strlen("text/html")))
-			goto done;
+	/*
+	 * Check if resources on this domain have been loaded before.  If
+	 * not, add the current tab's http-accept and user-agent id's to a
+	 * new domain_id and insert into the RB tree.  Use these http headers
+	 * for all resources loaded from this domain for the lifetime of the
+	 * browser.
+	 */
+	if ((di_find.domain = uri->host) == NULL)
+		goto done;
+	if ((di = RB_FIND(domain_id_list, &di_list, &di_find)) == NULL) {
+		di = g_malloc(sizeof *di);
+		di->domain = g_strdup(uri->host);
+		di->ua_id = t->user_agent_id++;
+		di->ha_id = t->http_accept_id++;
+		RB_INSERT(domain_id_list, &di_list, di);
+
+		ua_find.id = t->user_agent_id;
+		ua = RB_FIND(user_agent_list, &ua_list, &ua_find);
+		if (ua == NULL)
+			t->user_agent_id = 0;
 
 		ha_find.id = t->http_accept_id;
 		ha = RB_FIND(http_accept_list, &ha_list, &ha_find);
-		if (ha == NULL) {
-			ha_find.id = 0;
-			t->http_accept_id = 1;
-			http_accept = RB_FIND(http_accept_list, &ha_list,
-			    &ha_find);
-		} else {
-			++t->http_accept_id;
-			http_accept = ha;
-		}
-
-		soup_message_headers_replace(msg->request_headers, "Accept",
-		    http_accept->value);
+		if (ha == NULL)
+			t->http_accept_id = 0;
 	}
+
+	ua_find.id = di->ua_id;
+	ua = RB_FIND(user_agent_list, &ua_list, &ua_find);
+	ha_find.id = di->ha_id;
+	ha = RB_FIND(http_accept_list, &ha_list, &ha_find);
+
+	if (ua != NULL)
+		soup_message_headers_replace(msg->request_headers,
+		    "User-Agent", ua->value);
+	if (ha != NULL)
+		soup_message_headers_replace(msg->request_headers,
+		    "Accept", ha->value);
+
 done:
 	if (uri_s)
 		g_free(uri_s);
@@ -6999,8 +6999,6 @@ setup_webkit(struct tab *t)
 	else
 		warnx("webkit does not have \"enable-dns-prefetching\" property");
 	g_object_set(G_OBJECT(t->settings),
-	    "user-agent", t->user_agent, (char *)NULL);
-	g_object_set(G_OBJECT(t->settings),
 	    "enable-scripts", enable_scripts, (char *)NULL);
 	g_object_set(G_OBJECT(t->settings),
 	    "enable-plugins", enable_plugins, (char *)NULL);
@@ -7094,7 +7092,6 @@ GtkWidget *
 create_browser(struct tab *t)
 {
 	GtkWidget		*w;
-	gchar			*strval;
 	GtkAdjustment		*adjustment;
 
 	if (t == NULL) {
@@ -7122,14 +7119,6 @@ create_browser(struct tab *t)
 
 	g_object_set(t->settings, "default-encoding", encoding, (char *)NULL);
 
-	if (user_agent == NULL) {
-		g_object_get(G_OBJECT(t->settings), "user-agent", &strval,
-		    (char *)NULL);
-		t->user_agent = g_strdup_printf("%s %s+", strval, version);
-		g_free(strval);
-	} else
-		t->user_agent = g_strdup(user_agent->value);
-
 	t->stylesheet = g_strdup(stylesheet);
 	t->load_images = auto_load_images;
 
@@ -7495,7 +7484,6 @@ delete_tab(struct tab *t)
 	gtk_widget_destroy(t->tab_elems.eventbox);
 	gtk_widget_destroy(t->vbox);
 
-	g_free(t->user_agent);
 	g_free(t->stylesheet);
 	g_free(t->tmp_uri);
 	g_free(t->status);
@@ -8629,6 +8617,8 @@ main(int argc, char **argv)
 	RB_INIT(&st_tree);
 	RB_INIT(&svl);
 	RB_INIT(&ua_list);
+	RB_INIT(&ha_list);
+	RB_INIT(&di_list);
 
 	TAILQ_INIT(&sessions);
 	TAILQ_INIT(&tabs);
@@ -8763,6 +8753,16 @@ main(int argc, char **argv)
 		config_parse(conf, 0);
 	}
 
+	/* check whether to read in a crapton of additional http headers */
+	if (anonymize_headers) {
+		snprintf(conf, sizeof conf, "%s" PS "%s",
+		    resource_dir, XT_USER_AGENT_FILE);
+		config_parse(conf, 0);
+		snprintf(conf, sizeof conf, "%s" PS "%s",
+		    resource_dir, XT_HTTP_ACCEPT_FILE);
+		config_parse(conf, 0);
+	}
+
 	/* init fonts */
 	cmd_font = pango_font_description_from_string(cmd_font_name);
 	oops_font = pango_font_description_from_string(oops_font_name);
diff --git a/xombrero.h b/xombrero.h
index 5ab0623..42bb8e5 100644
--- a/xombrero.h
+++ b/xombrero.h
@@ -291,7 +291,6 @@ struct tab {
 
 	/* settings */
 	WebKitWebSettings	*settings;
-	gchar			*user_agent;
 	int			user_agent_id;
 	int			http_accept_id;
 	gboolean		load_images;
@@ -378,6 +377,15 @@ struct http_accept {
 RB_HEAD(http_accept_list, http_accept);
 RB_PROTOTYPE(http_accept_list, http_accept, entry, http_accept_rb_cmp);
 
+struct domain_id {
+	RB_ENTRY(domain_id)	entry;
+	int			ua_id;	/* user agent id key */
+	int			ha_id;	/* http accept header id key */
+	char			*domain;
+};
+RB_HEAD(domain_id_list, domain_id);
+RB_PROTOTYPE(domain_id_list, domain_id, entry, domain_id_rb_cmp);
+
 /* utility */
 #define XT_NAME			("xombrero")
 #define XT_DIR			(".xombrero")
@@ -388,6 +396,8 @@ RB_PROTOTYPE(http_accept_list, http_accept, entry, http_accept_rb_cmp);
 #define XT_FAVS_FILE		("favorites")
 #define XT_SOD_FILE		("startofday")
 #define XT_HSTS_PRELOAD_FILE	("hsts-preload")
+#define XT_USER_AGENT_FILE	("user-agent-headers")
+#define XT_HTTP_ACCEPT_FILE	("http-accept-headers")
 #define XT_RESERVED_CHARS	"$&+,/:;=?@ \"<>#%%{}|^~[]`"
 
 int			run_script(struct tab *, char *);
@@ -898,8 +908,6 @@ extern char	runtime_settings[PATH_MAX];
 extern int	allow_volatile_cookies;
 extern int	color_visited_uris;
 extern int	save_global_history;
-extern struct user_agent	*user_agent;
-extern struct http_accept	*http_accept;
 extern int	save_rejected_cookies;
 extern int	session_autosave;
 extern int	guess_search;
@@ -936,6 +944,7 @@ extern int	allow_insecure_scripts;
 extern int	do_not_track;
 extern int	preload_strict_transport;
 extern char	*gnutls_priority_string;
+extern int	anonymize_headers;
 
 /* globals */
 extern void		(*os_init)(void);