about summary refs log tree commit diff stats
path: root/WWW/Library
diff options
context:
space:
mode:
authorThomas E. Dickey <dickey@invisible-island.net>2016-11-04 17:30:06 -0400
committerThomas E. Dickey <dickey@invisible-island.net>2016-11-04 17:30:06 -0400
commitc72875ff13e8237450e6a9b7c7c67835d974c429 (patch)
treea003b95b4ece179055ed802069fa2a77f350696e /WWW/Library
parent3cf187da658dde6b54df4aa335b281163083f782 (diff)
downloadlynx-snapshots-c72875ff13e8237450e6a9b7c7c67835d974c429.tar.gz
snapshot of project "lynx", label v2-8-9dev_9h
Diffstat (limited to 'WWW/Library')
-rw-r--r--WWW/Library/Implementation/HTTP.c96
1 files changed, 88 insertions, 8 deletions
diff --git a/WWW/Library/Implementation/HTTP.c b/WWW/Library/Implementation/HTTP.c
index bb457e07..a4cb9c0e 100644
--- a/WWW/Library/Implementation/HTTP.c
+++ b/WWW/Library/Implementation/HTTP.c
@@ -1,5 +1,5 @@
 /*
- * $LynxId: HTTP.c,v 1.155 2016/10/20 21:04:44 Kamil.Dudka Exp $
+ * $LynxId: HTTP.c,v 1.156 2016/11/04 13:27:29 tom Exp $
  *
  * HyperText Tranfer Protocol	- Client implementation		HTTP.c
  * ==========================
@@ -499,27 +499,107 @@ int ws_netread(int fd, char *buf, int len)
 #endif /* _WINDOWS */
 
 /*
+ * RFC-1738 says we can have user/password using these ASCII characters
+ *    safe           = "$" | "-" | "_" | "." | "+"
+ *    extra          = "!" | "*" | "'" | "(" | ")" | ","
+ *    hex            = digit | "A" | "B" | "C" | "D" | "E" | "F" |
+ *                             "a" | "b" | "c" | "d" | "e" | "f"
+ *    escape         = "%" hex hex
+ *    unreserved     = alpha | digit | safe | extra
+ *    uchar          = unreserved | escape
+ *    user           = *[ uchar | ";" | "?" | "&" | "=" ]
+ *    password       = *[ uchar | ";" | "?" | "&" | "=" ]
+ * and we cannot have a password without user, i.e., no leading ":"
+ * and ":", "@", "/" must be encoded, i.e., will not appear as such.
+ *
+ * However, in a URL
+ *    //<user>:<password>@<host>:<port>/<url-path>
+ * valid characters in the host are different, not allowing most of those
+ * punctuation characters.
+ */
+static char *skip_user_passwd(char *host)
+{
+    char *result = 0;
+    char *s = host;
+    int pass = 0;
+    int ch;
+    int last = -1;
+
+    while ((ch = UCH(*s)) != '\0') {
+	if (ch == '\0') {
+	    break;
+	} else if (ch == ':') {
+	    if (pass++)
+		break;
+	} else if (ch == '@') {
+	    if (s != host && last != ':')
+		result = s;
+	    break;
+	} else if (ch == '/') {
+	    break;
+	} else if (ch == '%') {
+	    if (!(isxdigit(UCH(s[1])) && isxdigit(UCH(s[2]))))
+		break;
+	} else if (!(isalnum(ch) || strchr(";?&=!*'(),$-_.+", ch))) {
+	    break;
+	}
+	++s;
+	last = ch;
+    }
+    return result;
+}
+
+/*
  * Strip any username from the given string so we retain only the host.
  */
 static void strip_userid(char *host)
 {
     char *p1 = host;
-    char *p2 = StrChr(host, '@');
+    char *p2 = skip_user_passwd(host);
     char *fake;
 
     if (p2 != 0) {
+	char *msg = NULL;
+	char *auth = NULL;
+	char *save = NULL;
+	char *p3 = p2;
+
 	*p2++ = '\0';
-	if ((fake = HTParse(host, "", PARSE_HOST)) != NULL) {
-	    char *msg = NULL;
 
-	    CTRACE((tfp, "parsed:%s\n", fake));
-	    HTSprintf0(&msg, gettext("Address contains a username: %s"), host);
-	    HTAlert(msg);
-	    FREE(msg);
+	StrAllocCopy(auth, host);
+
+	/*
+	 * Trim trailing characters that can be in a user name, but not in
+	 * a host name, to improve the warning.  For instance "?????" is
+	 * literally a legal user name.
+	 */
+	while ((p3 != host) && strchr(":;?&=!*'(),", p3[-1])) {
+	    *(--p3) = '\0';
 	}
+	CTRACE((tfp, "trimmed:%s\n", host));
+	StrAllocCopy(save, host);
+
+	if (*host == '\0') {
+	    HTSprintf0(&msg,
+		       gettext("User/password contains only punctuation: %s"),
+		       auth);
+	} else if ((fake = HTParse(host, "", PARSE_HOST)) != NULL && *fake) {
+	    HTSprintf0(&msg,
+		       gettext("User/password may be confused with hostname: '%s' (e.g, '%s')"),
+		       auth, fake);
+	} else if (strcmp(save, auth)) {
+	    HTSprintf0(&msg,
+		       gettext("User/password may appear to be a hostname: '%s' (e.g, '%s')"),
+		       auth, save);
+	}
+	if (msg != 0)
+	    HTAlert(msg);
 	while ((*p1++ = *p2++) != '\0') {
 	    ;
 	}
+	FREE(save);
+	FREE(auth);
+	FREE(msg);
     }
 }