about summary refs log tree commit diff stats
path: root/WWW
diff options
context:
space:
mode:
Diffstat (limited to 'WWW')
-rw-r--r--WWW/Library/Implementation/HTFile.c4
-rw-r--r--WWW/Library/Implementation/HTParse.c245
-rw-r--r--WWW/Library/Implementation/HTParse.h18
-rw-r--r--WWW/Library/Implementation/HTUtils.h3
4 files changed, 129 insertions, 141 deletions
diff --git a/WWW/Library/Implementation/HTFile.c b/WWW/Library/Implementation/HTFile.c
index 90745749..1624e5c2 100644
--- a/WWW/Library/Implementation/HTFile.c
+++ b/WWW/Library/Implementation/HTFile.c
@@ -1,5 +1,5 @@
 /*
- * $LynxId: HTFile.c,v 1.151 2018/05/11 23:20:35 tom Exp $
+ * $LynxId: HTFile.c,v 1.152 2019/08/16 22:53:10 tom Exp $
  *
  *			File Access				HTFile.c
  *			===========
@@ -2139,7 +2139,7 @@ static int print_local_dir(DIR *dp, char *localname,
 	     * Append the current entry's filename to the path.
 	     */
 	    StrAllocCat(tmpfilename, entry->file_name);
-	    HTSimplify(tmpfilename);
+	    HTSimplify(tmpfilename, LYIsPathSep(*tmpfilename));
 	    /*
 	     * Output the directory entry.
 	     */
diff --git a/WWW/Library/Implementation/HTParse.c b/WWW/Library/Implementation/HTParse.c
index c5d947f8..1e6a3d56 100644
--- a/WWW/Library/Implementation/HTParse.c
+++ b/WWW/Library/Implementation/HTParse.c
@@ -1,5 +1,5 @@
 /*
- * $LynxId: HTParse.c,v 1.78 2016/11/24 15:29:50 tom Exp $
+ * $LynxId: HTParse.c,v 1.88 2019/08/17 00:58:20 tom Exp $
  *
  *		Parse HyperText Document Address		HTParse.c
  *		================================
@@ -326,6 +326,8 @@ static void convert_to_idna(char *host)
  *	This returns those parts of a name which are given (and requested)
  *	substituting bits from the related name where necessary.
  *
+ *	Originally based on RFC 1808, some details in RFC 3986 are used.
+ *
  * On entry,
  *	aName		A filename given
  *	relatedName	A name relative to which aName is to be parsed
@@ -613,9 +615,12 @@ char *HTParse(const char *aName,
 	}
 
 	if (given.absolute) {	/* All is given */
+	    char *base = tail;
+
 	    if (wanted & PARSE_PUNCTUATION)
 		*tail++ = '/';
 	    strcpy(tail, given.absolute);
+	    HTSimplify(base, TRUE);
 	    CTRACE((tfp, "HTParse: (ABS)\n"));
 	} else if (related.absolute) {	/* Adopt path not name */
 	    char *base = tail;
@@ -641,16 +646,20 @@ char *HTParse(const char *aName,
 		    p[1] = '\0';	/* Remove filename */
 		    strcat(p, given.relative);	/* Add given one */
 		}
-		HTSimplify(base);
+		HTSimplify(base, FALSE);
 		if (*base == '\0')
 		    strcpy(base, "/");
+	    } else {
+		HTSimplify(base, TRUE);
 	    }
 	    CTRACE((tfp, "HTParse: (Related-ABS)\n"));
 	} else if (given.relative) {
 	    strcpy(tail, given.relative);	/* what we've got */
+	    HTSimplify(tail, FALSE);
 	    CTRACE((tfp, "HTParse: (REL)\n"));
 	} else if (related.relative) {
 	    strcpy(tail, related.relative);
+	    HTSimplify(tail, FALSE);
 	    CTRACE((tfp, "HTParse: (Related-REL)\n"));
 	} else {		/* No inheritance */
 	    if (!isLYNXCGI(aName) &&
@@ -658,6 +667,8 @@ char *HTParse(const char *aName,
 		!isLYNXPROG(aName)) {
 		*tail++ = '/';
 		*tail = '\0';
+	    } else {
+		HTSimplify(tail, FALSE);
 	    }
 	    if (!strcmp(result, "news:/"))
 		result[5] = '*';
@@ -805,143 +816,127 @@ const char *HTParseAnchor(const char *aName)
  *  be replaced by "" , and the sequence "/./" which may be replaced by "/".
  *  Simplification helps us recognize duplicate filenames.
  *
- *	Thus,	/etc/junk/../fred	becomes /etc/fred
- *		/etc/junk/./fred	becomes /etc/junk/fred
- *
- *	but we should NOT change
- *		http://fred.xxx.edu/../..
- *
- *	or	../../albert.html
+ *  RFC 3986 section 5.2.4 says to do this whether or not the path was relative.
  */
-void HTSimplify(char *filename)
+void HTSimplify(char *filename, BOOL absolute)
 {
+#define MY_FMT "HTParse HTSimplify\t(%s)"
+#ifdef NO_LYNX_TRACE
+#define debug_at()		/* nothing */
+#else
+    const char *atln;
+
+#define debug_at(at)	atln = at
+#endif
+    char *mark;
     char *p;
-    char *q, *q1;
+    size_t limit;
 
-    if (filename == NULL)
-	return;
+    CTRACE2(TRACE_HTPARSE,
+	    (tfp, MY_FMT " %s\n",
+	     filename,
+	     absolute ? "ABS" : "REL"));
 
-    if (!(filename[0] && filename[1]) ||
-	filename[0] == '?' || filename[1] == '?' || filename[2] == '?')
-	return;
+    if (LYIsPathSep(*filename) && !absolute)
+	++filename;
+    mark = filename;
+    limit = strlen(filename);
 
-    if (StrChr(filename, '/') != NULL) {
-	for (p = (filename + 2); *p; p++) {
-	    if (*p == '?') {
-		/*
-		 * We're still treating a ?searchpart as part of the path in
-		 * HTParse() and scan(), but if we encounter a '?' here, assume
-		 * it's the delimiter and break.  We also could check for a
-		 * parameter delimiter (';') here, but the current Fielding
-		 * draft (wisely or ill-advisedly :) says that it should be
-		 * ignored and collapsing be allowed in it's value).  The only
-		 * defined parameter at present is ;type=[A, I, or D] for ftp
-		 * URLs, so if there's a "/..", "/../", "/./", or terminal '.'
-		 * following the ';', it must be due to the ';' being an
-		 * unescaped path character and not actually a parameter
-		 * delimiter.  - FM
-		 */
-		break;
-	    }
-	    if (*p == '/') {
-		if ((p[1] == '.') && (p[2] == '.') &&
-		    (p[3] == '/' || p[3] == '?' || p[3] == '\0')) {
-		    /*
-		     * Handle "../", "..?" or "..".
-		     */
-		    for (q = (p - 1); (q >= filename) && (*q != '/'); q--)
-			/*
-			 * Back up to previous slash or beginning of string.
-			 */
-			;
-		    if ((q[0] == '/') &&
-			(StrNCmp(q, "/../", 4) &&
-			 StrNCmp(q, "/..?", 4)) &&
-			!((q - 1) > filename && q[-1] == '/')) {
-			/*
-			 * Not at beginning of string or in a host field, so
-			 * remove the "/xxx/..".
-			 */
-			q1 = (p + 3);
-			p = q;
-			while (*q1 != '\0')
-			    *p++ = *q1++;
-			*p = '\0';	/* terminate */
-			/*
-			 * Start again with previous slash.
-			 */
-			p = (q - 1);
-		    }
-		} else if (p[1] == '.' && p[2] == '/') {
-		    /*
-		     * Handle "./" by removing both characters.
-		     */
-		    q = p;
-		    q1 = (p + 2);
-		    while (*q1 != '\0')
-			*q++ = *q1++;
-		    *q = '\0';	/* terminate */
-		    p--;
-		} else if (p[1] == '.' && p[2] == '?') {
-		    /*
-		     * Handle ".?" by removing the dot.
-		     */
-		    q = (p + 1);
-		    q1 = (p + 2);
-		    while (*q1 != '\0')
-			*q++ = *q1++;
-		    *q = '\0';	/* terminate */
-		    p--;
-		} else if (p[1] == '.' && p[2] == '\0') {
-		    /*
-		     * Handle terminal "." by removing the character.
-		     */
-		    p[1] = '\0';
+    for (p = filename; *p; ++p) {
+	if (*p == '?' || *p == '#') {
+	    limit = (size_t)(p - filename);
+	    break;
+	}
+    }
+    while ((limit != 0) && (*filename != '\0')) {
+	size_t trim = 0;
+	size_t skip = 0;
+	size_t last = 0;
+
+	debug_at("?");
+	p = filename;
+	if (limit >= 2 && !memcmp(p, "./", 2)) {	/* 2A */
+	    debug_at("2A");
+	    trim = 2;
+	} else if (limit >= 3 && !memcmp(p, "../", 3)) {
+	    debug_at("2A2");
+	    trim = 3;
+	} else if (limit >= 3 && !memcmp(p, "/./", 3)) {	/* 2B */
+	    debug_at("2B");
+	    trim = 2;
+	    skip = 1;
+	} else if (limit == 2 && !memcmp(p, "/.", 2)) {
+	    debug_at("2B2");
+	    trim = 1;
+	    skip = 1;
+	} else if (limit >= 4 && !memcmp(p, "/../", 4)) {	/* 2C */
+	    debug_at("2C");
+	    trim = 3;
+	    skip = 1;
+	    last = 1;
+	} else if (limit == 3 && !memcmp(p, "/..", 3)) {
+	    debug_at("2C2");
+	    trim = 2;
+	    skip = 1;
+	    last = 1;
+	} else if (limit == 2 && !memcmp(p, "..", 2)) {		/* 2D */
+	    debug_at("2D");
+	    trim = 2;
+	} else if (limit == 1 && !memcmp(p, ".", 1)) {
+	    debug_at("2D2");
+	    trim = 1;
+	}
+	if (trim) {
+	    CTRACE2(TRACE_HTPARSE,
+		    (tfp, MY_FMT " trim %lu/%lu (%.*s) '%.*s' @%s\n",
+		     mark, (unsigned long)trim, (unsigned long)limit,
+		     (int) trim, p + skip, (int) limit, p, atln));
+	}
+	if (last) {
+	    char *prior = filename;
+
+	    if (prior != mark) {
+		--prior;
+		while (prior != mark && *prior != '/') {
+		    --prior;
 		}
 	    }
+	    if (prior != filename) {
+		trim += (size_t)(filename - prior);
+		limit += (size_t)(filename - prior);
+		filename = p = prior;
+		CTRACE2(TRACE_HTPARSE,
+			(tfp, MY_FMT " TRIM %lu/%lu (%.*s)\n",
+			 mark, (unsigned long) trim, (unsigned long) limit,
+			 (int) trim, filename + skip));
+	    }
 	}
-	if (p >= filename + 2 && *p == '?' && *(p - 1) == '.') {
-	    if (*(p - 2) == '/') {
-		/*
-		 * Handle "/.?" by removing the dot.
-		 */
-		q = p - 1;
-		q1 = p;
-		while (*q1 != '\0')
-		    *q++ = *q1++;
-		*q = '\0';
-	    } else if (*(p - 2) == '.' &&
-		       p >= filename + 4 && *(p - 3) == '/' &&
-		       (*(p - 4) != '/' ||
-			(p > filename + 4 && *(p - 5) != ':'))) {
-		/*
-		 * Handle "xxx/..?"
-		 */
-		for (q = (p - 4); (q > filename) && (*q != '/'); q--)
-		    /*
-		     * Back up to previous slash or beginning of string.
-		     */
-		    ;
-		if (*q == '/') {
-		    if (q > filename && *(q - 1) == '/' &&
-			!(q > filename + 1 && *(q - 1) != ':'))
-			return;
-		    q++;
+	if (trim) {
+	    limit -= trim;
+	    for (p = filename;; ++p) {
+		if ((p[0] = p[trim]) == '\0') {
+		    break;
 		}
-		if (StrNCmp(q, "../", 3) && StrNCmp(q, "./", 2)) {
-		    /*
-		     * Not after "//" at beginning of string or after "://",
-		     * and xxx is not ".." or ".", so remove the "xxx/..".
-		     */
-		    q1 = p;
-		    p = q;
-		    while (*q1 != '\0')
-			*p++ = *q1++;
-		    *p = '\0';	/* terminate */
+		if (skip) {
+		    p[0] = '/';
+		    skip = 0;
 		}
 	    }
+	    CTRACE2(TRACE_HTPARSE,
+		    (tfp, MY_FMT " loop %lu\n", mark, (unsigned long) limit));
+	} else {
+	    if (*filename == '/') {
+		++filename;
+		--limit;
+	    }
+	    while ((limit != 0) && (*filename != '/')) {
+		++filename;
+		--limit;
+	    }
 	}
     }
+    CTRACE2(TRACE_HTPARSE, (tfp, MY_FMT " done\n", mark));
+#undef MY_FMT
 }
 
 /*	Make Relative Name.					HTRelative()
diff --git a/WWW/Library/Implementation/HTParse.h b/WWW/Library/Implementation/HTParse.h
index ce1bff61..a1aa2e25 100644
--- a/WWW/Library/Implementation/HTParse.h
+++ b/WWW/Library/Implementation/HTParse.h
@@ -1,5 +1,5 @@
 /*
- * $LynxId: HTParse.h,v 1.22 2016/11/23 21:06:50 tom Exp $
+ * $LynxId: HTParse.h,v 1.23 2019/08/16 22:42:06 tom Exp $
  *				HTParse:  URL parsing in the WWW Library
  *				HTPARSE
  *
@@ -99,26 +99,18 @@ extern "C" {
 /*	Simplify a filename.				HTSimplify()
  *	--------------------
  *
- *  A unix-style file is allowed to contain the seqeunce xxx/../ which may
- *  be replaced by "" , and the seqeunce "/./" which may be replaced by "/".
+ *  A unix-style file is allowed to contain the sequence xxx/../ which may
+ *  be replaced by "" , and the sequence "/./" which may be replaced by "/".
  *  Simplification helps us recognize duplicate filenames.
- *
- *	Thus,	/etc/junk/../fred	becomes /etc/fred
- *		/etc/junk/./fred	becomes	/etc/junk/fred
- *
- *      but we should NOT change
- *		http://fred.xxx.edu/../..
- *
- *	or	../../albert.html
  */
-    extern void HTSimplify(char *filename);
+    extern void HTSimplify(char *filename, BOOL absolute);
 
 /*	Make Relative Name.					HTRelative()
  *	-------------------
  *
  * This function creates and returns a string which gives an expression of
  * one address as related to another.  Where there is no relation, an absolute
- * address is retured.
+ * address is returned.
  *
  *  On entry,
  *	Both names must be absolute, fully qualified names of nodes
diff --git a/WWW/Library/Implementation/HTUtils.h b/WWW/Library/Implementation/HTUtils.h
index cffa3b6e..cdd84eff 100644
--- a/WWW/Library/Implementation/HTUtils.h
+++ b/WWW/Library/Implementation/HTUtils.h
@@ -1,5 +1,5 @@
 /*
- * $LynxId: HTUtils.h,v 1.129 2018/05/16 22:16:05 tom Exp $
+ * $LynxId: HTUtils.h,v 1.130 2019/08/17 00:37:51 tom Exp $
  *
  * Utility macros for the W3 code library
  * MACROS FOR GENERAL USE
@@ -573,6 +573,7 @@ extern int WWW_TraceMask;
 #define TRACE_CHARSETS  (TRACE_bit(6))
 #define TRACE_GRIDTEXT  (TRACE_bit(7))
 #define TRACE_TIMING    (TRACE_bit(8))
+#define TRACE_HTPARSE   (TRACE_bit(9))
 
 /*
  * Get printing/scanning formats.