summary refs log tree commit diff stats
diff options
context:
space:
mode:
authorAli Fardan <raiz@stellarbound.space>2020-11-22 17:02:51 +0300
committerAli Fardan <raiz@stellarbound.space>2020-11-22 17:02:51 +0300
commit5a66aa1bbace4775025cd99a8ab0a8a18487dfe9 (patch)
treea40b9c727001661fb2200f06b87155a9e5a8db98
parent7ea4eb3fe07564f605731b4b4eef09a47c0bfb08 (diff)
downloadlibyuri-5a66aa1bbace4775025cd99a8ab0a8a18487dfe9.tar.gz
- implement percent encoding in encode.c
- move syntax macros from decode.c to separate types.h header
- fix normalizer (sorta, needs testing)
- TODO: more testing...
-rw-r--r--decode.c118
-rw-r--r--encode.c362
-rw-r--r--normalize.c36
-rw-r--r--test.c7
-rw-r--r--types.h113
5 files changed, 498 insertions, 138 deletions
diff --git a/decode.c b/decode.c
index de546ba..805ad3f 100644
--- a/decode.c
+++ b/decode.c
@@ -3,123 +3,7 @@
 
 #include "yuri.h"
 
-/*
- * for an explanation of this hell please refer to
- * RFC 3986 (Appendix A)
- */
-#define _is_alpha(c)\
-	((c == 'A') ||\
-	 (c == 'B') ||\
-	 (c == 'C') ||\
-	 (c == 'D') ||\
-	 (c == 'E') ||\
-	 (c == 'F') ||\
-	 (c == 'G') ||\
-	 (c == 'H') ||\
-	 (c == 'I') ||\
-	 (c == 'J') ||\
-	 (c == 'K') ||\
-	 (c == 'L') ||\
-	 (c == 'M') ||\
-	 (c == 'N') ||\
-	 (c == 'O') ||\
-	 (c == 'P') ||\
-	 (c == 'Q') ||\
-	 (c == 'R') ||\
-	 (c == 'S') ||\
-	 (c == 'T') ||\
-	 (c == 'U') ||\
-	 (c == 'V') ||\
-	 (c == 'W') ||\
-	 (c == 'X') ||\
-	 (c == 'Y') ||\
-	 (c == 'Z') ||\
-	 (c == 'a') ||\
-	 (c == 'b') ||\
-	 (c == 'c') ||\
-	 (c == 'd') ||\
-	 (c == 'e') ||\
-	 (c == 'f') ||\
-	 (c == 'g') ||\
-	 (c == 'h') ||\
-	 (c == 'i') ||\
-	 (c == 'j') ||\
-	 (c == 'k') ||\
-	 (c == 'l') ||\
-	 (c == 'm') ||\
-	 (c == 'n') ||\
-	 (c == 'o') ||\
-	 (c == 'p') ||\
-	 (c == 'q') ||\
-	 (c == 'r') ||\
-	 (c == 's') ||\
-	 (c == 't') ||\
-	 (c == 'u') ||\
-	 (c == 'v') ||\
-	 (c == 'w') ||\
-	 (c == 'x') ||\
-	 (c == 'y') ||\
-	 (c == 'z'))
-
-#define _is_digit(c)\
-	((c == '0') ||\
-	 (c == '1') ||\
-	 (c == '2') ||\
-	 (c == '3') ||\
-	 (c == '4') ||\
-	 (c == '5') ||\
-	 (c == '6') ||\
-	 (c == '7') ||\
-	 (c == '8') ||\
-	 (c == '9'))
-
-#define _is_gen_delim(c)\
-	((c == ':') ||\
-	 (c == '/') ||\
-	 (c == '?') ||\
-	 (c == '#') ||\
-	 (c == '[') ||\
-	 (c == ']') ||\
-	 (c == '@'))
-
-#define _is_sub_delim(c)\
-	((c == '!') ||\
-	 (c == '$') ||\
-	 (c == '&') ||\
-	 (c == '\'') ||\
-	 (c == '(') ||\
-	 (c == ')') ||\
-	 (c == '*') ||\
-	 (c == '+') ||\
-	 (c == ',') ||\
-	 (c == ';') ||\
-	 (c == '='))
-
-#define _is_unreserved(c)\
-	(_is_alpha(c) ||\
-	 _is_digit(c) ||\
-	 (c == '-')   ||\
-	 (c == '.')   ||\
-	 (c == '_')   ||\
-	 (c == '~'))
-
-#define _is_reserved(c)\
-	(_is_gen_delim(c) ||\
-	 _is_sub_delim(c))
-
-#define _is_pchar(c)\
-	(_is_unreserved(c) ||\
-	 _is_sub_delim(c)  ||\
-	 (c == ':')        ||\
-	 (c == '@'))
-
-#define _is_segment(c)\
-	_is_pchar(c)
-
-#define _is_segment_nc(c)\
-	(_is_unreserved(c) ||\
-	 _is_sub_delim(c)  ||\
-	 (c == '@'))
+#include "types.h"
 
 /*
  * TODO: I have written code for conversion to and
diff --git a/encode.c b/encode.c
index 75b7f90..4806136 100644
--- a/encode.c
+++ b/encode.c
@@ -4,9 +4,327 @@
 
 #include "yuri.h"
 
-/*
- * TODO: percent encode appropriate characters
- */
+#include "types.h"
+
+static int
+pct_encode_authority_user(char **str, size_t *plen, const char *raw)
+{
+	int i;
+	char *ret;
+	char *buf;
+	char *ptr;
+	size_t len;
+
+	len = 0;
+	ptr = raw;
+	while (*ptr != '\0') {
+		if (_is_unreserved(*ptr) || _is_sub_delim(*ptr) || *ptr == ':')
+			len++;
+		else
+			len += 3;
+		ptr++;
+	}
+	if (len == 0)
+		return 0;
+
+	buf = malloc(len+1);
+	if (buf == NULL)
+		return -1;
+	i = 0;
+	ptr = raw;
+	while (*ptr != '\0') {
+		if (_is_unreserved(*ptr) || _is_sub_delim(*ptr) || *ptr == ':') {
+			buf[i] = *ptr;
+			i++;
+		} else {
+			buf[i] = '%';
+			sprintf(buf+i+1, "%X", (unsigned int)*ptr);
+			i += 3;
+		}
+		ptr++;
+	}
+	buf[i] = '\0';
+
+	ret = realloc(*str, (*plen)+len);
+	if (ret == NULL) {
+		free(buf);
+		return -1;
+	}
+	(*plen) += len;
+	*str = ret;
+
+	strlcat(*str, buf, *plen);
+	free(buf);
+
+	return 0;
+}
+
+static int
+pct_encode_authority_host(char **str, size_t *plen, const char *raw)
+{
+	int i;
+	char *ret;
+	char *buf;
+	char *ptr;
+	size_t len;
+
+	len = 0;
+	ptr = raw;
+	while (*ptr != '\0') {
+		if (_is_unreserved(*ptr) || _is_sub_delim(*ptr))
+			len++;
+		else
+			len += 3;
+		ptr++;
+	}
+	if (len == 0)
+		return 0;
+
+	buf = malloc(len+1);
+	if (buf == NULL)
+		return -1;
+	i = 0;
+	ptr = raw;
+	while (*ptr != '\0') {
+		if (_is_unreserved(*ptr) || _is_sub_delim(*ptr)) {
+			buf[i] = *ptr;
+			i++;
+		} else {
+			buf[i] = '%';
+			sprintf(buf+i+1, "%X", (unsigned int)*ptr);
+			i += 3;
+		}
+		ptr++;
+	}
+	buf[i] = '\0';
+
+	ret = realloc(*str, (*plen)+len);
+	if (ret == NULL) {
+		free(buf);
+		return -1;
+	}
+	(*plen) += len;
+	*str = ret;
+
+	strlcat(*str, buf, *plen);
+	free(buf);
+
+	return 0;
+}
+
+
+static int
+pct_encode_segment(char **str, size_t *plen, char *raw)
+{
+	int i;
+	char *ret;
+	char *buf;
+	char *ptr;
+	size_t len;
+
+	len = 0;
+	ptr = raw;
+	while (*ptr != '\0') {
+		if (_is_segment(*ptr))
+			len++;
+		else
+			len += 3;
+		ptr++;
+	}
+	if (len == 0)
+		return 0;
+
+	buf = malloc(len+1);
+	if (buf == NULL)
+		return -1;
+	i = 0;
+	ptr = raw;
+	while (*ptr != '\0') {
+		if (_is_segment(*ptr)) {
+			buf[i] = *ptr;
+			i++;
+		} else {
+			buf[i] = '%';
+			sprintf(buf+i+1, "%X", (unsigned int)*ptr);
+			i += 3;
+		}
+		ptr++;
+	}
+	buf[i] = '\0';
+
+	ret = realloc(*str, (*plen)+len);
+	if (ret == NULL) {
+		free(buf);
+		return -1;
+	}
+	(*plen) += len;
+	*str = ret;
+
+	strlcat(*str, buf, *plen);
+	free(buf);
+
+	return 0;
+}
+
+static int
+pct_encode_segment_nc(char **str, size_t *plen, char *raw)
+{
+	int i;
+	char *ret;
+	char *buf;
+	char *ptr;
+	size_t len;
+
+	len = 0;
+	ptr = raw;
+	while (*ptr != '\0') {
+		if (_is_segment_nc(*ptr))
+			len++;
+		else
+			len += 3;
+		ptr++;
+	}
+	if (len == 0)
+		return 0;
+
+	buf = malloc(len+1);
+	if (buf == NULL)
+		return -1;
+	i = 0;
+	ptr = raw;
+	while (*ptr != '\0') {
+		if (_is_segment_nc(*ptr)) {
+			buf[i] = *ptr;
+			i++;
+		} else {
+			buf[i] = '%';
+			sprintf(buf+i+1, "%X", (unsigned int)*ptr);
+			i += 3;
+		}
+		ptr++;
+	}
+	buf[i] = '\0';
+
+	ret = realloc(*str, (*plen)+len);
+	if (ret == NULL) {
+		free(buf);
+		return -1;
+	}
+	(*plen) += len;
+	*str = ret;
+
+	strlcat(*str, buf, *plen);
+	free(buf);
+
+	return 0;
+}
+
+static int
+pct_encode_query(char **str, size_t *plen, char *raw)
+{
+	int i;
+	char *ret;
+	char *buf;
+	char *ptr;
+	size_t len;
+
+	len = 0;
+	ptr = raw;
+	while (*ptr != '\0') {
+		if (_is_pchar(*ptr) || *ptr == '/' || *ptr == '?')
+			len++;
+		else
+			len += 3;
+		ptr++;
+	}
+	if (len == 0)
+		return 0;
+
+	buf = malloc(len+1);
+	if (buf == NULL)
+		return -1;
+	i = 0;
+	ptr = raw;
+	while (*ptr != '\0') {
+		if (_is_pchar(*ptr) || *ptr == '/' || *ptr == '?') {
+			buf[i] = *ptr;
+			i++;
+		} else {
+			buf[i] = '%';
+			sprintf(buf+i+1, "%X", (unsigned int)*ptr);
+			i += 3;
+		}
+		ptr++;
+	}
+	buf[i] = '\0';
+
+	ret = realloc(*str, (*plen)+len);
+	if (ret == NULL) {
+		free(buf);
+		return -1;
+	}
+	(*plen) += len;
+	*str = ret;
+
+	strlcat(*str, buf, *plen);
+	free(buf);
+
+	return 0;
+}
+
+static int
+pct_encode_fragment(char **str, size_t *plen, char *raw)
+{
+	int i;
+	char *ret;
+	char *buf;
+	char *ptr;
+	size_t len;
+
+	len = 0;
+	ptr = raw;
+	while (*ptr != '\0') {
+		if (_is_pchar(*ptr) || *ptr == '/' || *ptr == '?')
+			len++;
+		else
+			len += 3;
+		ptr++;
+	}
+	if (len == 0)
+		return 0;
+
+	buf = malloc(len+1);
+	if (buf == NULL)
+		return -1;
+	i = 0;
+	ptr = raw;
+	while (*ptr != '\0') {
+		if (_is_pchar(*ptr) || *ptr == '/' || *ptr == '?') {
+			buf[i] = *ptr;
+			i++;
+		} else {
+			buf[i] = '%';
+			sprintf(buf+i+1, "%X", (unsigned int)*ptr);
+			i += 3;
+		}
+		ptr++;
+	}
+	buf[i] = '\0';
+
+	ret = realloc(*str, (*plen)+len);
+	if (ret == NULL) {
+		free(buf);
+		return -1;
+	}
+	(*plen) += len;
+	*str = ret;
+
+	strlcat(*str, buf, *plen);
+	free(buf);
+
+	return 0;
+}
+
 char *
 uri_encode(struct uri *u)
 {
@@ -46,23 +364,24 @@ uri_encode(struct uri *u)
 
 		if (u->authority.user) {
 			len += strlen(u->authority.user);
+			if (pct_encode_authority_user(&ret, &len, u->authority.user) == -1) {
+				free(ret);
+				return NULL;
+			}
+			len++; /* @ */
 			dup = realloc(ret, len);
 			if (dup == NULL) {
 				free(ret);
 				return NULL;
 			}
 			ret = dup;
-			strlcat(ret, u->authority.user, len);
+			strlcat(ret, "@", len);
 		}
 
-		len += strlen(u->authority.host);
-		dup = realloc(ret, len);
-		if (dup == NULL) {
+		if (pct_encode_authority_host(&ret, &len, u->authority.host) == -1) {
 			free(ret);
 			return NULL;
 		}
-		ret = dup;
-		strlcat(ret, u->authority.host, len);
 
 		if (u->authority.port) {
 			memset(portbuf, 0, sizeof(portbuf));
@@ -91,20 +410,21 @@ uri_encode(struct uri *u)
 				}
 				ret = dup;
 				strlcat(ret, "/", len);
+				if (pct_encode_segment_nc(&ret, &len, u->path[i]) == -1) {
+					free(ret);
+					return NULL;
+				}
+				continue;
 			}
-			len += strlen(u->path[i]);
-			dup = realloc(ret, len);
-			if (dup == NULL) {
+			if (pct_encode_segment(&ret, &len, u->path[i]) == -1) {
 				free(ret);
 				return NULL;
 			}
-			ret = dup;
-			strlcat(ret, u->path[i], len);
 		}
 	}
 
 	if (u->query) {
-		len += strlen(u->query)+1; /* ?query */
+		len++; /* ? */
 		dup = realloc(ret, len);
 		if (dup == NULL) {
 			free(ret);
@@ -112,11 +432,14 @@ uri_encode(struct uri *u)
 		}
 		ret = dup;
 		strlcat(ret, "?", len);
-		strlcat(ret, u->query, len);
+		if (pct_encode_query(&ret, &len, u->query) == -1) {
+			free(ret);
+			return NULL;
+		}
 	}
 
 	if (u->fragment) {
-		len += strlen(u->fragment)+1; /* #fragment */
+		len++; /* # */
 		dup = realloc(ret, len);
 		if (dup == NULL) {
 			free(ret);
@@ -124,7 +447,10 @@ uri_encode(struct uri *u)
 		}
 		ret = dup;
 		strlcat(ret, "#", len);
-		strlcat(ret, u->fragment, len);
+		if (pct_encode_fragment(&ret, &len, u->fragment) == -1) {
+			free(ret);
+			return NULL;
+		}
 	}
 
 	return ret;
diff --git a/normalize.c b/normalize.c
index f8e8dbe..821df35 100644
--- a/normalize.c
+++ b/normalize.c
@@ -32,6 +32,7 @@ int
 uri_normalize(struct uri *u)
 {
 	int i;
+	int therewaspath;
 
 	if (u->scheme) {
 		for (i = 0; i < strlen(u->scheme); i++) {
@@ -47,20 +48,49 @@ uri_normalize(struct uri *u)
 		}
 	}
 
+	if (u->npath != 0)
+		therewaspath = 1;
+	else
+		therewaspath = 0;
+
+	for (i = 0; i < u->npath; i++) {
+		if (i != u->npath-1 || u->npath == 1) {
+			if (strcmp(u->path[i], "") == 0) {
+				if (_eat(u, i) == -1)
+					return -1;
+				i = 0; /* count altered, reset back */
+			}
+		}
+	}
+
 	for (i = 0; i < u->npath; i++) {
 		if (strcmp(u->path[i], ".") == 0) {
 			if (_eat(u, i) == -1)
 				return -1;
+			i = 0; /* count altered, reset back */
 		}
+	}
+
+	for (i = 0; i < u->npath; i++) {
 		if (strcmp(u->path[i], "..") == 0) {
-			if (u->npath >= 2 && i-1 >= 0) {
-				if (_eat(u, i-1) == -1)
-					return -1;
+			if (_eat(u, i) == -1)
+				return -1;
+			if (i-1 >= 0) {
 				if (_eat(u, i-1) == -1)
 					return -1;
 			}
+			i = 0; /* count altered, reset back */
 		}
 	}
 
+	/* if there was a path and all redundant segments were remved
+	 * we'd be left with no path and path list would be set to NULL
+	 * indicating that path has never exists, we don't want that,
+	 * so we just add an empty path back */
+	if (therewaspath && u->npath == 0) {
+		if (uri_append_path(u, "") == -1)
+			return -1;
+	}
+
 	return 0;
 }
diff --git a/test.c b/test.c
index c1cdd72..ff6b137 100644
--- a/test.c
+++ b/test.c
@@ -1,4 +1,5 @@
 #include <stdio.h>
+#include <stdlib.h>
 
 #include "yuri.h"
 
@@ -22,6 +23,7 @@ int
 main(int argc, char *argv[])
 {
 	struct uri *u;
+	char *p;
 
 	if (argc < 2) {
 		fprintf(stderr, "Usage %s url\n", argv[0]);
@@ -33,6 +35,11 @@ main(int argc, char *argv[])
 	if (uri_normalize(u) == -1)
 		return 1;
 	_print_uri(u);
+	p = uri_encode(u);
+	if (p == NULL)
+		return 1;
+	printf("%s\n", p);
+	free(p);
 	uri_free(u);
 	return 0;
 }
diff --git a/types.h b/types.h
new file mode 100644
index 0000000..03208d4
--- /dev/null
+++ b/types.h
@@ -0,0 +1,113 @@
+#define _is_alpha(c)\
+	((c == 'A') ||\
+	 (c == 'B') ||\
+	 (c == 'C') ||\
+	 (c == 'D') ||\
+	 (c == 'E') ||\
+	 (c == 'F') ||\
+	 (c == 'G') ||\
+	 (c == 'H') ||\
+	 (c == 'I') ||\
+	 (c == 'J') ||\
+	 (c == 'K') ||\
+	 (c == 'L') ||\
+	 (c == 'M') ||\
+	 (c == 'N') ||\
+	 (c == 'O') ||\
+	 (c == 'P') ||\
+	 (c == 'Q') ||\
+	 (c == 'R') ||\
+	 (c == 'S') ||\
+	 (c == 'T') ||\
+	 (c == 'U') ||\
+	 (c == 'V') ||\
+	 (c == 'W') ||\
+	 (c == 'X') ||\
+	 (c == 'Y') ||\
+	 (c == 'Z') ||\
+	 (c == 'a') ||\
+	 (c == 'b') ||\
+	 (c == 'c') ||\
+	 (c == 'd') ||\
+	 (c == 'e') ||\
+	 (c == 'f') ||\
+	 (c == 'g') ||\
+	 (c == 'h') ||\
+	 (c == 'i') ||\
+	 (c == 'j') ||\
+	 (c == 'k') ||\
+	 (c == 'l') ||\
+	 (c == 'm') ||\
+	 (c == 'n') ||\
+	 (c == 'o') ||\
+	 (c == 'p') ||\
+	 (c == 'q') ||\
+	 (c == 'r') ||\
+	 (c == 's') ||\
+	 (c == 't') ||\
+	 (c == 'u') ||\
+	 (c == 'v') ||\
+	 (c == 'w') ||\
+	 (c == 'x') ||\
+	 (c == 'y') ||\
+	 (c == 'z'))
+
+#define _is_digit(c)\
+	((c == '0') ||\
+	 (c == '1') ||\
+	 (c == '2') ||\
+	 (c == '3') ||\
+	 (c == '4') ||\
+	 (c == '5') ||\
+	 (c == '6') ||\
+	 (c == '7') ||\
+	 (c == '8') ||\
+	 (c == '9'))
+
+#define _is_gen_delim(c)\
+	((c == ':') ||\
+	 (c == '/') ||\
+	 (c == '?') ||\
+	 (c == '#') ||\
+	 (c == '[') ||\
+	 (c == ']') ||\
+	 (c == '@'))
+
+#define _is_sub_delim(c)\
+	((c == '!') ||\
+	 (c == '$') ||\
+	 (c == '&') ||\
+	 (c == '\'') ||\
+	 (c == '(') ||\
+	 (c == ')') ||\
+	 (c == '*') ||\
+	 (c == '+') ||\
+	 (c == ',') ||\
+	 (c == ';') ||\
+	 (c == '='))
+
+#define _is_unreserved(c)\
+	(_is_alpha(c) ||\
+	 _is_digit(c) ||\
+	 (c == '-')   ||\
+	 (c == '.')   ||\
+	 (c == '_')   ||\
+	 (c == '~'))
+
+#define _is_reserved(c)\
+	(_is_gen_delim(c) ||\
+	 _is_sub_delim(c))
+
+#define _is_pchar(c)\
+	(_is_unreserved(c) ||\
+	 _is_sub_delim(c)  ||\
+	 (c == ':')        ||\
+	 (c == '@'))
+
+#define _is_segment(c)\
+	_is_pchar(c)
+
+#define _is_segment_nc(c)\
+	(_is_unreserved(c) ||\
+	 _is_sub_delim(c)  ||\
+	 (c == '@'))