diff options
author | Ali Fardan <raiz@stellarbound.space> | 2020-11-22 17:02:51 +0300 |
---|---|---|
committer | Ali Fardan <raiz@stellarbound.space> | 2020-11-22 17:02:51 +0300 |
commit | 5a66aa1bbace4775025cd99a8ab0a8a18487dfe9 (patch) | |
tree | a40b9c727001661fb2200f06b87155a9e5a8db98 | |
parent | 7ea4eb3fe07564f605731b4b4eef09a47c0bfb08 (diff) | |
download | libyuri-5a66aa1bbace4775025cd99a8ab0a8a18487dfe9.tar.gz |
- implement percent encoding in encode.c
- move syntax macros from decode.c to separate types.h header - fix normalizer (sorta, needs testing) - TODO: more testing...
-rw-r--r-- | decode.c | 118 | ||||
-rw-r--r-- | encode.c | 362 | ||||
-rw-r--r-- | normalize.c | 36 | ||||
-rw-r--r-- | test.c | 7 | ||||
-rw-r--r-- | types.h | 113 |
5 files changed, 498 insertions, 138 deletions
diff --git a/decode.c b/decode.c index de546ba..805ad3f 100644 --- a/decode.c +++ b/decode.c @@ -3,123 +3,7 @@ #include "yuri.h" -/* - * for an explanation of this hell please refer to - * RFC 3986 (Appendix A) - */ -#define _is_alpha(c)\ - ((c == 'A') ||\ - (c == 'B') ||\ - (c == 'C') ||\ - (c == 'D') ||\ - (c == 'E') ||\ - (c == 'F') ||\ - (c == 'G') ||\ - (c == 'H') ||\ - (c == 'I') ||\ - (c == 'J') ||\ - (c == 'K') ||\ - (c == 'L') ||\ - (c == 'M') ||\ - (c == 'N') ||\ - (c == 'O') ||\ - (c == 'P') ||\ - (c == 'Q') ||\ - (c == 'R') ||\ - (c == 'S') ||\ - (c == 'T') ||\ - (c == 'U') ||\ - (c == 'V') ||\ - (c == 'W') ||\ - (c == 'X') ||\ - (c == 'Y') ||\ - (c == 'Z') ||\ - (c == 'a') ||\ - (c == 'b') ||\ - (c == 'c') ||\ - (c == 'd') ||\ - (c == 'e') ||\ - (c == 'f') ||\ - (c == 'g') ||\ - (c == 'h') ||\ - (c == 'i') ||\ - (c == 'j') ||\ - (c == 'k') ||\ - (c == 'l') ||\ - (c == 'm') ||\ - (c == 'n') ||\ - (c == 'o') ||\ - (c == 'p') ||\ - (c == 'q') ||\ - (c == 'r') ||\ - (c == 's') ||\ - (c == 't') ||\ - (c == 'u') ||\ - (c == 'v') ||\ - (c == 'w') ||\ - (c == 'x') ||\ - (c == 'y') ||\ - (c == 'z')) - -#define _is_digit(c)\ - ((c == '0') ||\ - (c == '1') ||\ - (c == '2') ||\ - (c == '3') ||\ - (c == '4') ||\ - (c == '5') ||\ - (c == '6') ||\ - (c == '7') ||\ - (c == '8') ||\ - (c == '9')) - -#define _is_gen_delim(c)\ - ((c == ':') ||\ - (c == '/') ||\ - (c == '?') ||\ - (c == '#') ||\ - (c == '[') ||\ - (c == ']') ||\ - (c == '@')) - -#define _is_sub_delim(c)\ - ((c == '!') ||\ - (c == '$') ||\ - (c == '&') ||\ - (c == '\'') ||\ - (c == '(') ||\ - (c == ')') ||\ - (c == '*') ||\ - (c == '+') ||\ - (c == ',') ||\ - (c == ';') ||\ - (c == '=')) - -#define _is_unreserved(c)\ - (_is_alpha(c) ||\ - _is_digit(c) ||\ - (c == '-') ||\ - (c == '.') ||\ - (c == '_') ||\ - (c == '~')) - -#define _is_reserved(c)\ - (_is_gen_delim(c) ||\ - _is_sub_delim(c)) - -#define _is_pchar(c)\ - (_is_unreserved(c) ||\ - _is_sub_delim(c) ||\ - (c == ':') ||\ - (c == '@')) - -#define _is_segment(c)\ - _is_pchar(c) - -#define _is_segment_nc(c)\ - (_is_unreserved(c) ||\ - _is_sub_delim(c) ||\ - (c == '@')) +#include "types.h" /* * TODO: I have written code for conversion to and diff --git a/encode.c b/encode.c index 75b7f90..4806136 100644 --- a/encode.c +++ b/encode.c @@ -4,9 +4,327 @@ #include "yuri.h" -/* - * TODO: percent encode appropriate characters - */ +#include "types.h" + +static int +pct_encode_authority_user(char **str, size_t *plen, const char *raw) +{ + int i; + char *ret; + char *buf; + char *ptr; + size_t len; + + len = 0; + ptr = raw; + while (*ptr != '\0') { + if (_is_unreserved(*ptr) || _is_sub_delim(*ptr) || *ptr == ':') + len++; + else + len += 3; + ptr++; + } + if (len == 0) + return 0; + + buf = malloc(len+1); + if (buf == NULL) + return -1; + i = 0; + ptr = raw; + while (*ptr != '\0') { + if (_is_unreserved(*ptr) || _is_sub_delim(*ptr) || *ptr == ':') { + buf[i] = *ptr; + i++; + } else { + buf[i] = '%'; + sprintf(buf+i+1, "%X", (unsigned int)*ptr); + i += 3; + } + ptr++; + } + buf[i] = '\0'; + + ret = realloc(*str, (*plen)+len); + if (ret == NULL) { + free(buf); + return -1; + } + (*plen) += len; + *str = ret; + + strlcat(*str, buf, *plen); + free(buf); + + return 0; +} + +static int +pct_encode_authority_host(char **str, size_t *plen, const char *raw) +{ + int i; + char *ret; + char *buf; + char *ptr; + size_t len; + + len = 0; + ptr = raw; + while (*ptr != '\0') { + if (_is_unreserved(*ptr) || _is_sub_delim(*ptr)) + len++; + else + len += 3; + ptr++; + } + if (len == 0) + return 0; + + buf = malloc(len+1); + if (buf == NULL) + return -1; + i = 0; + ptr = raw; + while (*ptr != '\0') { + if (_is_unreserved(*ptr) || _is_sub_delim(*ptr)) { + buf[i] = *ptr; + i++; + } else { + buf[i] = '%'; + sprintf(buf+i+1, "%X", (unsigned int)*ptr); + i += 3; + } + ptr++; + } + buf[i] = '\0'; + + ret = realloc(*str, (*plen)+len); + if (ret == NULL) { + free(buf); + return -1; + } + (*plen) += len; + *str = ret; + + strlcat(*str, buf, *plen); + free(buf); + + return 0; +} + + +static int +pct_encode_segment(char **str, size_t *plen, char *raw) +{ + int i; + char *ret; + char *buf; + char *ptr; + size_t len; + + len = 0; + ptr = raw; + while (*ptr != '\0') { + if (_is_segment(*ptr)) + len++; + else + len += 3; + ptr++; + } + if (len == 0) + return 0; + + buf = malloc(len+1); + if (buf == NULL) + return -1; + i = 0; + ptr = raw; + while (*ptr != '\0') { + if (_is_segment(*ptr)) { + buf[i] = *ptr; + i++; + } else { + buf[i] = '%'; + sprintf(buf+i+1, "%X", (unsigned int)*ptr); + i += 3; + } + ptr++; + } + buf[i] = '\0'; + + ret = realloc(*str, (*plen)+len); + if (ret == NULL) { + free(buf); + return -1; + } + (*plen) += len; + *str = ret; + + strlcat(*str, buf, *plen); + free(buf); + + return 0; +} + +static int +pct_encode_segment_nc(char **str, size_t *plen, char *raw) +{ + int i; + char *ret; + char *buf; + char *ptr; + size_t len; + + len = 0; + ptr = raw; + while (*ptr != '\0') { + if (_is_segment_nc(*ptr)) + len++; + else + len += 3; + ptr++; + } + if (len == 0) + return 0; + + buf = malloc(len+1); + if (buf == NULL) + return -1; + i = 0; + ptr = raw; + while (*ptr != '\0') { + if (_is_segment_nc(*ptr)) { + buf[i] = *ptr; + i++; + } else { + buf[i] = '%'; + sprintf(buf+i+1, "%X", (unsigned int)*ptr); + i += 3; + } + ptr++; + } + buf[i] = '\0'; + + ret = realloc(*str, (*plen)+len); + if (ret == NULL) { + free(buf); + return -1; + } + (*plen) += len; + *str = ret; + + strlcat(*str, buf, *plen); + free(buf); + + return 0; +} + +static int +pct_encode_query(char **str, size_t *plen, char *raw) +{ + int i; + char *ret; + char *buf; + char *ptr; + size_t len; + + len = 0; + ptr = raw; + while (*ptr != '\0') { + if (_is_pchar(*ptr) || *ptr == '/' || *ptr == '?') + len++; + else + len += 3; + ptr++; + } + if (len == 0) + return 0; + + buf = malloc(len+1); + if (buf == NULL) + return -1; + i = 0; + ptr = raw; + while (*ptr != '\0') { + if (_is_pchar(*ptr) || *ptr == '/' || *ptr == '?') { + buf[i] = *ptr; + i++; + } else { + buf[i] = '%'; + sprintf(buf+i+1, "%X", (unsigned int)*ptr); + i += 3; + } + ptr++; + } + buf[i] = '\0'; + + ret = realloc(*str, (*plen)+len); + if (ret == NULL) { + free(buf); + return -1; + } + (*plen) += len; + *str = ret; + + strlcat(*str, buf, *plen); + free(buf); + + return 0; +} + +static int +pct_encode_fragment(char **str, size_t *plen, char *raw) +{ + int i; + char *ret; + char *buf; + char *ptr; + size_t len; + + len = 0; + ptr = raw; + while (*ptr != '\0') { + if (_is_pchar(*ptr) || *ptr == '/' || *ptr == '?') + len++; + else + len += 3; + ptr++; + } + if (len == 0) + return 0; + + buf = malloc(len+1); + if (buf == NULL) + return -1; + i = 0; + ptr = raw; + while (*ptr != '\0') { + if (_is_pchar(*ptr) || *ptr == '/' || *ptr == '?') { + buf[i] = *ptr; + i++; + } else { + buf[i] = '%'; + sprintf(buf+i+1, "%X", (unsigned int)*ptr); + i += 3; + } + ptr++; + } + buf[i] = '\0'; + + ret = realloc(*str, (*plen)+len); + if (ret == NULL) { + free(buf); + return -1; + } + (*plen) += len; + *str = ret; + + strlcat(*str, buf, *plen); + free(buf); + + return 0; +} + char * uri_encode(struct uri *u) { @@ -46,23 +364,24 @@ uri_encode(struct uri *u) if (u->authority.user) { len += strlen(u->authority.user); + if (pct_encode_authority_user(&ret, &len, u->authority.user) == -1) { + free(ret); + return NULL; + } + len++; /* @ */ dup = realloc(ret, len); if (dup == NULL) { free(ret); return NULL; } ret = dup; - strlcat(ret, u->authority.user, len); + strlcat(ret, "@", len); } - len += strlen(u->authority.host); - dup = realloc(ret, len); - if (dup == NULL) { + if (pct_encode_authority_host(&ret, &len, u->authority.host) == -1) { free(ret); return NULL; } - ret = dup; - strlcat(ret, u->authority.host, len); if (u->authority.port) { memset(portbuf, 0, sizeof(portbuf)); @@ -91,20 +410,21 @@ uri_encode(struct uri *u) } ret = dup; strlcat(ret, "/", len); + if (pct_encode_segment_nc(&ret, &len, u->path[i]) == -1) { + free(ret); + return NULL; + } + continue; } - len += strlen(u->path[i]); - dup = realloc(ret, len); - if (dup == NULL) { + if (pct_encode_segment(&ret, &len, u->path[i]) == -1) { free(ret); return NULL; } - ret = dup; - strlcat(ret, u->path[i], len); } } if (u->query) { - len += strlen(u->query)+1; /* ?query */ + len++; /* ? */ dup = realloc(ret, len); if (dup == NULL) { free(ret); @@ -112,11 +432,14 @@ uri_encode(struct uri *u) } ret = dup; strlcat(ret, "?", len); - strlcat(ret, u->query, len); + if (pct_encode_query(&ret, &len, u->query) == -1) { + free(ret); + return NULL; + } } if (u->fragment) { - len += strlen(u->fragment)+1; /* #fragment */ + len++; /* # */ dup = realloc(ret, len); if (dup == NULL) { free(ret); @@ -124,7 +447,10 @@ uri_encode(struct uri *u) } ret = dup; strlcat(ret, "#", len); - strlcat(ret, u->fragment, len); + if (pct_encode_fragment(&ret, &len, u->fragment) == -1) { + free(ret); + return NULL; + } } return ret; diff --git a/normalize.c b/normalize.c index f8e8dbe..821df35 100644 --- a/normalize.c +++ b/normalize.c @@ -32,6 +32,7 @@ int uri_normalize(struct uri *u) { int i; + int therewaspath; if (u->scheme) { for (i = 0; i < strlen(u->scheme); i++) { @@ -47,20 +48,49 @@ uri_normalize(struct uri *u) } } + if (u->npath != 0) + therewaspath = 1; + else + therewaspath = 0; + + for (i = 0; i < u->npath; i++) { + if (i != u->npath-1 || u->npath == 1) { + if (strcmp(u->path[i], "") == 0) { + if (_eat(u, i) == -1) + return -1; + i = 0; /* count altered, reset back */ + } + } + } + for (i = 0; i < u->npath; i++) { if (strcmp(u->path[i], ".") == 0) { if (_eat(u, i) == -1) return -1; + i = 0; /* count altered, reset back */ } + } + + for (i = 0; i < u->npath; i++) { if (strcmp(u->path[i], "..") == 0) { - if (u->npath >= 2 && i-1 >= 0) { - if (_eat(u, i-1) == -1) - return -1; + if (_eat(u, i) == -1) + return -1; + if (i-1 >= 0) { if (_eat(u, i-1) == -1) return -1; } + i = 0; /* count altered, reset back */ } } + /* if there was a path and all redundant segments were remved + * we'd be left with no path and path list would be set to NULL + * indicating that path has never exists, we don't want that, + * so we just add an empty path back */ + if (therewaspath && u->npath == 0) { + if (uri_append_path(u, "") == -1) + return -1; + } + return 0; } diff --git a/test.c b/test.c index c1cdd72..ff6b137 100644 --- a/test.c +++ b/test.c @@ -1,4 +1,5 @@ #include <stdio.h> +#include <stdlib.h> #include "yuri.h" @@ -22,6 +23,7 @@ int main(int argc, char *argv[]) { struct uri *u; + char *p; if (argc < 2) { fprintf(stderr, "Usage %s url\n", argv[0]); @@ -33,6 +35,11 @@ main(int argc, char *argv[]) if (uri_normalize(u) == -1) return 1; _print_uri(u); + p = uri_encode(u); + if (p == NULL) + return 1; + printf("%s\n", p); + free(p); uri_free(u); return 0; } diff --git a/types.h b/types.h new file mode 100644 index 0000000..03208d4 --- /dev/null +++ b/types.h @@ -0,0 +1,113 @@ +#define _is_alpha(c)\ + ((c == 'A') ||\ + (c == 'B') ||\ + (c == 'C') ||\ + (c == 'D') ||\ + (c == 'E') ||\ + (c == 'F') ||\ + (c == 'G') ||\ + (c == 'H') ||\ + (c == 'I') ||\ + (c == 'J') ||\ + (c == 'K') ||\ + (c == 'L') ||\ + (c == 'M') ||\ + (c == 'N') ||\ + (c == 'O') ||\ + (c == 'P') ||\ + (c == 'Q') ||\ + (c == 'R') ||\ + (c == 'S') ||\ + (c == 'T') ||\ + (c == 'U') ||\ + (c == 'V') ||\ + (c == 'W') ||\ + (c == 'X') ||\ + (c == 'Y') ||\ + (c == 'Z') ||\ + (c == 'a') ||\ + (c == 'b') ||\ + (c == 'c') ||\ + (c == 'd') ||\ + (c == 'e') ||\ + (c == 'f') ||\ + (c == 'g') ||\ + (c == 'h') ||\ + (c == 'i') ||\ + (c == 'j') ||\ + (c == 'k') ||\ + (c == 'l') ||\ + (c == 'm') ||\ + (c == 'n') ||\ + (c == 'o') ||\ + (c == 'p') ||\ + (c == 'q') ||\ + (c == 'r') ||\ + (c == 's') ||\ + (c == 't') ||\ + (c == 'u') ||\ + (c == 'v') ||\ + (c == 'w') ||\ + (c == 'x') ||\ + (c == 'y') ||\ + (c == 'z')) + +#define _is_digit(c)\ + ((c == '0') ||\ + (c == '1') ||\ + (c == '2') ||\ + (c == '3') ||\ + (c == '4') ||\ + (c == '5') ||\ + (c == '6') ||\ + (c == '7') ||\ + (c == '8') ||\ + (c == '9')) + +#define _is_gen_delim(c)\ + ((c == ':') ||\ + (c == '/') ||\ + (c == '?') ||\ + (c == '#') ||\ + (c == '[') ||\ + (c == ']') ||\ + (c == '@')) + +#define _is_sub_delim(c)\ + ((c == '!') ||\ + (c == '$') ||\ + (c == '&') ||\ + (c == '\'') ||\ + (c == '(') ||\ + (c == ')') ||\ + (c == '*') ||\ + (c == '+') ||\ + (c == ',') ||\ + (c == ';') ||\ + (c == '=')) + +#define _is_unreserved(c)\ + (_is_alpha(c) ||\ + _is_digit(c) ||\ + (c == '-') ||\ + (c == '.') ||\ + (c == '_') ||\ + (c == '~')) + +#define _is_reserved(c)\ + (_is_gen_delim(c) ||\ + _is_sub_delim(c)) + +#define _is_pchar(c)\ + (_is_unreserved(c) ||\ + _is_sub_delim(c) ||\ + (c == ':') ||\ + (c == '@')) + +#define _is_segment(c)\ + _is_pchar(c) + +#define _is_segment_nc(c)\ + (_is_unreserved(c) ||\ + _is_sub_delim(c) ||\ + (c == '@')) |