#include #include #include "yuri.h" #include "types.h" /* * TODO: I have written code for conversion to and * from strings to abstract numbers representation * that allows base conversion directly to any base * efficiently, I could not find where I kept it, * when I do this hack will be replaced with proper * function. * * This is here because we want to make sure that * when parsing numbers from strings to specify how * long that number is represented in the string, it * uses dynamic memory allocation for the temporary * string which is inefficient. */ static int _strtoi(const char *str, int n, int b) { int ret; char *strbuf; strbuf = strndup(str, n); if (strbuf == NULL) return -1; ret = strtol(strbuf, NULL, b); free(strbuf); return ret; } /* * This function could be optimized too. */ static const char * pct_decode(const char *text) { int i, x; int buflen; char *buf; char *reallocbuf; if (text == NULL) return NULL; buflen = strlen(text)+1; buf = strdup(text); if (buf == NULL) return NULL; x = 0; i = 0; while (i < buflen) { if (text[i] == '%') { i++; buf[x] = _strtoi(text+i, 2, 16); if (buf[x] == -1) { free(buf); return NULL; } i += 2; x++; continue; } buf[x] = text[i]; i++; x++; } reallocbuf = realloc(buf, strlen(buf)+1); if (reallocbuf == NULL) { free(buf); return NULL; } return reallocbuf; } static int _uri_append_path(struct uri *uri, const char *item, int len) { char **path; int npath; if (uri->npath == 0) npath = 1; else npath = uri->npath + 1; path = realloc(uri->path, sizeof(*uri->path)*npath); if (path == NULL) return -1; uri->path = path; uri->path[npath-1] = strndup(item, len); if (uri->path[npath-1] == NULL) return -1; uri->npath = npath; return 0; } struct uri * uri_decode(const char *text) { struct uri *ret; const char *ptr; const char *cpy; const char *dup; int dotctr; int i; ret = uri_new(); if (ret == NULL) return NULL; ptr = text; /* look for scheme */ if (_is_alpha(*ptr)) { cpy = ptr; ptr++; while (*ptr != '\0' && (_is_alpha(*ptr) || _is_digit(*ptr) || *ptr == '+' || *ptr == '-' || *ptr == '.')) ptr++; if (*ptr == ':') { ret->scheme = strndup(cpy, ptr-cpy); if (ret->scheme == NULL) { free(ret); return NULL; } ptr++; } else { /* not found, rewind */ ptr = cpy; } } /* there is authority */ if (strncmp(ptr, "//", 2) == 0) { ptr += 2; /* scan for userinfo */ cpy = ptr; while (*ptr != '\0' && _is_userinfo(*ptr)) { /* skip pct-encoded */ if (*ptr == '%') ptr += 2; else ptr++; } if (*ptr == '@') { ret->authority.user = strndup(cpy, ptr-cpy); if (ret->authority.user == NULL) { uri_free(ret); return NULL; } ptr++; } else { /* not found, reset back */ ptr = cpy; } /* try IP6 */ if (*ptr == '[') { ptr++; cpy = ptr; while (*ptr != '\0' && (_is_digit(*ptr) || _is_alpha(*ptr) || *ptr == ':')) ptr++; if (*ptr != ']') { uri_free(ret); return NULL; } ret->authority.host = strndup(cpy, ptr-cpy); if (ret->authority.host == NULL) { uri_free(ret); return NULL; } ret->authority.type = YURI_HOST_IP6; ptr++; } /* not found? try IP4 */ if (ret->authority.type == 0) { dotctr = 0; cpy = ptr; while (*ptr != '\0' && (_is_digit(*ptr) || *ptr == '.')) { if (*ptr == '.') dotctr++; ptr++; } if (dotctr == 3) { if (*ptr != '\0' && *ptr != ':' && *ptr != '/' && *ptr != '?' && *ptr != '#') { uri_free(ret); return NULL; } ret->authority.host = strndup(cpy, ptr-cpy); if (ret->authority.host == NULL) { uri_free(ret); return NULL; } ret->authority.type = YURI_HOST_IP4; } else { /* not and IP4 rewind and try again */ ptr = cpy; } } /* not found? try IPFuture (not gonna happen) */ if (ret->authority.type == 0) { if (*ptr == 'v') { if ((_is_digit(*(ptr+1)) || _is_alpha(*(ptr+1))) && (_is_digit(*(ptr+2)) || _is_alpha(*(ptr+2))) && *(ptr+3) == '.') { ptr += 4; cpy = ptr; while (*ptr != '\0' && (_is_unreserved(*ptr) || _is_sub_delim(*ptr) || *ptr == ':')) ptr++; if (*ptr != '\0' && *ptr != ':' && *ptr != '/' && *ptr != '?' && *ptr != '#') { uri_free(ret); return NULL; } ret->authority.host = strndup(cpy, ptr-cpy); if (ret->authority.host == NULL) { uri_free(ret); return NULL; } ret->authority.type = YURI_HOST_IPFUTURE; } } } /* not found? try name */ if (ret->authority.type == 0) { while (*ptr != '\0' && (_is_unreserved(*ptr) || _is_sub_delim(*ptr) || *ptr == '%')) { /* skip pct-encoded */ if (*ptr == '%') ptr += 2; else ptr++; } if (*ptr != '\0' && *ptr != ':' && *ptr != '/' && *ptr != '?' && *ptr != '#') { uri_free(ret); return NULL; } ret->authority.host = strndup(cpy, ptr-cpy); if (ret->authority.host == NULL) { uri_free(ret); return NULL; } ret->authority.type = YURI_HOST_NAME; } /* host is set, check if there's alternative port */ if (ret->authority.host != 0 && *ptr == ':') { ptr++; cpy = ptr; while (*ptr != '\0' && _is_digit(*ptr)) ptr++; ret->authority.port = _strtoi(cpy, ptr-cpy, 10); if (ret->authority.port == -1) { uri_free(ret); return NULL; } } } /* look for path */ if ((ret->authority.host && *ptr == '/') || _is_segment_nc(*ptr)) { do { if (*ptr == '/') ptr++; cpy = ptr; while (*ptr != '\0' && ((ret->npath == 0 && ret->scheme == NULL) ? _is_segment_nc(*ptr) : _is_segment(*ptr))) { /* skip pct-encoded */ if (*ptr == '%') ptr += 2; else ptr++; } if (_uri_append_path(ret, cpy, ptr-cpy) == -1) { uri_free(ret); return NULL; } } while (*ptr != '\0' && *ptr == '/'); } /* look for query */ if (*ptr == '?') { ptr++; cpy = ptr; while (*ptr != '\0' && (_is_pchar(*ptr) || *ptr == '/' || *ptr == '?')) { /* skip pct-encoded */ if (*ptr == '%') ptr += 2; else ptr++; } ret->query = strndup(cpy, ptr-cpy); if (ret->query == NULL) { uri_free(ret); return NULL; } } /* look for fragment */ if (*ptr == '#') { ptr++; cpy = ptr; while (*ptr != '\0' && (_is_pchar(*ptr) || *ptr == '/' || *ptr == '?')) { /* skip pct-encoded */ if (*ptr == '%') ptr += 2; else ptr++; } ret->fragment = strndup(cpy, ptr-cpy); if (ret->fragment == NULL) { uri_free(ret); return NULL; } } /* if there is still some trailing text, this is a bug, fail */ if (*ptr != '\0') { uri_free(ret); return NULL; } /* decode percent encoded characters */ if (ret->authority.user) { dup = pct_decode(ret->authority.user); if (dup == NULL) { uri_free(ret); return NULL; } free(ret->authority.user); ret->authority.user = dup; } if (ret->authority.host) { dup = pct_decode(ret->authority.host); if (dup == NULL) { uri_free(ret); return NULL; } free(ret->authority.host); ret->authority.host = dup; } if (ret->npath != 0) { for (i = 0; i < ret->npath; i++) { dup = pct_decode(ret->path[i]); if (dup == NULL) { uri_free(ret); return NULL; } free(ret->path[i]); ret->path[i] = dup; } } if (ret->query) { dup = pct_decode(ret->query); if (dup == NULL) { uri_free(ret); return NULL; } free(ret->query); ret->query = dup; } if (ret->fragment) { dup = pct_decode(ret->fragment); if (dup == NULL) { uri_free(ret); return NULL; } free(ret->fragment); ret->fragment = dup; } if (uri_normalize(ret) == -1) { uri_free(ret); return NULL; } return ret; }