summary refs log blame commit diff stats
path: root/decode.c
blob: 39f6f097548e51beb21273eea925f65c800a2cf5 (plain) (tree)
1
2
3
4
5
6




                   
                  













































































































































                                                                                                                         
                                                            


                                              

                                      



















































































                                                                                                                            
                                                                                                              


                                                      

                                              
































                                                                                                       
                                                                                                                                       


                                                      

                                              












                                                                                         


                                              

                                      












                                                                                         


                                              

                                      





































































                                                                       
#include <stdlib.h>
#include <string.h>

#include "yuri.h"

#include "types.h"

/*
 * TODO: I have written code for conversion to and
 * from strings to abstract numbers representation
 * that allows base conversion directly to any base
 * efficiently, I could not find where I kept it,
 * when I do this hack will be replaced with proper
 * function.
 *
 * This is here because we want to make sure that
 * when parsing numbers from strings to specify how
 * long that number is represented in the string, it
 * uses dynamic memory allocation for the temporary
 * string which is inefficient.
 */
static int
_strtoi(const char *str, int n, int b)
{
	int ret;
	char *strbuf;

	strbuf = strndup(str, n);
	if (strbuf == NULL)
		return -1;

	ret = strtol(strbuf, NULL, b);
	free(strbuf);

	return ret;
}

/*
 * This function could be optimized too.
 */
static const char *
pct_decode(const char *text)
{
	int i, x;
	int buflen;
	char *buf;
	char *reallocbuf;

	if (text == NULL)
		return NULL;

	buflen = strlen(text)+1;
	buf = strdup(text);
	if (buf == NULL)
		return NULL;

	x = 0;
	i = 0;
	while (i < buflen) {
		if (text[i] == '%') {
			i++;
			buf[x] = _strtoi(text+i, 2, 16);
			if (buf[x] == -1) {
				free(buf);
				return NULL;
			}
			i += 2;
			x++;
			continue;
		}
		buf[x] = text[i];
		i++;
		x++;
	}

	reallocbuf = realloc(buf, strlen(buf)+1);
	if (reallocbuf == NULL) {
		free(buf);
		return NULL;
	}

	return reallocbuf;
}

static int
_uri_append_path(struct uri *uri, const char *item, int len)
{
	char **path;
	int npath;

	if (uri->npath == 0)
		npath = 1;
	else
		npath = uri->npath + 1;

	path = realloc(uri->path, sizeof(*uri->path)*npath);
	if (path == NULL)
		return -1;
	uri->path = path;
	uri->path[npath-1] = strndup(item, len);
	if (uri->path[npath-1] == NULL)
		return -1;
	uri->npath = npath;

	return 0;
}

struct uri *
uri_decode(const char *text)
{
	struct uri *ret;
	const char *ptr;
	const char *cpy;
	const char *dup;
	int dotctr;
	int i;

	ret = uri_new();
	if (ret == NULL)
		return NULL;

	ptr = text;

	/* look for scheme */
	if (_is_alpha(*ptr)) {
		cpy = ptr;
		ptr++;
		while (*ptr != '\0' && (_is_alpha(*ptr) || _is_digit(*ptr) || *ptr == '+' || *ptr == '-' || *ptr == '.'))
			ptr++;
		if (*ptr == ':') {
			ret->scheme = strndup(cpy, ptr-cpy);
			if (ret->scheme == NULL) {
				free(ret);
				return NULL;
			}
			ptr++;
		} else {
			/* not found, rewind */
			ptr = cpy;
		}
	}

	/* there is authority */
	if (strncmp(ptr, "//", 2) == 0) {
		ptr += 2;

		/* scan for userinfo */
		cpy = ptr;
		while (*ptr != '\0' && _is_userinfo(*ptr)) {
			/* skip pct-encoded */
			if (*ptr == '%')
				ptr += 2;
			else
				ptr++;
		}
		if (*ptr == '@') {
			ret->authority.user = strndup(cpy, ptr-cpy);
			if (ret->authority.user == NULL) {
				uri_free(ret);
				return NULL;
			}
			ptr++;
		} else {
			/* not found, reset back */
			ptr = cpy;
		}

		/* try IP6 */
		if (*ptr == '[') {
			ptr++;
			cpy = ptr;
			while (*ptr != '\0' && (_is_digit(*ptr) || _is_alpha(*ptr) || *ptr == ':'))
				ptr++;
			if (*ptr != ']') {
				uri_free(ret);
				return NULL;
			}
			ret->authority.host = strndup(cpy, ptr-cpy);
			if (ret->authority.host == NULL) {
				uri_free(ret);
				return NULL;
			}
			ret->authority.type = YURI_HOST_IP6;
			ptr++;
		}

		/* not found? try IP4 */
		if (ret->authority.type == 0) {
			dotctr = 0;
			cpy = ptr;
			while (*ptr != '\0' && (_is_digit(*ptr) || *ptr == '.')) {
				if (*ptr == '.')
					dotctr++;
				ptr++;
			}
			if (dotctr == 3) {
				if (*ptr != '\0' && *ptr != ':' && *ptr != '/' && *ptr != '?' && *ptr != '#') {
					uri_free(ret);
					return NULL;
				}
				ret->authority.host = strndup(cpy, ptr-cpy);
				if (ret->authority.host == NULL) {
					uri_free(ret);
					return NULL;
				}
				ret->authority.type = YURI_HOST_IP4;
			} else {
				/* not and IP4 rewind and try again */
				ptr = cpy;
			}
		}

		/* not found? try IPFuture (not gonna happen) */
		if (ret->authority.type == 0) {
			if (*ptr == 'v') {
				if ((_is_digit(*(ptr+1)) || _is_alpha(*(ptr+1))) &&
					(_is_digit(*(ptr+2)) || _is_alpha(*(ptr+2))) &&
					*(ptr+3) == '.') {
					ptr += 4;
					cpy = ptr;
					while (*ptr != '\0' && (_is_unreserved(*ptr) || _is_sub_delim(*ptr) || *ptr == ':'))
						ptr++;
					if (*ptr != '\0' && *ptr != ':' && *ptr != '/' && *ptr != '?' && *ptr != '#') {
						uri_free(ret);
						return NULL;
					}
					ret->authority.host = strndup(cpy, ptr-cpy);
					if (ret->authority.host == NULL) {
						uri_free(ret);
						return NULL;
					}
					ret->authority.type = YURI_HOST_IPFUTURE;
				}
			}
		}

		/* not found? try name */
		if (ret->authority.type == 0) {
			while (*ptr != '\0' && (_is_unreserved(*ptr) || _is_sub_delim(*ptr) || *ptr == '%')) {
				/* skip pct-encoded */
				if (*ptr == '%')
					ptr += 2;
				else
					ptr++;
			}
			if (*ptr != '\0' && *ptr != ':' && *ptr != '/' && *ptr != '?' && *ptr != '#') {
				uri_free(ret);
				return NULL;
			}
			ret->authority.host = strndup(cpy, ptr-cpy);
			if (ret->authority.host == NULL) {
				uri_free(ret);
				return NULL;
			}
			ret->authority.type = YURI_HOST_NAME;
		}

		/* host is set, check if there's alternative port */
		if (ret->authority.host != 0 && *ptr == ':') {
			ptr++;
			cpy = ptr;
			while (*ptr != '\0' && _is_digit(*ptr))
				ptr++;
			ret->authority.port = _strtoi(cpy, ptr-cpy, 10);
			if (ret->authority.port == -1) {
				uri_free(ret);
				return NULL;
			}
		}
	}

	/* look for path */
	if ((ret->authority.host && *ptr == '/') || _is_segment_nc(*ptr)) {
		do {
			if (*ptr == '/')
				ptr++;
			cpy = ptr;
			while (*ptr != '\0' && ((ret->npath == 0 && ret->scheme == NULL) ? _is_segment_nc(*ptr) : _is_segment(*ptr))) {
				/* skip pct-encoded */
				if (*ptr == '%')
					ptr += 2;
				else
					ptr++;
			}
			if (_uri_append_path(ret, cpy, ptr-cpy) == -1) {
				uri_free(ret);
				return NULL;
			}
		} while (*ptr != '\0' && *ptr == '/');
	}

	/* look for query */
	if (*ptr == '?') {
		ptr++;
		cpy = ptr;
		while (*ptr != '\0' && (_is_pchar(*ptr) || *ptr == '/' || *ptr == '?')) {
			/* skip pct-encoded */
			if (*ptr == '%')
				ptr += 2;
			else
				ptr++;
		}
		ret->query = strndup(cpy, ptr-cpy);
		if (ret->query == NULL) {
			uri_free(ret);
			return NULL;
		}
	}

	/* look for fragment */
	if (*ptr == '#') {
		ptr++;
		cpy = ptr;
		while (*ptr != '\0' && (_is_pchar(*ptr) || *ptr == '/' || *ptr == '?')) {
			/* skip pct-encoded */
			if (*ptr == '%')
				ptr += 2;
			else
				ptr++;
		}
		ret->fragment = strndup(cpy, ptr-cpy);
		if (ret->fragment == NULL) {
			uri_free(ret);
			return NULL;
		}
	}

	/* if there is still some trailing text, this is a bug, fail */
	if (*ptr != '\0') {
		uri_free(ret);
		return NULL;
	}

	/* decode percent encoded characters */
	if (ret->authority.user) {
		dup = pct_decode(ret->authority.user);
		if (dup == NULL) {
			uri_free(ret);
			return NULL;
		}
		free(ret->authority.user);
		ret->authority.user = dup;
	}
	if (ret->authority.host) {
		dup = pct_decode(ret->authority.host);
		if (dup == NULL) {
			uri_free(ret);
			return NULL;
		}
		free(ret->authority.host);
		ret->authority.host = dup;
	}
	if (ret->npath != 0) {
		for (i = 0; i < ret->npath; i++) {
			dup = pct_decode(ret->path[i]);
			if (dup == NULL) {
				uri_free(ret);
				return NULL;
			}
			free(ret->path[i]);
			ret->path[i] = dup;
		}
	}
	if (ret->query) {
		dup = pct_decode(ret->query);
		if (dup == NULL) {
			uri_free(ret);
			return NULL;
		}
		free(ret->query);
		ret->query = dup;
	}
	if (ret->fragment) {
		dup = pct_decode(ret->fragment);
		if (dup == NULL) {
			uri_free(ret);
			return NULL;
		}
		free(ret->fragment);
		ret->fragment = dup;
	}

	if (uri_normalize(ret) == -1) {
		uri_free(ret);
		return NULL;
	}

	return ret;
}