1 /**
2  * ae.net.ietf.url
3  *
4  * License:
5  *   This Source Code Form is subject to the terms of
6  *   the Mozilla Public License, v. 2.0. If a copy of
7  *   the MPL was not distributed with this file, You
8  *   can obtain one at http://mozilla.org/MPL/2.0/.
9  *
10  * Authors:
11  *   Vladimir Panteleev <ae@cy.md>
12  */
13 
14 module ae.net.ietf.url;
15 
16 import std.exception;
17 import std.string;
18 
19 import ae.utils.array;
20 
21 /// Calculate the full URL given a base and a target relative URL.
22 string applyRelativeURL(string base, string rel)
23 {
24 	{
25 		auto p = rel.indexOf("://");
26 		if (p >= 0 && rel.indexOf("/") > p)
27 			return rel;
28 	}
29 
30 	base = base.split("?")[0];
31 	base = base[0..base.lastIndexOf('/')+1];
32 	while (true)
33 	{
34 		if (rel.startsWith("../"))
35 		{
36 			rel = rel[3..$];
37 			base = base[0..base[0..$-1].lastIndexOf('/')+1];
38 			enforce(base.length, "Bad relative URL");
39 		}
40 		else
41 		if (rel.startsWith("/"))
42 			return base.split("/").slice(0, 3).join("/") ~ rel;
43 		else
44 			return base ~ rel;
45 	}
46 }
47 
48 ///
49 unittest
50 {
51 	assert(applyRelativeURL("http://example.com/", "index.html") == "http://example.com/index.html");
52 	assert(applyRelativeURL("http://example.com/index.html", "page.html") == "http://example.com/page.html");
53 	assert(applyRelativeURL("http://example.com/dir/index.html", "page.html") == "http://example.com/dir/page.html");
54 	assert(applyRelativeURL("http://example.com/dir/index.html", "/page.html") == "http://example.com/page.html");
55 	assert(applyRelativeURL("http://example.com/dir/index.html", "../page.html") == "http://example.com/page.html");
56 	assert(applyRelativeURL("http://example.com/script.php?path=a/b/c", "page.html") == "http://example.com/page.html");
57 	assert(applyRelativeURL("http://example.com/index.html", "http://example.org/page.html") == "http://example.org/page.html");
58 	assert(applyRelativeURL("http://example.com/http://archived.website", "/http://archived.website/2") == "http://example.com/http://archived.website/2");
59 }
60 
61 /// Return a likely base file name given a URL,
62 /// stripping the host/port/path and query string.
63 string fileNameFromURL(string url)
64 {
65 	return url.split("?")[0].split("/")[$-1];
66 }
67 
68 ///
69 unittest
70 {
71 	assert(fileNameFromURL("http://example.com/index.html") == "index.html");
72 	assert(fileNameFromURL("http://example.com/dir/index.html") == "index.html");
73 	assert(fileNameFromURL("http://example.com/script.php?path=a/b/c") == "script.php");
74 }
75 
76 // ***************************************************************************
77 
78 /// Encode an URL part using a custom predicate to decide
79 /// which characters to encode.
80 template UrlEncoder(alias isCharAllowed, char escape = '%')
81 {
82 	private bool[256] genCharAllowed()
83 	{
84 		bool[256] result;
85 		foreach (c; 0..256)
86 			result[c] = isCharAllowed(cast(char)c);
87 		return result;
88 	}
89 
90 	private immutable bool[256] charAllowed = genCharAllowed();
91 
92 	struct UrlEncoder(Sink)
93 	{
94 		Sink sink; /// Output will go here.
95 
96 		/// Feed input here.
97 		void put(in char[] s)
98 		{
99 			foreach (c; s)
100 				if (charAllowed[c])
101 					sink.put(c);
102 				else
103 				{
104 					sink.put(escape);
105 					sink.put(hexDigits[cast(ubyte)c >> 4]);
106 					sink.put(hexDigits[cast(ubyte)c & 15]);
107 				}
108 		}
109 	} ///
110 }
111 
112 import ae.utils.textout : countCopy;
113 
114 /// Encode an URL part using a custom predicate.
115 string encodeUrlPart(alias isCharAllowed, char escape = '%')(string s) pure
116 {
117 	alias UrlPartEncoder = UrlEncoder!(isCharAllowed, escape);
118 
119 	static struct Encoder
120 	{
121 		string s;
122 
123 		void opCall(Sink)(Sink sink)
124 		{
125 			auto encoder = UrlPartEncoder!Sink(sink);
126 			encoder.put(s);
127 		}
128 	}
129 
130 	Encoder encoder = {s};
131 	return countCopy!char(encoder);
132 }
133 
134 import std.ascii;
135 
136 /// Encode a URL parameter, escaping all non-alpha-numeric characters
137 /// except `'-'` and `'_'`.
138 alias encodeUrlParameter = encodeUrlPart!(c => isAlphaNum(c) || c=='-' || c=='_');
139 
140 unittest
141 {
142 	assert(encodeUrlParameter("abc?123") == "abc%3F123");
143 }
144 
145 // ***************************************************************************
146 
147 import ae.utils.aa : MultiAA;
148 
149 /// Type to hold decoded URL query string parameters.
150 alias UrlParameters = MultiAA!(string, string);
151 
152 /// Encodes URL parameters into a query string (without a leading `'?'`)).
153 string encodeUrlParameters(UrlParameters dic)
154 {
155 	string[] segs;
156 	foreach (name, value; dic)
157 		segs ~= encodeUrlParameter(name) ~ '=' ~ encodeUrlParameter(value);
158 	return join(segs, "&");
159 }
160 
161 string encodeUrlParameters(string[string] dic) { return encodeUrlParameters(UrlParameters(dic)); } /// ditto
162 
163 import ae.utils.text;
164 
165 /// Decodes a single URL parameter.
166 string decodeUrlParameter(bool plusToSpace=true, char escape = '%')(string encoded)
167 {
168 	string s;
169 	for (auto i=0; i<encoded.length; i++)
170 		if (encoded[i] == escape && i+3 <= encoded.length)
171 		{
172 			s ~= cast(char)fromHex!ubyte(encoded[i+1..i+3]);
173 			i += 2;
174 		}
175 		else
176 		if (plusToSpace && encoded[i] == '+')
177 			s ~= ' ';
178 		else
179 			s ~= encoded[i];
180 	return s;
181 }
182 
183 /// Decodes URL parameters from a query string. (Do not include the leading `'?'`).
184 UrlParameters decodeUrlParameters(string qs)
185 {
186 	UrlParameters dic;
187 	if (!qs.length)
188 		return dic;
189 	string[] segs = split(qs, "&");
190 	foreach (pair; segs)
191 	{
192 		auto p = pair.indexOf('=');
193 		if (p < 0)
194 			dic.add(decodeUrlParameter(pair), null);
195 		else
196 			dic.add(decodeUrlParameter(pair[0..p]), decodeUrlParameter(pair[p+1..$]));
197 	}
198 	return dic;
199 }
200 
201 unittest
202 {
203 	assert(decodeUrlParameters("").length == 0);
204 }