1 /**
2  * RFC 2646. May be upgraded to RFC 3676 for international text.
3  *
4  * License:
5  *   This Source Code Form is subject to the terms of
6  *   the Mozilla Public License, v. 2.0. If a copy of
7  *   the MPL was not distributed with this file, You
8  *   can obtain one at http://mozilla.org/MPL/2.0/.
9  *
10  * Authors:
11  *   Vladimir Panteleev <vladimir@thecybershadow.net>
12  */
13 
14 module ae.net.ietf.wrap;
15 
16 import std.string;
17 import std.utf;
18 
19 import ae.utils.text;
20 
21 struct Paragraph
22 {
23 	dstring quotePrefix, text;
24 }
25 
26 Paragraph[] unwrapText(string text, bool flowed, bool delsp)
27 {
28 	auto lines = text.toUTF32().splitLines();
29 
30 	Paragraph[] paragraphs;
31 
32 	foreach (line; lines)
33 	{
34 		dstring quotePrefix;
35 		while (line.startsWith(">"d))
36 		{
37 			int l = 1;
38 			// This is against standard, but many clients
39 			// (incl. Web-News and M$ Outlook) don't give a damn:
40 			if (line.startsWith("> "d))
41 				l = 2;
42 
43 			quotePrefix ~= line[0..l];
44 			line = line[l..$];
45 		}
46 
47 		// Remove space-stuffing
48 		if (flowed && line.startsWith(" "d))
49 			line = line[1..$];
50 
51 		if (paragraphs.length>0
52 		 && paragraphs[$-1].quotePrefix==quotePrefix
53 		 && paragraphs[$-1].text.endsWith(" "d)
54 		 && !line.startsWith(" "d)
55 		 && line.length
56 		 && line != "-- "
57 		 && paragraphs[$-1].text != "-- "d
58 		 && (flowed || quotePrefix.length))
59 		{
60 			if (delsp)
61 				paragraphs[$-1].text = paragraphs[$-1].text[0..$-1];
62 			paragraphs[$-1].text ~= line;
63 		}
64 		else
65 			paragraphs ~= Paragraph(quotePrefix, line);
66 	}
67 
68 	return paragraphs;
69 }
70 
71 enum DEFAULT_WRAP_LENGTH = 66;
72 
73 string wrapText(Paragraph[] paragraphs, int margin = DEFAULT_WRAP_LENGTH)
74 {
75 	dstring[] lines;
76 
77 	void addLine(dstring quotePrefix, dstring line)
78 	{
79 		line = quotePrefix ~ line;
80 		// Add space-stuffing
81 		if (line.startsWith(" "d) ||
82 			line.startsWith("From "d) ||
83 			(line.startsWith(">"d) && quotePrefix.length==0))
84 		{
85 			line = " " ~ line;
86 		}
87 		lines ~= line;
88 	}
89 
90 	foreach (paragraph; paragraphs)
91 	{
92 		dstring line = paragraph.text;
93 		auto cutPoint = margin - paragraph.quotePrefix.length;
94 
95 		while (line.length && line[$-1] == ' ')
96 			line = line[0..$-1];
97 
98 		if (!line.length)
99 		{
100 			addLine(paragraph.quotePrefix, null);
101 			continue;
102 		}
103 
104 		while (line.length > cutPoint)
105 		{
106 			auto i = line[0..cutPoint].lastIndexOf(' ');
107 			if (i < 0)
108 			{
109 				i = cutPoint + line[cutPoint..$].indexOf(' ');
110 				if (i < cutPoint)
111 					break;
112 			}
113 
114 			i++;
115 			addLine(paragraph.quotePrefix, line[0..i]);
116 			line = line[i..$];
117 		}
118 
119 		if (line.length)
120 			addLine(paragraph.quotePrefix, line);
121 	}
122 
123 	return lines.join("\n"d).toUTF8();
124 }
125 
126 unittest
127 {
128 	// Space-stuffing
129 	assert(wrapText(unwrapText(" Hello", false, false)) == "  Hello");
130 
131 	// Don't rewrap user input
132 	assert(wrapText(unwrapText("Line 1 \nLine 2 ", false, false)) == "Line 1\nLine 2");
133 	// ...but rewrap quoted text
134 	assert(wrapText(unwrapText("> Line 1 \n> Line 2 ", false, false)) == "> Line 1 Line 2");
135 	// Wrap long lines
136 	import std.array : replicate;
137 	assert(wrapText(unwrapText(replicate("abcde ", 20), false, false)).split("\n").length > 1);
138 
139 	// Wrap by character count, not UTF-8 code-unit count. TODO: take into account surrogates and composite characters.
140 	enum str = "Это очень очень очень очень очень очень очень длинная строка";
141 	static assert(str.toUTF32().length < DEFAULT_WRAP_LENGTH);
142 	static assert(str.length > DEFAULT_WRAP_LENGTH);
143 	assert(wrapText(unwrapText(str, false, false)).split("\n").length == 1);
144 }