1 /** 2 * RFC 2646. May be upgraded to RFC 3676 for international text. 3 * 4 * License: 5 * This Source Code Form is subject to the terms of 6 * the Mozilla Public License, v. 2.0. If a copy of 7 * the MPL was not distributed with this file, You 8 * can obtain one at http://mozilla.org/MPL/2.0/. 9 * 10 * Authors: 11 * Vladimir Panteleev <vladimir@thecybershadow.net> 12 */ 13 14 module ae.net.ietf.wrap; 15 16 import std.string; 17 import std.utf; 18 19 import ae.utils.text; 20 21 struct Paragraph 22 { 23 dstring quotePrefix, text; 24 } 25 26 Paragraph[] unwrapText(string text, bool flowed, bool delsp) 27 { 28 auto lines = text.toUTF32().splitLines(); 29 30 Paragraph[] paragraphs; 31 32 foreach (line; lines) 33 { 34 dstring quotePrefix; 35 while (line.startsWith(">"d)) 36 { 37 int l = 1; 38 // This is against standard, but many clients 39 // (incl. Web-News and M$ Outlook) don't give a damn: 40 if (line.startsWith("> "d)) 41 l = 2; 42 43 quotePrefix ~= line[0..l]; 44 line = line[l..$]; 45 } 46 47 // Remove space-stuffing 48 if (flowed && line.startsWith(" "d)) 49 line = line[1..$]; 50 51 if (paragraphs.length>0 52 && paragraphs[$-1].quotePrefix==quotePrefix 53 && paragraphs[$-1].text.endsWith(" "d) 54 && !line.startsWith(" "d) 55 && line.length 56 && line != "-- " 57 && paragraphs[$-1].text != "-- "d 58 && (flowed || quotePrefix.length)) 59 { 60 if (delsp) 61 paragraphs[$-1].text = paragraphs[$-1].text[0..$-1]; 62 paragraphs[$-1].text ~= line; 63 } 64 else 65 paragraphs ~= Paragraph(quotePrefix, line); 66 } 67 68 return paragraphs; 69 } 70 71 enum DEFAULT_WRAP_LENGTH = 66; 72 73 string wrapText(Paragraph[] paragraphs, int margin = DEFAULT_WRAP_LENGTH) 74 { 75 dstring[] lines; 76 77 void addLine(dstring quotePrefix, dstring line) 78 { 79 line = quotePrefix ~ line; 80 // Add space-stuffing 81 if (line.startsWith(" "d) || 82 line.startsWith("From "d) || 83 (line.startsWith(">"d) && quotePrefix.length==0)) 84 { 85 line = " " ~ line; 86 } 87 lines ~= line; 88 } 89 90 foreach (paragraph; paragraphs) 91 { 92 dstring line = paragraph.text; 93 auto cutPoint = margin - paragraph.quotePrefix.length; 94 95 while (line.length && line[$-1] == ' ') 96 line = line[0..$-1]; 97 98 if (!line.length) 99 { 100 addLine(paragraph.quotePrefix, null); 101 continue; 102 } 103 104 while (line.length > cutPoint) 105 { 106 auto i = line[0..cutPoint].lastIndexOf(' '); 107 if (i < 0) 108 { 109 i = cutPoint + line[cutPoint..$].indexOf(' '); 110 if (i < cutPoint) 111 break; 112 } 113 114 i++; 115 addLine(paragraph.quotePrefix, line[0..i]); 116 line = line[i..$]; 117 } 118 119 if (line.length) 120 addLine(paragraph.quotePrefix, line); 121 } 122 123 return lines.join("\n"d).toUTF8(); 124 } 125 126 unittest 127 { 128 // Space-stuffing 129 assert(wrapText(unwrapText(" Hello", false, false)) == " Hello"); 130 131 // Don't rewrap user input 132 assert(wrapText(unwrapText("Line 1 \nLine 2 ", false, false)) == "Line 1\nLine 2"); 133 // ...but rewrap quoted text 134 assert(wrapText(unwrapText("> Line 1 \n> Line 2 ", false, false)) == "> Line 1 Line 2"); 135 // Wrap long lines 136 import std.array : replicate; 137 assert(wrapText(unwrapText(replicate("abcde ", 20), false, false)).split("\n").length > 1); 138 139 // Wrap by character count, not UTF-8 code-unit count. TODO: take into account surrogates and composite characters. 140 enum str = "Это очень очень очень очень очень очень очень длинная строка"; 141 static assert(str.toUTF32().length < DEFAULT_WRAP_LENGTH); 142 static assert(str.length > DEFAULT_WRAP_LENGTH); 143 assert(wrapText(unwrapText(str, false, false)).split("\n").length == 1); 144 }