1 | <html> |
---|
2 | <head> |
---|
3 | <title>technikum29 translation system</title> |
---|
4 | <link rel="stylesheet" href="/shared/css/fresh.css" type="text/css" title="technikum29"> |
---|
5 | <meta http-equiv="content-type" content="text/html; charset=ISO-8859-1"> |
---|
6 | </head> |
---|
7 | <body> |
---|
8 | <h1 class="dev"><a href="http://dev.technikum29.de/"><span>technikum29</span></a></h1> |
---|
9 | |
---|
10 | <div id="page"> |
---|
11 | <h2>Correct mistakes</h2> |
---|
12 | |
---|
13 | <p>Thank you for improving this page.</p> |
---|
14 | |
---|
15 | <form method="post" action="http://dev.technikum29.de/cgi-bin/mail.php"> |
---|
16 | <?php |
---|
17 | |
---|
18 | if(!isset($_GET['page'])) { |
---|
19 | print "Please specify a page, e.g. ?page=/en/computer/gamma3.shtm"; |
---|
20 | exit; |
---|
21 | } |
---|
22 | |
---|
23 | $input = $_GET['page']; |
---|
24 | |
---|
25 | if(!preg_match("#^/(de|en)/[/a-z]+\.s?html?$#", $input)) { |
---|
26 | print "Bad input filename."; |
---|
27 | exit; |
---|
28 | } |
---|
29 | |
---|
30 | $raw = @file_get_contents("http://www.technikum29.de".$input); |
---|
31 | |
---|
32 | // versuchen, seite etwas einzugrenzen |
---|
33 | $content = ''; |
---|
34 | if(!preg_match('#^.+?<div id="content">(.+?)<div id="sidebar">.+?$#s', $raw, $matches)) { |
---|
35 | //print "Cannot find content"; |
---|
36 | $content = $raw; |
---|
37 | } else { |
---|
38 | $content = $matches[1]."..."; |
---|
39 | } |
---|
40 | |
---|
41 | $text = html2text($content); |
---|
42 | |
---|
43 | ?> |
---|
44 | |
---|
45 | <input type="hidden" name="to" value="dev"> |
---|
46 | <input type="hidden" name="subject" value="t29 translation system non-scripted submission"> |
---|
47 | <input type="hidden" name="pre" value="Someone edited <?php print htmlentities($input, ENT_QUOTES); ?>"> |
---|
48 | <input type="hidden" name="out_heading" value="Thank you for your corrections"> |
---|
49 | <input type="hidden" name="out_text" value="Your edit was mailed to the translation team and will be published on the website in the next days. <a href=http://www.technikum29.de<?php print $input; ?>>Return to website</a>"> |
---|
50 | |
---|
51 | <textarea cols="100" rows="30"> |
---|
52 | <?php print html2text($content); ?> |
---|
53 | </textarea> |
---|
54 | |
---|
55 | <p><input type="Submit" value="Submit" style="font-weight: bold;"> |
---|
56 | <br><small>(Captcha challenge in next step)</small> |
---|
57 | |
---|
58 | </form> |
---|
59 | |
---|
60 | <?php |
---|
61 | function html2text($html) { |
---|
62 | $search = array( |
---|
63 | "/\r/", // Non-legal carriage return |
---|
64 | "/[\n\t]+/", // Newlines and tabs |
---|
65 | '/[ ]{2,}/', // Runs of spaces, pre-handling |
---|
66 | '/<script[^>]*>.*?<\/script>/i', // <script>s -- which strip_tags supposedly has problems with |
---|
67 | '/<style[^>]*>.*?<\/style>/i', // <style>s -- which strip_tags supposedly has problems with |
---|
68 | //'/<!-- .* -->/', // Comments -- which strip_tags might have problem a with |
---|
69 | '/<h[123][^>]*>(.*?)<\/h[123]>/ie', // H1 - H3 |
---|
70 | '/<h[456][^>]*>(.*?)<\/h[456]>/ie', // H4 - H6 |
---|
71 | '/<p[^>]*>/i', // <P> |
---|
72 | '/<br[^>]*>/i', // <br> |
---|
73 | '/<b[^>]*>(.*?)<\/b>/ie', // <b> |
---|
74 | '/<strong[^>]*>(.*?)<\/strong>/ie', // <strong> |
---|
75 | '/<i[^>]*>(.*?)<\/i>/i', // <i> |
---|
76 | '/<em[^>]*>(.*?)<\/em>/i', // <em> |
---|
77 | '/(<ul[^>]*>|<\/ul>)/i', // <ul> and </ul> |
---|
78 | '/(<ol[^>]*>|<\/ol>)/i', // <ol> and </ol> |
---|
79 | '/<li[^>]*>(.*?)<\/li>/i', // <li> and </li> |
---|
80 | '/<li[^>]*>/i', // <li> |
---|
81 | '/<a [^>]*href="([^"]+)"[^>]*>(.*?)<\/a>/i', |
---|
82 | // <a href=""> |
---|
83 | '/<hr[^>]*>/i', // <hr> |
---|
84 | '/(<table[^>]*>|<\/table>)/i', // <table> and </table> |
---|
85 | '/(<tr[^>]*>|<\/tr>)/i', // <tr> and </tr> |
---|
86 | '/<td[^>]*>(.*?)<\/td>/i', // <td> and </td> |
---|
87 | '/<th[^>]*>(.*?)<\/th>/ie', // <th> and </th> |
---|
88 | '/&(nbsp|#160);/i', // Non-breaking space |
---|
89 | '/&(quot|rdquo|ldquo|#8220|#8221|#147|#148);/i', |
---|
90 | // Double quotes |
---|
91 | '/&(apos|rsquo|lsquo|#8216|#8217);/i', // Single quotes |
---|
92 | '/>/i', // Greater-than |
---|
93 | '/</i', // Less-than |
---|
94 | '/&(amp|#38);/i', // Ampersand |
---|
95 | '/&(copy|#169);/i', // Copyright |
---|
96 | '/&(trade|#8482|#153);/i', // Trademark |
---|
97 | '/&(reg|#174);/i', // Registered |
---|
98 | '/&(mdash|#151|#8212);/i', // mdash |
---|
99 | '/&(ndash|minus|#8211|#8722);/i', // ndash |
---|
100 | '/&(bull|#149|#8226);/i', // Bullet |
---|
101 | '/&(pound|#163);/i', // Pound sign |
---|
102 | '/&(euro|#8364);/i', // Euro sign |
---|
103 | '/<head[^>]*>(.*?)<\/head>/is', // unwanted tags |
---|
104 | '/&[^&;]+;/i', // Unknown/unhandled entities |
---|
105 | '/[ ]{2,}/' // Runs of spaces, post-handling |
---|
106 | ); |
---|
107 | |
---|
108 | /** |
---|
109 | * List of pattern replacements corresponding to patterns searched. |
---|
110 | * |
---|
111 | * @var array $replace |
---|
112 | * @access public |
---|
113 | * @see $search |
---|
114 | */ |
---|
115 | $replace = array( |
---|
116 | '', // Non-legal carriage return |
---|
117 | ' ', // Newlines and tabs |
---|
118 | ' ', // Runs of spaces, pre-handling |
---|
119 | '', // <script>s -- which strip_tags supposedly has problems with |
---|
120 | '', // <style>s -- which strip_tags supposedly has problems with |
---|
121 | //'', // Comments -- which strip_tags might have problem a with |
---|
122 | "strtoupper(\"\n\n\\1\n\n\")", // H1 - H3 |
---|
123 | "ucwords(\"\n\n\\1\n\n\")", // H4 - H6 |
---|
124 | "\n\n\t", // <P> |
---|
125 | "\n", // <br> |
---|
126 | 'strtoupper("\\1")', // <b> |
---|
127 | 'strtoupper("\\1")', // <strong> |
---|
128 | '_\\1_', // <i> |
---|
129 | '_\\1_', // <em> |
---|
130 | "\n\n", // <ul> and </ul> |
---|
131 | "\n\n", // <ol> and </ol> |
---|
132 | "\t* \\1\n", // <li> and </li> |
---|
133 | "\n\t* ", // <li> |
---|
134 | '\\2', // <a href=""> |
---|
135 | "\n-------------------------\n", // <hr> |
---|
136 | "\n\n", // <table> and </table> |
---|
137 | "\n", // <tr> and </tr> |
---|
138 | "\t\t\\1\n", // <td> and </td> |
---|
139 | "strtoupper(\"\t\t\\1\n\")", // <th> and </th> |
---|
140 | ' ', // Non-breaking space |
---|
141 | '"', // Double quotes |
---|
142 | "'", // Single quotes |
---|
143 | '>', |
---|
144 | '<', |
---|
145 | '&', |
---|
146 | '(c)', |
---|
147 | '(tm)', |
---|
148 | '(R)', |
---|
149 | '--', |
---|
150 | '-', |
---|
151 | '*', |
---|
152 | '£', |
---|
153 | 'EUR', // Euro sign. ? |
---|
154 | '', // unwanted tags |
---|
155 | '', // Unknown/unhandled entities |
---|
156 | ' ' // Runs of spaces, post-handling |
---|
157 | ); |
---|
158 | |
---|
159 | $text = preg_replace($search, $replace, $html); |
---|
160 | |
---|
161 | // Strip any other HTML tags |
---|
162 | $text = strip_tags($text); |
---|
163 | |
---|
164 | // Bring down number of empty lines to 2 max |
---|
165 | $text = preg_replace("/\n\s+\n/", "\n\n", $text); |
---|
166 | $text = preg_replace("/[\n]{3,}/", "\n\n", $text); |
---|
167 | |
---|
168 | // Wrap the text to a readable format |
---|
169 | $text = wordwrap($text, 80); |
---|
170 | |
---|
171 | return $text; |
---|
172 | } |
---|
173 | |
---|