1 | <?php |
---|
2 | /** |
---|
3 | * JavaScript Minifier |
---|
4 | * |
---|
5 | * FROM: MediaWiki /includes/libs/ , Sven 28.05.2012 |
---|
6 | * |
---|
7 | * @file |
---|
8 | * @author Paul Copperman <paul.copperman@gmail.com> |
---|
9 | * @license Choose any of Apache, MIT, GPL, LGPL |
---|
10 | */ |
---|
11 | |
---|
12 | /** |
---|
13 | * This class is meant to safely minify javascript code, while leaving syntactically correct |
---|
14 | * programs intact. Other libraries, such as JSMin require a certain coding style to work |
---|
15 | * correctly. OTOH, libraries like jsminplus, that do parse the code correctly are rather |
---|
16 | * slow, because they construct a complete parse tree before outputting the code minified. |
---|
17 | * So this class is meant to allow arbitrary (but syntactically correct) input, while being |
---|
18 | * fast enough to be used for on-the-fly minifying. |
---|
19 | */ |
---|
20 | class JavaScriptMinifier { |
---|
21 | |
---|
22 | /* Class constants */ |
---|
23 | /* Parsing states. |
---|
24 | * The state machine is only necessary to decide whether to parse a slash as division |
---|
25 | * operator or as regexp literal. |
---|
26 | * States are named after the next expected item. We only distinguish states when the |
---|
27 | * distinction is relevant for our purpose. |
---|
28 | */ |
---|
29 | const STATEMENT = 0; |
---|
30 | const CONDITION = 1; |
---|
31 | const PROPERTY_ASSIGNMENT = 2; |
---|
32 | const EXPRESSION = 3; |
---|
33 | const EXPRESSION_NO_NL = 4; // only relevant for semicolon insertion |
---|
34 | const EXPRESSION_OP = 5; |
---|
35 | const EXPRESSION_FUNC = 6; |
---|
36 | const EXPRESSION_TERNARY = 7; // used to determine the role of a colon |
---|
37 | const EXPRESSION_TERNARY_OP = 8; |
---|
38 | const EXPRESSION_TERNARY_FUNC = 9; |
---|
39 | const PAREN_EXPRESSION = 10; // expression which is not on the top level |
---|
40 | const PAREN_EXPRESSION_OP = 11; |
---|
41 | const PAREN_EXPRESSION_FUNC = 12; |
---|
42 | const PROPERTY_EXPRESSION = 13; // expression which is within an object literal |
---|
43 | const PROPERTY_EXPRESSION_OP = 14; |
---|
44 | const PROPERTY_EXPRESSION_FUNC = 15; |
---|
45 | |
---|
46 | /* Token types */ |
---|
47 | const TYPE_UN_OP = 1; // unary operators |
---|
48 | const TYPE_INCR_OP = 2; // ++ and -- |
---|
49 | const TYPE_BIN_OP = 3; // binary operators |
---|
50 | const TYPE_ADD_OP = 4; // + and - which can be either unary or binary ops |
---|
51 | const TYPE_HOOK = 5; // ? |
---|
52 | const TYPE_COLON = 6; // : |
---|
53 | const TYPE_COMMA = 7; // , |
---|
54 | const TYPE_SEMICOLON = 8; // ; |
---|
55 | const TYPE_BRACE_OPEN = 9; // { |
---|
56 | const TYPE_BRACE_CLOSE = 10; // } |
---|
57 | const TYPE_PAREN_OPEN = 11; // ( and [ |
---|
58 | const TYPE_PAREN_CLOSE = 12; // ) and ] |
---|
59 | const TYPE_RETURN = 13; // keywords: break, continue, return, throw |
---|
60 | const TYPE_IF = 14; // keywords: catch, for, with, switch, while, if |
---|
61 | const TYPE_DO = 15; // keywords: case, var, finally, else, do, try |
---|
62 | const TYPE_FUNC = 16; // keywords: function |
---|
63 | const TYPE_LITERAL = 17; // all literals, identifiers and unrecognised tokens |
---|
64 | |
---|
65 | // Sanity limit to avoid excessive memory usage |
---|
66 | const STACK_LIMIT = 1000; |
---|
67 | |
---|
68 | /* Static functions */ |
---|
69 | |
---|
70 | /** |
---|
71 | * Returns minified JavaScript code. |
---|
72 | * |
---|
73 | * NOTE: $maxLineLength isn't a strict maximum. Longer lines will be produced when |
---|
74 | * literals (e.g. quoted strings) longer than $maxLineLength are encountered |
---|
75 | * or when required to guard against semicolon insertion. |
---|
76 | * |
---|
77 | * @param $s String JavaScript code to minify |
---|
78 | * @param $statementsOnOwnLine Bool Whether to put each statement on its own line |
---|
79 | * @param $maxLineLength Int Maximum length of a single line, or -1 for no maximum. |
---|
80 | * @return String Minified code |
---|
81 | */ |
---|
82 | public static function minify( $s, $statementsOnOwnLine = false, $maxLineLength = 1000 ) { |
---|
83 | // First we declare a few tables that contain our parsing rules |
---|
84 | |
---|
85 | // $opChars : characters, which can be combined without whitespace in between them |
---|
86 | $opChars = array( |
---|
87 | '!' => true, |
---|
88 | '"' => true, |
---|
89 | '%' => true, |
---|
90 | '&' => true, |
---|
91 | "'" => true, |
---|
92 | '(' => true, |
---|
93 | ')' => true, |
---|
94 | '*' => true, |
---|
95 | '+' => true, |
---|
96 | ',' => true, |
---|
97 | '-' => true, |
---|
98 | '.' => true, |
---|
99 | '/' => true, |
---|
100 | ':' => true, |
---|
101 | ';' => true, |
---|
102 | '<' => true, |
---|
103 | '=' => true, |
---|
104 | '>' => true, |
---|
105 | '?' => true, |
---|
106 | '[' => true, |
---|
107 | ']' => true, |
---|
108 | '^' => true, |
---|
109 | '{' => true, |
---|
110 | '|' => true, |
---|
111 | '}' => true, |
---|
112 | '~' => true |
---|
113 | ); |
---|
114 | |
---|
115 | // $tokenTypes : maps keywords and operators to their corresponding token type |
---|
116 | $tokenTypes = array( |
---|
117 | '!' => self::TYPE_UN_OP, |
---|
118 | '~' => self::TYPE_UN_OP, |
---|
119 | 'delete' => self::TYPE_UN_OP, |
---|
120 | 'new' => self::TYPE_UN_OP, |
---|
121 | 'typeof' => self::TYPE_UN_OP, |
---|
122 | 'void' => self::TYPE_UN_OP, |
---|
123 | '++' => self::TYPE_INCR_OP, |
---|
124 | '--' => self::TYPE_INCR_OP, |
---|
125 | '!=' => self::TYPE_BIN_OP, |
---|
126 | '!==' => self::TYPE_BIN_OP, |
---|
127 | '%' => self::TYPE_BIN_OP, |
---|
128 | '%=' => self::TYPE_BIN_OP, |
---|
129 | '&' => self::TYPE_BIN_OP, |
---|
130 | '&&' => self::TYPE_BIN_OP, |
---|
131 | '&=' => self::TYPE_BIN_OP, |
---|
132 | '*' => self::TYPE_BIN_OP, |
---|
133 | '*=' => self::TYPE_BIN_OP, |
---|
134 | '+=' => self::TYPE_BIN_OP, |
---|
135 | '-=' => self::TYPE_BIN_OP, |
---|
136 | '.' => self::TYPE_BIN_OP, |
---|
137 | '/' => self::TYPE_BIN_OP, |
---|
138 | '/=' => self::TYPE_BIN_OP, |
---|
139 | '<' => self::TYPE_BIN_OP, |
---|
140 | '<<' => self::TYPE_BIN_OP, |
---|
141 | '<<=' => self::TYPE_BIN_OP, |
---|
142 | '<=' => self::TYPE_BIN_OP, |
---|
143 | '=' => self::TYPE_BIN_OP, |
---|
144 | '==' => self::TYPE_BIN_OP, |
---|
145 | '===' => self::TYPE_BIN_OP, |
---|
146 | '>' => self::TYPE_BIN_OP, |
---|
147 | '>=' => self::TYPE_BIN_OP, |
---|
148 | '>>' => self::TYPE_BIN_OP, |
---|
149 | '>>=' => self::TYPE_BIN_OP, |
---|
150 | '>>>' => self::TYPE_BIN_OP, |
---|
151 | '>>>=' => self::TYPE_BIN_OP, |
---|
152 | '^' => self::TYPE_BIN_OP, |
---|
153 | '^=' => self::TYPE_BIN_OP, |
---|
154 | '|' => self::TYPE_BIN_OP, |
---|
155 | '|=' => self::TYPE_BIN_OP, |
---|
156 | '||' => self::TYPE_BIN_OP, |
---|
157 | 'in' => self::TYPE_BIN_OP, |
---|
158 | 'instanceof' => self::TYPE_BIN_OP, |
---|
159 | '+' => self::TYPE_ADD_OP, |
---|
160 | '-' => self::TYPE_ADD_OP, |
---|
161 | '?' => self::TYPE_HOOK, |
---|
162 | ':' => self::TYPE_COLON, |
---|
163 | ',' => self::TYPE_COMMA, |
---|
164 | ';' => self::TYPE_SEMICOLON, |
---|
165 | '{' => self::TYPE_BRACE_OPEN, |
---|
166 | '}' => self::TYPE_BRACE_CLOSE, |
---|
167 | '(' => self::TYPE_PAREN_OPEN, |
---|
168 | '[' => self::TYPE_PAREN_OPEN, |
---|
169 | ')' => self::TYPE_PAREN_CLOSE, |
---|
170 | ']' => self::TYPE_PAREN_CLOSE, |
---|
171 | 'break' => self::TYPE_RETURN, |
---|
172 | 'continue' => self::TYPE_RETURN, |
---|
173 | 'return' => self::TYPE_RETURN, |
---|
174 | 'throw' => self::TYPE_RETURN, |
---|
175 | 'catch' => self::TYPE_IF, |
---|
176 | 'for' => self::TYPE_IF, |
---|
177 | 'if' => self::TYPE_IF, |
---|
178 | 'switch' => self::TYPE_IF, |
---|
179 | 'while' => self::TYPE_IF, |
---|
180 | 'with' => self::TYPE_IF, |
---|
181 | 'case' => self::TYPE_DO, |
---|
182 | 'do' => self::TYPE_DO, |
---|
183 | 'else' => self::TYPE_DO, |
---|
184 | 'finally' => self::TYPE_DO, |
---|
185 | 'try' => self::TYPE_DO, |
---|
186 | 'var' => self::TYPE_DO, |
---|
187 | 'function' => self::TYPE_FUNC |
---|
188 | ); |
---|
189 | |
---|
190 | // $goto : This is the main table for our state machine. For every state/token pair |
---|
191 | // the following state is defined. When no rule exists for a given pair, |
---|
192 | // the state is left unchanged. |
---|
193 | $goto = array( |
---|
194 | self::STATEMENT => array( |
---|
195 | self::TYPE_UN_OP => self::EXPRESSION, |
---|
196 | self::TYPE_INCR_OP => self::EXPRESSION, |
---|
197 | self::TYPE_ADD_OP => self::EXPRESSION, |
---|
198 | self::TYPE_PAREN_OPEN => self::PAREN_EXPRESSION, |
---|
199 | self::TYPE_RETURN => self::EXPRESSION_NO_NL, |
---|
200 | self::TYPE_IF => self::CONDITION, |
---|
201 | self::TYPE_FUNC => self::CONDITION, |
---|
202 | self::TYPE_LITERAL => self::EXPRESSION_OP |
---|
203 | ), |
---|
204 | self::CONDITION => array( |
---|
205 | self::TYPE_PAREN_OPEN => self::PAREN_EXPRESSION |
---|
206 | ), |
---|
207 | self::PROPERTY_ASSIGNMENT => array( |
---|
208 | self::TYPE_COLON => self::PROPERTY_EXPRESSION, |
---|
209 | self::TYPE_BRACE_OPEN => self::STATEMENT |
---|
210 | ), |
---|
211 | self::EXPRESSION => array( |
---|
212 | self::TYPE_SEMICOLON => self::STATEMENT, |
---|
213 | self::TYPE_BRACE_OPEN => self::PROPERTY_ASSIGNMENT, |
---|
214 | self::TYPE_PAREN_OPEN => self::PAREN_EXPRESSION, |
---|
215 | self::TYPE_FUNC => self::EXPRESSION_FUNC, |
---|
216 | self::TYPE_LITERAL => self::EXPRESSION_OP |
---|
217 | ), |
---|
218 | self::EXPRESSION_NO_NL => array( |
---|
219 | self::TYPE_SEMICOLON => self::STATEMENT, |
---|
220 | self::TYPE_BRACE_OPEN => self::PROPERTY_ASSIGNMENT, |
---|
221 | self::TYPE_PAREN_OPEN => self::PAREN_EXPRESSION, |
---|
222 | self::TYPE_FUNC => self::EXPRESSION_FUNC, |
---|
223 | self::TYPE_LITERAL => self::EXPRESSION_OP |
---|
224 | ), |
---|
225 | self::EXPRESSION_OP => array( |
---|
226 | self::TYPE_BIN_OP => self::EXPRESSION, |
---|
227 | self::TYPE_ADD_OP => self::EXPRESSION, |
---|
228 | self::TYPE_HOOK => self::EXPRESSION_TERNARY, |
---|
229 | self::TYPE_COLON => self::STATEMENT, |
---|
230 | self::TYPE_COMMA => self::EXPRESSION, |
---|
231 | self::TYPE_SEMICOLON => self::STATEMENT, |
---|
232 | self::TYPE_PAREN_OPEN => self::PAREN_EXPRESSION |
---|
233 | ), |
---|
234 | self::EXPRESSION_FUNC => array( |
---|
235 | self::TYPE_BRACE_OPEN => self::STATEMENT |
---|
236 | ), |
---|
237 | self::EXPRESSION_TERNARY => array( |
---|
238 | self::TYPE_BRACE_OPEN => self::PROPERTY_ASSIGNMENT, |
---|
239 | self::TYPE_PAREN_OPEN => self::PAREN_EXPRESSION, |
---|
240 | self::TYPE_FUNC => self::EXPRESSION_TERNARY_FUNC, |
---|
241 | self::TYPE_LITERAL => self::EXPRESSION_TERNARY_OP |
---|
242 | ), |
---|
243 | self::EXPRESSION_TERNARY_OP => array( |
---|
244 | self::TYPE_BIN_OP => self::EXPRESSION_TERNARY, |
---|
245 | self::TYPE_ADD_OP => self::EXPRESSION_TERNARY, |
---|
246 | self::TYPE_HOOK => self::EXPRESSION_TERNARY, |
---|
247 | self::TYPE_COMMA => self::EXPRESSION_TERNARY, |
---|
248 | self::TYPE_PAREN_OPEN => self::PAREN_EXPRESSION |
---|
249 | ), |
---|
250 | self::EXPRESSION_TERNARY_FUNC => array( |
---|
251 | self::TYPE_BRACE_OPEN => self::STATEMENT |
---|
252 | ), |
---|
253 | self::PAREN_EXPRESSION => array( |
---|
254 | self::TYPE_BRACE_OPEN => self::PROPERTY_ASSIGNMENT, |
---|
255 | self::TYPE_PAREN_OPEN => self::PAREN_EXPRESSION, |
---|
256 | self::TYPE_FUNC => self::PAREN_EXPRESSION_FUNC, |
---|
257 | self::TYPE_LITERAL => self::PAREN_EXPRESSION_OP |
---|
258 | ), |
---|
259 | self::PAREN_EXPRESSION_OP => array( |
---|
260 | self::TYPE_BIN_OP => self::PAREN_EXPRESSION, |
---|
261 | self::TYPE_ADD_OP => self::PAREN_EXPRESSION, |
---|
262 | self::TYPE_HOOK => self::PAREN_EXPRESSION, |
---|
263 | self::TYPE_COLON => self::PAREN_EXPRESSION, |
---|
264 | self::TYPE_COMMA => self::PAREN_EXPRESSION, |
---|
265 | self::TYPE_SEMICOLON => self::PAREN_EXPRESSION, |
---|
266 | self::TYPE_PAREN_OPEN => self::PAREN_EXPRESSION |
---|
267 | ), |
---|
268 | self::PAREN_EXPRESSION_FUNC => array( |
---|
269 | self::TYPE_BRACE_OPEN => self::STATEMENT |
---|
270 | ), |
---|
271 | self::PROPERTY_EXPRESSION => array( |
---|
272 | self::TYPE_BRACE_OPEN => self::PROPERTY_ASSIGNMENT, |
---|
273 | self::TYPE_PAREN_OPEN => self::PAREN_EXPRESSION, |
---|
274 | self::TYPE_FUNC => self::PROPERTY_EXPRESSION_FUNC, |
---|
275 | self::TYPE_LITERAL => self::PROPERTY_EXPRESSION_OP |
---|
276 | ), |
---|
277 | self::PROPERTY_EXPRESSION_OP => array( |
---|
278 | self::TYPE_BIN_OP => self::PROPERTY_EXPRESSION, |
---|
279 | self::TYPE_ADD_OP => self::PROPERTY_EXPRESSION, |
---|
280 | self::TYPE_HOOK => self::PROPERTY_EXPRESSION, |
---|
281 | self::TYPE_COMMA => self::PROPERTY_ASSIGNMENT, |
---|
282 | self::TYPE_PAREN_OPEN => self::PAREN_EXPRESSION |
---|
283 | ), |
---|
284 | self::PROPERTY_EXPRESSION_FUNC => array( |
---|
285 | self::TYPE_BRACE_OPEN => self::STATEMENT |
---|
286 | ) |
---|
287 | ); |
---|
288 | |
---|
289 | // $push : This table contains the rules for when to push a state onto the stack. |
---|
290 | // The pushed state is the state to return to when the corresponding |
---|
291 | // closing token is found |
---|
292 | $push = array( |
---|
293 | self::STATEMENT => array( |
---|
294 | self::TYPE_BRACE_OPEN => self::STATEMENT, |
---|
295 | self::TYPE_PAREN_OPEN => self::EXPRESSION_OP |
---|
296 | ), |
---|
297 | self::CONDITION => array( |
---|
298 | self::TYPE_PAREN_OPEN => self::STATEMENT |
---|
299 | ), |
---|
300 | self::PROPERTY_ASSIGNMENT => array( |
---|
301 | self::TYPE_BRACE_OPEN => self::PROPERTY_ASSIGNMENT |
---|
302 | ), |
---|
303 | self::EXPRESSION => array( |
---|
304 | self::TYPE_BRACE_OPEN => self::EXPRESSION_OP, |
---|
305 | self::TYPE_PAREN_OPEN => self::EXPRESSION_OP |
---|
306 | ), |
---|
307 | self::EXPRESSION_NO_NL => array( |
---|
308 | self::TYPE_BRACE_OPEN => self::EXPRESSION_OP, |
---|
309 | self::TYPE_PAREN_OPEN => self::EXPRESSION_OP |
---|
310 | ), |
---|
311 | self::EXPRESSION_OP => array( |
---|
312 | self::TYPE_HOOK => self::EXPRESSION, |
---|
313 | self::TYPE_PAREN_OPEN => self::EXPRESSION_OP |
---|
314 | ), |
---|
315 | self::EXPRESSION_FUNC => array( |
---|
316 | self::TYPE_BRACE_OPEN => self::EXPRESSION_OP |
---|
317 | ), |
---|
318 | self::EXPRESSION_TERNARY => array( |
---|
319 | self::TYPE_BRACE_OPEN => self::EXPRESSION_TERNARY_OP, |
---|
320 | self::TYPE_PAREN_OPEN => self::EXPRESSION_TERNARY_OP |
---|
321 | ), |
---|
322 | self::EXPRESSION_TERNARY_OP => array( |
---|
323 | self::TYPE_HOOK => self::EXPRESSION_TERNARY, |
---|
324 | self::TYPE_PAREN_OPEN => self::EXPRESSION_TERNARY_OP |
---|
325 | ), |
---|
326 | self::EXPRESSION_TERNARY_FUNC => array( |
---|
327 | self::TYPE_BRACE_OPEN => self::EXPRESSION_TERNARY_OP |
---|
328 | ), |
---|
329 | self::PAREN_EXPRESSION => array( |
---|
330 | self::TYPE_BRACE_OPEN => self::PAREN_EXPRESSION_OP, |
---|
331 | self::TYPE_PAREN_OPEN => self::PAREN_EXPRESSION_OP |
---|
332 | ), |
---|
333 | self::PAREN_EXPRESSION_OP => array( |
---|
334 | self::TYPE_PAREN_OPEN => self::PAREN_EXPRESSION_OP |
---|
335 | ), |
---|
336 | self::PAREN_EXPRESSION_FUNC => array( |
---|
337 | self::TYPE_BRACE_OPEN => self::PAREN_EXPRESSION_OP |
---|
338 | ), |
---|
339 | self::PROPERTY_EXPRESSION => array( |
---|
340 | self::TYPE_BRACE_OPEN => self::PROPERTY_EXPRESSION_OP, |
---|
341 | self::TYPE_PAREN_OPEN => self::PROPERTY_EXPRESSION_OP |
---|
342 | ), |
---|
343 | self::PROPERTY_EXPRESSION_OP => array( |
---|
344 | self::TYPE_PAREN_OPEN => self::PROPERTY_EXPRESSION_OP |
---|
345 | ), |
---|
346 | self::PROPERTY_EXPRESSION_FUNC => array( |
---|
347 | self::TYPE_BRACE_OPEN => self::PROPERTY_EXPRESSION_OP |
---|
348 | ) |
---|
349 | ); |
---|
350 | |
---|
351 | // $pop : Rules for when to pop a state from the stack |
---|
352 | $pop = array( |
---|
353 | self::STATEMENT => array( self::TYPE_BRACE_CLOSE => true ), |
---|
354 | self::PROPERTY_ASSIGNMENT => array( self::TYPE_BRACE_CLOSE => true ), |
---|
355 | self::EXPRESSION => array( self::TYPE_BRACE_CLOSE => true ), |
---|
356 | self::EXPRESSION_NO_NL => array( self::TYPE_BRACE_CLOSE => true ), |
---|
357 | self::EXPRESSION_OP => array( self::TYPE_BRACE_CLOSE => true ), |
---|
358 | self::EXPRESSION_TERNARY_OP => array( self::TYPE_COLON => true ), |
---|
359 | self::PAREN_EXPRESSION => array( self::TYPE_PAREN_CLOSE => true ), |
---|
360 | self::PAREN_EXPRESSION_OP => array( self::TYPE_PAREN_CLOSE => true ), |
---|
361 | self::PROPERTY_EXPRESSION => array( self::TYPE_BRACE_CLOSE => true ), |
---|
362 | self::PROPERTY_EXPRESSION_OP => array( self::TYPE_BRACE_CLOSE => true ) |
---|
363 | ); |
---|
364 | |
---|
365 | // $semicolon : Rules for when a semicolon insertion is appropriate |
---|
366 | $semicolon = array( |
---|
367 | self::EXPRESSION_NO_NL => array( |
---|
368 | self::TYPE_UN_OP => true, |
---|
369 | self::TYPE_INCR_OP => true, |
---|
370 | self::TYPE_ADD_OP => true, |
---|
371 | self::TYPE_BRACE_OPEN => true, |
---|
372 | self::TYPE_PAREN_OPEN => true, |
---|
373 | self::TYPE_RETURN => true, |
---|
374 | self::TYPE_IF => true, |
---|
375 | self::TYPE_DO => true, |
---|
376 | self::TYPE_FUNC => true, |
---|
377 | self::TYPE_LITERAL => true |
---|
378 | ), |
---|
379 | self::EXPRESSION_OP => array( |
---|
380 | self::TYPE_UN_OP => true, |
---|
381 | self::TYPE_INCR_OP => true, |
---|
382 | self::TYPE_BRACE_OPEN => true, |
---|
383 | self::TYPE_RETURN => true, |
---|
384 | self::TYPE_IF => true, |
---|
385 | self::TYPE_DO => true, |
---|
386 | self::TYPE_FUNC => true, |
---|
387 | self::TYPE_LITERAL => true |
---|
388 | ) |
---|
389 | ); |
---|
390 | |
---|
391 | // Rules for when newlines should be inserted if |
---|
392 | // $statementsOnOwnLine is enabled. |
---|
393 | // $newlineBefore is checked before switching state, |
---|
394 | // $newlineAfter is checked after |
---|
395 | $newlineBefore = array( |
---|
396 | self::STATEMENT => array( |
---|
397 | self::TYPE_BRACE_CLOSE => true, |
---|
398 | ), |
---|
399 | ); |
---|
400 | $newlineAfter = array( |
---|
401 | self::STATEMENT => array( |
---|
402 | self::TYPE_BRACE_OPEN => true, |
---|
403 | self::TYPE_PAREN_CLOSE => true, |
---|
404 | self::TYPE_SEMICOLON => true, |
---|
405 | ), |
---|
406 | ); |
---|
407 | |
---|
408 | // $divStates : Contains all states that can be followed by a division operator |
---|
409 | $divStates = array( |
---|
410 | self::EXPRESSION_OP => true, |
---|
411 | self::EXPRESSION_TERNARY_OP => true, |
---|
412 | self::PAREN_EXPRESSION_OP => true, |
---|
413 | self::PROPERTY_EXPRESSION_OP => true |
---|
414 | ); |
---|
415 | |
---|
416 | // Here's where the minifying takes place: Loop through the input, looking for tokens |
---|
417 | // and output them to $out, taking actions to the above defined rules when appropriate. |
---|
418 | $out = ''; |
---|
419 | $pos = 0; |
---|
420 | $length = strlen( $s ); |
---|
421 | $lineLength = 0; |
---|
422 | $newlineFound = true; |
---|
423 | $state = self::STATEMENT; |
---|
424 | $stack = array(); |
---|
425 | $last = ';'; // Pretend that we have seen a semicolon yet |
---|
426 | while( $pos < $length ) { |
---|
427 | // First, skip over any whitespace and multiline comments, recording whether we |
---|
428 | // found any newline character |
---|
429 | $skip = strspn( $s, " \t\n\r\xb\xc", $pos ); |
---|
430 | if( !$skip ) { |
---|
431 | $ch = $s[$pos]; |
---|
432 | if( $ch === '/' && substr( $s, $pos, 2 ) === '/*' ) { |
---|
433 | // Multiline comment. Search for the end token or EOT. |
---|
434 | $end = strpos( $s, '*/', $pos + 2 ); |
---|
435 | $skip = $end === false ? $length - $pos : $end - $pos + 2; |
---|
436 | } |
---|
437 | } |
---|
438 | if( $skip ) { |
---|
439 | // The semicolon insertion mechanism needs to know whether there was a newline |
---|
440 | // between two tokens, so record it now. |
---|
441 | if( !$newlineFound && strcspn( $s, "\r\n", $pos, $skip ) !== $skip ) { |
---|
442 | $newlineFound = true; |
---|
443 | } |
---|
444 | $pos += $skip; |
---|
445 | continue; |
---|
446 | } |
---|
447 | // Handle C++-style comments and html comments, which are treated as single line |
---|
448 | // comments by the browser, regardless of whether the end tag is on the same line. |
---|
449 | // Handle --> the same way, but only if it's at the beginning of the line |
---|
450 | if( ( $ch === '/' && substr( $s, $pos, 2 ) === '//' ) |
---|
451 | || ( $ch === '<' && substr( $s, $pos, 4 ) === '<!--' ) |
---|
452 | || ( $ch === '-' && $newlineFound && substr( $s, $pos, 3 ) === '-->' ) |
---|
453 | ) { |
---|
454 | $pos += strcspn( $s, "\r\n", $pos ); |
---|
455 | continue; |
---|
456 | } |
---|
457 | |
---|
458 | // Find out which kind of token we're handling. $end will point past the end of it. |
---|
459 | $end = $pos + 1; |
---|
460 | // Handle string literals |
---|
461 | if( $ch === "'" || $ch === '"' ) { |
---|
462 | // Search to the end of the string literal, skipping over backslash escapes |
---|
463 | $search = $ch . '\\'; |
---|
464 | do{ |
---|
465 | $end += strcspn( $s, $search, $end ) + 2; |
---|
466 | } while( $end - 2 < $length && $s[$end - 2] === '\\' ); |
---|
467 | $end--; |
---|
468 | // We have to distinguish between regexp literals and division operators |
---|
469 | // A division operator is only possible in certain states |
---|
470 | } elseif( $ch === '/' && !isset( $divStates[$state] ) ) { |
---|
471 | // Regexp literal, search to the end, skipping over backslash escapes and |
---|
472 | // character classes |
---|
473 | for( ; ; ) { |
---|
474 | do{ |
---|
475 | $end += strcspn( $s, '/[\\', $end ) + 2; |
---|
476 | } while( $end - 2 < $length && $s[$end - 2] === '\\' ); |
---|
477 | $end--; |
---|
478 | if( $end - 1 >= $length || $s[$end - 1] === '/' ) { |
---|
479 | break; |
---|
480 | } |
---|
481 | do{ |
---|
482 | $end += strcspn( $s, ']\\', $end ) + 2; |
---|
483 | } while( $end - 2 < $length && $s[$end - 2] === '\\' ); |
---|
484 | $end--; |
---|
485 | }; |
---|
486 | // Search past the regexp modifiers (gi) |
---|
487 | while( $end < $length && ctype_alpha( $s[$end] ) ) { |
---|
488 | $end++; |
---|
489 | } |
---|
490 | } elseif( |
---|
491 | $ch === '0' |
---|
492 | && ($pos + 1 < $length) && ($s[$pos + 1] === 'x' || $s[$pos + 1] === 'X' ) |
---|
493 | ) { |
---|
494 | // Hex numeric literal |
---|
495 | $end++; // x or X |
---|
496 | $len = strspn( $s, '0123456789ABCDEFabcdef', $end ); |
---|
497 | if ( !$len ) { |
---|
498 | return self::parseError($s, $pos, 'Expected a hexadecimal number but found ' . substr( $s, $pos, 5 ) . '...' ); |
---|
499 | } |
---|
500 | $end += $len; |
---|
501 | } elseif( |
---|
502 | ctype_digit( $ch ) |
---|
503 | || ( $ch === '.' && $pos + 1 < $length && ctype_digit( $s[$pos + 1] ) ) |
---|
504 | ) { |
---|
505 | $end += strspn( $s, '0123456789', $end ); |
---|
506 | $decimal = strspn( $s, '.', $end ); |
---|
507 | if ($decimal) { |
---|
508 | if ( $decimal > 2 ) { |
---|
509 | return self::parseError($s, $end, 'The number has too many decimal points' ); |
---|
510 | } |
---|
511 | $end += strspn( $s, '0123456789', $end + 1 ) + $decimal; |
---|
512 | } |
---|
513 | $exponent = strspn( $s, 'eE', $end ); |
---|
514 | if( $exponent ) { |
---|
515 | if ( $exponent > 1 ) { |
---|
516 | return self::parseError($s, $end, 'Number with several E' ); |
---|
517 | } |
---|
518 | $end++; |
---|
519 | |
---|
520 | // + sign is optional; - sign is required. |
---|
521 | $end += strspn( $s, '-+', $end ); |
---|
522 | $len = strspn( $s, '0123456789', $end ); |
---|
523 | if ( !$len ) { |
---|
524 | return self::parseError($s, $pos, 'No decimal digits after e, how many zeroes should be added?' ); |
---|
525 | } |
---|
526 | $end += $len; |
---|
527 | } |
---|
528 | } elseif( isset( $opChars[$ch] ) ) { |
---|
529 | // Punctuation character. Search for the longest matching operator. |
---|
530 | while( |
---|
531 | $end < $length |
---|
532 | && isset( $tokenTypes[substr( $s, $pos, $end - $pos + 1 )] ) |
---|
533 | ) { |
---|
534 | $end++; |
---|
535 | } |
---|
536 | } else { |
---|
537 | // Identifier or reserved word. Search for the end by excluding whitespace and |
---|
538 | // punctuation. |
---|
539 | $end += strcspn( $s, " \t\n.;,=<>+-{}()[]?:*/%'\"!&|^~\xb\xc\r", $end ); |
---|
540 | } |
---|
541 | |
---|
542 | // Now get the token type from our type array |
---|
543 | $token = substr( $s, $pos, $end - $pos ); // so $end - $pos == strlen( $token ) |
---|
544 | $type = isset( $tokenTypes[$token] ) ? $tokenTypes[$token] : self::TYPE_LITERAL; |
---|
545 | |
---|
546 | if( $newlineFound && isset( $semicolon[$state][$type] ) ) { |
---|
547 | // This token triggers the semicolon insertion mechanism of javascript. While we |
---|
548 | // could add the ; token here ourselves, keeping the newline has a few advantages. |
---|
549 | $out .= "\n"; |
---|
550 | $state = self::STATEMENT; |
---|
551 | $lineLength = 0; |
---|
552 | } elseif( $maxLineLength > 0 && $lineLength + $end - $pos > $maxLineLength && |
---|
553 | !isset( $semicolon[$state][$type] ) && $type !== self::TYPE_INCR_OP ) |
---|
554 | { |
---|
555 | // This line would get too long if we added $token, so add a newline first. |
---|
556 | // Only do this if it won't trigger semicolon insertion and if it won't |
---|
557 | // put a postfix increment operator on its own line, which is illegal in js. |
---|
558 | $out .= "\n"; |
---|
559 | $lineLength = 0; |
---|
560 | // Check, whether we have to separate the token from the last one with whitespace |
---|
561 | } elseif( !isset( $opChars[$last] ) && !isset( $opChars[$ch] ) ) { |
---|
562 | $out .= ' '; |
---|
563 | $lineLength++; |
---|
564 | // Don't accidentally create ++, -- or // tokens |
---|
565 | } elseif( $last === $ch && ( $ch === '+' || $ch === '-' || $ch === '/' ) ) { |
---|
566 | $out .= ' '; |
---|
567 | $lineLength++; |
---|
568 | } |
---|
569 | |
---|
570 | $out .= $token; |
---|
571 | $lineLength += $end - $pos; // += strlen( $token ) |
---|
572 | $last = $s[$end - 1]; |
---|
573 | $pos = $end; |
---|
574 | $newlineFound = false; |
---|
575 | |
---|
576 | // Output a newline after the token if required |
---|
577 | // This is checked before AND after switching state |
---|
578 | $newlineAdded = false; |
---|
579 | if ( $statementsOnOwnLine && !$newlineAdded && isset( $newlineBefore[$state][$type] ) ) { |
---|
580 | $out .= "\n"; |
---|
581 | $lineLength = 0; |
---|
582 | $newlineAdded = true; |
---|
583 | } |
---|
584 | |
---|
585 | // Now that we have output our token, transition into the new state. |
---|
586 | if( isset( $push[$state][$type] ) && count( $stack ) < self::STACK_LIMIT ) { |
---|
587 | $stack[] = $push[$state][$type]; |
---|
588 | } |
---|
589 | if( $stack && isset( $pop[$state][$type] ) ) { |
---|
590 | $state = array_pop( $stack ); |
---|
591 | } elseif( isset( $goto[$state][$type] ) ) { |
---|
592 | $state = $goto[$state][$type]; |
---|
593 | } |
---|
594 | |
---|
595 | // Check for newline insertion again |
---|
596 | if ( $statementsOnOwnLine && !$newlineAdded && isset( $newlineAfter[$state][$type] ) ) { |
---|
597 | $out .= "\n"; |
---|
598 | $lineLength = 0; |
---|
599 | } |
---|
600 | } |
---|
601 | return $out; |
---|
602 | } |
---|
603 | |
---|
604 | static function parseError($fullJavascript, $position, $errorMsg) { |
---|
605 | // TODO: Handle the error: trigger_error, throw exception, return false... |
---|
606 | return false; |
---|
607 | } |
---|
608 | } |
---|