BUG-5410: bring the imported source code to Java 7 world
[yangtools.git] / third-party / xsd-regex / src / main / java / org / opendaylight / yangtools / xsd / regex / Token.java
1 /*
2  * Licensed to the Apache Software Foundation (ASF) under one or more
3  * contributor license agreements.  See the NOTICE file distributed with
4  * this work for additional information regarding copyright ownership.
5  * The ASF licenses this file to You under the Apache License, Version 2.0
6  * (the "License"); you may not use this file except in compliance with
7  * the License.  You may obtain a copy of the License at
8  *
9  *      http://www.apache.org/licenses/LICENSE-2.0
10  *
11  * Unless required by applicable law or agreed to in writing, software
12  * distributed under the License is distributed on an "AS IS" BASIS,
13  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14  * See the License for the specific language governing permissions and
15  * limitations under the License.
16  */
17
18 package org.opendaylight.yangtools.xsd.regex;
19
20 import java.util.Hashtable;
21 import java.util.Vector;
22
23 /**
24  * This class represents a node in parse tree.
25  *
26  * @xerces.internal
27  *
28  * @version $Id: Token.java 1638344 2014-11-11 20:15:46Z mrglavas $
29  */
30 class Token implements java.io.Serializable {
31
32     private static final long serialVersionUID = 8484976002585487481L;
33
34     static final boolean COUNTTOKENS = true;
35     static int tokens = 0;
36
37     static final int CHAR = 0;                  // Literal char
38     static final int DOT = 11;                  // .
39     static final int CONCAT = 1;                // XY
40     static final int UNION = 2;                 // X|Y|Z
41     static final int CLOSURE = 3;               // X*
42     static final int RANGE = 4;                 // [a-zA-Z] etc.
43     static final int NRANGE = 5;                // [^a-zA-Z] etc.
44     static final int PAREN = 6;                 // (X) or (?:X)
45     static final int EMPTY = 7;                 //
46     static final int ANCHOR = 8;                // ^ $ \b \B \< \> \A \Z \z
47     static final int NONGREEDYCLOSURE = 9;      // *? +?
48     static final int STRING = 10;               // strings
49     static final int BACKREFERENCE = 12;        // back references
50     static final int LOOKAHEAD = 20;            // (?=...)
51     static final int NEGATIVELOOKAHEAD = 21;    // (?!...)
52     static final int LOOKBEHIND = 22;           // (?<=...)
53     static final int NEGATIVELOOKBEHIND = 23;   // (?<!...)
54     static final int INDEPENDENT = 24;          // (?>...)
55     static final int MODIFIERGROUP = 25;        // (?ims-ims:...)
56     static final int CONDITION = 26;            // (?(...)yes|no)
57
58     static final int UTF16_MAX = 0x10ffff;
59
60     final int type;
61
62     static Token token_dot;
63     static Token token_0to9;
64     static Token token_wordchars;
65     static Token token_not_0to9;
66     static Token token_not_wordchars;
67     static Token token_spaces;
68     static Token token_not_spaces;
69     static Token token_empty;
70     static Token token_linebeginning;
71     static Token token_linebeginning2;
72     static Token token_lineend;
73     static Token token_stringbeginning;
74     static Token token_stringend;
75     static Token token_stringend2;
76     static Token token_wordedge;
77     static Token token_not_wordedge;
78     static Token token_wordbeginning;
79     static Token token_wordend;
80     static {
81         Token.token_empty = new Token(Token.EMPTY);
82
83         Token.token_linebeginning = Token.createAnchor('^');
84         Token.token_linebeginning2 = Token.createAnchor('@');
85         Token.token_lineend = Token.createAnchor('$');
86         Token.token_stringbeginning = Token.createAnchor('A');
87         Token.token_stringend = Token.createAnchor('z');
88         Token.token_stringend2 = Token.createAnchor('Z');
89         Token.token_wordedge = Token.createAnchor('b');
90         Token.token_not_wordedge = Token.createAnchor('B');
91         Token.token_wordbeginning = Token.createAnchor('<');
92         Token.token_wordend = Token.createAnchor('>');
93
94         Token.token_dot = new Token(Token.DOT);
95
96         Token.token_0to9 = Token.createRange();
97         Token.token_0to9.addRange('0', '9');
98         Token.token_wordchars = Token.createRange();
99         Token.token_wordchars.addRange('0', '9');
100         Token.token_wordchars.addRange('A', 'Z');
101         Token.token_wordchars.addRange('_', '_');
102         Token.token_wordchars.addRange('a', 'z');
103         Token.token_spaces = Token.createRange();
104         Token.token_spaces.addRange('\t', '\t');
105         Token.token_spaces.addRange('\n', '\n');
106         Token.token_spaces.addRange('\f', '\f');
107         Token.token_spaces.addRange('\r', '\r');
108         Token.token_spaces.addRange(' ', ' ');
109
110         Token.token_not_0to9 = Token.complementRanges(Token.token_0to9);
111         Token.token_not_wordchars = Token.complementRanges(Token.token_wordchars);
112         Token.token_not_spaces = Token.complementRanges(Token.token_spaces);
113     }
114
115     static Token.ParenToken createLook(int type, Token child) {
116         if (COUNTTOKENS) {
117             Token.tokens ++;
118         }
119         return new Token.ParenToken(type, child, 0);
120     }
121     static Token.ParenToken createParen(Token child, int pnumber) {
122         if (COUNTTOKENS) {
123             Token.tokens ++;
124         }
125         return new Token.ParenToken(Token.PAREN, child, pnumber);
126     }
127     static Token.ClosureToken createClosure(Token tok) {
128         if (COUNTTOKENS) {
129             Token.tokens ++;
130         }
131         return new Token.ClosureToken(Token.CLOSURE, tok);
132     }
133     static Token.ClosureToken createNGClosure(Token tok) {
134         if (COUNTTOKENS) {
135             Token.tokens ++;
136         }
137         return new Token.ClosureToken(Token.NONGREEDYCLOSURE, tok);
138     }
139     static Token.ConcatToken createConcat(Token tok1, Token tok2) {
140         if (COUNTTOKENS) {
141             Token.tokens ++;
142         }
143         return new Token.ConcatToken(tok1, tok2);
144     }
145     static Token.UnionToken createConcat() {
146         if (COUNTTOKENS) {
147             Token.tokens ++;
148         }
149         return new Token.UnionToken(Token.CONCAT); // *** It is not a bug.
150     }
151     static Token.UnionToken createUnion() {
152         if (COUNTTOKENS) {
153             Token.tokens ++;
154         }
155         return new Token.UnionToken(Token.UNION);
156     }
157     static Token createEmpty() {
158         return Token.token_empty;
159     }
160     static RangeToken createRange() {
161         if (COUNTTOKENS) {
162             Token.tokens ++;
163         }
164         return new RangeToken(Token.RANGE);
165     }
166     static RangeToken createNRange() {
167         if (COUNTTOKENS) {
168             Token.tokens ++;
169         }
170         return new RangeToken(Token.NRANGE);
171     }
172     static Token.CharToken createChar(int ch) {
173         if (COUNTTOKENS) {
174             Token.tokens ++;
175         }
176         return new Token.CharToken(Token.CHAR, ch);
177     }
178     static private Token.CharToken createAnchor(int ch) {
179         if (COUNTTOKENS) {
180             Token.tokens ++;
181         }
182         return new Token.CharToken(Token.ANCHOR, ch);
183     }
184     static Token.StringToken createBackReference(int refno) {
185         if (COUNTTOKENS) {
186             Token.tokens ++;
187         }
188         return new Token.StringToken(Token.BACKREFERENCE, null, refno);
189     }
190     static Token.StringToken createString(String str) {
191         if (COUNTTOKENS) {
192             Token.tokens ++;
193         }
194         return new Token.StringToken(Token.STRING, str, 0);
195     }
196     static Token.ModifierToken createModifierGroup(Token child, int add, int mask) {
197         if (COUNTTOKENS) {
198             Token.tokens ++;
199         }
200         return new Token.ModifierToken(child, add, mask);
201     }
202     static Token.ConditionToken createCondition(int refno, Token condition,
203                                                 Token yespat, Token nopat) {
204         if (COUNTTOKENS) {
205             Token.tokens ++;
206         }
207         return new Token.ConditionToken(refno, condition, yespat, nopat);
208     }
209
210     protected Token(int type) {
211         this.type = type;
212     }
213
214     /**
215      * A number of children.
216      */
217     int size() {
218         return 0;
219     }
220     Token getChild(int index) {
221         return null;
222     }
223     void addChild(Token tok) {
224         throw new RuntimeException("Not supported.");
225     }
226
227                                                 // for RANGE or NRANGE
228     protected void addRange(int start, int end) {
229         throw new RuntimeException("Not supported.");
230     }
231     protected void sortRanges() {
232         throw new RuntimeException("Not supported.");
233     }
234     protected void compactRanges() {
235         throw new RuntimeException("Not supported.");
236     }
237     protected void mergeRanges(Token tok) {
238         throw new RuntimeException("Not supported.");
239     }
240     protected void subtractRanges(Token tok) {
241         throw new RuntimeException("Not supported.");
242     }
243     protected void intersectRanges(Token tok) {
244         throw new RuntimeException("Not supported.");
245     }
246     static Token complementRanges(Token tok) {
247         return RangeToken.complementRanges(tok);
248     }
249
250
251     void setMin(int min) {                      // for CLOSURE
252     }
253     void setMax(int max) {                      // for CLOSURE
254     }
255     int getMin() {                              // for CLOSURE
256         return -1;
257     }
258     int getMax() {                              // for CLOSURE
259         return -1;
260     }
261     int getReferenceNumber() {                  // for STRING
262         return 0;
263     }
264     String getString() {                        // for STRING
265         return null;
266     }
267
268     int getParenNumber() {
269         return 0;
270     }
271     int getChar() {
272         return -1;
273     }
274
275     @Override
276     public String toString() {
277         return this.toString(0);
278     }
279     public String toString(int options) {
280         return this.type == Token.DOT ? "." : "";
281     }
282
283     /**
284      * How many characters are needed?
285      */
286     final int getMinLength() {
287         switch (this.type) {
288           case CONCAT:
289             int sum = 0;
290             for (int i = 0;  i < this.size();  i ++) {
291                 sum += this.getChild(i).getMinLength();
292             }
293             return sum;
294
295           case CONDITION:
296           case UNION:
297             if (this.size() == 0) {
298                 return 0;
299             }
300             int ret = this.getChild(0).getMinLength();
301             for (int i = 1;  i < this.size();  i ++) {
302                 int min = this.getChild(i).getMinLength();
303                 if (min < ret) {
304                     ret = min;
305                 }
306             }
307             return ret;
308
309           case CLOSURE:
310           case NONGREEDYCLOSURE:
311             if (this.getMin() >= 0) {
312                 return this.getMin() * this.getChild(0).getMinLength();
313             }
314             return 0;
315
316           case EMPTY:
317           case ANCHOR:
318             return 0;
319
320           case DOT:
321           case CHAR:
322           case RANGE:
323           case NRANGE:
324             return 1;
325
326           case INDEPENDENT:
327           case PAREN:
328           case MODIFIERGROUP:
329             return this.getChild(0).getMinLength();
330
331           case BACKREFERENCE:
332             return 0;                           // *******
333
334           case STRING:
335             return this.getString().length();
336
337           case LOOKAHEAD:
338           case NEGATIVELOOKAHEAD:
339           case LOOKBEHIND:
340           case NEGATIVELOOKBEHIND:
341             return 0;                           // ***** Really?
342
343           default:
344             throw new RuntimeException("Token#getMinLength(): Invalid Type: "+this.type);
345         }
346     }
347
348     final int getMaxLength() {
349         switch (this.type) {
350           case CONCAT:
351             int sum = 0;
352             for (int i = 0;  i < this.size();  i ++) {
353                 int d = this.getChild(i).getMaxLength();
354                 if (d < 0) {
355                     return -1;
356                 }
357                 sum += d;
358             }
359             return sum;
360
361           case CONDITION:
362           case UNION:
363             if (this.size() == 0) {
364                 return 0;
365             }
366             int ret = this.getChild(0).getMaxLength();
367             for (int i = 1;  ret >= 0 && i < this.size();  i ++) {
368                 int max = this.getChild(i).getMaxLength();
369                 if (max < 0) {                  // infinity
370                     ret = -1;
371                     break;
372                 }
373                 if (max > ret) {
374                     ret = max;
375                 }
376             }
377             return ret;
378
379           case CLOSURE:
380           case NONGREEDYCLOSURE:
381             if (this.getMax() >= 0) {
382                 // When this.child.getMaxLength() < 0,
383                                                 // this returns minus value
384                 return this.getMax() * this.getChild(0).getMaxLength();
385             }
386             return -1;
387
388           case EMPTY:
389           case ANCHOR:
390             return 0;
391
392           case CHAR:
393             return 1;
394           case DOT:
395           case RANGE:
396           case NRANGE:
397             return 2;
398
399           case INDEPENDENT:
400           case PAREN:
401           case MODIFIERGROUP:
402             return this.getChild(0).getMaxLength();
403
404           case BACKREFERENCE:
405             return -1;                          // ******
406
407           case STRING:
408             return this.getString().length();
409
410           case LOOKAHEAD:
411           case NEGATIVELOOKAHEAD:
412           case LOOKBEHIND:
413           case NEGATIVELOOKBEHIND:
414             return 0;                           // ***** Really?
415
416           default:
417             throw new RuntimeException("Token#getMaxLength(): Invalid Type: "+this.type);
418         }
419     }
420
421     static final int FC_CONTINUE = 0;
422     static final int FC_TERMINAL = 1;
423     static final int FC_ANY = 2;
424     private static final boolean isSet(int options, int flag) {
425         return (options & flag) == flag;
426     }
427     final int analyzeFirstCharacter(RangeToken result, int options) {
428         switch (this.type) {
429           case CONCAT:
430             int ret = FC_CONTINUE;
431             for (int i = 0;  i < this.size();  i ++) {
432                 if ((ret = this.getChild(i).analyzeFirstCharacter(result, options)) != FC_CONTINUE) {
433                     break;
434                 }
435             }
436             return ret;
437
438           case UNION:
439             if (this.size() == 0) {
440                 return FC_CONTINUE;
441             }
442             /*
443              *  a|b|c -> FC_TERMINAL
444              *  a|.|c -> FC_ANY
445              *  a|b|  -> FC_CONTINUE
446              */
447             int ret2 = FC_CONTINUE;
448             boolean hasEmpty = false;
449             for (int i = 0;  i < this.size();  i ++) {
450                 ret2 = this.getChild(i).analyzeFirstCharacter(result, options);
451                 if (ret2 == FC_ANY) {
452                     break;
453                 } else if (ret2 == FC_CONTINUE) {
454                     hasEmpty = true;
455                 }
456             }
457             return hasEmpty ? FC_CONTINUE : ret2;
458
459           case CONDITION:
460             int ret3 = this.getChild(0).analyzeFirstCharacter(result, options);
461             if (this.size() == 1) {
462                 return FC_CONTINUE;
463             }
464             if (ret3 == FC_ANY) {
465                 return ret3;
466             }
467             int ret4 = this.getChild(1).analyzeFirstCharacter(result, options);
468             if (ret4 == FC_ANY) {
469                 return ret4;
470             }
471             return ret3 == FC_CONTINUE || ret4 == FC_CONTINUE ? FC_CONTINUE : FC_TERMINAL;
472
473           case CLOSURE:
474           case NONGREEDYCLOSURE:
475             this.getChild(0).analyzeFirstCharacter(result, options);
476             return FC_CONTINUE;
477
478           case EMPTY:
479           case ANCHOR:
480             return FC_CONTINUE;
481
482           case CHAR:
483             int ch = this.getChar();
484             result.addRange(ch, ch);
485             if (ch < 0x10000 && isSet(options, RegularExpression.IGNORE_CASE)) {
486                 ch = Character.toUpperCase((char)ch);
487                 result.addRange(ch, ch);
488                 ch = Character.toLowerCase((char)ch);
489                 result.addRange(ch, ch);
490             }
491             return FC_TERMINAL;
492
493           case DOT:
494               return FC_ANY;
495
496           case RANGE:
497             result.mergeRanges(this);
498             return FC_TERMINAL;
499
500           case NRANGE:                          // ****
501             result.mergeRanges(Token.complementRanges(this));
502             return FC_TERMINAL;
503
504           case INDEPENDENT:
505           case PAREN:
506             return this.getChild(0).analyzeFirstCharacter(result, options);
507
508           case MODIFIERGROUP:
509             options |= ((ModifierToken)this).getOptions();
510             options &= ~((ModifierToken)this).getOptionsMask();
511             return this.getChild(0).analyzeFirstCharacter(result, options);
512
513           case BACKREFERENCE:
514             result.addRange(0, UTF16_MAX);  // **** We can not optimize.
515             return FC_ANY;
516
517           case STRING:
518             int cha = this.getString().charAt(0);
519             int ch2;
520             if (REUtil.isHighSurrogate(cha)
521                 && this.getString().length() >= 2
522                 && REUtil.isLowSurrogate((ch2 = this.getString().charAt(1)))) {
523                 cha = REUtil.composeFromSurrogates(cha, ch2);
524             }
525             result.addRange(cha, cha);
526             if (cha < 0x10000 && isSet(options, RegularExpression.IGNORE_CASE)) {
527                 cha = Character.toUpperCase((char)cha);
528                 result.addRange(cha, cha);
529                 cha = Character.toLowerCase((char)cha);
530                 result.addRange(cha, cha);
531             }
532             return FC_TERMINAL;
533
534           case LOOKAHEAD:
535           case NEGATIVELOOKAHEAD:
536           case LOOKBEHIND:
537           case NEGATIVELOOKBEHIND:
538             return FC_CONTINUE;
539
540           default:
541             throw new RuntimeException("Token#analyzeHeadCharacter(): Invalid Type: "+this.type);
542         }
543     }
544
545     private final boolean isShorterThan(Token tok) {
546         if (tok == null) {
547             return false;
548         }
549         /*
550         int mylength;
551         if (this.type == STRING)  mylength = this.getString().length();
552         else if (this.type == CHAR)  mylength = this.getChar() >= 0x10000 ? 2 : 1;
553         else throw new RuntimeException("Internal Error: Illegal type: "+this.type);
554         int otherlength;
555         if (tok.type == STRING)  otherlength = tok.getString().length();
556         else if (tok.type == CHAR)  otherlength = tok.getChar() >= 0x10000 ? 2 : 1;
557         else throw new RuntimeException("Internal Error: Illegal type: "+tok.type);
558         */
559         int mylength;
560         if (this.type == STRING) {
561             mylength = this.getString().length();
562         } else {
563             throw new RuntimeException("Internal Error: Illegal type: "+this.type);
564         }
565         int otherlength;
566         if (tok.type == STRING) {
567             otherlength = tok.getString().length();
568         } else {
569             throw new RuntimeException("Internal Error: Illegal type: "+tok.type);
570         }
571         return mylength < otherlength;
572     }
573
574     static class FixedStringContainer {
575         Token token = null;
576         int options = 0;
577         FixedStringContainer() {
578         }
579     }
580
581     final void findFixedString(FixedStringContainer container, int options) {
582         switch (this.type) {
583           case CONCAT:
584             Token prevToken = null;
585             int prevOptions = 0;
586             for (int i = 0;  i < this.size();  i ++) {
587                 this.getChild(i).findFixedString(container, options);
588                 if (prevToken == null || prevToken.isShorterThan(container.token)) {
589                     prevToken = container.token;
590                     prevOptions = container.options;
591                 }
592             }
593             container.token = prevToken;
594             container.options = prevOptions;
595             return;
596
597           case UNION:
598           case CLOSURE:
599           case NONGREEDYCLOSURE:
600           case EMPTY:
601           case ANCHOR:
602           case RANGE:
603           case DOT:
604           case NRANGE:
605           case BACKREFERENCE:
606           case LOOKAHEAD:
607           case NEGATIVELOOKAHEAD:
608           case LOOKBEHIND:
609           case NEGATIVELOOKBEHIND:
610           case CONDITION:
611             container.token = null;
612             return;
613
614           case CHAR:                            // Ignore CHAR tokens.
615             container.token = null;             // **
616             return;                             // **
617
618           case STRING:
619             container.token = this;
620             container.options = options;
621             return;
622
623           case INDEPENDENT:
624           case PAREN:
625             this.getChild(0).findFixedString(container, options);
626             return;
627
628           case MODIFIERGROUP:
629             options |= ((ModifierToken)this).getOptions();
630             options &= ~((ModifierToken)this).getOptionsMask();
631             this.getChild(0).findFixedString(container, options);
632             return;
633
634           default:
635             throw new RuntimeException("Token#findFixedString(): Invalid Type: "+this.type);
636         }
637     }
638
639     boolean match(int ch) {
640         throw new RuntimeException("NFAArrow#match(): Internal error: "+this.type);
641     }
642
643     // ------------------------------------------------------
644     private final static Hashtable<String, Token> categories = new Hashtable<>();
645     private final static Hashtable<String, Token> categories2 = new Hashtable<>();
646     private static final String[] categoryNames = {
647         "Cn", "Lu", "Ll", "Lt", "Lm", "Lo", "Mn", "Me", "Mc", "Nd",
648         "Nl", "No", "Zs", "Zl", "Zp", "Cc", "Cf", null, "Co", "Cs",
649         "Pd", "Ps", "Pe", "Pc", "Po", "Sm", "Sc", "Sk", "So", // 28
650         "Pi", "Pf",  // 29, 30
651         "L", "M", "N", "Z", "C", "P", "S",      // 31-37
652     };
653
654     // Schema Rec. {Datatypes} - Punctuation
655     static final int CHAR_INIT_QUOTE  = 29;     // Pi - initial quote
656     static final int CHAR_FINAL_QUOTE = 30;     // Pf - final quote
657     static final int CHAR_LETTER = 31;
658     static final int CHAR_MARK = 32;
659     static final int CHAR_NUMBER = 33;
660     static final int CHAR_SEPARATOR = 34;
661     static final int CHAR_OTHER = 35;
662     static final int CHAR_PUNCTUATION = 36;
663     static final int CHAR_SYMBOL = 37;
664
665     //blockNames in UNICODE 3.1 that supported by XML Schema REC
666     private static final String[] blockNames = {
667         /*0000..007F;*/ "Basic Latin",
668         /*0080..00FF;*/ "Latin-1 Supplement",
669         /*0100..017F;*/ "Latin Extended-A",
670         /*0180..024F;*/ "Latin Extended-B",
671         /*0250..02AF;*/ "IPA Extensions",
672         /*02B0..02FF;*/ "Spacing Modifier Letters",
673         /*0300..036F;*/ "Combining Diacritical Marks",
674         /*0370..03FF;*/ "Greek",
675         /*0400..04FF;*/ "Cyrillic",
676         /*0530..058F;*/ "Armenian",
677         /*0590..05FF;*/ "Hebrew",
678         /*0600..06FF;*/ "Arabic",
679         /*0700..074F;*/ "Syriac",
680         /*0780..07BF;*/ "Thaana",
681         /*0900..097F;*/ "Devanagari",
682         /*0980..09FF;*/ "Bengali",
683         /*0A00..0A7F;*/ "Gurmukhi",
684         /*0A80..0AFF;*/ "Gujarati",
685         /*0B00..0B7F;*/ "Oriya",
686         /*0B80..0BFF;*/ "Tamil",
687         /*0C00..0C7F;*/ "Telugu",
688         /*0C80..0CFF;*/ "Kannada",
689         /*0D00..0D7F;*/ "Malayalam",
690         /*0D80..0DFF;*/ "Sinhala",
691         /*0E00..0E7F;*/ "Thai",
692         /*0E80..0EFF;*/ "Lao",
693         /*0F00..0FFF;*/ "Tibetan",
694         /*1000..109F;*/ "Myanmar",
695         /*10A0..10FF;*/ "Georgian",
696         /*1100..11FF;*/ "Hangul Jamo",
697         /*1200..137F;*/ "Ethiopic",
698         /*13A0..13FF;*/ "Cherokee",
699         /*1400..167F;*/ "Unified Canadian Aboriginal Syllabics",
700         /*1680..169F;*/ "Ogham",
701         /*16A0..16FF;*/ "Runic",
702         /*1780..17FF;*/ "Khmer",
703         /*1800..18AF;*/ "Mongolian",
704         /*1E00..1EFF;*/ "Latin Extended Additional",
705         /*1F00..1FFF;*/ "Greek Extended",
706         /*2000..206F;*/ "General Punctuation",
707         /*2070..209F;*/ "Superscripts and Subscripts",
708         /*20A0..20CF;*/ "Currency Symbols",
709         /*20D0..20FF;*/ "Combining Marks for Symbols",
710         /*2100..214F;*/ "Letterlike Symbols",
711         /*2150..218F;*/ "Number Forms",
712         /*2190..21FF;*/ "Arrows",
713         /*2200..22FF;*/ "Mathematical Operators",
714         /*2300..23FF;*/ "Miscellaneous Technical",
715         /*2400..243F;*/ "Control Pictures",
716         /*2440..245F;*/ "Optical Character Recognition",
717         /*2460..24FF;*/ "Enclosed Alphanumerics",
718         /*2500..257F;*/ "Box Drawing",
719         /*2580..259F;*/ "Block Elements",
720         /*25A0..25FF;*/ "Geometric Shapes",
721         /*2600..26FF;*/ "Miscellaneous Symbols",
722         /*2700..27BF;*/ "Dingbats",
723         /*2800..28FF;*/ "Braille Patterns",
724         /*2E80..2EFF;*/ "CJK Radicals Supplement",
725         /*2F00..2FDF;*/ "Kangxi Radicals",
726         /*2FF0..2FFF;*/ "Ideographic Description Characters",
727         /*3000..303F;*/ "CJK Symbols and Punctuation",
728         /*3040..309F;*/ "Hiragana",
729         /*30A0..30FF;*/ "Katakana",
730         /*3100..312F;*/ "Bopomofo",
731         /*3130..318F;*/ "Hangul Compatibility Jamo",
732         /*3190..319F;*/ "Kanbun",
733         /*31A0..31BF;*/ "Bopomofo Extended",
734         /*3200..32FF;*/ "Enclosed CJK Letters and Months",
735         /*3300..33FF;*/ "CJK Compatibility",
736         /*3400..4DB5;*/ "CJK Unified Ideographs Extension A",
737         /*4E00..9FFF;*/ "CJK Unified Ideographs",
738         /*A000..A48F;*/ "Yi Syllables",
739         /*A490..A4CF;*/ "Yi Radicals",
740         /*AC00..D7A3;*/ "Hangul Syllables",
741         /*E000..F8FF;*/ "Private Use",
742         /*F900..FAFF;*/ "CJK Compatibility Ideographs",
743         /*FB00..FB4F;*/ "Alphabetic Presentation Forms",
744         /*FB50..FDFF;*/ "Arabic Presentation Forms-A",
745         /*FE20..FE2F;*/ "Combining Half Marks",
746         /*FE30..FE4F;*/ "CJK Compatibility Forms",
747         /*FE50..FE6F;*/ "Small Form Variants",
748         /*FE70..FEFE;*/ "Arabic Presentation Forms-B",
749         /*FEFF..FEFF;*/ "Specials",
750         /*FF00..FFEF;*/ "Halfwidth and Fullwidth Forms",
751          //missing Specials add manually
752         /*10300..1032F;*/ "Old Italic",         // 84
753         /*10330..1034F;*/ "Gothic",
754         /*10400..1044F;*/ "Deseret",
755         /*1D000..1D0FF;*/ "Byzantine Musical Symbols",
756         /*1D100..1D1FF;*/ "Musical Symbols",
757         /*1D400..1D7FF;*/ "Mathematical Alphanumeric Symbols",
758         /*20000..2A6D6;*/ "CJK Unified Ideographs Extension B",
759         /*2F800..2FA1F;*/ "CJK Compatibility Ideographs Supplement",
760         /*E0000..E007F;*/ "Tags",
761         //missing 2 private use add manually
762
763     };
764     //ADD THOSE MANUALLY
765     //F0000..FFFFD; "Private Use",
766     //100000..10FFFD; "Private Use"
767     //FFF0..FFFD; "Specials",
768     static final String blockRanges =
769        "\u0000\u007F\u0080\u00FF\u0100\u017F\u0180\u024F\u0250\u02AF\u02B0\u02FF\u0300\u036F"
770         +"\u0370\u03FF\u0400\u04FF\u0530\u058F\u0590\u05FF\u0600\u06FF\u0700\u074F\u0780\u07BF"
771         +"\u0900\u097F\u0980\u09FF\u0A00\u0A7F\u0A80\u0AFF\u0B00\u0B7F\u0B80\u0BFF\u0C00\u0C7F\u0C80\u0CFF"
772         +"\u0D00\u0D7F\u0D80\u0DFF\u0E00\u0E7F\u0E80\u0EFF\u0F00\u0FFF\u1000\u109F\u10A0\u10FF\u1100\u11FF"
773         +"\u1200\u137F\u13A0\u13FF\u1400\u167F\u1680\u169F\u16A0\u16FF\u1780\u17FF\u1800\u18AF\u1E00\u1EFF"
774         +"\u1F00\u1FFF\u2000\u206F\u2070\u209F\u20A0\u20CF\u20D0\u20FF\u2100\u214F\u2150\u218F\u2190\u21FF\u2200\u22FF"
775         +"\u2300\u23FF\u2400\u243F\u2440\u245F\u2460\u24FF\u2500\u257F\u2580\u259F\u25A0\u25FF\u2600\u26FF\u2700\u27BF"
776         +"\u2800\u28FF\u2E80\u2EFF\u2F00\u2FDF\u2FF0\u2FFF\u3000\u303F\u3040\u309F\u30A0\u30FF\u3100\u312F\u3130\u318F"
777         +"\u3190\u319F\u31A0\u31BF\u3200\u32FF\u3300\u33FF\u3400\u4DB5\u4E00\u9FFF\uA000\uA48F\uA490\uA4CF"
778         +"\uAC00\uD7A3\uE000\uF8FF\uF900\uFAFF\uFB00\uFB4F\uFB50\uFDFF"
779         +"\uFE20\uFE2F\uFE30\uFE4F\uFE50\uFE6F\uFE70\uFEFE\uFEFF\uFEFF\uFF00\uFFEF";
780     static final int[] nonBMPBlockRanges = {
781         0x10300, 0x1032F,       // 84
782         0x10330, 0x1034F,
783         0x10400, 0x1044F,
784         0x1D000, 0x1D0FF,
785         0x1D100, 0x1D1FF,
786         0x1D400, 0x1D7FF,
787         0x20000, 0x2A6D6,
788         0x2F800, 0x2FA1F,
789         0xE0000, 0xE007F
790     };
791     private static final int NONBMP_BLOCK_START = 84;
792
793     static protected RangeToken getRange(String name, boolean positive) {
794         if (Token.categories.size() == 0) {
795             synchronized (Token.categories) {
796                 Token[] ranges = new Token[Token.categoryNames.length];
797                 for (int i = 0;  i < ranges.length;  i ++) {
798                     ranges[i] = Token.createRange();
799                 }
800                 int type;
801                 for (int i = 0;  i < 0x10000;  i ++) {
802                     type = Character.getType((char)i);
803                     if (type == Character.START_PUNCTUATION ||
804                         type == Character.END_PUNCTUATION) {
805                         //build table of Pi values
806                         if (i == 0x00AB || i == 0x2018 || i == 0x201B || i == 0x201C ||
807                             i == 0x201F || i == 0x2039) {
808                             type = CHAR_INIT_QUOTE;
809                         }
810                         //build table of Pf values
811                         if (i == 0x00BB || i == 0x2019 || i == 0x201D || i == 0x203A ) {
812                             type = CHAR_FINAL_QUOTE;
813                         }
814                     }
815                     ranges[type].addRange(i, i);
816                     switch (type) {
817                       case Character.UPPERCASE_LETTER:
818                       case Character.LOWERCASE_LETTER:
819                       case Character.TITLECASE_LETTER:
820                       case Character.MODIFIER_LETTER:
821                       case Character.OTHER_LETTER:
822                         type = CHAR_LETTER;
823                         break;
824                       case Character.NON_SPACING_MARK:
825                       case Character.COMBINING_SPACING_MARK:
826                       case Character.ENCLOSING_MARK:
827                         type = CHAR_MARK;
828                         break;
829                       case Character.DECIMAL_DIGIT_NUMBER:
830                       case Character.LETTER_NUMBER:
831                       case Character.OTHER_NUMBER:
832                         type = CHAR_NUMBER;
833                         break;
834                       case Character.SPACE_SEPARATOR:
835                       case Character.LINE_SEPARATOR:
836                       case Character.PARAGRAPH_SEPARATOR:
837                         type = CHAR_SEPARATOR;
838                         break;
839                       case Character.CONTROL:
840                       case Character.FORMAT:
841                       case Character.SURROGATE:
842                       case Character.PRIVATE_USE:
843                       case Character.UNASSIGNED:
844                         type = CHAR_OTHER;
845                         break;
846                       case Character.CONNECTOR_PUNCTUATION:
847                       case Character.DASH_PUNCTUATION:
848                       case Character.START_PUNCTUATION:
849                       case Character.END_PUNCTUATION:
850                       case CHAR_INIT_QUOTE:
851                       case CHAR_FINAL_QUOTE:
852                       case Character.OTHER_PUNCTUATION:
853                         type = CHAR_PUNCTUATION;
854                         break;
855                       case Character.MATH_SYMBOL:
856                       case Character.CURRENCY_SYMBOL:
857                       case Character.MODIFIER_SYMBOL:
858                       case Character.OTHER_SYMBOL:
859                         type = CHAR_SYMBOL;
860                         break;
861                       default:
862                         throw new RuntimeException("org.apache.xerces.utils.regex.Token#getRange(): Unknown Unicode category: "+type);
863                     }
864                     ranges[type].addRange(i, i);
865                 } // for all characters
866                 ranges[Character.UNASSIGNED].addRange(0x10000, Token.UTF16_MAX);
867                 ranges[CHAR_OTHER].addRange(0x10000, Token.UTF16_MAX);
868
869                 for (int i = 0;  i < ranges.length;  i ++) {
870                     if (Token.categoryNames[i] != null) {
871                         if (i == Character.UNASSIGNED) { // Unassigned
872                             ranges[i].addRange(0x10000, Token.UTF16_MAX);
873                         }
874                         Token.categories.put(Token.categoryNames[i], ranges[i]);
875                         Token.categories2.put(Token.categoryNames[i],
876                                               Token.complementRanges(ranges[i]));
877                     }
878                 }
879                 //REVISIT: do we really need to support block names as in Unicode 3.1
880                 //         or we can just create all the names in IsBLOCKNAME format (XML Schema REC)?
881                 //
882                 StringBuffer buffer = new StringBuffer(50);
883                 for (int i = 0;  i < Token.blockNames.length;  i ++) {
884                     Token r1 = Token.createRange();
885                     int location;
886                     if (i < NONBMP_BLOCK_START) {
887                         location = i*2;
888                         int rstart = Token.blockRanges.charAt(location);
889                         int rend = Token.blockRanges.charAt(location+1);
890                         //DEBUGING
891                         //System.out.println(n+" " +Integer.toHexString(rstart)
892                         //                     +"-"+ Integer.toHexString(rend));
893                         r1.addRange(rstart, rend);
894                     } else {
895                         location = (i - NONBMP_BLOCK_START) * 2;
896                         r1.addRange(Token.nonBMPBlockRanges[location],
897                                     Token.nonBMPBlockRanges[location + 1]);
898                     }
899                     String n = Token.blockNames[i];
900                     if (n.equals("Specials")) {
901                         r1.addRange(0xfff0, 0xfffd);
902                     }
903                     if (n.equals("Private Use")) {
904                         r1.addRange(0xF0000,0xFFFFD);
905                         r1.addRange(0x100000,0x10FFFD);
906                     }
907                     Token.categories.put(n, r1);
908                     Token.categories2.put(n, Token.complementRanges(r1));
909                     buffer.setLength(0);
910                     buffer.append("Is");
911                     if (n.indexOf(' ') >= 0) {
912                         for (int ci = 0;  ci < n.length();  ci ++) {
913                             if (n.charAt(ci) != ' ') {
914                                 buffer.append(n.charAt(ci));
915                             }
916                         }
917                     }
918                     else {
919                         buffer.append(n);
920                     }
921                     Token.setAlias(buffer.toString(), n, true);
922                 }
923
924                 // TR#18 1.2
925                 Token.setAlias("ASSIGNED", "Cn", false);
926                 Token.setAlias("UNASSIGNED", "Cn", true);
927                 Token all = Token.createRange();
928                 all.addRange(0, Token.UTF16_MAX);
929                 Token.categories.put("ALL", all);
930                 Token.categories2.put("ALL", Token.complementRanges(all));
931                 Token.registerNonXS("ASSIGNED");
932                 Token.registerNonXS("UNASSIGNED");
933                 Token.registerNonXS("ALL");
934
935                 Token isalpha = Token.createRange();
936                 isalpha.mergeRanges(ranges[Character.UPPERCASE_LETTER]); // Lu
937                 isalpha.mergeRanges(ranges[Character.LOWERCASE_LETTER]); // Ll
938                 isalpha.mergeRanges(ranges[Character.OTHER_LETTER]); // Lo
939                 Token.categories.put("IsAlpha", isalpha);
940                 Token.categories2.put("IsAlpha", Token.complementRanges(isalpha));
941                 Token.registerNonXS("IsAlpha");
942
943                 Token isalnum = Token.createRange();
944                 isalnum.mergeRanges(isalpha);   // Lu Ll Lo
945                 isalnum.mergeRanges(ranges[Character.DECIMAL_DIGIT_NUMBER]); // Nd
946                 Token.categories.put("IsAlnum", isalnum);
947                 Token.categories2.put("IsAlnum", Token.complementRanges(isalnum));
948                 Token.registerNonXS("IsAlnum");
949
950                 Token isspace = Token.createRange();
951                 isspace.mergeRanges(Token.token_spaces);
952                 isspace.mergeRanges(ranges[CHAR_SEPARATOR]); // Z
953                 Token.categories.put("IsSpace", isspace);
954                 Token.categories2.put("IsSpace", Token.complementRanges(isspace));
955                 Token.registerNonXS("IsSpace");
956
957                 Token isword = Token.createRange();
958                 isword.mergeRanges(isalnum);     // Lu Ll Lo Nd
959                 isword.addRange('_', '_');
960                 Token.categories.put("IsWord", isword);
961                 Token.categories2.put("IsWord", Token.complementRanges(isword));
962                 Token.registerNonXS("IsWord");
963
964                 Token isascii = Token.createRange();
965                 isascii.addRange(0, 127);
966                 Token.categories.put("IsASCII", isascii);
967                 Token.categories2.put("IsASCII", Token.complementRanges(isascii));
968                 Token.registerNonXS("IsASCII");
969
970                 Token isnotgraph = Token.createRange();
971                 isnotgraph.mergeRanges(ranges[CHAR_OTHER]);
972                 isnotgraph.addRange(' ', ' ');
973                 Token.categories.put("IsGraph", Token.complementRanges(isnotgraph));
974                 Token.categories2.put("IsGraph", isnotgraph);
975                 Token.registerNonXS("IsGraph");
976
977                 Token isxdigit = Token.createRange();
978                 isxdigit.addRange('0', '9');
979                 isxdigit.addRange('A', 'F');
980                 isxdigit.addRange('a', 'f');
981                 Token.categories.put("IsXDigit", Token.complementRanges(isxdigit));
982                 Token.categories2.put("IsXDigit", isxdigit);
983                 Token.registerNonXS("IsXDigit");
984
985                 Token.setAlias("IsDigit", "Nd", true);
986                 Token.setAlias("IsUpper", "Lu", true);
987                 Token.setAlias("IsLower", "Ll", true);
988                 Token.setAlias("IsCntrl", "C", true);
989                 Token.setAlias("IsPrint", "C", false);
990                 Token.setAlias("IsPunct", "P", true);
991                 Token.registerNonXS("IsDigit");
992                 Token.registerNonXS("IsUpper");
993                 Token.registerNonXS("IsLower");
994                 Token.registerNonXS("IsCntrl");
995                 Token.registerNonXS("IsPrint");
996                 Token.registerNonXS("IsPunct");
997
998                 Token.setAlias("alpha", "IsAlpha", true);
999                 Token.setAlias("alnum", "IsAlnum", true);
1000                 Token.setAlias("ascii", "IsASCII", true);
1001                 Token.setAlias("cntrl", "IsCntrl", true);
1002                 Token.setAlias("digit", "IsDigit", true);
1003                 Token.setAlias("graph", "IsGraph", true);
1004                 Token.setAlias("lower", "IsLower", true);
1005                 Token.setAlias("print", "IsPrint", true);
1006                 Token.setAlias("punct", "IsPunct", true);
1007                 Token.setAlias("space", "IsSpace", true);
1008                 Token.setAlias("upper", "IsUpper", true);
1009                 Token.setAlias("word", "IsWord", true); // Perl extension
1010                 Token.setAlias("xdigit", "IsXDigit", true);
1011                 Token.registerNonXS("alpha");
1012                 Token.registerNonXS("alnum");
1013                 Token.registerNonXS("ascii");
1014                 Token.registerNonXS("cntrl");
1015                 Token.registerNonXS("digit");
1016                 Token.registerNonXS("graph");
1017                 Token.registerNonXS("lower");
1018                 Token.registerNonXS("print");
1019                 Token.registerNonXS("punct");
1020                 Token.registerNonXS("space");
1021                 Token.registerNonXS("upper");
1022                 Token.registerNonXS("word");
1023                 Token.registerNonXS("xdigit");
1024             } // synchronized
1025         } // if null
1026         RangeToken tok = positive ? (RangeToken)Token.categories.get(name)
1027             : (RangeToken)Token.categories2.get(name);
1028         //if (tok == null) System.out.println(name);
1029         return tok;
1030     }
1031     static protected RangeToken getRange(String name, boolean positive, boolean xs) {
1032         RangeToken range = Token.getRange(name, positive);
1033         if (xs && range != null && Token.isRegisterNonXS(name)) {
1034             range = null;
1035         }
1036         return range;
1037     }
1038
1039     static Hashtable<String, String> nonxs = null;
1040     /**
1041      * This method is called by only getRange().
1042      * So this method need not MT-safe.
1043      */
1044     static protected void registerNonXS(String name) {
1045         if (Token.nonxs == null) {
1046             Token.nonxs = new Hashtable<>();
1047         }
1048         Token.nonxs.put(name, name);
1049     }
1050     static protected boolean isRegisterNonXS(String name) {
1051         if (Token.nonxs == null) {
1052             return false;
1053         }
1054         //DEBUG
1055         //System.err.println("isRegisterNonXS: "+name);
1056         return Token.nonxs.containsKey(name);
1057     }
1058
1059     private static void setAlias(String newName, String name, boolean positive) {
1060         Token t1 = Token.categories.get(name);
1061         Token t2 = Token.categories2.get(name);
1062         if (positive) {
1063             Token.categories.put(newName, t1);
1064             Token.categories2.put(newName, t2);
1065         } else {
1066             Token.categories2.put(newName, t1);
1067             Token.categories.put(newName, t2);
1068         }
1069     }
1070
1071     // ------------------------------------------------------
1072
1073     static final String viramaString =
1074     "\u094D"// ;DEVANAGARI SIGN VIRAMA;Mn;9;ON;;;;;N;;;;;
1075     +"\u09CD"//;BENGALI SIGN VIRAMA;Mn;9;ON;;;;;N;;;;;
1076     +"\u0A4D"//;GURMUKHI SIGN VIRAMA;Mn;9;ON;;;;;N;;;;;
1077     +"\u0ACD"//;GUJARATI SIGN VIRAMA;Mn;9;ON;;;;;N;;;;;
1078     +"\u0B4D"//;ORIYA SIGN VIRAMA;Mn;9;ON;;;;;N;;;;;
1079     +"\u0BCD"//;TAMIL SIGN VIRAMA;Mn;9;ON;;;;;N;;;;;
1080     +"\u0C4D"//;TELUGU SIGN VIRAMA;Mn;9;ON;;;;;N;;;;;
1081     +"\u0CCD"//;KANNADA SIGN VIRAMA;Mn;9;ON;;;;;N;;;;;
1082     +"\u0D4D"//;MALAYALAM SIGN VIRAMA;Mn;9;ON;;;;;N;;;;;
1083     +"\u0E3A"//;THAI CHARACTER PHINTHU;Mn;9;ON;;;;;N;THAI VOWEL SIGN PHINTHU;;;;
1084     +"\u0F84";//;TIBETAN MARK HALANTA;Mn;9;ON;;;;;N;TIBETAN VIRAMA;;;;
1085
1086     static private Token token_grapheme = null;
1087     static synchronized Token getGraphemePattern() {
1088         if (Token.token_grapheme != null) {
1089             return Token.token_grapheme;
1090         }
1091
1092         Token base_char = Token.createRange();  // [{ASSIGNED}]-[{M},{C}]
1093         base_char.mergeRanges(Token.getRange("ASSIGNED", true));
1094         base_char.subtractRanges(Token.getRange("M", true));
1095         base_char.subtractRanges(Token.getRange("C", true));
1096
1097         Token virama = Token.createRange();
1098         for (int i = 0;  i < Token.viramaString.length(); i++) {
1099             virama.addRange(i, i);
1100         }
1101
1102         Token combiner_wo_virama = Token.createRange();
1103         combiner_wo_virama.mergeRanges(Token.getRange("M", true));
1104         combiner_wo_virama.addRange(0x1160, 0x11ff); // hangul_medial and hangul_final
1105         combiner_wo_virama.addRange(0xff9e, 0xff9f); // extras
1106
1107         Token left = Token.createUnion();       // base_char?
1108         left.addChild(base_char);
1109         left.addChild(Token.token_empty);
1110
1111         Token foo = Token.createUnion();
1112         foo.addChild(Token.createConcat(virama, Token.getRange("L", true)));
1113         foo.addChild(combiner_wo_virama);
1114
1115         foo = Token.createClosure(foo);
1116
1117         foo = Token.createConcat(left, foo);
1118
1119         Token.token_grapheme = foo;
1120         return Token.token_grapheme;
1121     }
1122
1123     /**
1124      * Combing Character Sequence in Perl 5.6.
1125      */
1126     static private Token token_ccs = null;
1127     static synchronized Token getCombiningCharacterSequence() {
1128         if (Token.token_ccs != null) {
1129             return Token.token_ccs;
1130         }
1131
1132         Token foo = Token.createClosure(Token.getRange("M", true)); // \pM*
1133         foo = Token.createConcat(Token.getRange("M", false), foo); // \PM + \pM*
1134         Token.token_ccs = foo;
1135         return Token.token_ccs;
1136     }
1137
1138     // ------------------------------------------------------
1139
1140     // ------------------------------------------------------
1141     /**
1142      * This class represents a node in parse tree.
1143      */
1144     static class StringToken extends Token implements java.io.Serializable {
1145
1146         private static final long serialVersionUID = -4614366944218504172L;
1147
1148         String string;
1149         final int refNumber;
1150
1151         StringToken(int type, String str, int n) {
1152             super(type);
1153             this.string = str;
1154             this.refNumber = n;
1155         }
1156
1157         @Override
1158         int getReferenceNumber() {              // for STRING
1159             return this.refNumber;
1160         }
1161         @Override
1162         String getString() {                    // for STRING
1163             return this.string;
1164         }
1165
1166         @Override
1167         public String toString(int options) {
1168             if (this.type == BACKREFERENCE) {
1169                 return "\\"+this.refNumber;
1170             } else {
1171                 return REUtil.quoteMeta(this.string);
1172             }
1173         }
1174     }
1175
1176     /**
1177      * This class represents a node in parse tree.
1178      */
1179     static class ConcatToken extends Token implements java.io.Serializable {
1180
1181         private static final long serialVersionUID = 8717321425541346381L;
1182
1183         final Token child;
1184         final Token child2;
1185
1186         ConcatToken(Token t1, Token t2) {
1187             super(Token.CONCAT);
1188             this.child = t1;
1189             this.child2 = t2;
1190         }
1191
1192         @Override
1193         int size() {
1194             return 2;
1195         }
1196         @Override
1197         Token getChild(int index) {
1198             return index == 0 ? this.child : this.child2;
1199         }
1200
1201         @Override
1202         public String toString(int options) {
1203             String ret;
1204             if (this.child2.type == CLOSURE && this.child2.getChild(0) == this.child) {
1205                 ret = this.child.toString(options)+"+";
1206             } else if (this.child2.type == NONGREEDYCLOSURE && this.child2.getChild(0) == this.child) {
1207                 ret = this.child.toString(options)+"+?";
1208             } else {
1209                 ret = this.child.toString(options)+this.child2.toString(options);
1210             }
1211             return ret;
1212         }
1213     }
1214
1215     /**
1216      * This class represents a node in parse tree.
1217      */
1218     static class CharToken extends Token implements java.io.Serializable {
1219
1220         private static final long serialVersionUID = -4394272816279496989L;
1221
1222         final int chardata;
1223
1224         CharToken(int type, int ch) {
1225             super(type);
1226             this.chardata = ch;
1227         }
1228
1229         @Override
1230         int getChar() {
1231             return this.chardata;
1232         }
1233
1234         @Override
1235         public String toString(int options) {
1236             String ret;
1237             switch (this.type) {
1238               case CHAR:
1239                 switch (this.chardata) {
1240                   case '|':  case '*':  case '+':  case '?':
1241                   case '(':  case ')':  case '.':  case '[':
1242                   case '{':  case '\\':
1243                     ret = "\\"+(char)this.chardata;
1244                     break;
1245                   case '\f':  ret = "\\f";  break;
1246                   case '\n':  ret = "\\n";  break;
1247                   case '\r':  ret = "\\r";  break;
1248                   case '\t':  ret = "\\t";  break;
1249                   case 0x1b:  ret = "\\e";  break;
1250                     //case 0x0b:  ret = "\\v";  break;
1251                   default:
1252                     if (this.chardata >= 0x10000) {
1253                         String pre = "0"+Integer.toHexString(this.chardata);
1254                         ret = "\\v"+pre.substring(pre.length()-6, pre.length());
1255                     } else {
1256                         ret = ""+(char)this.chardata;
1257                     }
1258                 }
1259                 break;
1260
1261               case ANCHOR:
1262                 if (this == Token.token_linebeginning || this == Token.token_lineend) {
1263                     ret = ""+(char)this.chardata;
1264                 } else {
1265                     ret = "\\"+(char)this.chardata;
1266                 }
1267                 break;
1268
1269               default:
1270                 ret = null;
1271             }
1272             return ret;
1273         }
1274
1275         @Override
1276         boolean match(int ch) {
1277             if (this.type == CHAR) {
1278                 return ch == this.chardata;
1279             } else {
1280                 throw new RuntimeException("NFAArrow#match(): Internal error: "+this.type);
1281             }
1282         }
1283     }
1284
1285     /**
1286      * This class represents a node in parse tree.
1287      */
1288     static class ClosureToken extends Token implements java.io.Serializable {
1289
1290         private static final long serialVersionUID = 1308971930673997452L;
1291
1292         int min;
1293         int max;
1294         final Token child;
1295
1296         ClosureToken(int type, Token tok) {
1297             super(type);
1298             this.child = tok;
1299             this.setMin(-1);
1300             this.setMax(-1);
1301         }
1302
1303         @Override
1304         int size() {
1305             return 1;
1306         }
1307         @Override
1308         Token getChild(int index) {
1309             return this.child;
1310         }
1311
1312         @Override
1313         final void setMin(int min) {
1314             this.min = min;
1315         }
1316         @Override
1317         final void setMax(int max) {
1318             this.max = max;
1319         }
1320         @Override
1321         final int getMin() {
1322             return this.min;
1323         }
1324         @Override
1325         final int getMax() {
1326             return this.max;
1327         }
1328
1329         @Override
1330         public String toString(int options) {
1331             String ret;
1332             if (this.type == CLOSURE) {
1333                 if (this.getMin() < 0 && this.getMax() < 0) {
1334                     ret = this.child.toString(options)+"*";
1335                 } else if (this.getMin() == this.getMax()) {
1336                     ret = this.child.toString(options)+"{"+this.getMin()+"}";
1337                 } else if (this.getMin() >= 0 && this.getMax() >= 0) {
1338                     ret = this.child.toString(options)+"{"+this.getMin()+","+this.getMax()+"}";
1339                 } else if (this.getMin() >= 0 && this.getMax() < 0) {
1340                     ret = this.child.toString(options)+"{"+this.getMin()+",}";
1341                 } else {
1342                     throw new RuntimeException("Token#toString(): CLOSURE "
1343                                                +this.getMin()+", "+this.getMax());
1344                 }
1345             } else {
1346                 if (this.getMin() < 0 && this.getMax() < 0) {
1347                     ret = this.child.toString(options)+"*?";
1348                 } else if (this.getMin() == this.getMax()) {
1349                     ret = this.child.toString(options)+"{"+this.getMin()+"}?";
1350                 } else if (this.getMin() >= 0 && this.getMax() >= 0) {
1351                     ret = this.child.toString(options)+"{"+this.getMin()+","+this.getMax()+"}?";
1352                 } else if (this.getMin() >= 0 && this.getMax() < 0) {
1353                     ret = this.child.toString(options)+"{"+this.getMin()+",}?";
1354                 } else {
1355                     throw new RuntimeException("Token#toString(): NONGREEDYCLOSURE "
1356                                                +this.getMin()+", "+this.getMax());
1357                 }
1358             }
1359             return ret;
1360         }
1361     }
1362
1363     /**
1364      * This class represents a node in parse tree.
1365      */
1366     static class ParenToken extends Token implements java.io.Serializable {
1367
1368         private static final long serialVersionUID = -5938014719827987704L;
1369
1370         final Token child;
1371         final int parennumber;
1372
1373         ParenToken(int type, Token tok, int paren) {
1374             super(type);
1375             this.child = tok;
1376             this.parennumber = paren;
1377         }
1378
1379         @Override
1380         int size() {
1381             return 1;
1382         }
1383         @Override
1384         Token getChild(int index) {
1385             return this.child;
1386         }
1387
1388         @Override
1389         int getParenNumber() {
1390             return this.parennumber;
1391         }
1392
1393         @Override
1394         public String toString(int options) {
1395             String ret = null;
1396             switch (this.type) {
1397               case PAREN:
1398                 if (this.parennumber == 0) {
1399                     ret = "(?:"+this.child.toString(options)+")";
1400                 } else {
1401                     ret = "("+this.child.toString(options)+")";
1402                 }
1403                 break;
1404
1405               case LOOKAHEAD:
1406                 ret = "(?="+this.child.toString(options)+")";
1407                 break;
1408               case NEGATIVELOOKAHEAD:
1409                 ret = "(?!"+this.child.toString(options)+")";
1410                 break;
1411               case LOOKBEHIND:
1412                 ret = "(?<="+this.child.toString(options)+")";
1413                 break;
1414               case NEGATIVELOOKBEHIND:
1415                 ret = "(?<!"+this.child.toString(options)+")";
1416                 break;
1417               case INDEPENDENT:
1418                 ret = "(?>"+this.child.toString(options)+")";
1419                 break;
1420             }
1421             return ret;
1422         }
1423     }
1424
1425     /**
1426      * (?(condition)yes-pattern|no-pattern)
1427      */
1428     static class ConditionToken extends Token implements java.io.Serializable {
1429
1430         private static final long serialVersionUID = 4353765277910594411L;
1431
1432         final int refNumber;
1433         final Token condition;
1434         final Token yes;
1435         final Token no;
1436         ConditionToken(int refno, Token cond, Token yespat, Token nopat) {
1437             super(Token.CONDITION);
1438             this.refNumber = refno;
1439             this.condition = cond;
1440             this.yes = yespat;
1441             this.no = nopat;
1442         }
1443         @Override
1444         int size() {
1445             return this.no == null ? 1 : 2;
1446         }
1447         @Override
1448         Token getChild(int index) {
1449             if (index == 0) {
1450                 return this.yes;
1451             }
1452             if (index == 1) {
1453                 return this.no;
1454             }
1455             throw new RuntimeException("Internal Error: "+index);
1456         }
1457
1458         @Override
1459         public String toString(int options) {
1460             String ret;
1461             if (refNumber > 0) {
1462                 ret = "(?("+refNumber+")";
1463             } else if (this.condition.type == Token.ANCHOR) {
1464                 ret = "(?("+this.condition+")";
1465             } else {
1466                 ret = "(?"+this.condition;
1467             }
1468
1469             if (this.no == null) {
1470                 ret += this.yes+")";
1471             } else {
1472                 ret += this.yes+"|"+this.no+")";
1473             }
1474             return ret;
1475         }
1476     }
1477
1478     /**
1479      * (ims-ims: .... )
1480      */
1481     static class ModifierToken extends Token implements java.io.Serializable {
1482
1483         private static final long serialVersionUID = -9114536559696480356L;
1484
1485         final Token child;
1486         final int add;
1487         final int mask;
1488
1489         ModifierToken(Token tok, int add, int mask) {
1490             super(Token.MODIFIERGROUP);
1491             this.child = tok;
1492             this.add = add;
1493             this.mask = mask;
1494         }
1495
1496         @Override
1497         int size() {
1498             return 1;
1499         }
1500         @Override
1501         Token getChild(int index) {
1502             return this.child;
1503         }
1504
1505         int getOptions() {
1506             return this.add;
1507         }
1508         int getOptionsMask() {
1509             return this.mask;
1510         }
1511
1512         @Override
1513         public String toString(int options) {
1514             return "(?"
1515                 +(this.add == 0 ? "" : REUtil.createOptionString(this.add))
1516                 +(this.mask == 0 ? "" : REUtil.createOptionString(this.mask))
1517                 +":"
1518                 +this.child.toString(options)
1519                 +")";
1520         }
1521     }
1522
1523     /**
1524      * This class represents a node in parse tree.
1525      * for UNION or CONCAT.
1526      */
1527     static class UnionToken extends Token implements java.io.Serializable {
1528
1529         private static final long serialVersionUID = -2568843945989489861L;
1530
1531         Vector<Token> children;
1532
1533         UnionToken(int type) {
1534             super(type);
1535         }
1536
1537         @Override
1538         void addChild(Token tok) {
1539             if (tok == null) {
1540                 return;
1541             }
1542             if (this.children == null) {
1543                 this.children = new Vector<>();
1544             }
1545             if (this.type == UNION) {
1546                 this.children.addElement(tok);
1547                 return;
1548             }
1549                                                 // This is CONCAT, and new child is CONCAT.
1550             if (tok.type == CONCAT) {
1551                 for (int i = 0;  i < tok.size();  i ++)
1552                  {
1553                     this.addChild(tok.getChild(i)); // Recursion
1554                 }
1555                 return;
1556             }
1557             int size = this.children.size();
1558             if (size == 0) {
1559                 this.children.addElement(tok);
1560                 return;
1561             }
1562             Token previous = this.children.elementAt(size-1);
1563             if (!((previous.type == CHAR || previous.type == STRING)
1564                   && (tok.type == CHAR || tok.type == STRING))) {
1565                 this.children.addElement(tok);
1566                 return;
1567             }
1568
1569             //System.err.println("Merge '"+previous+"' and '"+tok+"'.");
1570
1571             StringBuffer buffer;
1572             int nextMaxLength = (tok.type == CHAR ? 2 : tok.getString().length());
1573             if (previous.type == CHAR) {        // Replace previous token by STRING
1574                 buffer = new StringBuffer(2 + nextMaxLength);
1575                 int ch = previous.getChar();
1576                 if (ch >= 0x10000) {
1577                     buffer.append(REUtil.decomposeToSurrogates(ch));
1578                 } else {
1579                     buffer.append((char)ch);
1580                 }
1581                 previous = Token.createString(null);
1582                 this.children.setElementAt(previous, size-1);
1583             } else {                            // STRING
1584                 buffer = new StringBuffer(previous.getString().length() + nextMaxLength);
1585                 buffer.append(previous.getString());
1586             }
1587
1588             if (tok.type == CHAR) {
1589                 int ch = tok.getChar();
1590                 if (ch >= 0x10000) {
1591                     buffer.append(REUtil.decomposeToSurrogates(ch));
1592                 } else {
1593                     buffer.append((char)ch);
1594                 }
1595             } else {
1596                 buffer.append(tok.getString());
1597             }
1598
1599             ((StringToken)previous).string = new String(buffer);
1600         }
1601
1602         @Override
1603         int size() {
1604             return this.children == null ? 0 : this.children.size();
1605         }
1606         @Override
1607         Token getChild(int index) {
1608             return this.children.elementAt(index);
1609         }
1610
1611         @Override
1612         public String toString(int options) {
1613             String ret;
1614             if (this.type == CONCAT) {
1615                 if (this.children.size() == 2) {
1616                     Token ch = this.getChild(0);
1617                     Token ch2 = this.getChild(1);
1618                     if (ch2.type == CLOSURE && ch2.getChild(0) == ch) {
1619                         ret = ch.toString(options)+"+";
1620                     } else if (ch2.type == NONGREEDYCLOSURE && ch2.getChild(0) == ch) {
1621                         ret = ch.toString(options)+"+?";
1622                     } else {
1623                         ret = ch.toString(options)+ch2.toString(options);
1624                     }
1625                 } else {
1626                     StringBuffer sb = new StringBuffer();
1627                     for (int i = 0;  i < this.children.size();  i ++) {
1628                         sb.append(this.children.elementAt(i).toString(options));
1629                     }
1630                     ret = new String(sb);
1631                 }
1632                 return ret;
1633             }
1634             if (this.children.size() == 2 && this.getChild(1).type == EMPTY) {
1635                 ret = this.getChild(0).toString(options)+"?";
1636             } else if (this.children.size() == 2
1637                        && this.getChild(0).type == EMPTY) {
1638                 ret = this.getChild(1).toString(options)+"??";
1639             } else {
1640                 StringBuffer sb = new StringBuffer();
1641                 sb.append(this.children.elementAt(0).toString(options));
1642                 for (int i = 1;  i < this.children.size();  i ++) {
1643                     sb.append('|');
1644                     sb.append(this.children.elementAt(i).toString(options));
1645                 }
1646                 ret = new String(sb);
1647             }
1648             return ret;
1649         }
1650     }
1651 }