BUG-5410: introduce RegularExpression.toPatternString()
[yangtools.git] / third-party / xsd-regex / src / main / java / org / opendaylight / yangtools / xsd / regex / ParserForXMLSchema.java
1 /*
2  * Licensed to the Apache Software Foundation (ASF) under one or more
3  * contributor license agreements.  See the NOTICE file distributed with
4  * this work for additional information regarding copyright ownership.
5  * The ASF licenses this file to You under the Apache License, Version 2.0
6  * (the "License"); you may not use this file except in compliance with
7  * the License.  You may obtain a copy of the License at
8  *
9  *      http://www.apache.org/licenses/LICENSE-2.0
10  *
11  * Unless required by applicable law or agreed to in writing, software
12  * distributed under the License is distributed on an "AS IS" BASIS,
13  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14  * See the License for the specific language governing permissions and
15  * limitations under the License.
16  */
17
18 package org.opendaylight.yangtools.xsd.regex;
19
20 import java.util.Hashtable;
21 import java.util.Locale;
22
23 /**
24  * A regular expression parser for the XML Schema.
25  *
26  * @xerces.internal
27  *
28  * @author TAMURA Kent <kent@trl.ibm.co.jp>
29  * @version $Id: ParserForXMLSchema.java 1638344 2014-11-11 20:15:46Z mrglavas $
30  */
31 class ParserForXMLSchema extends RegexParser {
32
33     public ParserForXMLSchema() {
34         //this.setLocale(Locale.getDefault());
35     }
36     public ParserForXMLSchema(Locale locale) {
37         super(locale);
38     }
39
40     @Override
41     Token processCaret() throws ParseException {
42         this.next();
43         return Token.createChar('^');
44     }
45     @Override
46     Token processDollar() throws ParseException {
47         this.next();
48         return Token.createChar('$');
49      }
50     @Override
51     Token processLookahead() throws ParseException {
52         throw ex("parser.process.1", this.offset);
53     }
54     @Override
55     Token processNegativelookahead() throws ParseException {
56         throw ex("parser.process.1", this.offset);
57     }
58     @Override
59     Token processLookbehind() throws ParseException {
60         throw ex("parser.process.1", this.offset);
61     }
62     @Override
63     Token processNegativelookbehind() throws ParseException {
64         throw ex("parser.process.1", this.offset);
65     }
66     @Override
67     Token processBacksolidus_A() throws ParseException {
68         throw ex("parser.process.1", this.offset);
69     }
70     @Override
71     Token processBacksolidus_Z() throws ParseException {
72         throw ex("parser.process.1", this.offset);
73     }
74     @Override
75     Token processBacksolidus_z() throws ParseException {
76         throw ex("parser.process.1", this.offset);
77     }
78     @Override
79     Token processBacksolidus_b() throws ParseException {
80         throw ex("parser.process.1", this.offset);
81     }
82     @Override
83     Token processBacksolidus_B() throws ParseException {
84         throw ex("parser.process.1", this.offset);
85     }
86     @Override
87     Token processBacksolidus_lt() throws ParseException {
88         throw ex("parser.process.1", this.offset);
89     }
90     @Override
91     Token processBacksolidus_gt() throws ParseException {
92         throw ex("parser.process.1", this.offset);
93     }
94     @Override
95     Token processStar(Token tok) throws ParseException {
96         this.next();
97         return Token.createClosure(tok);
98     }
99     @Override
100     Token processPlus(Token tok) throws ParseException {
101         // X+ -> XX*
102         this.next();
103         return Token.createConcat(tok, Token.createClosure(tok));
104     }
105     @Override
106     Token processQuestion(Token tok) throws ParseException {
107         // X? -> X|
108         this.next();
109         Token par = Token.createUnion();
110         par.addChild(tok);
111         par.addChild(Token.createEmpty());
112         return par;
113     }
114     @Override
115     boolean checkQuestion(int off) {
116         return false;
117     }
118     @Override
119     Token processParen() throws ParseException {
120         this.next();
121         Token tok = Token.createParen(this.parseRegex(), 0);
122         if (this.read() != T_RPAREN) {
123             throw ex("parser.factor.1", this.offset-1);
124         }
125         this.next();                            // Skips ')'
126         return tok;
127     }
128     @Override
129     Token processParen2() throws ParseException {
130         throw ex("parser.process.1", this.offset);
131     }
132     @Override
133     Token processCondition() throws ParseException {
134         throw ex("parser.process.1", this.offset);
135     }
136     @Override
137     Token processModifiers() throws ParseException {
138         throw ex("parser.process.1", this.offset);
139     }
140     @Override
141     Token processIndependent() throws ParseException {
142         throw ex("parser.process.1", this.offset);
143     }
144     @Override
145     Token processBacksolidus_c() throws ParseException {
146         this.next();
147         return this.getTokenForShorthand('c');
148     }
149     @Override
150     Token processBacksolidus_C() throws ParseException {
151         this.next();
152         return this.getTokenForShorthand('C');
153     }
154     @Override
155     Token processBacksolidus_i() throws ParseException {
156         this.next();
157         return this.getTokenForShorthand('i');
158     }
159     @Override
160     Token processBacksolidus_I() throws ParseException {
161         this.next();
162         return this.getTokenForShorthand('I');
163     }
164     @Override
165     Token processBacksolidus_g() throws ParseException {
166         throw this.ex("parser.process.1", this.offset-2);
167     }
168     @Override
169     Token processBacksolidus_X() throws ParseException {
170         throw ex("parser.process.1", this.offset-2);
171     }
172     @Override
173     Token processBackreference() throws ParseException {
174         throw ex("parser.process.1", this.offset-4);
175     }
176
177     @Override
178     int processCIinCharacterClass(RangeToken tok, int c) {
179         tok.mergeRanges(this.getTokenForShorthand(c));
180         return -1;
181     }
182
183
184     /**
185      * Parses a character-class-expression, not a character-class-escape.
186      *
187      * c-c-expression   ::= '[' c-group ']'
188      * c-group          ::= positive-c-group | negative-c-group | c-c-subtraction
189      * positive-c-group ::= (c-range | c-c-escape)+
190      * negative-c-group ::= '^' positive-c-group
191      * c-c-subtraction  ::= (positive-c-group | negative-c-group) subtraction
192      * subtraction      ::= '-' c-c-expression
193      * c-range          ::= single-range | from-to-range
194      * single-range     ::= multi-c-escape | category-c-escape | block-c-escape | <any XML char>
195      * cc-normal-c      ::= <any character except [, ], \>
196      * from-to-range    ::= cc-normal-c '-' cc-normal-c
197      *
198      * @param useNrage Ignored.
199      * @return This returns no NrageToken.
200      */
201     @Override
202     protected RangeToken parseCharacterClass(boolean useNrange) throws ParseException {
203         this.setContext(S_INBRACKETS);
204         this.next();                            // '['
205         boolean nrange = false;
206         boolean wasDecoded = false;                     // used to detect if the last - was escaped.
207         RangeToken base = null;
208         RangeToken tok;
209         if (this.read() == T_CHAR && this.chardata == '^') {
210             nrange = true;
211             this.next();                        // '^'
212             base = Token.createRange();
213             base.addRange(0, Token.UTF16_MAX);
214             tok = Token.createRange();
215         } else {
216             tok = Token.createRange();
217         }
218         int type;
219         boolean firstloop = true;
220         while ((type = this.read()) != T_EOF) { // Don't use 'cotinue' for this loop.
221
222                 wasDecoded = false;
223             // single-range | from-to-range | subtraction
224             if (type == T_CHAR && this.chardata == ']' && !firstloop) {
225                 if (nrange) {
226                     base.subtractRanges(tok);
227                     tok = base;
228                 }
229                 break;
230             }
231             int c = this.chardata;
232             boolean end = false;
233             if (type == T_BACKSOLIDUS) {
234                 switch (c) {
235                   case 'd':  case 'D':
236                   case 'w':  case 'W':
237                   case 's':  case 'S':
238                     tok.mergeRanges(this.getTokenForShorthand(c));
239                     end = true;
240                     break;
241
242                   case 'i':  case 'I':
243                   case 'c':  case 'C':
244                     c = this.processCIinCharacterClass(tok, c);
245                     if (c < 0) {
246                         end = true;
247                     }
248                     break;
249
250                   case 'p':
251                   case 'P':
252                     int pstart = this.offset;
253                     RangeToken tok2 = this.processBacksolidus_pP(c);
254                     if (tok2 == null) {
255                         throw this.ex("parser.atom.5", pstart);
256                     }
257                     tok.mergeRanges(tok2);
258                     end = true;
259                     break;
260
261                  case '-':
262                         c = this.decodeEscaped();
263                         wasDecoded = true;
264                         break;
265
266                   default:
267                     c = this.decodeEscaped();
268                 } // \ + c
269             } // backsolidus
270             else if (type == T_XMLSCHEMA_CC_SUBTRACTION && !firstloop) {
271                                                 // Subraction
272                 if (nrange) {
273                     base.subtractRanges(tok);
274                     tok = base;
275                 }
276                 RangeToken range2 = this.parseCharacterClass(false);
277                 tok.subtractRanges(range2);
278                 if (this.read() != T_CHAR || this.chardata != ']') {
279                     throw this.ex("parser.cc.5", this.offset);
280                 }
281                 break;                          // Exit this loop
282             }
283             this.next();
284             if (!end) {                         // if not shorthands...
285                 if (type == T_CHAR) {
286                     if (c == '[') {
287                         throw this.ex("parser.cc.6", this.offset-2);
288                     }
289                     if (c == ']') {
290                         throw this.ex("parser.cc.7", this.offset-2);
291                     }
292                     if (c == '-' && this.chardata != ']' && !firstloop)
293                      {
294                         throw this.ex("parser.cc.8", this.offset-2);    // if regex = '[-]' then invalid
295                     }
296                 }
297                 if (this.read() != T_CHAR || this.chardata != '-' || c == '-' && !wasDecoded && firstloop) { // Here is no '-'.
298                     if (!this.isSet(RegularExpression.IGNORE_CASE) || c > 0xffff) {
299                         tok.addRange(c, c);
300                     }
301                     else {
302                         addCaseInsensitiveChar(tok, c);
303                     }
304                 } else {                        // Found '-'
305                                                 // Is this '-' is a from-to token??
306                     this.next(); // Skips '-'
307                     if ((type = this.read()) == T_EOF) {
308                         throw this.ex("parser.cc.2", this.offset);
309                     }
310                                                 // c '-' ']' -> '-' is a single-range.
311                     if(type == T_CHAR && this.chardata == ']') {                                // if - is at the last position of the group
312                         if (!this.isSet(RegularExpression.IGNORE_CASE) || c > 0xffff) {
313                             tok.addRange(c, c);
314                         }
315                         else {
316                             addCaseInsensitiveChar(tok, c);
317                         }
318                         tok.addRange('-', '-');
319                     }
320                     else if (type == T_XMLSCHEMA_CC_SUBTRACTION) {
321                         throw this.ex("parser.cc.8", this.offset-1);
322                     } else {
323
324                         int rangeend = this.chardata;
325                         if (type == T_CHAR) {
326                             if (rangeend == '[') {
327                                 throw this.ex("parser.cc.6", this.offset-1);
328                             }
329                             if (rangeend == ']') {
330                                 throw this.ex("parser.cc.7", this.offset-1);
331                             }
332                             if (rangeend == '-') {
333                                 throw this.ex("parser.cc.8", this.offset-2);
334                             }
335                         }
336                         else if (type == T_BACKSOLIDUS) {
337                             rangeend = this.decodeEscaped();
338                         }
339                         this.next();
340
341                         if (c > rangeend) {
342                             throw this.ex("parser.ope.3", this.offset-1);
343                         }
344                         if (!this.isSet(RegularExpression.IGNORE_CASE) ||
345                                 (c > 0xffff && rangeend > 0xffff)) {
346                             tok.addRange(c, rangeend);
347                         }
348                         else {
349                             addCaseInsensitiveCharRange(tok, c, rangeend);
350                         }
351                     }
352                 }
353             }
354             firstloop = false;
355         }
356         if (this.read() == T_EOF) {
357             throw this.ex("parser.cc.2", this.offset);
358         }
359         tok.sortRanges();
360         tok.compactRanges();
361         //tok.dumpRanges();
362         this.setContext(S_NORMAL);
363         this.next();                    // Skips ']'
364
365         return tok;
366     }
367
368     @Override
369     protected RangeToken parseSetOperations() throws ParseException {
370         throw this.ex("parser.process.1", this.offset);
371     }
372
373     @Override
374     Token getTokenForShorthand(int ch) {
375         switch (ch) {
376           case 'd':
377             return ParserForXMLSchema.getRange("xml:isDigit", true);
378           case 'D':
379             return ParserForXMLSchema.getRange("xml:isDigit", false);
380           case 'w':
381             return ParserForXMLSchema.getRange("xml:isWord", true);
382           case 'W':
383             return ParserForXMLSchema.getRange("xml:isWord", false);
384           case 's':
385             return ParserForXMLSchema.getRange("xml:isSpace", true);
386           case 'S':
387             return ParserForXMLSchema.getRange("xml:isSpace", false);
388           case 'c':
389             return ParserForXMLSchema.getRange("xml:isNameChar", true);
390           case 'C':
391             return ParserForXMLSchema.getRange("xml:isNameChar", false);
392           case 'i':
393             return ParserForXMLSchema.getRange("xml:isInitialNameChar", true);
394           case 'I':
395             return ParserForXMLSchema.getRange("xml:isInitialNameChar", false);
396           default:
397             throw new RuntimeException("Internal Error: shorthands: \\u"+Integer.toString(ch, 16));
398         }
399     }
400     @Override
401     int decodeEscaped() throws ParseException {
402         if (this.read() != T_BACKSOLIDUS) {
403             throw ex("parser.next.1", this.offset-1);
404         }
405         int c = this.chardata;
406         switch (c) {
407           case 'n':  c = '\n';  break; // LINE FEED U+000A
408           case 'r':  c = '\r';  break; // CRRIAGE RETURN U+000D
409           case 't':  c = '\t';  break; // HORIZONTAL TABULATION U+0009
410           case '\\':
411           case '|':
412           case '.':
413           case '^':
414           case '-':
415           case '?':
416           case '*':
417           case '+':
418           case '{':
419           case '}':
420           case '(':
421           case ')':
422           case '[':
423           case ']':
424             break; // return actucal char
425           default:
426             throw ex("parser.process.1", this.offset-2);
427         }
428         return c;
429     }
430
431     static private Hashtable<String, Token> ranges = null;
432     static private Hashtable<String, Token> ranges2 = null;
433     static synchronized protected RangeToken getRange(String name, boolean positive) {
434         if (ranges == null) {
435             ranges = new Hashtable<>();
436             ranges2 = new Hashtable<>();
437
438             Token tok = Token.createRange();
439             setupRange(tok, SPACES);
440             ranges.put("xml:isSpace", tok);
441             ranges2.put("xml:isSpace", Token.complementRanges(tok));
442
443             tok = Token.createRange();
444 //            setupRange(tok, DIGITS);
445             setupRange(tok, DIGITS_INTS);
446             ranges.put("xml:isDigit", tok);
447             ranges2.put("xml:isDigit", Token.complementRanges(tok));
448
449             /*
450              * \w is defined by the XML Schema specification to be:
451              * [#x0000-#x10FFFF]-[\p{P}\p{Z}\p{C}] (all characters except the set of "punctuation", "separator" and "other" characters)
452              */
453             tok = Token.createRange();
454             tok.mergeRanges(Token.getRange("P", true));
455             tok.mergeRanges(Token.getRange("Z", true));
456             tok.mergeRanges(Token.getRange("C", true));
457             ranges2.put("xml:isWord", tok);
458             ranges.put("xml:isWord", Token.complementRanges(tok));
459
460             tok = Token.createRange();
461             setupRange(tok, NAMECHARS);
462             ranges.put("xml:isNameChar", tok);
463             ranges2.put("xml:isNameChar", Token.complementRanges(tok));
464
465             tok = Token.createRange();
466             setupRange(tok, LETTERS);
467             tok.addRange('_', '_');
468             tok.addRange(':', ':');
469             ranges.put("xml:isInitialNameChar", tok);
470             ranges2.put("xml:isInitialNameChar", Token.complementRanges(tok));
471         }
472         RangeToken tok = positive ? (RangeToken)ranges.get(name)
473             : (RangeToken)ranges2.get(name);
474         return tok;
475     }
476
477     static void setupRange(Token range, String src) {
478         int len = src.length();
479         for (int i = 0;  i < len;  i += 2) {
480             range.addRange(src.charAt(i), src.charAt(i+1));
481         }
482     }
483
484     static void setupRange(Token range, int[] src) {
485         int len = src.length;
486         for (int i = 0;  i < len;  i += 2) {
487             range.addRange(src[i], src[i+1]);
488         }
489     }
490
491     private static final String SPACES = "\t\n\r\r  ";
492     private static final String NAMECHARS =
493         "\u002d\u002e\u0030\u003a\u0041\u005a\u005f\u005f\u0061\u007a\u00b7\u00b7\u00c0\u00d6"
494         +"\u00d8\u00f6\u00f8\u0131\u0134\u013e\u0141\u0148\u014a\u017e\u0180\u01c3\u01cd\u01f0"
495         +"\u01f4\u01f5\u01fa\u0217\u0250\u02a8\u02bb\u02c1\u02d0\u02d1\u0300\u0345\u0360\u0361"
496         +"\u0386\u038a\u038c\u038c\u038e\u03a1\u03a3\u03ce\u03d0\u03d6\u03da\u03da\u03dc\u03dc"
497         +"\u03de\u03de\u03e0\u03e0\u03e2\u03f3\u0401\u040c\u040e\u044f\u0451\u045c\u045e\u0481"
498         +"\u0483\u0486\u0490\u04c4\u04c7\u04c8\u04cb\u04cc\u04d0\u04eb\u04ee\u04f5\u04f8\u04f9"
499         +"\u0531\u0556\u0559\u0559\u0561\u0586\u0591\u05a1\u05a3\u05b9\u05bb\u05bd\u05bf\u05bf"
500         +"\u05c1\u05c2\u05c4\u05c4\u05d0\u05ea\u05f0\u05f2\u0621\u063a\u0640\u0652\u0660\u0669"
501         +"\u0670\u06b7\u06ba\u06be\u06c0\u06ce\u06d0\u06d3\u06d5\u06e8\u06ea\u06ed\u06f0\u06f9"
502         +"\u0901\u0903\u0905\u0939\u093c\u094d\u0951\u0954\u0958\u0963\u0966\u096f\u0981\u0983"
503         +"\u0985\u098c\u098f\u0990\u0993\u09a8\u09aa\u09b0\u09b2\u09b2\u09b6\u09b9\u09bc\u09bc"
504         +"\u09be\u09c4\u09c7\u09c8\u09cb\u09cd\u09d7\u09d7\u09dc\u09dd\u09df\u09e3\u09e6\u09f1"
505         +"\u0a02\u0a02\u0a05\u0a0a\u0a0f\u0a10\u0a13\u0a28\u0a2a\u0a30\u0a32\u0a33\u0a35\u0a36"
506         +"\u0a38\u0a39\u0a3c\u0a3c\u0a3e\u0a42\u0a47\u0a48\u0a4b\u0a4d\u0a59\u0a5c\u0a5e\u0a5e"
507         +"\u0a66\u0a74\u0a81\u0a83\u0a85\u0a8b\u0a8d\u0a8d\u0a8f\u0a91\u0a93\u0aa8\u0aaa\u0ab0"
508         +"\u0ab2\u0ab3\u0ab5\u0ab9\u0abc\u0ac5\u0ac7\u0ac9\u0acb\u0acd\u0ae0\u0ae0\u0ae6\u0aef"
509         +"\u0b01\u0b03\u0b05\u0b0c\u0b0f\u0b10\u0b13\u0b28\u0b2a\u0b30\u0b32\u0b33\u0b36\u0b39"
510         +"\u0b3c\u0b43\u0b47\u0b48\u0b4b\u0b4d\u0b56\u0b57\u0b5c\u0b5d\u0b5f\u0b61\u0b66\u0b6f"
511         +"\u0b82\u0b83\u0b85\u0b8a\u0b8e\u0b90\u0b92\u0b95\u0b99\u0b9a\u0b9c\u0b9c\u0b9e\u0b9f"
512         +"\u0ba3\u0ba4\u0ba8\u0baa\u0bae\u0bb5\u0bb7\u0bb9\u0bbe\u0bc2\u0bc6\u0bc8\u0bca\u0bcd"
513         +"\u0bd7\u0bd7\u0be7\u0bef\u0c01\u0c03\u0c05\u0c0c\u0c0e\u0c10\u0c12\u0c28\u0c2a\u0c33"
514         +"\u0c35\u0c39\u0c3e\u0c44\u0c46\u0c48\u0c4a\u0c4d\u0c55\u0c56\u0c60\u0c61\u0c66\u0c6f"
515         +"\u0c82\u0c83\u0c85\u0c8c\u0c8e\u0c90\u0c92\u0ca8\u0caa\u0cb3\u0cb5\u0cb9\u0cbe\u0cc4"
516         +"\u0cc6\u0cc8\u0cca\u0ccd\u0cd5\u0cd6\u0cde\u0cde\u0ce0\u0ce1\u0ce6\u0cef\u0d02\u0d03"
517         +"\u0d05\u0d0c\u0d0e\u0d10\u0d12\u0d28\u0d2a\u0d39\u0d3e\u0d43\u0d46\u0d48\u0d4a\u0d4d"
518         +"\u0d57\u0d57\u0d60\u0d61\u0d66\u0d6f\u0e01\u0e2e\u0e30\u0e3a\u0e40\u0e4e\u0e50\u0e59"
519         +"\u0e81\u0e82\u0e84\u0e84\u0e87\u0e88\u0e8a\u0e8a\u0e8d\u0e8d\u0e94\u0e97\u0e99\u0e9f"
520         +"\u0ea1\u0ea3\u0ea5\u0ea5\u0ea7\u0ea7\u0eaa\u0eab\u0ead\u0eae\u0eb0\u0eb9\u0ebb\u0ebd"
521         +"\u0ec0\u0ec4\u0ec6\u0ec6\u0ec8\u0ecd\u0ed0\u0ed9\u0f18\u0f19\u0f20\u0f29\u0f35\u0f35"
522         +"\u0f37\u0f37\u0f39\u0f39\u0f3e\u0f47\u0f49\u0f69\u0f71\u0f84\u0f86\u0f8b\u0f90\u0f95"
523         +"\u0f97\u0f97\u0f99\u0fad\u0fb1\u0fb7\u0fb9\u0fb9\u10a0\u10c5\u10d0\u10f6\u1100\u1100"
524         +"\u1102\u1103\u1105\u1107\u1109\u1109\u110b\u110c\u110e\u1112\u113c\u113c\u113e\u113e"
525         +"\u1140\u1140\u114c\u114c\u114e\u114e\u1150\u1150\u1154\u1155\u1159\u1159\u115f\u1161"
526         +"\u1163\u1163\u1165\u1165\u1167\u1167\u1169\u1169\u116d\u116e\u1172\u1173\u1175\u1175"
527         +"\u119e\u119e\u11a8\u11a8\u11ab\u11ab\u11ae\u11af\u11b7\u11b8\u11ba\u11ba\u11bc\u11c2"
528         +"\u11eb\u11eb\u11f0\u11f0\u11f9\u11f9\u1e00\u1e9b\u1ea0\u1ef9\u1f00\u1f15\u1f18\u1f1d"
529         +"\u1f20\u1f45\u1f48\u1f4d\u1f50\u1f57\u1f59\u1f59\u1f5b\u1f5b\u1f5d\u1f5d\u1f5f\u1f7d"
530         +"\u1f80\u1fb4\u1fb6\u1fbc\u1fbe\u1fbe\u1fc2\u1fc4\u1fc6\u1fcc\u1fd0\u1fd3\u1fd6\u1fdb"
531         +"\u1fe0\u1fec\u1ff2\u1ff4\u1ff6\u1ffc\u20d0\u20dc\u20e1\u20e1\u2126\u2126\u212a\u212b"
532         +"\u212e\u212e\u2180\u2182\u3005\u3005\u3007\u3007\u3021\u302f\u3031\u3035\u3041\u3094"
533         +"\u3099\u309a\u309d\u309e\u30a1\u30fa\u30fc\u30fe\u3105\u312c\u4e00\u9fa5\uac00\ud7a3"
534         +"";
535     private static final String LETTERS =
536         "\u0041\u005a\u0061\u007a\u00c0\u00d6\u00d8\u00f6\u00f8\u0131\u0134\u013e\u0141\u0148"
537         +"\u014a\u017e\u0180\u01c3\u01cd\u01f0\u01f4\u01f5\u01fa\u0217\u0250\u02a8\u02bb\u02c1"
538         +"\u0386\u0386\u0388\u038a\u038c\u038c\u038e\u03a1\u03a3\u03ce\u03d0\u03d6\u03da\u03da"
539         +"\u03dc\u03dc\u03de\u03de\u03e0\u03e0\u03e2\u03f3\u0401\u040c\u040e\u044f\u0451\u045c"
540         +"\u045e\u0481\u0490\u04c4\u04c7\u04c8\u04cb\u04cc\u04d0\u04eb\u04ee\u04f5\u04f8\u04f9"
541         +"\u0531\u0556\u0559\u0559\u0561\u0586\u05d0\u05ea\u05f0\u05f2\u0621\u063a\u0641\u064a"
542         +"\u0671\u06b7\u06ba\u06be\u06c0\u06ce\u06d0\u06d3\u06d5\u06d5\u06e5\u06e6\u0905\u0939"
543         +"\u093d\u093d\u0958\u0961\u0985\u098c\u098f\u0990\u0993\u09a8\u09aa\u09b0\u09b2\u09b2"
544         +"\u09b6\u09b9\u09dc\u09dd\u09df\u09e1\u09f0\u09f1\u0a05\u0a0a\u0a0f\u0a10\u0a13\u0a28"
545         +"\u0a2a\u0a30\u0a32\u0a33\u0a35\u0a36\u0a38\u0a39\u0a59\u0a5c\u0a5e\u0a5e\u0a72\u0a74"
546         +"\u0a85\u0a8b\u0a8d\u0a8d\u0a8f\u0a91\u0a93\u0aa8\u0aaa\u0ab0\u0ab2\u0ab3\u0ab5\u0ab9"
547         +"\u0abd\u0abd\u0ae0\u0ae0\u0b05\u0b0c\u0b0f\u0b10\u0b13\u0b28\u0b2a\u0b30\u0b32\u0b33"
548         +"\u0b36\u0b39\u0b3d\u0b3d\u0b5c\u0b5d\u0b5f\u0b61\u0b85\u0b8a\u0b8e\u0b90\u0b92\u0b95"
549         +"\u0b99\u0b9a\u0b9c\u0b9c\u0b9e\u0b9f\u0ba3\u0ba4\u0ba8\u0baa\u0bae\u0bb5\u0bb7\u0bb9"
550         +"\u0c05\u0c0c\u0c0e\u0c10\u0c12\u0c28\u0c2a\u0c33\u0c35\u0c39\u0c60\u0c61\u0c85\u0c8c"
551         +"\u0c8e\u0c90\u0c92\u0ca8\u0caa\u0cb3\u0cb5\u0cb9\u0cde\u0cde\u0ce0\u0ce1\u0d05\u0d0c"
552         +"\u0d0e\u0d10\u0d12\u0d28\u0d2a\u0d39\u0d60\u0d61\u0e01\u0e2e\u0e30\u0e30\u0e32\u0e33"
553         +"\u0e40\u0e45\u0e81\u0e82\u0e84\u0e84\u0e87\u0e88\u0e8a\u0e8a\u0e8d\u0e8d\u0e94\u0e97"
554         +"\u0e99\u0e9f\u0ea1\u0ea3\u0ea5\u0ea5\u0ea7\u0ea7\u0eaa\u0eab\u0ead\u0eae\u0eb0\u0eb0"
555         +"\u0eb2\u0eb3\u0ebd\u0ebd\u0ec0\u0ec4\u0f40\u0f47\u0f49\u0f69\u10a0\u10c5\u10d0\u10f6"
556         +"\u1100\u1100\u1102\u1103\u1105\u1107\u1109\u1109\u110b\u110c\u110e\u1112\u113c\u113c"
557         +"\u113e\u113e\u1140\u1140\u114c\u114c\u114e\u114e\u1150\u1150\u1154\u1155\u1159\u1159"
558         +"\u115f\u1161\u1163\u1163\u1165\u1165\u1167\u1167\u1169\u1169\u116d\u116e\u1172\u1173"
559         +"\u1175\u1175\u119e\u119e\u11a8\u11a8\u11ab\u11ab\u11ae\u11af\u11b7\u11b8\u11ba\u11ba"
560         +"\u11bc\u11c2\u11eb\u11eb\u11f0\u11f0\u11f9\u11f9\u1e00\u1e9b\u1ea0\u1ef9\u1f00\u1f15"
561         +"\u1f18\u1f1d\u1f20\u1f45\u1f48\u1f4d\u1f50\u1f57\u1f59\u1f59\u1f5b\u1f5b\u1f5d\u1f5d"
562         +"\u1f5f\u1f7d\u1f80\u1fb4\u1fb6\u1fbc\u1fbe\u1fbe\u1fc2\u1fc4\u1fc6\u1fcc\u1fd0\u1fd3"
563         +"\u1fd6\u1fdb\u1fe0\u1fec\u1ff2\u1ff4\u1ff6\u1ffc\u2126\u2126\u212a\u212b\u212e\u212e"
564         +"\u2180\u2182\u3007\u3007\u3021\u3029\u3041\u3094\u30a1\u30fa\u3105\u312c\u4e00\u9fa5"
565         +"\uac00\ud7a3";
566     private static final String DIGITS =
567         "\u0030\u0039\u0660\u0669\u06F0\u06F9\u0966\u096F\u09E6\u09EF\u0A66\u0A6F\u0AE6\u0AEF"
568         +"\u0B66\u0B6F\u0BE7\u0BEF\u0C66\u0C6F\u0CE6\u0CEF\u0D66\u0D6F\u0E50\u0E59\u0ED0\u0ED9"
569         +"\u0F20\u0F29";
570     private static final int[] DIGITS_INTS = {
571         0x0030, 0x0039, 0x0660, 0x0669, 0x06F0, 0x06F9, 0x0966, 0x096F,
572         0x09E6, 0x09EF, 0x0A66, 0x0A6F, 0x0AE6, 0x0AEF, 0x0B66, 0x0B6F,
573         0x0BE7, 0x0BEF, 0x0C66, 0x0C6F, 0x0CE6, 0x0CEF, 0x0D66, 0x0D6F,
574         0x0E50, 0x0E59, 0x0ED0, 0x0ED9, 0x0F20, 0x0F29, 0x1040, 0x1049,
575         0x1369, 0x1371, 0x17E0, 0x17E9, 0x1810, 0x1819, 0xFF10, 0xFF19,
576         0x1D7CE, 0x1D7FF
577     };
578 }