BUG-5410: initial import of Xerces RegularExpression
[yangtools.git] / third-party / xsd-regex / src / main / java / org / opendaylight / yangtools / xsd / regex / ParserForXMLSchema.java
1 /*
2  * Licensed to the Apache Software Foundation (ASF) under one or more
3  * contributor license agreements.  See the NOTICE file distributed with
4  * this work for additional information regarding copyright ownership.
5  * The ASF licenses this file to You under the Apache License, Version 2.0
6  * (the "License"); you may not use this file except in compliance with
7  * the License.  You may obtain a copy of the License at
8  * 
9  *      http://www.apache.org/licenses/LICENSE-2.0
10  * 
11  * Unless required by applicable law or agreed to in writing, software
12  * distributed under the License is distributed on an "AS IS" BASIS,
13  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14  * See the License for the specific language governing permissions and
15  * limitations under the License.
16  */
17
18 package org.opendaylight.yangtools.xsd.regex;
19
20 import java.util.Hashtable;
21 import java.util.Locale;
22
23 /**
24  * A regular expression parser for the XML Schema.
25  * 
26  * @xerces.internal
27  *
28  * @author TAMURA Kent <kent@trl.ibm.co.jp>
29  * @version $Id: ParserForXMLSchema.java 1638344 2014-11-11 20:15:46Z mrglavas $
30  */
31 class ParserForXMLSchema extends RegexParser {
32
33     public ParserForXMLSchema() {
34         //this.setLocale(Locale.getDefault());
35     }
36     public ParserForXMLSchema(Locale locale) {
37         super(locale);
38     }
39
40     Token processCaret() throws ParseException {
41         this.next();
42         return Token.createChar('^');
43     }
44     Token processDollar() throws ParseException {
45         this.next();
46         return Token.createChar('$');
47      }
48     Token processLookahead() throws ParseException {
49         throw ex("parser.process.1", this.offset);
50     }
51     Token processNegativelookahead() throws ParseException {
52         throw ex("parser.process.1", this.offset);
53     }
54     Token processLookbehind() throws ParseException {
55         throw ex("parser.process.1", this.offset);
56     }
57     Token processNegativelookbehind() throws ParseException {
58         throw ex("parser.process.1", this.offset);
59     }
60     Token processBacksolidus_A() throws ParseException {
61         throw ex("parser.process.1", this.offset);
62     }
63     Token processBacksolidus_Z() throws ParseException {
64         throw ex("parser.process.1", this.offset);
65     }
66     Token processBacksolidus_z() throws ParseException {
67         throw ex("parser.process.1", this.offset);
68     }
69     Token processBacksolidus_b() throws ParseException {
70         throw ex("parser.process.1", this.offset);
71     }
72     Token processBacksolidus_B() throws ParseException {
73         throw ex("parser.process.1", this.offset);
74     }
75     Token processBacksolidus_lt() throws ParseException {
76         throw ex("parser.process.1", this.offset);
77     }
78     Token processBacksolidus_gt() throws ParseException {
79         throw ex("parser.process.1", this.offset);
80     }
81     Token processStar(Token tok) throws ParseException {
82         this.next();
83         return Token.createClosure(tok);
84     }
85     Token processPlus(Token tok) throws ParseException {
86         // X+ -> XX*
87         this.next();
88         return Token.createConcat(tok, Token.createClosure(tok));
89     }
90     Token processQuestion(Token tok) throws ParseException {
91         // X? -> X|
92         this.next();
93         Token par = Token.createUnion();
94         par.addChild(tok);
95         par.addChild(Token.createEmpty());
96         return par;
97     }
98     boolean checkQuestion(int off) {
99         return false;
100     }
101     Token processParen() throws ParseException {
102         this.next();
103         Token tok = Token.createParen(this.parseRegex(), 0);
104         if (this.read() != T_RPAREN)  throw ex("parser.factor.1", this.offset-1);
105         this.next();                            // Skips ')'
106         return tok;
107     }
108     Token processParen2() throws ParseException {
109         throw ex("parser.process.1", this.offset);
110     }
111     Token processCondition() throws ParseException {
112         throw ex("parser.process.1", this.offset);
113     }
114     Token processModifiers() throws ParseException {
115         throw ex("parser.process.1", this.offset);
116     }
117     Token processIndependent() throws ParseException {
118         throw ex("parser.process.1", this.offset);
119     }
120     Token processBacksolidus_c() throws ParseException {
121         this.next();
122         return this.getTokenForShorthand('c');
123     }
124     Token processBacksolidus_C() throws ParseException {
125         this.next();
126         return this.getTokenForShorthand('C');
127     }
128     Token processBacksolidus_i() throws ParseException {
129         this.next();
130         return this.getTokenForShorthand('i');
131     }
132     Token processBacksolidus_I() throws ParseException {
133         this.next();
134         return this.getTokenForShorthand('I');
135     }
136     Token processBacksolidus_g() throws ParseException {
137         throw this.ex("parser.process.1", this.offset-2);
138     }
139     Token processBacksolidus_X() throws ParseException {
140         throw ex("parser.process.1", this.offset-2);
141     }
142     Token processBackreference() throws ParseException {
143         throw ex("parser.process.1", this.offset-4);
144     }
145
146     int processCIinCharacterClass(RangeToken tok, int c) {
147         tok.mergeRanges(this.getTokenForShorthand(c));
148         return -1;
149     }
150
151
152     /**
153      * Parses a character-class-expression, not a character-class-escape.
154      *
155      * c-c-expression   ::= '[' c-group ']'
156      * c-group          ::= positive-c-group | negative-c-group | c-c-subtraction
157      * positive-c-group ::= (c-range | c-c-escape)+
158      * negative-c-group ::= '^' positive-c-group
159      * c-c-subtraction  ::= (positive-c-group | negative-c-group) subtraction
160      * subtraction      ::= '-' c-c-expression
161      * c-range          ::= single-range | from-to-range
162      * single-range     ::= multi-c-escape | category-c-escape | block-c-escape | <any XML char>
163      * cc-normal-c      ::= <any character except [, ], \>
164      * from-to-range    ::= cc-normal-c '-' cc-normal-c
165      *
166      * @param useNrage Ignored.
167      * @return This returns no NrageToken.
168      */
169     protected RangeToken parseCharacterClass(boolean useNrange) throws ParseException {
170         this.setContext(S_INBRACKETS);
171         this.next();                            // '['
172         boolean nrange = false;
173         boolean wasDecoded = false;                     // used to detect if the last - was escaped.
174         RangeToken base = null;
175         RangeToken tok;
176         if (this.read() == T_CHAR && this.chardata == '^') {
177             nrange = true;
178             this.next();                        // '^'
179             base = Token.createRange();
180             base.addRange(0, Token.UTF16_MAX);
181             tok = Token.createRange();
182         } else {
183             tok = Token.createRange();
184         }
185         int type;
186         boolean firstloop = true;
187         while ((type = this.read()) != T_EOF) { // Don't use 'cotinue' for this loop.
188                 
189                 wasDecoded = false;
190             // single-range | from-to-range | subtraction
191             if (type == T_CHAR && this.chardata == ']' && !firstloop) {
192                 if (nrange) {
193                     base.subtractRanges(tok);
194                     tok = base;
195                 }
196                 break;
197             }
198             int c = this.chardata;
199             boolean end = false;
200             if (type == T_BACKSOLIDUS) {
201                 switch (c) {
202                   case 'd':  case 'D':
203                   case 'w':  case 'W':
204                   case 's':  case 'S':
205                     tok.mergeRanges(this.getTokenForShorthand(c));
206                     end = true;
207                     break;
208
209                   case 'i':  case 'I':
210                   case 'c':  case 'C':
211                     c = this.processCIinCharacterClass(tok, c);
212                     if (c < 0)  end = true;
213                     break;
214                     
215                   case 'p':
216                   case 'P':
217                     int pstart = this.offset;
218                     RangeToken tok2 = this.processBacksolidus_pP(c);
219                     if (tok2 == null)  throw this.ex("parser.atom.5", pstart);
220                     tok.mergeRanges(tok2);
221                     end = true;
222                     break;
223                    
224                  case '-':
225                         c = this.decodeEscaped();
226                         wasDecoded = true;
227                         break;
228
229                   default:
230                     c = this.decodeEscaped();
231                 } // \ + c
232             } // backsolidus
233             else if (type == T_XMLSCHEMA_CC_SUBTRACTION && !firstloop) {
234                                                 // Subraction
235                 if (nrange) {
236                     base.subtractRanges(tok);
237                     tok = base;
238                 }
239                 RangeToken range2 = this.parseCharacterClass(false);
240                 tok.subtractRanges(range2);
241                 if (this.read() != T_CHAR || this.chardata != ']')
242                     throw this.ex("parser.cc.5", this.offset);
243                 break;                          // Exit this loop
244             }
245             this.next();
246             if (!end) {                         // if not shorthands...
247                 if (type == T_CHAR) {
248                     if (c == '[')  throw this.ex("parser.cc.6", this.offset-2);
249                     if (c == ']')  throw this.ex("parser.cc.7", this.offset-2);
250                     if (c == '-' && this.chardata != ']' && !firstloop)  throw this.ex("parser.cc.8", this.offset-2);   // if regex = '[-]' then invalid
251                 }
252                 if (this.read() != T_CHAR || this.chardata != '-' || c == '-' && !wasDecoded && firstloop) { // Here is no '-'.
253                     if (!this.isSet(RegularExpression.IGNORE_CASE) || c > 0xffff) {
254                         tok.addRange(c, c);
255                     }
256                     else {
257                         addCaseInsensitiveChar(tok, c);
258                     }
259                 } else {                        // Found '-'
260                                                 // Is this '-' is a from-to token??
261                     this.next(); // Skips '-'
262                     if ((type = this.read()) == T_EOF)  throw this.ex("parser.cc.2", this.offset);
263                                                 // c '-' ']' -> '-' is a single-range.
264                     if(type == T_CHAR && this.chardata == ']') {                                // if - is at the last position of the group
265                         if (!this.isSet(RegularExpression.IGNORE_CASE) || c > 0xffff) {
266                             tok.addRange(c, c);
267                         }
268                         else {
269                             addCaseInsensitiveChar(tok, c);
270                         }
271                         tok.addRange('-', '-');
272                     }
273                     else if (type == T_XMLSCHEMA_CC_SUBTRACTION) {
274                         throw this.ex("parser.cc.8", this.offset-1);
275                     } else {
276                         
277                         int rangeend = this.chardata;
278                         if (type == T_CHAR) {
279                             if (rangeend == '[')  throw this.ex("parser.cc.6", this.offset-1);
280                             if (rangeend == ']')  throw this.ex("parser.cc.7", this.offset-1);
281                             if (rangeend == '-')  throw this.ex("parser.cc.8", this.offset-2);
282                         }
283                         else if (type == T_BACKSOLIDUS)
284                             rangeend = this.decodeEscaped();
285                         this.next();
286
287                         if (c > rangeend)  throw this.ex("parser.ope.3", this.offset-1);
288                         if (!this.isSet(RegularExpression.IGNORE_CASE) ||
289                                 (c > 0xffff && rangeend > 0xffff)) {
290                             tok.addRange(c, rangeend);
291                         }
292                         else {
293                             addCaseInsensitiveCharRange(tok, c, rangeend);
294                         }
295                     }
296                 }
297             }
298             firstloop = false;
299         }
300         if (this.read() == T_EOF)
301             throw this.ex("parser.cc.2", this.offset);
302         tok.sortRanges();
303         tok.compactRanges();
304         //tok.dumpRanges();
305         this.setContext(S_NORMAL);
306         this.next();                    // Skips ']'
307
308         return tok;
309     }
310
311     protected RangeToken parseSetOperations() throws ParseException {
312         throw this.ex("parser.process.1", this.offset);
313     }
314  
315     Token getTokenForShorthand(int ch) {
316         switch (ch) {
317           case 'd':
318             return ParserForXMLSchema.getRange("xml:isDigit", true);
319           case 'D':
320             return ParserForXMLSchema.getRange("xml:isDigit", false);
321           case 'w':
322             return ParserForXMLSchema.getRange("xml:isWord", true);
323           case 'W':
324             return ParserForXMLSchema.getRange("xml:isWord", false);
325           case 's':
326             return ParserForXMLSchema.getRange("xml:isSpace", true);
327           case 'S':
328             return ParserForXMLSchema.getRange("xml:isSpace", false);
329           case 'c':
330             return ParserForXMLSchema.getRange("xml:isNameChar", true);
331           case 'C':
332             return ParserForXMLSchema.getRange("xml:isNameChar", false);
333           case 'i':
334             return ParserForXMLSchema.getRange("xml:isInitialNameChar", true);
335           case 'I':
336             return ParserForXMLSchema.getRange("xml:isInitialNameChar", false);
337           default:
338             throw new RuntimeException("Internal Error: shorthands: \\u"+Integer.toString(ch, 16));
339         }
340     }
341     int decodeEscaped() throws ParseException {
342         if (this.read() != T_BACKSOLIDUS)  throw ex("parser.next.1", this.offset-1);
343         int c = this.chardata;
344         switch (c) {
345           case 'n':  c = '\n';  break; // LINE FEED U+000A
346           case 'r':  c = '\r';  break; // CRRIAGE RETURN U+000D
347           case 't':  c = '\t';  break; // HORIZONTAL TABULATION U+0009
348           case '\\':
349           case '|':
350           case '.':
351           case '^':
352           case '-':
353           case '?':
354           case '*':
355           case '+':
356           case '{':
357           case '}':
358           case '(':
359           case ')':
360           case '[':
361           case ']':
362             break; // return actucal char
363           default:
364             throw ex("parser.process.1", this.offset-2);
365         }
366         return c;
367     }
368
369     static private Hashtable ranges = null;
370     static private Hashtable ranges2 = null;
371     static synchronized protected RangeToken getRange(String name, boolean positive) {
372         if (ranges == null) {
373             ranges = new Hashtable();
374             ranges2 = new Hashtable();
375
376             Token tok = Token.createRange();
377             setupRange(tok, SPACES);
378             ranges.put("xml:isSpace", tok);
379             ranges2.put("xml:isSpace", Token.complementRanges(tok));
380
381             tok = Token.createRange();
382 //            setupRange(tok, DIGITS);
383             setupRange(tok, DIGITS_INTS);
384             ranges.put("xml:isDigit", tok);
385             ranges2.put("xml:isDigit", Token.complementRanges(tok));
386
387             /*
388              * \w is defined by the XML Schema specification to be:
389              * [#x0000-#x10FFFF]-[\p{P}\p{Z}\p{C}] (all characters except the set of "punctuation", "separator" and "other" characters) 
390              */
391             tok = Token.createRange();
392             tok.mergeRanges(Token.getRange("P", true));
393             tok.mergeRanges(Token.getRange("Z", true));
394             tok.mergeRanges(Token.getRange("C", true));
395             ranges2.put("xml:isWord", tok);
396             ranges.put("xml:isWord", Token.complementRanges(tok));
397
398             tok = Token.createRange();
399             setupRange(tok, NAMECHARS);
400             ranges.put("xml:isNameChar", tok);
401             ranges2.put("xml:isNameChar", Token.complementRanges(tok));
402
403             tok = Token.createRange();
404             setupRange(tok, LETTERS);
405             tok.addRange('_', '_');
406             tok.addRange(':', ':');
407             ranges.put("xml:isInitialNameChar", tok);
408             ranges2.put("xml:isInitialNameChar", Token.complementRanges(tok));
409         }
410         RangeToken tok = positive ? (RangeToken)ranges.get(name)
411             : (RangeToken)ranges2.get(name);
412         return tok;
413     }
414
415     static void setupRange(Token range, String src) {
416         int len = src.length();
417         for (int i = 0;  i < len;  i += 2)
418             range.addRange(src.charAt(i), src.charAt(i+1));
419     }
420
421     static void setupRange(Token range, int[] src) {
422         int len = src.length;
423         for (int i = 0;  i < len;  i += 2)
424             range.addRange(src[i], src[i+1]);
425     }
426
427     private static final String SPACES = "\t\n\r\r  ";
428     private static final String NAMECHARS =
429         "\u002d\u002e\u0030\u003a\u0041\u005a\u005f\u005f\u0061\u007a\u00b7\u00b7\u00c0\u00d6"
430         +"\u00d8\u00f6\u00f8\u0131\u0134\u013e\u0141\u0148\u014a\u017e\u0180\u01c3\u01cd\u01f0"
431         +"\u01f4\u01f5\u01fa\u0217\u0250\u02a8\u02bb\u02c1\u02d0\u02d1\u0300\u0345\u0360\u0361"
432         +"\u0386\u038a\u038c\u038c\u038e\u03a1\u03a3\u03ce\u03d0\u03d6\u03da\u03da\u03dc\u03dc"
433         +"\u03de\u03de\u03e0\u03e0\u03e2\u03f3\u0401\u040c\u040e\u044f\u0451\u045c\u045e\u0481"
434         +"\u0483\u0486\u0490\u04c4\u04c7\u04c8\u04cb\u04cc\u04d0\u04eb\u04ee\u04f5\u04f8\u04f9"
435         +"\u0531\u0556\u0559\u0559\u0561\u0586\u0591\u05a1\u05a3\u05b9\u05bb\u05bd\u05bf\u05bf"
436         +"\u05c1\u05c2\u05c4\u05c4\u05d0\u05ea\u05f0\u05f2\u0621\u063a\u0640\u0652\u0660\u0669"
437         +"\u0670\u06b7\u06ba\u06be\u06c0\u06ce\u06d0\u06d3\u06d5\u06e8\u06ea\u06ed\u06f0\u06f9"
438         +"\u0901\u0903\u0905\u0939\u093c\u094d\u0951\u0954\u0958\u0963\u0966\u096f\u0981\u0983"
439         +"\u0985\u098c\u098f\u0990\u0993\u09a8\u09aa\u09b0\u09b2\u09b2\u09b6\u09b9\u09bc\u09bc"
440         +"\u09be\u09c4\u09c7\u09c8\u09cb\u09cd\u09d7\u09d7\u09dc\u09dd\u09df\u09e3\u09e6\u09f1"
441         +"\u0a02\u0a02\u0a05\u0a0a\u0a0f\u0a10\u0a13\u0a28\u0a2a\u0a30\u0a32\u0a33\u0a35\u0a36"
442         +"\u0a38\u0a39\u0a3c\u0a3c\u0a3e\u0a42\u0a47\u0a48\u0a4b\u0a4d\u0a59\u0a5c\u0a5e\u0a5e"
443         +"\u0a66\u0a74\u0a81\u0a83\u0a85\u0a8b\u0a8d\u0a8d\u0a8f\u0a91\u0a93\u0aa8\u0aaa\u0ab0"
444         +"\u0ab2\u0ab3\u0ab5\u0ab9\u0abc\u0ac5\u0ac7\u0ac9\u0acb\u0acd\u0ae0\u0ae0\u0ae6\u0aef"
445         +"\u0b01\u0b03\u0b05\u0b0c\u0b0f\u0b10\u0b13\u0b28\u0b2a\u0b30\u0b32\u0b33\u0b36\u0b39"
446         +"\u0b3c\u0b43\u0b47\u0b48\u0b4b\u0b4d\u0b56\u0b57\u0b5c\u0b5d\u0b5f\u0b61\u0b66\u0b6f"
447         +"\u0b82\u0b83\u0b85\u0b8a\u0b8e\u0b90\u0b92\u0b95\u0b99\u0b9a\u0b9c\u0b9c\u0b9e\u0b9f"
448         +"\u0ba3\u0ba4\u0ba8\u0baa\u0bae\u0bb5\u0bb7\u0bb9\u0bbe\u0bc2\u0bc6\u0bc8\u0bca\u0bcd"
449         +"\u0bd7\u0bd7\u0be7\u0bef\u0c01\u0c03\u0c05\u0c0c\u0c0e\u0c10\u0c12\u0c28\u0c2a\u0c33"
450         +"\u0c35\u0c39\u0c3e\u0c44\u0c46\u0c48\u0c4a\u0c4d\u0c55\u0c56\u0c60\u0c61\u0c66\u0c6f"
451         +"\u0c82\u0c83\u0c85\u0c8c\u0c8e\u0c90\u0c92\u0ca8\u0caa\u0cb3\u0cb5\u0cb9\u0cbe\u0cc4"
452         +"\u0cc6\u0cc8\u0cca\u0ccd\u0cd5\u0cd6\u0cde\u0cde\u0ce0\u0ce1\u0ce6\u0cef\u0d02\u0d03"
453         +"\u0d05\u0d0c\u0d0e\u0d10\u0d12\u0d28\u0d2a\u0d39\u0d3e\u0d43\u0d46\u0d48\u0d4a\u0d4d"
454         +"\u0d57\u0d57\u0d60\u0d61\u0d66\u0d6f\u0e01\u0e2e\u0e30\u0e3a\u0e40\u0e4e\u0e50\u0e59"
455         +"\u0e81\u0e82\u0e84\u0e84\u0e87\u0e88\u0e8a\u0e8a\u0e8d\u0e8d\u0e94\u0e97\u0e99\u0e9f"
456         +"\u0ea1\u0ea3\u0ea5\u0ea5\u0ea7\u0ea7\u0eaa\u0eab\u0ead\u0eae\u0eb0\u0eb9\u0ebb\u0ebd"
457         +"\u0ec0\u0ec4\u0ec6\u0ec6\u0ec8\u0ecd\u0ed0\u0ed9\u0f18\u0f19\u0f20\u0f29\u0f35\u0f35"
458         +"\u0f37\u0f37\u0f39\u0f39\u0f3e\u0f47\u0f49\u0f69\u0f71\u0f84\u0f86\u0f8b\u0f90\u0f95"
459         +"\u0f97\u0f97\u0f99\u0fad\u0fb1\u0fb7\u0fb9\u0fb9\u10a0\u10c5\u10d0\u10f6\u1100\u1100"
460         +"\u1102\u1103\u1105\u1107\u1109\u1109\u110b\u110c\u110e\u1112\u113c\u113c\u113e\u113e"
461         +"\u1140\u1140\u114c\u114c\u114e\u114e\u1150\u1150\u1154\u1155\u1159\u1159\u115f\u1161"
462         +"\u1163\u1163\u1165\u1165\u1167\u1167\u1169\u1169\u116d\u116e\u1172\u1173\u1175\u1175"
463         +"\u119e\u119e\u11a8\u11a8\u11ab\u11ab\u11ae\u11af\u11b7\u11b8\u11ba\u11ba\u11bc\u11c2"
464         +"\u11eb\u11eb\u11f0\u11f0\u11f9\u11f9\u1e00\u1e9b\u1ea0\u1ef9\u1f00\u1f15\u1f18\u1f1d"
465         +"\u1f20\u1f45\u1f48\u1f4d\u1f50\u1f57\u1f59\u1f59\u1f5b\u1f5b\u1f5d\u1f5d\u1f5f\u1f7d"
466         +"\u1f80\u1fb4\u1fb6\u1fbc\u1fbe\u1fbe\u1fc2\u1fc4\u1fc6\u1fcc\u1fd0\u1fd3\u1fd6\u1fdb"
467         +"\u1fe0\u1fec\u1ff2\u1ff4\u1ff6\u1ffc\u20d0\u20dc\u20e1\u20e1\u2126\u2126\u212a\u212b"
468         +"\u212e\u212e\u2180\u2182\u3005\u3005\u3007\u3007\u3021\u302f\u3031\u3035\u3041\u3094"
469         +"\u3099\u309a\u309d\u309e\u30a1\u30fa\u30fc\u30fe\u3105\u312c\u4e00\u9fa5\uac00\ud7a3"
470         +"";
471     private static final String LETTERS =
472         "\u0041\u005a\u0061\u007a\u00c0\u00d6\u00d8\u00f6\u00f8\u0131\u0134\u013e\u0141\u0148"
473         +"\u014a\u017e\u0180\u01c3\u01cd\u01f0\u01f4\u01f5\u01fa\u0217\u0250\u02a8\u02bb\u02c1"
474         +"\u0386\u0386\u0388\u038a\u038c\u038c\u038e\u03a1\u03a3\u03ce\u03d0\u03d6\u03da\u03da"
475         +"\u03dc\u03dc\u03de\u03de\u03e0\u03e0\u03e2\u03f3\u0401\u040c\u040e\u044f\u0451\u045c"
476         +"\u045e\u0481\u0490\u04c4\u04c7\u04c8\u04cb\u04cc\u04d0\u04eb\u04ee\u04f5\u04f8\u04f9"
477         +"\u0531\u0556\u0559\u0559\u0561\u0586\u05d0\u05ea\u05f0\u05f2\u0621\u063a\u0641\u064a"
478         +"\u0671\u06b7\u06ba\u06be\u06c0\u06ce\u06d0\u06d3\u06d5\u06d5\u06e5\u06e6\u0905\u0939"
479         +"\u093d\u093d\u0958\u0961\u0985\u098c\u098f\u0990\u0993\u09a8\u09aa\u09b0\u09b2\u09b2"
480         +"\u09b6\u09b9\u09dc\u09dd\u09df\u09e1\u09f0\u09f1\u0a05\u0a0a\u0a0f\u0a10\u0a13\u0a28"
481         +"\u0a2a\u0a30\u0a32\u0a33\u0a35\u0a36\u0a38\u0a39\u0a59\u0a5c\u0a5e\u0a5e\u0a72\u0a74"
482         +"\u0a85\u0a8b\u0a8d\u0a8d\u0a8f\u0a91\u0a93\u0aa8\u0aaa\u0ab0\u0ab2\u0ab3\u0ab5\u0ab9"
483         +"\u0abd\u0abd\u0ae0\u0ae0\u0b05\u0b0c\u0b0f\u0b10\u0b13\u0b28\u0b2a\u0b30\u0b32\u0b33"
484         +"\u0b36\u0b39\u0b3d\u0b3d\u0b5c\u0b5d\u0b5f\u0b61\u0b85\u0b8a\u0b8e\u0b90\u0b92\u0b95"
485         +"\u0b99\u0b9a\u0b9c\u0b9c\u0b9e\u0b9f\u0ba3\u0ba4\u0ba8\u0baa\u0bae\u0bb5\u0bb7\u0bb9"
486         +"\u0c05\u0c0c\u0c0e\u0c10\u0c12\u0c28\u0c2a\u0c33\u0c35\u0c39\u0c60\u0c61\u0c85\u0c8c"
487         +"\u0c8e\u0c90\u0c92\u0ca8\u0caa\u0cb3\u0cb5\u0cb9\u0cde\u0cde\u0ce0\u0ce1\u0d05\u0d0c"
488         +"\u0d0e\u0d10\u0d12\u0d28\u0d2a\u0d39\u0d60\u0d61\u0e01\u0e2e\u0e30\u0e30\u0e32\u0e33"
489         +"\u0e40\u0e45\u0e81\u0e82\u0e84\u0e84\u0e87\u0e88\u0e8a\u0e8a\u0e8d\u0e8d\u0e94\u0e97"
490         +"\u0e99\u0e9f\u0ea1\u0ea3\u0ea5\u0ea5\u0ea7\u0ea7\u0eaa\u0eab\u0ead\u0eae\u0eb0\u0eb0"
491         +"\u0eb2\u0eb3\u0ebd\u0ebd\u0ec0\u0ec4\u0f40\u0f47\u0f49\u0f69\u10a0\u10c5\u10d0\u10f6"
492         +"\u1100\u1100\u1102\u1103\u1105\u1107\u1109\u1109\u110b\u110c\u110e\u1112\u113c\u113c"
493         +"\u113e\u113e\u1140\u1140\u114c\u114c\u114e\u114e\u1150\u1150\u1154\u1155\u1159\u1159"
494         +"\u115f\u1161\u1163\u1163\u1165\u1165\u1167\u1167\u1169\u1169\u116d\u116e\u1172\u1173"
495         +"\u1175\u1175\u119e\u119e\u11a8\u11a8\u11ab\u11ab\u11ae\u11af\u11b7\u11b8\u11ba\u11ba"
496         +"\u11bc\u11c2\u11eb\u11eb\u11f0\u11f0\u11f9\u11f9\u1e00\u1e9b\u1ea0\u1ef9\u1f00\u1f15"
497         +"\u1f18\u1f1d\u1f20\u1f45\u1f48\u1f4d\u1f50\u1f57\u1f59\u1f59\u1f5b\u1f5b\u1f5d\u1f5d"
498         +"\u1f5f\u1f7d\u1f80\u1fb4\u1fb6\u1fbc\u1fbe\u1fbe\u1fc2\u1fc4\u1fc6\u1fcc\u1fd0\u1fd3"
499         +"\u1fd6\u1fdb\u1fe0\u1fec\u1ff2\u1ff4\u1ff6\u1ffc\u2126\u2126\u212a\u212b\u212e\u212e"
500         +"\u2180\u2182\u3007\u3007\u3021\u3029\u3041\u3094\u30a1\u30fa\u3105\u312c\u4e00\u9fa5"
501         +"\uac00\ud7a3";
502     private static final String DIGITS =
503         "\u0030\u0039\u0660\u0669\u06F0\u06F9\u0966\u096F\u09E6\u09EF\u0A66\u0A6F\u0AE6\u0AEF"
504         +"\u0B66\u0B6F\u0BE7\u0BEF\u0C66\u0C6F\u0CE6\u0CEF\u0D66\u0D6F\u0E50\u0E59\u0ED0\u0ED9"
505         +"\u0F20\u0F29";
506     private static final int[] DIGITS_INTS = {
507         0x0030, 0x0039, 0x0660, 0x0669, 0x06F0, 0x06F9, 0x0966, 0x096F,
508         0x09E6, 0x09EF, 0x0A66, 0x0A6F, 0x0AE6, 0x0AEF, 0x0B66, 0x0B6F,
509         0x0BE7, 0x0BEF, 0x0C66, 0x0C6F, 0x0CE6, 0x0CEF, 0x0D66, 0x0D6F,
510         0x0E50, 0x0E59, 0x0ED0, 0x0ED9, 0x0F20, 0x0F29, 0x1040, 0x1049,
511         0x1369, 0x1371, 0x17E0, 0x17E9, 0x1810, 0x1819, 0xFF10, 0xFF19,
512         0x1D7CE, 0x1D7FF
513     };
514 }