third-party/xsd-regex/src/main/java/org/opendaylight/yangtools/xsd/regex/ParserForXMLSchema.java

   1 /*
   2  * Licensed to the Apache Software Foundation (ASF) under one or more
   3  * contributor license agreements.  See the NOTICE file distributed with
   4  * this work for additional information regarding copyright ownership.
   5  * The ASF licenses this file to You under the Apache License, Version 2.0
   6  * (the "License"); you may not use this file except in compliance with
   7  * the License.  You may obtain a copy of the License at
   8  *
   9  *      http://www.apache.org/licenses/LICENSE-2.0
  10  *
  11  * Unless required by applicable law or agreed to in writing, software
  12  * distributed under the License is distributed on an "AS IS" BASIS,
  13  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  14  * See the License for the specific language governing permissions and
  15  * limitations under the License.
  16  */
  17
  18 package org.opendaylight.yangtools.xsd.regex;
  19
  20 import java.util.Hashtable;
  21 import java.util.Locale;
  22
  23 /**
  24  * A regular expression parser for the XML Schema.
  25  *
  26  * @xerces.internal
  27  *
  28  * @author TAMURA Kent &lt;kent@trl.ibm.co.jp&gt;
  29  * @version $Id: ParserForXMLSchema.java 1638344 2014-11-11 20:15:46Z mrglavas $
  30  */
  31 class ParserForXMLSchema extends RegexParser {
  32
  33     public ParserForXMLSchema() {
  34         //this.setLocale(Locale.getDefault());
  35     }
  36     public ParserForXMLSchema(Locale locale) {
  37         super(locale);
  38     }
  39
  40     @Override
  41     Token processCaret() throws ParseException {
  42         this.next();
  43         return Token.createChar('^');
  44     }
  45     @Override
  46     Token processDollar() throws ParseException {
  47         this.next();
  48         return Token.createChar('$');
  49      }
  50     @Override
  51     Token processLookahead() throws ParseException {
  52         throw ex("parser.process.1", this.offset);
  53     }
  54     @Override
  55     Token processNegativelookahead() throws ParseException {
  56         throw ex("parser.process.1", this.offset);
  57     }
  58     @Override
  59     Token processLookbehind() throws ParseException {
  60         throw ex("parser.process.1", this.offset);
  61     }
  62     @Override
  63     Token processNegativelookbehind() throws ParseException {
  64         throw ex("parser.process.1", this.offset);
  65     }
  66     @Override
  67     Token processBacksolidus_A() throws ParseException {
  68         throw ex("parser.process.1", this.offset);
  69     }
  70     @Override
  71     Token processBacksolidus_Z() throws ParseException {
  72         throw ex("parser.process.1", this.offset);
  73     }
  74     @Override
  75     Token processBacksolidus_z() throws ParseException {
  76         throw ex("parser.process.1", this.offset);
  77     }
  78     @Override
  79     Token processBacksolidus_b() throws ParseException {
  80         throw ex("parser.process.1", this.offset);
  81     }
  82     @Override
  83     Token processBacksolidus_B() throws ParseException {
  84         throw ex("parser.process.1", this.offset);
  85     }
  86     @Override
  87     Token processBacksolidus_lt() throws ParseException {
  88         throw ex("parser.process.1", this.offset);
  89     }
  90     @Override
  91     Token processBacksolidus_gt() throws ParseException {
  92         throw ex("parser.process.1", this.offset);
  93     }
  94     @Override
  95     Token processStar(Token tok) throws ParseException {
  96         this.next();
  97         return Token.createClosure(tok);
  98     }
  99     @Override
 100     Token processPlus(Token tok) throws ParseException {
 101         // X+ -> XX*
 102         this.next();
 103         return Token.createConcat(tok, Token.createClosure(tok));
 104     }
 105     @Override
 106     Token processQuestion(Token tok) throws ParseException {
 107         // X? -> X|
 108         this.next();
 109         Token par = Token.createUnion();
 110         par.addChild(tok);
 111         par.addChild(Token.createEmpty());
 112         return par;
 113     }
 114     @Override
 115     boolean checkQuestion(int off) {
 116         return false;
 117     }
 118     @Override
 119     Token processParen() throws ParseException {
 120         this.next();
 121         Token tok = Token.createParen(this.parseRegex(), 0);
 122         if (this.read() != T_RPAREN) {
 123             throw ex("parser.factor.1", this.offset-1);
 124         }
 125         this.next();                            // Skips ')'
 126         return tok;
 127     }
 128     @Override
 129     Token processParen2() throws ParseException {
 130         throw ex("parser.process.1", this.offset);
 131     }
 132     @Override
 133     Token processCondition() throws ParseException {
 134         throw ex("parser.process.1", this.offset);
 135     }
 136     @Override
 137     Token processModifiers() throws ParseException {
 138         throw ex("parser.process.1", this.offset);
 139     }
 140     @Override
 141     Token processIndependent() throws ParseException {
 142         throw ex("parser.process.1", this.offset);
 143     }
 144     @Override
 145     Token processBacksolidus_c() throws ParseException {
 146         this.next();
 147         return this.getTokenForShorthand('c');
 148     }
 149     @Override
 150     Token processBacksolidus_C() throws ParseException {
 151         this.next();
 152         return this.getTokenForShorthand('C');
 153     }
 154     @Override
 155     Token processBacksolidus_i() throws ParseException {
 156         this.next();
 157         return this.getTokenForShorthand('i');
 158     }
 159     @Override
 160     Token processBacksolidus_I() throws ParseException {
 161         this.next();
 162         return this.getTokenForShorthand('I');
 163     }
 164     @Override
 165     Token processBacksolidus_g() throws ParseException {
 166         throw this.ex("parser.process.1", this.offset-2);
 167     }
 168     @Override
 169     Token processBacksolidus_X() throws ParseException {
 170         throw ex("parser.process.1", this.offset-2);
 171     }
 172     @Override
 173     Token processBackreference() throws ParseException {
 174         throw ex("parser.process.1", this.offset-4);
 175     }
 176
 177     @Override
 178     int processCIinCharacterClass(RangeToken tok, int c) {
 179         tok.mergeRanges(this.getTokenForShorthand(c));
 180         return -1;
 181     }
 182
 183
 184     /**
 185      * Parses a character-class-expression, not a character-class-escape.
 186      *
 187      * c-c-expression   ::= '[' c-group ']'
 188      * c-group          ::= positive-c-group | negative-c-group | c-c-subtraction
 189      * positive-c-group ::= (c-range | c-c-escape)+
 190      * negative-c-group ::= '^' positive-c-group
 191      * c-c-subtraction  ::= (positive-c-group | negative-c-group) subtraction
 192      * subtraction      ::= '-' c-c-expression
 193      * c-range          ::= single-range | from-to-range
 194      * single-range     ::= multi-c-escape | category-c-escape | block-c-escape | &lt;any XML char&gt;
 195      * cc-normal-c      ::= &lt;any character except [, ], \&gt;
 196      * from-to-range    ::= cc-normal-c '-' cc-normal-c
 197      *
 198      * @param useNrage Ignored.
 199      * @return This returns no NrageToken.
 200      */
 201     @Override
 202     protected RangeToken parseCharacterClass(boolean useNrange) throws ParseException {
 203         this.setContext(S_INBRACKETS);
 204         this.next();                            // '['
 205         boolean nrange = false;
 206         boolean wasDecoded = false;                     // used to detect if the last - was escaped.
 207         RangeToken base = null;
 208         RangeToken tok;
 209         if (this.read() == T_CHAR && this.chardata == '^') {
 210             nrange = true;
 211             this.next();                        // '^'
 212             base = Token.createRange();
 213             base.addRange(0, Token.UTF16_MAX);
 214             tok = Token.createRange();
 215         } else {
 216             tok = Token.createRange();
 217         }
 218         int type;
 219         boolean firstloop = true;
 220         while ((type = this.read()) != T_EOF) { // Don't use 'cotinue' for this loop.
 221
 222                 wasDecoded = false;
 223             // single-range | from-to-range | subtraction
 224             if (type == T_CHAR && this.chardata == ']' && !firstloop) {
 225                 if (nrange) {
 226                     base.subtractRanges(tok);
 227                     tok = base;
 228                 }
 229                 break;
 230             }
 231             int c = this.chardata;
 232             boolean end = false;
 233             if (type == T_BACKSOLIDUS) {
 234                 switch (c) {
 235                   case 'd':  case 'D':
 236                   case 'w':  case 'W':
 237                   case 's':  case 'S':
 238                     tok.mergeRanges(this.getTokenForShorthand(c));
 239                     end = true;
 240                     break;
 241
 242                   case 'i':  case 'I':
 243                   case 'c':  case 'C':
 244                     c = this.processCIinCharacterClass(tok, c);
 245                     if (c < 0) {
 246                         end = true;
 247                     }
 248                     break;
 249
 250                   case 'p':
 251                   case 'P':
 252                     int pstart = this.offset;
 253                     RangeToken tok2 = this.processBacksolidus_pP(c);
 254                     if (tok2 == null) {
 255                         throw this.ex("parser.atom.5", pstart);
 256                     }
 257                     tok.mergeRanges(tok2);
 258                     end = true;
 259                     break;
 260
 261                  case '-':
 262                         c = this.decodeEscaped();
 263                         wasDecoded = true;
 264                         break;
 265
 266                   default:
 267                     c = this.decodeEscaped();
 268                 } // \ + c
 269             } // backsolidus
 270             else if (type == T_XMLSCHEMA_CC_SUBTRACTION && !firstloop) {
 271                                                 // Subraction
 272                 if (nrange) {
 273                     base.subtractRanges(tok);
 274                     tok = base;
 275                 }
 276                 RangeToken range2 = this.parseCharacterClass(false);
 277                 tok.subtractRanges(range2);
 278                 if (this.read() != T_CHAR || this.chardata != ']') {
 279                     throw this.ex("parser.cc.5", this.offset);
 280                 }
 281                 break;                          // Exit this loop
 282             }
 283             this.next();
 284             if (!end) {                         // if not shorthands...
 285                 if (type == T_CHAR) {
 286                     if (c == '[') {
 287                         throw this.ex("parser.cc.6", this.offset-2);
 288                     }
 289                     if (c == ']') {
 290                         throw this.ex("parser.cc.7", this.offset-2);
 291                     }
 292                     if (c == '-' && this.chardata != ']' && !firstloop)
 293                      {
 294                         throw this.ex("parser.cc.8", this.offset-2);    // if regex = '[-]' then invalid
 295                     }
 296                 }
 297                 if (this.read() != T_CHAR || this.chardata != '-' || c == '-' && !wasDecoded && firstloop) { // Here is no '-'.
 298                     if (!this.isSet(RegularExpression.IGNORE_CASE) || c > 0xffff) {
 299                         tok.addRange(c, c);
 300                     }
 301                     else {
 302                         addCaseInsensitiveChar(tok, c);
 303                     }
 304                 } else {                        // Found '-'
 305                                                 // Is this '-' is a from-to token??
 306                     this.next(); // Skips '-'
 307                     if ((type = this.read()) == T_EOF) {
 308                         throw this.ex("parser.cc.2", this.offset);
 309                     }
 310                                                 // c '-' ']' -> '-' is a single-range.
 311                     if(type == T_CHAR && this.chardata == ']') {                                // if - is at the last position of the group
 312                         if (!this.isSet(RegularExpression.IGNORE_CASE) || c > 0xffff) {
 313                             tok.addRange(c, c);
 314                         }
 315                         else {
 316                             addCaseInsensitiveChar(tok, c);
 317                         }
 318                         tok.addRange('-', '-');
 319                     }
 320                     else if (type == T_XMLSCHEMA_CC_SUBTRACTION) {
 321                         throw this.ex("parser.cc.8", this.offset-1);
 322                     } else {
 323
 324                         int rangeend = this.chardata;
 325                         if (type == T_CHAR) {
 326                             if (rangeend == '[') {
 327                                 throw this.ex("parser.cc.6", this.offset-1);
 328                             }
 329                             if (rangeend == ']') {
 330                                 throw this.ex("parser.cc.7", this.offset-1);
 331                             }
 332                             if (rangeend == '-') {
 333                                 throw this.ex("parser.cc.8", this.offset-2);
 334                             }
 335                         }
 336                         else if (type == T_BACKSOLIDUS) {
 337                             rangeend = this.decodeEscaped();
 338                         }
 339                         this.next();
 340
 341                         if (c > rangeend) {
 342                             throw this.ex("parser.ope.3", this.offset-1);
 343                         }
 344                         if (!this.isSet(RegularExpression.IGNORE_CASE) ||
 345                                 (c > 0xffff && rangeend > 0xffff)) {
 346                             tok.addRange(c, rangeend);
 347                         }
 348                         else {
 349                             addCaseInsensitiveCharRange(tok, c, rangeend);
 350                         }
 351                     }
 352                 }
 353             }
 354             firstloop = false;
 355         }
 356         if (this.read() == T_EOF) {
 357             throw this.ex("parser.cc.2", this.offset);
 358         }
 359         tok.sortRanges();
 360         tok.compactRanges();
 361         //tok.dumpRanges();
 362         this.setContext(S_NORMAL);
 363         this.next();                    // Skips ']'
 364
 365         return tok;
 366     }
 367
 368     @Override
 369     protected RangeToken parseSetOperations() throws ParseException {
 370         throw this.ex("parser.process.1", this.offset);
 371     }
 372
 373     @Override
 374     Token getTokenForShorthand(int ch) {
 375         switch (ch) {
 376           case 'd':
 377             return ParserForXMLSchema.getRange("xml:isDigit", true);
 378           case 'D':
 379             return ParserForXMLSchema.getRange("xml:isDigit", false);
 380           case 'w':
 381             return ParserForXMLSchema.getRange("xml:isWord", true);
 382           case 'W':
 383             return ParserForXMLSchema.getRange("xml:isWord", false);
 384           case 's':
 385             return ParserForXMLSchema.getRange("xml:isSpace", true);
 386           case 'S':
 387             return ParserForXMLSchema.getRange("xml:isSpace", false);
 388           case 'c':
 389             return ParserForXMLSchema.getRange("xml:isNameChar", true);
 390           case 'C':
 391             return ParserForXMLSchema.getRange("xml:isNameChar", false);
 392           case 'i':
 393             return ParserForXMLSchema.getRange("xml:isInitialNameChar", true);
 394           case 'I':
 395             return ParserForXMLSchema.getRange("xml:isInitialNameChar", false);
 396           default:
 397             throw new RuntimeException("Internal Error: shorthands: \\u"+Integer.toString(ch, 16));
 398         }
 399     }
 400     @Override
 401     int decodeEscaped() throws ParseException {
 402         if (this.read() != T_BACKSOLIDUS) {
 403             throw ex("parser.next.1", this.offset-1);
 404         }
 405         int c = this.chardata;
 406         switch (c) {
 407           case 'n':  c = '\n';  break; // LINE FEED U+000A
 408           case 'r':  c = '\r';  break; // CRRIAGE RETURN U+000D
 409           case 't':  c = '\t';  break; // HORIZONTAL TABULATION U+0009
 410           case '\\':
 411           case '|':
 412           case '.':
 413           case '^':
 414           case '-':
 415           case '?':
 416           case '*':
 417           case '+':
 418           case '{':
 419           case '}':
 420           case '(':
 421           case ')':
 422           case '[':
 423           case ']':
 424             break; // return actucal char
 425           default:
 426             throw ex("parser.process.1", this.offset-2);
 427         }
 428         return c;
 429     }
 430
 431     static private Hashtable<String, Token> ranges = null;
 432     static private Hashtable<String, Token> ranges2 = null;
 433     static synchronized protected RangeToken getRange(String name, boolean positive) {
 434         if (ranges == null) {
 435             ranges = new Hashtable<>();
 436             ranges2 = new Hashtable<>();
 437
 438             Token tok = Token.createRange();
 439             setupRange(tok, SPACES);
 440             ranges.put("xml:isSpace", tok);
 441             ranges2.put("xml:isSpace", Token.complementRanges(tok));
 442
 443             tok = Token.createRange();
 444 //            setupRange(tok, DIGITS);
 445             setupRange(tok, DIGITS_INTS);
 446             ranges.put("xml:isDigit", tok);
 447             ranges2.put("xml:isDigit", Token.complementRanges(tok));
 448
 449             /*
 450              * \w is defined by the XML Schema specification to be:
 451              * [#x0000-#x10FFFF]-[\p{P}\p{Z}\p{C}] (all characters except the set of "punctuation", "separator" and "other" characters)
 452              */
 453             tok = Token.createRange();
 454             tok.mergeRanges(Token.getRange("P", true));
 455             tok.mergeRanges(Token.getRange("Z", true));
 456             tok.mergeRanges(Token.getRange("C", true));
 457             ranges2.put("xml:isWord", tok);
 458             ranges.put("xml:isWord", Token.complementRanges(tok));
 459
 460             tok = Token.createRange();
 461             setupRange(tok, NAMECHARS);
 462             ranges.put("xml:isNameChar", tok);
 463             ranges2.put("xml:isNameChar", Token.complementRanges(tok));
 464
 465             tok = Token.createRange();
 466             setupRange(tok, LETTERS);
 467             tok.addRange('_', '_');
 468             tok.addRange(':', ':');
 469             ranges.put("xml:isInitialNameChar", tok);
 470             ranges2.put("xml:isInitialNameChar", Token.complementRanges(tok));
 471         }
 472         RangeToken tok = positive ? (RangeToken)ranges.get(name)
 473             : (RangeToken)ranges2.get(name);
 474         return tok;
 475     }
 476
 477     static void setupRange(Token range, String src) {
 478         int len = src.length();
 479         for (int i = 0;  i < len;  i += 2) {
 480             range.addRange(src.charAt(i), src.charAt(i+1));
 481         }
 482     }
 483
 484     static void setupRange(Token range, int[] src) {
 485         int len = src.length;
 486         for (int i = 0;  i < len;  i += 2) {
 487             range.addRange(src[i], src[i+1]);
 488         }
 489     }
 490
 491     private static final String SPACES = "\t\n\r\r  ";
 492     private static final String NAMECHARS =
 493         "\u002d\u002e\u0030\u003a\u0041\u005a\u005f\u005f\u0061\u007a\u00b7\u00b7\u00c0\u00d6"
 494         +"\u00d8\u00f6\u00f8\u0131\u0134\u013e\u0141\u0148\u014a\u017e\u0180\u01c3\u01cd\u01f0"
 495         +"\u01f4\u01f5\u01fa\u0217\u0250\u02a8\u02bb\u02c1\u02d0\u02d1\u0300\u0345\u0360\u0361"
 496         +"\u0386\u038a\u038c\u038c\u038e\u03a1\u03a3\u03ce\u03d0\u03d6\u03da\u03da\u03dc\u03dc"
 497         +"\u03de\u03de\u03e0\u03e0\u03e2\u03f3\u0401\u040c\u040e\u044f\u0451\u045c\u045e\u0481"
 498         +"\u0483\u0486\u0490\u04c4\u04c7\u04c8\u04cb\u04cc\u04d0\u04eb\u04ee\u04f5\u04f8\u04f9"
 499         +"\u0531\u0556\u0559\u0559\u0561\u0586\u0591\u05a1\u05a3\u05b9\u05bb\u05bd\u05bf\u05bf"
 500         +"\u05c1\u05c2\u05c4\u05c4\u05d0\u05ea\u05f0\u05f2\u0621\u063a\u0640\u0652\u0660\u0669"
 501         +"\u0670\u06b7\u06ba\u06be\u06c0\u06ce\u06d0\u06d3\u06d5\u06e8\u06ea\u06ed\u06f0\u06f9"
 502         +"\u0901\u0903\u0905\u0939\u093c\u094d\u0951\u0954\u0958\u0963\u0966\u096f\u0981\u0983"
 503         +"\u0985\u098c\u098f\u0990\u0993\u09a8\u09aa\u09b0\u09b2\u09b2\u09b6\u09b9\u09bc\u09bc"
 504         +"\u09be\u09c4\u09c7\u09c8\u09cb\u09cd\u09d7\u09d7\u09dc\u09dd\u09df\u09e3\u09e6\u09f1"
 505         +"\u0a02\u0a02\u0a05\u0a0a\u0a0f\u0a10\u0a13\u0a28\u0a2a\u0a30\u0a32\u0a33\u0a35\u0a36"
 506         +"\u0a38\u0a39\u0a3c\u0a3c\u0a3e\u0a42\u0a47\u0a48\u0a4b\u0a4d\u0a59\u0a5c\u0a5e\u0a5e"
 507         +"\u0a66\u0a74\u0a81\u0a83\u0a85\u0a8b\u0a8d\u0a8d\u0a8f\u0a91\u0a93\u0aa8\u0aaa\u0ab0"
 508         +"\u0ab2\u0ab3\u0ab5\u0ab9\u0abc\u0ac5\u0ac7\u0ac9\u0acb\u0acd\u0ae0\u0ae0\u0ae6\u0aef"
 509         +"\u0b01\u0b03\u0b05\u0b0c\u0b0f\u0b10\u0b13\u0b28\u0b2a\u0b30\u0b32\u0b33\u0b36\u0b39"
 510         +"\u0b3c\u0b43\u0b47\u0b48\u0b4b\u0b4d\u0b56\u0b57\u0b5c\u0b5d\u0b5f\u0b61\u0b66\u0b6f"
 511         +"\u0b82\u0b83\u0b85\u0b8a\u0b8e\u0b90\u0b92\u0b95\u0b99\u0b9a\u0b9c\u0b9c\u0b9e\u0b9f"
 512         +"\u0ba3\u0ba4\u0ba8\u0baa\u0bae\u0bb5\u0bb7\u0bb9\u0bbe\u0bc2\u0bc6\u0bc8\u0bca\u0bcd"
 513         +"\u0bd7\u0bd7\u0be7\u0bef\u0c01\u0c03\u0c05\u0c0c\u0c0e\u0c10\u0c12\u0c28\u0c2a\u0c33"
 514         +"\u0c35\u0c39\u0c3e\u0c44\u0c46\u0c48\u0c4a\u0c4d\u0c55\u0c56\u0c60\u0c61\u0c66\u0c6f"
 515         +"\u0c82\u0c83\u0c85\u0c8c\u0c8e\u0c90\u0c92\u0ca8\u0caa\u0cb3\u0cb5\u0cb9\u0cbe\u0cc4"
 516         +"\u0cc6\u0cc8\u0cca\u0ccd\u0cd5\u0cd6\u0cde\u0cde\u0ce0\u0ce1\u0ce6\u0cef\u0d02\u0d03"
 517         +"\u0d05\u0d0c\u0d0e\u0d10\u0d12\u0d28\u0d2a\u0d39\u0d3e\u0d43\u0d46\u0d48\u0d4a\u0d4d"
 518         +"\u0d57\u0d57\u0d60\u0d61\u0d66\u0d6f\u0e01\u0e2e\u0e30\u0e3a\u0e40\u0e4e\u0e50\u0e59"
 519         +"\u0e81\u0e82\u0e84\u0e84\u0e87\u0e88\u0e8a\u0e8a\u0e8d\u0e8d\u0e94\u0e97\u0e99\u0e9f"
 520         +"\u0ea1\u0ea3\u0ea5\u0ea5\u0ea7\u0ea7\u0eaa\u0eab\u0ead\u0eae\u0eb0\u0eb9\u0ebb\u0ebd"
 521         +"\u0ec0\u0ec4\u0ec6\u0ec6\u0ec8\u0ecd\u0ed0\u0ed9\u0f18\u0f19\u0f20\u0f29\u0f35\u0f35"
 522         +"\u0f37\u0f37\u0f39\u0f39\u0f3e\u0f47\u0f49\u0f69\u0f71\u0f84\u0f86\u0f8b\u0f90\u0f95"
 523         +"\u0f97\u0f97\u0f99\u0fad\u0fb1\u0fb7\u0fb9\u0fb9\u10a0\u10c5\u10d0\u10f6\u1100\u1100"
 524         +"\u1102\u1103\u1105\u1107\u1109\u1109\u110b\u110c\u110e\u1112\u113c\u113c\u113e\u113e"
 525         +"\u1140\u1140\u114c\u114c\u114e\u114e\u1150\u1150\u1154\u1155\u1159\u1159\u115f\u1161"
 526         +"\u1163\u1163\u1165\u1165\u1167\u1167\u1169\u1169\u116d\u116e\u1172\u1173\u1175\u1175"
 527         +"\u119e\u119e\u11a8\u11a8\u11ab\u11ab\u11ae\u11af\u11b7\u11b8\u11ba\u11ba\u11bc\u11c2"
 528         +"\u11eb\u11eb\u11f0\u11f0\u11f9\u11f9\u1e00\u1e9b\u1ea0\u1ef9\u1f00\u1f15\u1f18\u1f1d"
 529         +"\u1f20\u1f45\u1f48\u1f4d\u1f50\u1f57\u1f59\u1f59\u1f5b\u1f5b\u1f5d\u1f5d\u1f5f\u1f7d"
 530         +"\u1f80\u1fb4\u1fb6\u1fbc\u1fbe\u1fbe\u1fc2\u1fc4\u1fc6\u1fcc\u1fd0\u1fd3\u1fd6\u1fdb"
 531         +"\u1fe0\u1fec\u1ff2\u1ff4\u1ff6\u1ffc\u20d0\u20dc\u20e1\u20e1\u2126\u2126\u212a\u212b"
 532         +"\u212e\u212e\u2180\u2182\u3005\u3005\u3007\u3007\u3021\u302f\u3031\u3035\u3041\u3094"
 533         +"\u3099\u309a\u309d\u309e\u30a1\u30fa\u30fc\u30fe\u3105\u312c\u4e00\u9fa5\uac00\ud7a3"
 534         +"";
 535     private static final String LETTERS =
 536         "\u0041\u005a\u0061\u007a\u00c0\u00d6\u00d8\u00f6\u00f8\u0131\u0134\u013e\u0141\u0148"
 537         +"\u014a\u017e\u0180\u01c3\u01cd\u01f0\u01f4\u01f5\u01fa\u0217\u0250\u02a8\u02bb\u02c1"
 538         +"\u0386\u0386\u0388\u038a\u038c\u038c\u038e\u03a1\u03a3\u03ce\u03d0\u03d6\u03da\u03da"
 539         +"\u03dc\u03dc\u03de\u03de\u03e0\u03e0\u03e2\u03f3\u0401\u040c\u040e\u044f\u0451\u045c"
 540         +"\u045e\u0481\u0490\u04c4\u04c7\u04c8\u04cb\u04cc\u04d0\u04eb\u04ee\u04f5\u04f8\u04f9"
 541         +"\u0531\u0556\u0559\u0559\u0561\u0586\u05d0\u05ea\u05f0\u05f2\u0621\u063a\u0641\u064a"
 542         +"\u0671\u06b7\u06ba\u06be\u06c0\u06ce\u06d0\u06d3\u06d5\u06d5\u06e5\u06e6\u0905\u0939"
 543         +"\u093d\u093d\u0958\u0961\u0985\u098c\u098f\u0990\u0993\u09a8\u09aa\u09b0\u09b2\u09b2"
 544         +"\u09b6\u09b9\u09dc\u09dd\u09df\u09e1\u09f0\u09f1\u0a05\u0a0a\u0a0f\u0a10\u0a13\u0a28"
 545         +"\u0a2a\u0a30\u0a32\u0a33\u0a35\u0a36\u0a38\u0a39\u0a59\u0a5c\u0a5e\u0a5e\u0a72\u0a74"
 546         +"\u0a85\u0a8b\u0a8d\u0a8d\u0a8f\u0a91\u0a93\u0aa8\u0aaa\u0ab0\u0ab2\u0ab3\u0ab5\u0ab9"
 547         +"\u0abd\u0abd\u0ae0\u0ae0\u0b05\u0b0c\u0b0f\u0b10\u0b13\u0b28\u0b2a\u0b30\u0b32\u0b33"
 548         +"\u0b36\u0b39\u0b3d\u0b3d\u0b5c\u0b5d\u0b5f\u0b61\u0b85\u0b8a\u0b8e\u0b90\u0b92\u0b95"
 549         +"\u0b99\u0b9a\u0b9c\u0b9c\u0b9e\u0b9f\u0ba3\u0ba4\u0ba8\u0baa\u0bae\u0bb5\u0bb7\u0bb9"
 550         +"\u0c05\u0c0c\u0c0e\u0c10\u0c12\u0c28\u0c2a\u0c33\u0c35\u0c39\u0c60\u0c61\u0c85\u0c8c"
 551         +"\u0c8e\u0c90\u0c92\u0ca8\u0caa\u0cb3\u0cb5\u0cb9\u0cde\u0cde\u0ce0\u0ce1\u0d05\u0d0c"
 552         +"\u0d0e\u0d10\u0d12\u0d28\u0d2a\u0d39\u0d60\u0d61\u0e01\u0e2e\u0e30\u0e30\u0e32\u0e33"
 553         +"\u0e40\u0e45\u0e81\u0e82\u0e84\u0e84\u0e87\u0e88\u0e8a\u0e8a\u0e8d\u0e8d\u0e94\u0e97"
 554         +"\u0e99\u0e9f\u0ea1\u0ea3\u0ea5\u0ea5\u0ea7\u0ea7\u0eaa\u0eab\u0ead\u0eae\u0eb0\u0eb0"
 555         +"\u0eb2\u0eb3\u0ebd\u0ebd\u0ec0\u0ec4\u0f40\u0f47\u0f49\u0f69\u10a0\u10c5\u10d0\u10f6"
 556         +"\u1100\u1100\u1102\u1103\u1105\u1107\u1109\u1109\u110b\u110c\u110e\u1112\u113c\u113c"
 557         +"\u113e\u113e\u1140\u1140\u114c\u114c\u114e\u114e\u1150\u1150\u1154\u1155\u1159\u1159"
 558         +"\u115f\u1161\u1163\u1163\u1165\u1165\u1167\u1167\u1169\u1169\u116d\u116e\u1172\u1173"
 559         +"\u1175\u1175\u119e\u119e\u11a8\u11a8\u11ab\u11ab\u11ae\u11af\u11b7\u11b8\u11ba\u11ba"
 560         +"\u11bc\u11c2\u11eb\u11eb\u11f0\u11f0\u11f9\u11f9\u1e00\u1e9b\u1ea0\u1ef9\u1f00\u1f15"
 561         +"\u1f18\u1f1d\u1f20\u1f45\u1f48\u1f4d\u1f50\u1f57\u1f59\u1f59\u1f5b\u1f5b\u1f5d\u1f5d"
 562         +"\u1f5f\u1f7d\u1f80\u1fb4\u1fb6\u1fbc\u1fbe\u1fbe\u1fc2\u1fc4\u1fc6\u1fcc\u1fd0\u1fd3"
 563         +"\u1fd6\u1fdb\u1fe0\u1fec\u1ff2\u1ff4\u1ff6\u1ffc\u2126\u2126\u212a\u212b\u212e\u212e"
 564         +"\u2180\u2182\u3007\u3007\u3021\u3029\u3041\u3094\u30a1\u30fa\u3105\u312c\u4e00\u9fa5"
 565         +"\uac00\ud7a3";
 566     private static final String DIGITS =
 567         "\u0030\u0039\u0660\u0669\u06F0\u06F9\u0966\u096F\u09E6\u09EF\u0A66\u0A6F\u0AE6\u0AEF"
 568         +"\u0B66\u0B6F\u0BE7\u0BEF\u0C66\u0C6F\u0CE6\u0CEF\u0D66\u0D6F\u0E50\u0E59\u0ED0\u0ED9"
 569         +"\u0F20\u0F29";
 570     private static final int[] DIGITS_INTS = {
 571         0x0030, 0x0039, 0x0660, 0x0669, 0x06F0, 0x06F9, 0x0966, 0x096F,
 572         0x09E6, 0x09EF, 0x0A66, 0x0A6F, 0x0AE6, 0x0AEF, 0x0B66, 0x0B6F,
 573         0x0BE7, 0x0BEF, 0x0C66, 0x0C6F, 0x0CE6, 0x0CEF, 0x0D66, 0x0D6F,
 574         0x0E50, 0x0E59, 0x0ED0, 0x0ED9, 0x0F20, 0x0F29, 0x1040, 0x1049,
 575         0x1369, 0x1371, 0x17E0, 0x17E9, 0x1810, 0x1819, 0xFF10, 0xFF19,
 576         0x1D7CE, 0x1D7FF
 577     };
 578 }