third-party/xsd-regex/src/main/java/org/opendaylight/yangtools/xsd/regex/RegexParser.java

   1 /*
   2  * Licensed to the Apache Software Foundation (ASF) under one or more
   3  * contributor license agreements.  See the NOTICE file distributed with
   4  * this work for additional information regarding copyright ownership.
   5  * The ASF licenses this file to You under the Apache License, Version 2.0
   6  * (the "License"); you may not use this file except in compliance with
   7  * the License.  You may obtain a copy of the License at
   8  *
   9  *      http://www.apache.org/licenses/LICENSE-2.0
  10  *
  11  * Unless required by applicable law or agreed to in writing, software
  12  * distributed under the License is distributed on an "AS IS" BASIS,
  13  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  14  * See the License for the specific language governing permissions and
  15  * limitations under the License.
  16  */
  17
  18 package org.opendaylight.yangtools.xsd.regex;
  19
  20 import java.util.Locale;
  21 import java.util.MissingResourceException;
  22 import java.util.ResourceBundle;
  23 import java.util.Vector;
  24
  25 /**
  26  * A Regular Expression Parser.
  27  *
  28  * @xerces.internal
  29  *
  30  * @version $Id: RegexParser.java 1129306 2011-05-30 19:18:04Z sandygao $
  31  */
  32 class RegexParser {
  33     static final int T_CHAR = 0;
  34     static final int T_EOF = 1;
  35     static final int T_OR = 2;                  // '|'
  36     static final int T_STAR = 3;                // '*'
  37     static final int T_PLUS = 4;                // '+'
  38     static final int T_QUESTION = 5;            // '?'
  39     static final int T_LPAREN = 6;              // '('
  40     static final int T_RPAREN = 7;              // ')'
  41     static final int T_DOT = 8;                 // '.'
  42     static final int T_LBRACKET = 9;            // '['
  43     static final int T_BACKSOLIDUS = 10;        // '\'
  44     static final int T_CARET = 11;              // '^'
  45     static final int T_DOLLAR = 12;             // '$'
  46     static final int T_LPAREN2 = 13;            // '(?:'
  47     static final int T_LOOKAHEAD = 14;          // '(?='
  48     static final int T_NEGATIVELOOKAHEAD = 15;  // '(?!'
  49     static final int T_LOOKBEHIND = 16;         // '(?<='
  50     static final int T_NEGATIVELOOKBEHIND = 17; // '(?<!'
  51     static final int T_INDEPENDENT = 18;        // '(?>'
  52     static final int T_SET_OPERATIONS = 19;     // '(?['
  53     static final int T_POSIX_CHARCLASS_START = 20; // '[:' in a character class
  54     static final int T_COMMENT = 21;            // '(?#'
  55     static final int T_MODIFIERS = 22;          // '(?' [\-,a-z,A-Z]
  56     static final int T_CONDITION = 23;          // '(?('
  57     static final int T_XMLSCHEMA_CC_SUBTRACTION = 24; // '-[' in a character class
  58
  59     static class ReferencePosition {
  60         int refNumber;
  61         int position;
  62         ReferencePosition(int n, int pos) {
  63             this.refNumber = n;
  64             this.position = pos;
  65         }
  66     }
  67
  68     int offset;
  69     String regex;
  70     int regexlen;
  71     int options;
  72     ResourceBundle resources;
  73     int chardata;
  74     int nexttoken;
  75     static protected final int S_NORMAL = 0;
  76     static protected final int S_INBRACKETS = 1;
  77     static protected final int S_INXBRACKETS = 2;
  78     int context = S_NORMAL;
  79     int parenOpened = 1;
  80     int parennumber = 1;
  81     boolean hasBackReferences;
  82     Vector<ReferencePosition> references = null;
  83
  84     public RegexParser() {
  85         this.setLocale(Locale.getDefault());
  86     }
  87     public RegexParser(Locale locale) {
  88         this.setLocale(locale);
  89     }
  90
  91     public void setLocale(Locale locale) {
  92         try {
  93             if (locale != null) {
  94                 this.resources = ResourceBundle.getBundle("org.apache.xerces.impl.xpath.regex.message", locale);
  95             }
  96             else {
  97                 this.resources = ResourceBundle.getBundle("org.apache.xerces.impl.xpath.regex.message");
  98             }
  99         }
 100         catch (MissingResourceException mre) {
 101             throw new RuntimeException("Installation Problem???  Couldn't load messages: "
 102                                        + mre.getMessage());
 103         }
 104     }
 105
 106     final ParseException ex(String key, int loc) {
 107         return new ParseException(this.resources.getString(key), loc);
 108     }
 109
 110     protected final boolean isSet(int flag) {
 111         return (this.options & flag) == flag;
 112     }
 113
 114     synchronized Token parse(String regex, int options) throws ParseException {
 115         this.options = options;
 116         this.offset = 0;
 117         this.setContext(S_NORMAL);
 118         this.parennumber = 1;
 119         this.parenOpened = 1;
 120         this.hasBackReferences = false;
 121         this.regex = regex;
 122         if (this.isSet(RegularExpression.EXTENDED_COMMENT)) {
 123             this.regex = REUtil.stripExtendedComment(this.regex);
 124         }
 125         this.regexlen = this.regex.length();
 126
 127
 128         this.next();
 129         Token ret = this.parseRegex();
 130         if (this.offset != this.regexlen) {
 131             throw ex("parser.parse.1", this.offset);
 132         }
 133         if (this.read() != T_EOF) {
 134             throw ex("parser.parse.1", this.offset-1);
 135         }
 136         if (this.references != null) {
 137             for (int i = 0;  i < this.references.size();  i ++) {
 138                 ReferencePosition position = this.references.elementAt(i);
 139                 if (this.parennumber <= position.refNumber) {
 140                     throw ex("parser.parse.2", position.position);
 141                 }
 142             }
 143             this.references.removeAllElements();
 144         }
 145         return ret;
 146     }
 147
 148     /*
 149     public RegularExpression createRegex(String regex, int options) throws ParseException {
 150         Token tok = this.parse(regex, options);
 151         return new RegularExpression(regex, tok, this.parennumber, this.hasBackReferences, options);
 152     }
 153     */
 154
 155     protected final void setContext(int con) {
 156         this.context = con;
 157     }
 158
 159     final int read() {
 160         return this.nexttoken;
 161     }
 162
 163     final void next() {
 164         if (this.offset >= this.regexlen) {
 165             this.chardata = -1;
 166             this.nexttoken = T_EOF;
 167             return;
 168         }
 169
 170         int ret;
 171         int ch = this.regex.charAt(this.offset++);
 172         this.chardata = ch;
 173
 174         if (this.context == S_INBRACKETS) {
 175             // In a character class, this.chardata has one character, that is to say,
 176             // a pair of surrogates is composed and stored to this.chardata.
 177             switch (ch) {
 178               case '\\':
 179                 ret = T_BACKSOLIDUS;
 180                 if (this.offset >= this.regexlen) {
 181                     throw ex("parser.next.1", this.offset-1);
 182                 }
 183                 this.chardata = this.regex.charAt(this.offset++);
 184                 break;
 185
 186               case '-':
 187                 // Allow character class subtraction (regardless of whether we are in
 188                 // XML Schema mode or not)
 189                 if (this.offset < this.regexlen && this.regex.charAt(this.offset) == '[') {
 190                     this.offset++;
 191                     ret = T_XMLSCHEMA_CC_SUBTRACTION;
 192                 } else {
 193                     ret = T_CHAR;
 194                 }
 195                 break;
 196
 197               case '[':
 198                 if (!this.isSet(RegularExpression.XMLSCHEMA_MODE)
 199                     && this.offset < this.regexlen && this.regex.charAt(this.offset) == ':') {
 200                     this.offset++;
 201                     ret = T_POSIX_CHARCLASS_START;
 202                     break;
 203                 } // Through down
 204               default:
 205                 if (REUtil.isHighSurrogate(ch) && this.offset < this.regexlen) {
 206                     int low = this.regex.charAt(this.offset);
 207                     if (REUtil.isLowSurrogate(low)) {
 208                         this.chardata = REUtil.composeFromSurrogates(ch, low);
 209                         this.offset ++;
 210                     }
 211                 }
 212                 ret = T_CHAR;
 213             }
 214             this.nexttoken = ret;
 215             return;
 216         }
 217
 218         switch (ch) {
 219           case '|': ret = T_OR;             break;
 220           case '*': ret = T_STAR;           break;
 221           case '+': ret = T_PLUS;           break;
 222           case '?': ret = T_QUESTION;       break;
 223           case ')': ret = T_RPAREN;         break;
 224           case '.': ret = T_DOT;            break;
 225           case '[': ret = T_LBRACKET;       break;
 226           case '^':
 227               if (this.isSet(RegularExpression.XMLSCHEMA_MODE)) {
 228                   ret = T_CHAR;
 229               }
 230               else {
 231                   ret = T_CARET;
 232               }
 233               break;
 234           case '$':
 235               if (this.isSet(RegularExpression.XMLSCHEMA_MODE)) {
 236                   ret = T_CHAR;
 237               }
 238               else {
 239                   ret = T_DOLLAR;
 240               }
 241               break;
 242           case '(':
 243             ret = T_LPAREN;
 244             if (this.offset >= this.regexlen) {
 245                 break;
 246             }
 247             if (this.regex.charAt(this.offset) != '?') {
 248                 break;
 249             }
 250             if (++this.offset >= this.regexlen) {
 251                 throw ex("parser.next.2", this.offset-1);
 252             }
 253             ch = this.regex.charAt(this.offset++);
 254             switch (ch) {
 255               case ':':  ret = T_LPAREN2;            break;
 256               case '=':  ret = T_LOOKAHEAD;          break;
 257               case '!':  ret = T_NEGATIVELOOKAHEAD;  break;
 258               case '[':  ret = T_SET_OPERATIONS;     break;
 259               case '>':  ret = T_INDEPENDENT;        break;
 260               case '<':
 261                 if (this.offset >= this.regexlen) {
 262                     throw ex("parser.next.2", this.offset-3);
 263                 }
 264                 ch = this.regex.charAt(this.offset++);
 265                 if (ch == '=') {
 266                     ret = T_LOOKBEHIND;
 267                 } else if (ch == '!') {
 268                     ret = T_NEGATIVELOOKBEHIND;
 269                 } else {
 270                     throw ex("parser.next.3", this.offset-3);
 271                 }
 272                 break;
 273               case '#':
 274                 while (this.offset < this.regexlen) {
 275                     ch = this.regex.charAt(this.offset++);
 276                     if (ch == ')') {
 277                         break;
 278                     }
 279                 }
 280                 if (ch != ')') {
 281                     throw ex("parser.next.4", this.offset-1);
 282                 }
 283                 ret = T_COMMENT;
 284                 break;
 285               default:
 286                 if (ch == '-' || 'a' <= ch && ch <= 'z' || 'A' <= ch && ch <= 'Z') {// Options
 287                     this.offset --;
 288                     ret = T_MODIFIERS;
 289                     break;
 290                 } else if (ch == '(') {         // conditional
 291                     ret = T_CONDITION;          // this.offsets points the next of '('.
 292                     break;
 293                 }
 294                 throw ex("parser.next.2", this.offset-2);
 295             }
 296             break;
 297
 298           case '\\':
 299             ret = T_BACKSOLIDUS;
 300             if (this.offset >= this.regexlen) {
 301                 throw ex("parser.next.1", this.offset-1);
 302             }
 303             this.chardata = this.regex.charAt(this.offset++);
 304             break;
 305
 306           default:
 307             ret = T_CHAR;
 308         }
 309         this.nexttoken = ret;
 310     }
 311
 312     /**
 313      * regex ::= term (`|` term)*
 314      * term ::= factor+
 315      * factor ::= ('^' | '$' | '\A' | '\Z' | '\z' | '\b' | '\B' | '\<' | '\>'
 316      *            | atom (('*' | '+' | '?' | minmax ) '?'? )?)
 317      *            | '(?=' regex ')'  | '(?!' regex ')'  | '(?&lt;=' regex ')'  | '(?&lt;!' regex ')'
 318      * atom ::= char | '.' | range | '(' regex ')' | '(?:' regex ')' | '\' [0-9]
 319      *          | '\w' | '\W' | '\d' | '\D' | '\s' | '\S' | category-block
 320      */
 321     Token parseRegex() throws ParseException {
 322         Token tok = this.parseTerm();
 323         Token parent = null;
 324         while (this.read() == T_OR) {
 325             this.next();                    // '|'
 326             if (parent == null) {
 327                 parent = Token.createUnion();
 328                 parent.addChild(tok);
 329                 tok = parent;
 330             }
 331             tok.addChild(this.parseTerm());
 332         }
 333         return tok;
 334     }
 335
 336     /**
 337      * term ::= factor+
 338      */
 339     Token parseTerm() throws ParseException {
 340         int ch = this.read();
 341         if (ch == T_OR || ch == T_RPAREN || ch == T_EOF) {
 342             return Token.createEmpty();
 343         } else {
 344             Token tok = this.parseFactor();
 345             Token concat = null;
 346             while ((ch = this.read()) != T_OR && ch != T_RPAREN && ch != T_EOF) {
 347                 if (concat == null) {
 348                     concat = Token.createConcat();
 349                     concat.addChild(tok);
 350                     tok = concat;
 351                 }
 352                 concat.addChild(this.parseFactor());
 353                 //tok = Token.createConcat(tok, this.parseFactor());
 354             }
 355             return tok;
 356         }
 357     }
 358
 359     // ----------------------------------------------------------------
 360
 361     Token processCaret() throws ParseException {
 362         this.next();
 363         return Token.token_linebeginning;
 364     }
 365     Token processDollar() throws ParseException {
 366         this.next();
 367         return Token.token_lineend;
 368     }
 369     Token processLookahead() throws ParseException {
 370         this.next();
 371         Token tok = Token.createLook(Token.LOOKAHEAD, this.parseRegex());
 372         if (this.read() != T_RPAREN) {
 373             throw ex("parser.factor.1", this.offset-1);
 374         }
 375         this.next();                            // ')'
 376         return tok;
 377     }
 378     Token processNegativelookahead() throws ParseException {
 379         this.next();
 380         Token tok = Token.createLook(Token.NEGATIVELOOKAHEAD, this.parseRegex());
 381         if (this.read() != T_RPAREN) {
 382             throw ex("parser.factor.1", this.offset-1);
 383         }
 384         this.next();                            // ')'
 385         return tok;
 386     }
 387     Token processLookbehind() throws ParseException {
 388         this.next();
 389         Token tok = Token.createLook(Token.LOOKBEHIND, this.parseRegex());
 390         if (this.read() != T_RPAREN) {
 391             throw ex("parser.factor.1", this.offset-1);
 392         }
 393         this.next();                            // ')'
 394         return tok;
 395     }
 396     Token processNegativelookbehind() throws ParseException {
 397         this.next();
 398         Token tok = Token.createLook(Token.NEGATIVELOOKBEHIND, this.parseRegex());
 399         if (this.read() != T_RPAREN) {
 400             throw ex("parser.factor.1", this.offset-1);
 401         }
 402         this.next();                    // ')'
 403         return tok;
 404     }
 405     Token processBacksolidus_A() throws ParseException {
 406         this.next();
 407         return Token.token_stringbeginning;
 408     }
 409     Token processBacksolidus_Z() throws ParseException {
 410         this.next();
 411         return Token.token_stringend2;
 412     }
 413     Token processBacksolidus_z() throws ParseException {
 414         this.next();
 415         return Token.token_stringend;
 416     }
 417     Token processBacksolidus_b() throws ParseException {
 418         this.next();
 419         return Token.token_wordedge;
 420     }
 421     Token processBacksolidus_B() throws ParseException {
 422         this.next();
 423         return Token.token_not_wordedge;
 424     }
 425     Token processBacksolidus_lt() throws ParseException {
 426         this.next();
 427         return Token.token_wordbeginning;
 428     }
 429     Token processBacksolidus_gt() throws ParseException {
 430         this.next();
 431         return Token.token_wordend;
 432     }
 433     Token processStar(Token tok) throws ParseException {
 434         this.next();
 435         if (this.read() == T_QUESTION) {
 436             this.next();
 437             return Token.createNGClosure(tok);
 438         } else {
 439             return Token.createClosure(tok);
 440         }
 441     }
 442     Token processPlus(Token tok) throws ParseException {
 443         // X+ -> XX*
 444         this.next();
 445         if (this.read() == T_QUESTION) {
 446             this.next();
 447             return Token.createConcat(tok, Token.createNGClosure(tok));
 448         } else {
 449             return Token.createConcat(tok, Token.createClosure(tok));
 450         }
 451     }
 452     Token processQuestion(Token tok) throws ParseException {
 453         // X? -> X|
 454         this.next();
 455         Token par = Token.createUnion();
 456         if (this.read() == T_QUESTION) {
 457             this.next();
 458             par.addChild(Token.createEmpty());
 459             par.addChild(tok);
 460         } else {
 461             par.addChild(tok);
 462             par.addChild(Token.createEmpty());
 463         }
 464         return par;
 465     }
 466     boolean checkQuestion(int off) {
 467         return off < this.regexlen && this.regex.charAt(off) == '?';
 468     }
 469     Token processParen() throws ParseException {
 470         this.next();
 471         int p = this.parenOpened++;
 472         Token tok = Token.createParen(this.parseRegex(), p);
 473         if (this.read() != T_RPAREN) {
 474             throw ex("parser.factor.1", this.offset-1);
 475         }
 476         this.parennumber++;
 477         this.next();                            // Skips ')'
 478         return tok;
 479     }
 480     Token processParen2() throws ParseException {
 481         this.next();
 482         Token tok = Token.createParen(this.parseRegex(), 0);
 483         if (this.read() != T_RPAREN) {
 484             throw ex("parser.factor.1", this.offset-1);
 485         }
 486         this.next();                            // Skips ')'
 487         return tok;
 488     }
 489     Token processCondition() throws ParseException {
 490                                                 // this.offset points the next of '('
 491         if (this.offset+1 >= this.regexlen) {
 492             throw ex("parser.factor.4", this.offset);
 493         }
 494                                                 // Parses a condition.
 495         int refno = -1;
 496         Token condition = null;
 497         int ch = this.regex.charAt(this.offset);
 498         if ('1' <= ch && ch <= '9') {
 499             refno = ch-'0';
 500             int finalRefno = refno;
 501
 502             if (this.parennumber <= refno) {
 503                 throw ex("parser.parse.2", this.offset);
 504             }
 505
 506             while (this.offset + 1 < this.regexlen) {
 507                 ch = this.regex.charAt(this.offset + 1);
 508                 if ('0' <= ch && ch <= '9') {
 509                     refno = (refno * 10) + (ch - '0');
 510                     if (refno < this.parennumber) {
 511                         finalRefno= refno;
 512                         ++this.offset;
 513                     }
 514                     else {
 515                         break;
 516                     }
 517                 }
 518                 else {
 519                     break;
 520                 }
 521             }
 522
 523             this.hasBackReferences = true;
 524             if (this.references == null) {
 525                 this.references = new Vector<>();
 526             }
 527             this.references.addElement(new ReferencePosition(finalRefno, this.offset));
 528             this.offset ++;
 529             if (this.regex.charAt(this.offset) != ')') {
 530                 throw ex("parser.factor.1", this.offset);
 531             }
 532             this.offset ++;
 533         } else {
 534             if (ch == '?')
 535              {
 536                 this.offset --; // Points '('.
 537             }
 538             this.next();
 539             condition = this.parseFactor();
 540             switch (condition.type) {
 541               case Token.LOOKAHEAD:
 542               case Token.NEGATIVELOOKAHEAD:
 543               case Token.LOOKBEHIND:
 544               case Token.NEGATIVELOOKBEHIND:
 545                 break;
 546               case Token.ANCHOR:
 547                 if (this.read() != T_RPAREN) {
 548                     throw ex("parser.factor.1", this.offset-1);
 549                 }
 550                 break;
 551               default:
 552                 throw ex("parser.factor.5", this.offset);
 553             }
 554         }
 555                                                 // Parses yes/no-patterns.
 556         this.next();
 557         Token yesPattern = this.parseRegex();
 558         Token noPattern = null;
 559         if (yesPattern.type == Token.UNION) {
 560             if (yesPattern.size() != 2) {
 561                 throw ex("parser.factor.6", this.offset);
 562             }
 563             noPattern = yesPattern.getChild(1);
 564             yesPattern = yesPattern.getChild(0);
 565         }
 566         if (this.read() != T_RPAREN) {
 567             throw ex("parser.factor.1", this.offset-1);
 568         }
 569         this.next();
 570         return Token.createCondition(refno, condition, yesPattern, noPattern);
 571     }
 572     Token processModifiers() throws ParseException {
 573                                                 // this.offset points the next of '?'.
 574                                                 // modifiers ::= [imsw]* ('-' [imsw]*)? ':'
 575         int add = 0, mask = 0, ch = -1;
 576         while (this.offset < this.regexlen) {
 577             ch = this.regex.charAt(this.offset);
 578             int v = REUtil.getOptionValue(ch);
 579             if (v == 0)
 580              {
 581                 break;                 // '-' or ':'?
 582             }
 583             add |= v;
 584             this.offset ++;
 585         }
 586         if (this.offset >= this.regexlen) {
 587             throw ex("parser.factor.2", this.offset-1);
 588         }
 589         if (ch == '-') {
 590             this.offset ++;
 591             while (this.offset < this.regexlen) {
 592                 ch = this.regex.charAt(this.offset);
 593                 int v = REUtil.getOptionValue(ch);
 594                 if (v == 0)
 595                  {
 596                     break;             // ':'?
 597                 }
 598                 mask |= v;
 599                 this.offset ++;
 600             }
 601             if (this.offset >= this.regexlen) {
 602                 throw ex("parser.factor.2", this.offset-1);
 603             }
 604         }
 605         Token tok;
 606         if (ch == ':') {
 607             this.offset ++;
 608             this.next();
 609             tok = Token.createModifierGroup(this.parseRegex(), add, mask);
 610             if (this.read() != T_RPAREN) {
 611                 throw ex("parser.factor.1", this.offset-1);
 612             }
 613             this.next();
 614         } else if (ch == ')') {                 // such as (?-i)
 615             this.offset ++;
 616             this.next();
 617             tok = Token.createModifierGroup(this.parseRegex(), add, mask);
 618         } else {
 619             throw ex("parser.factor.3", this.offset);
 620         }
 621
 622         return tok;
 623     }
 624     Token processIndependent() throws ParseException {
 625         this.next();
 626         Token tok = Token.createLook(Token.INDEPENDENT, this.parseRegex());
 627         if (this.read() != T_RPAREN) {
 628             throw ex("parser.factor.1", this.offset-1);
 629         }
 630         this.next();                            // Skips ')'
 631         return tok;
 632     }
 633     Token processBacksolidus_c() throws ParseException {
 634         int ch2;                                // Must be in 0x0040-0x005f
 635         if (this.offset >= this.regexlen
 636             || ((ch2 = this.regex.charAt(this.offset++)) & 0xffe0) != 0x0040) {
 637             throw ex("parser.atom.1", this.offset-1);
 638         }
 639         this.next();
 640         return Token.createChar(ch2-0x40);
 641     }
 642     Token processBacksolidus_C() throws ParseException {
 643         throw ex("parser.process.1", this.offset);
 644     }
 645     Token processBacksolidus_i() throws ParseException {
 646         Token tok = Token.createChar('i');
 647         this.next();
 648         return tok;
 649     }
 650     Token processBacksolidus_I() throws ParseException {
 651         throw ex("parser.process.1", this.offset);
 652     }
 653     Token processBacksolidus_g() throws ParseException {
 654         this.next();
 655         return Token.getGraphemePattern();
 656     }
 657     Token processBacksolidus_X() throws ParseException {
 658         this.next();
 659         return Token.getCombiningCharacterSequence();
 660     }
 661     Token processBackreference() throws ParseException {
 662         int refnum = this.chardata-'0';
 663         int finalRefnum = refnum;
 664
 665         if (this.parennumber <= refnum) {
 666             throw ex("parser.parse.2", this.offset-2);
 667         }
 668
 669         while  (this.offset < this.regexlen) {
 670             final int ch = this.regex.charAt(this.offset);
 671             if ('0' <= ch && ch <= '9') {
 672                 refnum = (refnum * 10) + (ch - '0');
 673                 if (refnum < this.parennumber) {
 674                     ++this.offset;
 675                     finalRefnum = refnum;
 676                     this.chardata = ch;
 677                 }
 678                 else {
 679                     break;
 680                 }
 681             }
 682             else {
 683                 break;
 684             }
 685         }
 686
 687         Token tok = Token.createBackReference(finalRefnum);
 688         this.hasBackReferences = true;
 689         if (this.references == null) {
 690             this.references = new Vector<>();
 691         }
 692         this.references.addElement(new ReferencePosition(finalRefnum, this.offset-2));
 693         this.next();
 694         return tok;
 695     }
 696
 697     // ----------------------------------------------------------------
 698
 699     /**
 700      * factor ::= ('^' | '$' | '\A' | '\Z' | '\z' | '\b' | '\B' | '\<' | '\>'
 701      *            | atom (('*' | '+' | '?' | minmax ) '?'? )?)
 702      *            | '(?=' regex ')'  | '(?!' regex ')'  | '(?&lt;=' regex ')'  | '(?&lt;!' regex ')'
 703      *            | '(?#' [^)]* ')'
 704      * minmax ::= '{' min (',' max?)? '}'
 705      * min ::= [0-9]+
 706      * max ::= [0-9]+
 707      */
 708     Token parseFactor() throws ParseException {
 709         int ch = this.read();
 710         Token tok;
 711         switch (ch) {
 712           case T_CARET:         return this.processCaret();
 713           case T_DOLLAR:        return this.processDollar();
 714           case T_LOOKAHEAD:     return this.processLookahead();
 715           case T_NEGATIVELOOKAHEAD: return this.processNegativelookahead();
 716           case T_LOOKBEHIND:    return this.processLookbehind();
 717           case T_NEGATIVELOOKBEHIND: return this.processNegativelookbehind();
 718
 719           case T_COMMENT:
 720             this.next();
 721             return Token.createEmpty();
 722
 723           case T_BACKSOLIDUS:
 724             switch (this.chardata) {
 725               case 'A': return this.processBacksolidus_A();
 726               case 'Z': return this.processBacksolidus_Z();
 727               case 'z': return this.processBacksolidus_z();
 728               case 'b': return this.processBacksolidus_b();
 729               case 'B': return this.processBacksolidus_B();
 730               case '<': return this.processBacksolidus_lt();
 731               case '>': return this.processBacksolidus_gt();
 732             }
 733                                                 // through down
 734         }
 735         tok = this.parseAtom();
 736         ch = this.read();
 737         switch (ch) {
 738           case T_STAR:  return this.processStar(tok);
 739           case T_PLUS:  return this.processPlus(tok);
 740           case T_QUESTION: return this.processQuestion(tok);
 741           case T_CHAR:
 742             if (this.chardata == '{' && this.offset < this.regexlen) {
 743
 744                 int off = this.offset;          // this.offset -> next of '{'
 745                 int min = 0, max = -1;
 746
 747                 if ((ch = this.regex.charAt(off++)) >= '0' && ch <= '9') {
 748
 749                     min = ch -'0';
 750                     while (off < this.regexlen
 751                            && (ch = this.regex.charAt(off++)) >= '0' && ch <= '9') {
 752                         min = min*10 +ch-'0';
 753                         if (min < 0) {
 754                             throw ex("parser.quantifier.5", this.offset);
 755                         }
 756                     }
 757                 }
 758                 else {
 759                     throw ex("parser.quantifier.1", this.offset);
 760                 }
 761
 762                 max = min;
 763                 if (ch == ',') {
 764
 765                    if (off >= this.regexlen) {
 766                        throw ex("parser.quantifier.3", this.offset);
 767                    }
 768                    else if ((ch = this.regex.charAt(off++)) >= '0' && ch <= '9') {
 769
 770                         max = ch -'0';       // {min,max}
 771                         while (off < this.regexlen
 772                                && (ch = this.regex.charAt(off++)) >= '0'
 773                                && ch <= '9') {
 774                             max = max*10 +ch-'0';
 775                             if (max < 0) {
 776                                 throw ex("parser.quantifier.5", this.offset);
 777                             }
 778                         }
 779
 780                         if (min > max) {
 781                             throw ex("parser.quantifier.4", this.offset);
 782                         }
 783                    }
 784                    else { // assume {min,}
 785                         max = -1;
 786                     }
 787                 }
 788
 789                if (ch != '}') {
 790                 throw ex("parser.quantifier.2", this.offset);
 791             }
 792
 793                if (this.checkQuestion(off)) {  // off -> next of '}'
 794                     tok = Token.createNGClosure(tok);
 795                     this.offset = off+1;
 796                 } else {
 797                     tok = Token.createClosure(tok);
 798                     this.offset = off;
 799                 }
 800
 801                 tok.setMin(min);
 802                 tok.setMax(max);
 803                 //System.err.println("CLOSURE: "+min+", "+max);
 804                 this.next();
 805             }
 806         }
 807         return tok;
 808     }
 809
 810     /**
 811      * atom ::= char | '.' | char-class | '(' regex ')' | '(?:' regex ')' | '\' [0-9]
 812      *          | '\w' | '\W' | '\d' | '\D' | '\s' | '\S' | category-block
 813      *          | '(?>' regex ')'
 814      * char ::= '\\' | '\' [efnrt] | bmp-code | character-1
 815      */
 816     Token parseAtom() throws ParseException {
 817         int ch = this.read();
 818         Token tok = null;
 819         switch (ch) {
 820           case T_LPAREN:        return this.processParen();
 821           case T_LPAREN2:       return this.processParen2(); // '(?:'
 822           case T_CONDITION:     return this.processCondition(); // '(?('
 823           case T_MODIFIERS:     return this.processModifiers(); // (?modifiers ... )
 824           case T_INDEPENDENT:   return this.processIndependent();
 825           case T_DOT:
 826             this.next();                    // Skips '.'
 827             tok = Token.token_dot;
 828             break;
 829
 830             /**
 831              * char-class ::= '[' ( '^'? range ','?)+ ']'
 832              * range ::= '\d' | '\w' | '\s' | category-block | range-char
 833              *           | range-char '-' range-char
 834              * range-char ::= '\[' | '\]' | '\\' | '\' [,-efnrtv] | bmp-code | character-2
 835              * bmp-char ::= '\' 'u' [0-9a-fA-F] [0-9a-fA-F] [0-9a-fA-F] [0-9a-fA-F]
 836              */
 837           case T_LBRACKET:      return this.parseCharacterClass(true);
 838           case T_SET_OPERATIONS: return this.parseSetOperations();
 839
 840           case T_BACKSOLIDUS:
 841             switch (this.chardata) {
 842               case 'd':  case 'D':
 843               case 'w':  case 'W':
 844               case 's':  case 'S':
 845                 tok = this.getTokenForShorthand(this.chardata);
 846                 this.next();
 847                 return tok;
 848
 849               case 'e':  case 'f':  case 'n':  case 'r':
 850               case 't':  case 'u':  case 'v':  case 'x':
 851                 {
 852                     int ch2 = this.decodeEscaped();
 853                     if (ch2 < 0x10000) {
 854                         tok = Token.createChar(ch2);
 855                     } else {
 856                         tok = Token.createString(REUtil.decomposeToSurrogates(ch2));
 857                     }
 858                 }
 859                 break;
 860
 861               case 'c': return this.processBacksolidus_c();
 862               case 'C': return this.processBacksolidus_C();
 863               case 'i': return this.processBacksolidus_i();
 864               case 'I': return this.processBacksolidus_I();
 865               case 'g': return this.processBacksolidus_g();
 866               case 'X': return this.processBacksolidus_X();
 867               case '1':  case '2':  case '3':  case '4':
 868               case '5':  case '6':  case '7':  case '8':  case '9':
 869                 return this.processBackreference();
 870
 871               case 'P':
 872               case 'p':
 873                 int pstart = this.offset;
 874                 tok = processBacksolidus_pP(this.chardata);
 875                 if (tok == null) {
 876                     throw this.ex("parser.atom.5", pstart);
 877                 }
 878                 break;
 879
 880               default:
 881                 tok = Token.createChar(this.chardata);
 882             }
 883             this.next();
 884             break;
 885
 886           case T_CHAR:
 887             if (this.chardata == ']' || this.chardata == '{' || this.chardata == '}') {
 888                 throw this.ex("parser.atom.4", this.offset-1);
 889             }
 890             tok = Token.createChar(this.chardata);
 891             int high = this.chardata;
 892             this.next();
 893             if (REUtil.isHighSurrogate(high)
 894                 && this.read() == T_CHAR && REUtil.isLowSurrogate(this.chardata)) {
 895                 char[] sur = new char[2];
 896                 sur[0] = (char)high;
 897                 sur[1] = (char)this.chardata;
 898                 tok = Token.createParen(Token.createString(new String(sur)), 0);
 899                 this.next();
 900             }
 901             break;
 902
 903           default:
 904             throw this.ex("parser.atom.4", this.offset-1);
 905         }
 906         return tok;
 907     }
 908
 909     protected RangeToken processBacksolidus_pP(int c) throws ParseException {
 910
 911         this.next();
 912         if (this.read() != T_CHAR || this.chardata != '{') {
 913             throw this.ex("parser.atom.2", this.offset-1);
 914         }
 915
 916         // handle category escape
 917         boolean positive = c == 'p';
 918         int namestart = this.offset;
 919         int nameend = this.regex.indexOf('}', namestart);
 920
 921         if (nameend < 0) {
 922             throw this.ex("parser.atom.3", this.offset);
 923         }
 924
 925         String pname = this.regex.substring(namestart, nameend);
 926         this.offset = nameend+1;
 927
 928         return Token.getRange(pname, positive, this.isSet(RegularExpression.XMLSCHEMA_MODE));
 929     }
 930
 931     int processCIinCharacterClass(RangeToken tok, int c) {
 932         return this.decodeEscaped();
 933     }
 934
 935     /**
 936      * char-class ::= '[' ( '^'? range ','?)+ ']'
 937      * range ::= '\d' | '\w' | '\s' | category-block | range-char
 938      *           | range-char '-' range-char
 939      * range-char ::= '\[' | '\]' | '\\' | '\' [,-efnrtv] | bmp-code | character-2
 940      * bmp-code ::= '\' 'u' [0-9a-fA-F] [0-9a-fA-F] [0-9a-fA-F] [0-9a-fA-F]
 941      */
 942     protected RangeToken parseCharacterClass(boolean useNrange) throws ParseException {
 943         this.setContext(S_INBRACKETS);
 944         this.next();                            // '['
 945         boolean nrange = false;
 946         RangeToken base = null;
 947         RangeToken tok;
 948         if (this.read() == T_CHAR && this.chardata == '^') {
 949             nrange = true;
 950             this.next();                        // '^'
 951             if (useNrange) {
 952                 tok = Token.createNRange();
 953             } else {
 954                 base = Token.createRange();
 955                 base.addRange(0, Token.UTF16_MAX);
 956                 tok = Token.createRange();
 957             }
 958         } else {
 959             tok = Token.createRange();
 960         }
 961         int type;
 962         boolean firstloop = true;
 963         while ((type = this.read()) != T_EOF) {
 964             if (type == T_CHAR && this.chardata == ']' && !firstloop) {
 965                 break;
 966             }
 967             int c = this.chardata;
 968             boolean end = false;
 969             if (type == T_BACKSOLIDUS) {
 970                 switch (c) {
 971                   case 'd':  case 'D':
 972                   case 'w':  case 'W':
 973                   case 's':  case 'S':
 974                     tok.mergeRanges(this.getTokenForShorthand(c));
 975                     end = true;
 976                     break;
 977
 978                   case 'i':  case 'I':
 979                   case 'c':  case 'C':
 980                     c = this.processCIinCharacterClass(tok, c);
 981                     if (c < 0) {
 982                         end = true;
 983                     }
 984                     break;
 985
 986                   case 'p':
 987                   case 'P':
 988                     int pstart = this.offset;
 989                     RangeToken tok2 = this.processBacksolidus_pP(c);
 990                     if (tok2 == null) {
 991                         throw this.ex("parser.atom.5", pstart);
 992                     }
 993                     tok.mergeRanges(tok2);
 994                     end = true;
 995                     break;
 996
 997                   default:
 998                     c = this.decodeEscaped();
 999                 } // \ + c
1000             } // backsolidus
1001                                                 // POSIX Character class such as [:alnum:]
1002             else if (type == T_POSIX_CHARCLASS_START) {
1003                 int nameend = this.regex.indexOf(':', this.offset);
1004                 if (nameend < 0) {
1005                     throw this.ex("parser.cc.1", this.offset);
1006                 }
1007                 boolean positive = true;
1008                 if (this.regex.charAt(this.offset) == '^') {
1009                     this.offset ++;
1010                     positive = false;
1011                 }
1012                 String name = this.regex.substring(this.offset, nameend);
1013                 RangeToken range = Token.getRange(name, positive,
1014                                                   this.isSet(RegularExpression.XMLSCHEMA_MODE));
1015                 if (range == null) {
1016                     throw this.ex("parser.cc.3", this.offset);
1017                 }
1018                 tok.mergeRanges(range);
1019                 end = true;
1020                 if (nameend+1 >= this.regexlen || this.regex.charAt(nameend+1) != ']') {
1021                     throw this.ex("parser.cc.1", nameend);
1022                 }
1023                 this.offset = nameend+2;
1024             }
1025             else if (type == T_XMLSCHEMA_CC_SUBTRACTION && !firstloop) {
1026                 if (nrange) {
1027                     nrange = false;
1028                     if (useNrange) {
1029                         tok = (RangeToken) Token.complementRanges(tok);
1030                     }
1031                     else {
1032                         base.subtractRanges(tok);
1033                         tok = base;
1034                     }
1035                 }
1036                 RangeToken range2 = this.parseCharacterClass(false);
1037                 tok.subtractRanges(range2);
1038                 if (this.read() != T_CHAR || this.chardata != ']') {
1039                     throw this.ex("parser.cc.5", this.offset);
1040                 }
1041                 break;                          // Exit this loop
1042             }
1043             this.next();
1044             if (!end) {                         // if not shorthands...
1045                 if (this.read() != T_CHAR || this.chardata != '-') { // Here is no '-'.
1046                     if (!this.isSet(RegularExpression.IGNORE_CASE) || c > 0xffff) {
1047                         tok.addRange(c, c);
1048                     }
1049                     else {
1050                         addCaseInsensitiveChar(tok, c);
1051                     }
1052                 }
1053                 else if (type == T_XMLSCHEMA_CC_SUBTRACTION) {
1054                     throw this.ex("parser.cc.8", this.offset-1);
1055                 }
1056                 else {
1057                     this.next(); // Skips '-'
1058                     if ((type = this.read()) == T_EOF) {
1059                         throw this.ex("parser.cc.2", this.offset);
1060                     }
1061                     if (type == T_CHAR && this.chardata == ']') {
1062                         if (!this.isSet(RegularExpression.IGNORE_CASE) || c > 0xffff) {
1063                             tok.addRange(c, c);
1064                         }
1065                         else {
1066                             addCaseInsensitiveChar(tok, c);
1067                         }
1068                         tok.addRange('-', '-');
1069                     } else {
1070                         int rangeend = this.chardata;
1071                         if (type == T_BACKSOLIDUS) {
1072                             rangeend = this.decodeEscaped();
1073                         }
1074                         this.next();
1075                         if (c > rangeend) {
1076                             throw this.ex("parser.ope.3", this.offset-1);
1077                         }
1078                         if (!this.isSet(RegularExpression.IGNORE_CASE) ||
1079                                 (c > 0xffff && rangeend > 0xffff)) {
1080                             tok.addRange(c, rangeend);
1081                         }
1082                         else {
1083                             addCaseInsensitiveCharRange(tok, c, rangeend);
1084                         }
1085                     }
1086                 }
1087             }
1088             if (this.isSet(RegularExpression.SPECIAL_COMMA)
1089                 && this.read() == T_CHAR && this.chardata == ',') {
1090                 this.next();
1091             }
1092             firstloop = false;
1093         }
1094         if (this.read() == T_EOF) {
1095             throw this.ex("parser.cc.2", this.offset);
1096         }
1097
1098         if (!useNrange && nrange) {
1099             base.subtractRanges(tok);
1100             tok = base;
1101         }
1102         tok.sortRanges();
1103         tok.compactRanges();
1104         this.setContext(S_NORMAL);
1105         this.next();                    // Skips ']'
1106
1107         return tok;
1108     }
1109
1110     /**
1111      * '(?[' ... ']' (('-' | '+' | '&') '[' ... ']')? ')'
1112      */
1113     protected RangeToken parseSetOperations() throws ParseException {
1114         RangeToken tok = this.parseCharacterClass(false);
1115         int type;
1116         while ((type = this.read()) != T_RPAREN) {
1117             int ch = this.chardata;
1118             if (type == T_CHAR && (ch == '-' || ch == '&')
1119                 || type == T_PLUS) {
1120                 this.next();
1121                 if (this.read() != T_LBRACKET) {
1122                     throw ex("parser.ope.1", this.offset-1);
1123                 }
1124                 RangeToken t2 = this.parseCharacterClass(false);
1125                 if (type == T_PLUS) {
1126                     tok.mergeRanges(t2);
1127                 } else if (ch == '-') {
1128                     tok.subtractRanges(t2);
1129                 } else if (ch == '&') {
1130                     tok.intersectRanges(t2);
1131                 } else {
1132                     throw new RuntimeException("ASSERT");
1133                 }
1134             } else {
1135                 throw ex("parser.ope.2", this.offset-1);
1136             }
1137         }
1138         this.next();
1139         return tok;
1140     }
1141
1142     Token getTokenForShorthand(int ch) {
1143         Token tok;
1144         switch (ch) {
1145           case 'd':
1146             tok = this.isSet(RegularExpression.USE_UNICODE_CATEGORY)
1147                 ? Token.getRange("Nd", true) : Token.token_0to9;
1148             break;
1149           case 'D':
1150             tok = this.isSet(RegularExpression.USE_UNICODE_CATEGORY)
1151                 ? Token.getRange("Nd", false) : Token.token_not_0to9;
1152             break;
1153           case 'w':
1154             tok = this.isSet(RegularExpression.USE_UNICODE_CATEGORY)
1155                 ? Token.getRange("IsWord", true) : Token.token_wordchars;
1156             break;
1157           case 'W':
1158             tok = this.isSet(RegularExpression.USE_UNICODE_CATEGORY)
1159                 ? Token.getRange("IsWord", false) : Token.token_not_wordchars;
1160             break;
1161           case 's':
1162             tok = this.isSet(RegularExpression.USE_UNICODE_CATEGORY)
1163                 ? Token.getRange("IsSpace", true) : Token.token_spaces;
1164             break;
1165           case 'S':
1166             tok = this.isSet(RegularExpression.USE_UNICODE_CATEGORY)
1167                 ? Token.getRange("IsSpace", false) : Token.token_not_spaces;
1168             break;
1169
1170           default:
1171             throw new RuntimeException("Internal Error: shorthands: \\u"+Integer.toString(ch, 16));
1172         }
1173         return tok;
1174     }
1175
1176     /**
1177      */
1178     int decodeEscaped() throws ParseException {
1179         if (this.read() != T_BACKSOLIDUS) {
1180             throw ex("parser.next.1", this.offset-1);
1181         }
1182         int c = this.chardata;
1183         switch (c) {
1184           case 'e':  c = 0x1b;  break; // ESCAPE U+001B
1185           case 'f':  c = '\f';  break; // FORM FEED U+000C
1186           case 'n':  c = '\n';  break; // LINE FEED U+000A
1187           case 'r':  c = '\r';  break; // CRRIAGE RETURN U+000D
1188           case 't':  c = '\t';  break; // HORIZONTAL TABULATION U+0009
1189           //case 'v':  c = 0x0b;  break; // VERTICAL TABULATION U+000B
1190           case 'x':
1191             this.next();
1192             if (this.read() != T_CHAR) {
1193                 throw ex("parser.descape.1", this.offset-1);
1194             }
1195             if (this.chardata == '{') {
1196                 int v1 = 0;
1197                 int uv = 0;
1198                 do {
1199                     this.next();
1200                     if (this.read() != T_CHAR) {
1201                         throw ex("parser.descape.1", this.offset-1);
1202                     }
1203                     if ((v1 = hexChar(this.chardata)) < 0) {
1204                         break;
1205                     }
1206                     if (uv > uv*16) {
1207                         throw ex("parser.descape.2", this.offset-1);
1208                     }
1209                     uv = uv*16+v1;
1210                 } while (true);
1211                 if (this.chardata != '}') {
1212                     throw ex("parser.descape.3", this.offset-1);
1213                 }
1214                 if (uv > Token.UTF16_MAX) {
1215                     throw ex("parser.descape.4", this.offset-1);
1216                 }
1217                 c = uv;
1218             } else {
1219                 int v1 = 0;
1220                 if (this.read() != T_CHAR || (v1 = hexChar(this.chardata)) < 0) {
1221                     throw ex("parser.descape.1", this.offset-1);
1222                 }
1223                 int uv = v1;
1224                 this.next();
1225                 if (this.read() != T_CHAR || (v1 = hexChar(this.chardata)) < 0) {
1226                     throw ex("parser.descape.1", this.offset-1);
1227                 }
1228                 uv = uv*16+v1;
1229                 c = uv;
1230             }
1231             break;
1232
1233           case 'u':
1234             int v1 = 0;
1235             this.next();
1236             if (this.read() != T_CHAR || (v1 = hexChar(this.chardata)) < 0) {
1237                 throw ex("parser.descape.1", this.offset-1);
1238             }
1239             int uv = v1;
1240             this.next();
1241             if (this.read() != T_CHAR || (v1 = hexChar(this.chardata)) < 0) {
1242                 throw ex("parser.descape.1", this.offset-1);
1243             }
1244             uv = uv*16+v1;
1245             this.next();
1246             if (this.read() != T_CHAR || (v1 = hexChar(this.chardata)) < 0) {
1247                 throw ex("parser.descape.1", this.offset-1);
1248             }
1249             uv = uv*16+v1;
1250             this.next();
1251             if (this.read() != T_CHAR || (v1 = hexChar(this.chardata)) < 0) {
1252                 throw ex("parser.descape.1", this.offset-1);
1253             }
1254             uv = uv*16+v1;
1255             c = uv;
1256             break;
1257
1258           case 'v':
1259             this.next();
1260             if (this.read() != T_CHAR || (v1 = hexChar(this.chardata)) < 0) {
1261                 throw ex("parser.descape.1", this.offset-1);
1262             }
1263             uv = v1;
1264             this.next();
1265             if (this.read() != T_CHAR || (v1 = hexChar(this.chardata)) < 0) {
1266                 throw ex("parser.descape.1", this.offset-1);
1267             }
1268             uv = uv*16+v1;
1269             this.next();
1270             if (this.read() != T_CHAR || (v1 = hexChar(this.chardata)) < 0) {
1271                 throw ex("parser.descape.1", this.offset-1);
1272             }
1273             uv = uv*16+v1;
1274             this.next();
1275             if (this.read() != T_CHAR || (v1 = hexChar(this.chardata)) < 0) {
1276                 throw ex("parser.descape.1", this.offset-1);
1277             }
1278             uv = uv*16+v1;
1279             this.next();
1280             if (this.read() != T_CHAR || (v1 = hexChar(this.chardata)) < 0) {
1281                 throw ex("parser.descape.1", this.offset-1);
1282             }
1283             uv = uv*16+v1;
1284             this.next();
1285             if (this.read() != T_CHAR || (v1 = hexChar(this.chardata)) < 0) {
1286                 throw ex("parser.descape.1", this.offset-1);
1287             }
1288             uv = uv*16+v1;
1289             if (uv > Token.UTF16_MAX) {
1290                 throw ex("parser.descappe.4", this.offset-1);
1291             }
1292             c = uv;
1293             break;
1294           case 'A':
1295           case 'Z':
1296           case 'z':
1297             throw ex("parser.descape.5", this.offset-2);
1298           default:
1299         }
1300         return c;
1301     }
1302
1303     static private final int hexChar(int ch) {
1304         if (ch < '0') {
1305             return -1;
1306         }
1307         if (ch > 'f') {
1308             return -1;
1309         }
1310         if (ch <= '9') {
1311             return ch-'0';
1312         }
1313         if (ch < 'A') {
1314             return -1;
1315         }
1316         if (ch <= 'F') {
1317             return ch-'A'+10;
1318         }
1319         if (ch < 'a') {
1320             return -1;
1321         }
1322         return ch-'a'+10;
1323     }
1324
1325     static protected final void addCaseInsensitiveChar(RangeToken tok, int c) {
1326         final int[] caseMap = CaseInsensitiveMap.get(c);
1327         tok.addRange(c, c);
1328
1329         if (caseMap != null) {
1330             for (int i=0; i<caseMap.length; i+=2) {
1331                 tok.addRange(caseMap[i], caseMap[i]);
1332             }
1333         }
1334
1335     }
1336
1337     static protected final void addCaseInsensitiveCharRange(RangeToken tok, int start, int end) {
1338         int[] caseMap;
1339         int r1, r2;
1340         if (start <= end) {
1341             r1 = start;
1342             r2 = end;
1343         } else {
1344             r1 = end;
1345             r2 = start;
1346         }
1347
1348         tok.addRange(r1, r2);
1349         for (int ch = r1;  ch <= r2;  ch++) {
1350             caseMap = CaseInsensitiveMap.get(ch);
1351             if (caseMap != null) {
1352                 for (int i=0; i<caseMap.length; i+=2) {
1353                     tok.addRange(caseMap[i], caseMap[i]);
1354                 }
1355             }
1356         }
1357     }
1358 }