2 * Licensed to the Apache Software Foundation (ASF) under one or more
3 * contributor license agreements. See the NOTICE file distributed with
4 * this work for additional information regarding copyright ownership.
5 * The ASF licenses this file to You under the Apache License, Version 2.0
6 * (the "License"); you may not use this file except in compliance with
7 * the License. You may obtain a copy of the License at
9 * http://www.apache.org/licenses/LICENSE-2.0
11 * Unless required by applicable law or agreed to in writing, software
12 * distributed under the License is distributed on an "AS IS" BASIS,
13 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 * See the License for the specific language governing permissions and
15 * limitations under the License.
18 package org.opendaylight.yangtools.xsd.regex;
21 * This class represents a character class such as [a-z] or a period.
25 * @version $Id: RangeToken.java 965250 2010-07-18 16:04:58Z mrglavas $
27 final class RangeToken extends Token implements java.io.Serializable {
29 private static final long serialVersionUID = -553983121197679934L;
34 RangeToken icaseCache = null;
38 RangeToken(int type) {
40 this.setSorted(false);
43 // for RANGE or NRANGE
45 protected void addRange(int start, int end) {
46 this.icaseCache = null;
47 //System.err.println("Token#addRange(): "+start+" "+end);
58 if (this.ranges == null) {
59 this.ranges = new int[2];
64 pos = this.ranges.length;
65 if (this.ranges[pos-1]+1 == r1) {
66 this.ranges[pos-1] = r2;
69 int[] temp = new int[pos+2];
70 System.arraycopy(this.ranges, 0, temp, 0, pos);
72 if (this.ranges[pos-1] >= r1) {
73 this.setSorted(false);
75 this.ranges[pos++] = r1;
76 this.ranges[pos] = r2;
83 private final boolean isSorted() {
86 private final void setSorted(boolean sort) {
89 this.compacted = false;
92 private final boolean isCompacted() {
93 return this.compacted;
95 private final void setCompacted() {
96 this.compacted = true;
100 protected void sortRanges() {
101 if (this.isSorted()) {
104 if (this.ranges == null)
107 //System.err.println("Do sorting: "+this.ranges.length);
111 // Why? -- In many cases,
112 // this.ranges has few elements.
113 for (int i = this.ranges.length-4; i >= 0; i -= 2) {
114 for (int j = 0; j <= i; j += 2) {
115 if (this.ranges[j] > this.ranges[j+2]
116 || this.ranges[j] == this.ranges[j+2] && this.ranges[j+1] > this.ranges[j+3]) {
118 tmp = this.ranges[j+2];
119 this.ranges[j+2] = this.ranges[j];
120 this.ranges[j] = tmp;
121 tmp = this.ranges[j+3];
122 this.ranges[j+3] = this.ranges[j+1];
123 this.ranges[j+1] = tmp;
127 this.setSorted(true);
131 * this.ranges is sorted.
134 protected void compactRanges() {
135 boolean DEBUG = false;
136 if (this.ranges == null || this.ranges.length <= 2) {
139 if (this.isCompacted()) {
142 int base = 0; // Index of writing point
143 int target = 0; // Index of processing point
145 while (target < this.ranges.length) {
146 if (base != target) {
147 this.ranges[base] = this.ranges[target++];
148 this.ranges[base+1] = this.ranges[target++];
152 int baseend = this.ranges[base+1];
153 while (target < this.ranges.length) {
154 if (baseend+1 < this.ranges[target]) {
157 if (baseend+1 == this.ranges[target]) {
159 System.err.println("Token#compactRanges(): Compaction: ["+this.ranges[base]
160 +", "+this.ranges[base+1]
161 +"], ["+this.ranges[target]
162 +", "+this.ranges[target+1]
163 +"] -> ["+this.ranges[base]
164 +", "+this.ranges[target+1]
167 this.ranges[base+1] = this.ranges[target+1];
168 baseend = this.ranges[base+1];
170 } else if (baseend >= this.ranges[target+1]) {
172 System.err.println("Token#compactRanges(): Compaction: ["+this.ranges[base]
173 +", "+this.ranges[base+1]
174 +"], ["+this.ranges[target]
175 +", "+this.ranges[target+1]
176 +"] -> ["+this.ranges[base]
177 +", "+this.ranges[base+1]
181 } else if (baseend < this.ranges[target+1]) {
183 System.err.println("Token#compactRanges(): Compaction: ["+this.ranges[base]
184 +", "+this.ranges[base+1]
185 +"], ["+this.ranges[target]
186 +", "+this.ranges[target+1]
187 +"] -> ["+this.ranges[base]
188 +", "+this.ranges[target+1]
191 this.ranges[base+1] = this.ranges[target+1];
192 baseend = this.ranges[base+1];
195 throw new RuntimeException("Token#compactRanges(): Internel Error: ["
197 +","+this.ranges[base+1]
198 +"] ["+this.ranges[target]
199 +","+this.ranges[target+1]+"]");
205 if (base != this.ranges.length) {
206 int[] result = new int[base];
207 System.arraycopy(this.ranges, 0, result, 0, base);
208 this.ranges = result;
214 protected void mergeRanges(Token token) {
215 RangeToken tok = (RangeToken)token;
218 if (tok.ranges == null) {
221 this.icaseCache = null;
222 this.setSorted(true);
223 if (this.ranges == null) {
224 this.ranges = new int[tok.ranges.length];
225 System.arraycopy(tok.ranges, 0, this.ranges, 0, tok.ranges.length);
228 int[] result = new int[this.ranges.length+tok.ranges.length];
229 for (int i = 0, j = 0, k = 0; i < this.ranges.length || j < tok.ranges.length;) {
230 if (i >= this.ranges.length) {
231 result[k++] = tok.ranges[j++];
232 result[k++] = tok.ranges[j++];
233 } else if (j >= tok.ranges.length) {
234 result[k++] = this.ranges[i++];
235 result[k++] = this.ranges[i++];
236 } else if (tok.ranges[j] < this.ranges[i]
237 || tok.ranges[j] == this.ranges[i] && tok.ranges[j+1] < this.ranges[i+1]) {
238 result[k++] = tok.ranges[j++];
239 result[k++] = tok.ranges[j++];
241 result[k++] = this.ranges[i++];
242 result[k++] = this.ranges[i++];
245 this.ranges = result;
249 protected void subtractRanges(Token token) {
250 if (token.type == NRANGE) {
251 this.intersectRanges(token);
254 RangeToken tok = (RangeToken)token;
255 if (tok.ranges == null || this.ranges == null) {
258 this.icaseCache = null;
260 this.compactRanges();
264 //System.err.println("Token#substractRanges(): Entry: "+this.ranges.length+", "+tok.ranges.length);
266 int[] result = new int[this.ranges.length+tok.ranges.length];
267 int wp = 0, src = 0, sub = 0;
268 while (src < this.ranges.length && sub < tok.ranges.length) {
269 int srcbegin = this.ranges[src];
270 int srcend = this.ranges[src+1];
271 int subbegin = tok.ranges[sub];
272 int subend = tok.ranges[sub+1];
273 if (srcend < subbegin) { // Not overlapped
278 result[wp++] = this.ranges[src++];
279 result[wp++] = this.ranges[src++];
280 } else if (srcend >= subbegin
281 && srcbegin <= subend) { // Overlapped
286 // sub: o------------o
287 if (subbegin <= srcbegin && srcend <= subend) {
289 // sub: o------------o
293 } else if (subbegin <= srcbegin) {
298 this.ranges[src] = subend+1;
300 } else if (srcend <= subend) {
305 result[wp++] = srcbegin;
306 result[wp++] = subbegin-1;
312 // Reuse src(=right res)
313 result[wp++] = srcbegin;
314 result[wp++] = subbegin-1;
315 this.ranges[src] = subend+1;
318 } else if (subend < srcbegin) {
324 throw new RuntimeException("Token#subtractRanges(): Internal Error: ["+this.ranges[src]
325 +","+this.ranges[src+1]
326 +"] - ["+tok.ranges[sub]
327 +","+tok.ranges[sub+1]
331 while (src < this.ranges.length) {
332 result[wp++] = this.ranges[src++];
333 result[wp++] = this.ranges[src++];
335 this.ranges = new int[wp];
336 System.arraycopy(result, 0, this.ranges, 0, wp);
337 // this.ranges is sorted and compacted.
341 * @param tok Ignore whether it is NRANGE or not.
344 protected void intersectRanges(Token token) {
345 RangeToken tok = (RangeToken)token;
346 if (tok.ranges == null || this.ranges == null) {
349 this.icaseCache = null;
351 this.compactRanges();
355 int[] result = new int[this.ranges.length+tok.ranges.length];
356 int wp = 0, src1 = 0, src2 = 0;
357 while (src1 < this.ranges.length && src2 < tok.ranges.length) {
358 int src1begin = this.ranges[src1];
359 int src1end = this.ranges[src1+1];
360 int src2begin = tok.ranges[src2];
361 int src2end = tok.ranges[src2+1];
362 if (src1end < src2begin) { // Not overlapped
368 } else if (src1end >= src2begin
369 && src1begin <= src2end) { // Overlapped
374 // src2: o------------o
375 if (src2begin <= src1begin && src1end <= src2end) {
377 // src2: o------------o
380 result[wp++] = src1begin;
381 result[wp++] = src1end;
383 } else if (src2begin <= src1begin) {
387 // Reuse the rest of src1
388 result[wp++] = src1begin;
389 result[wp++] = src2end;
390 this.ranges[src1] = src2end+1;
392 } else if (src1end <= src2end) {
397 result[wp++] = src2begin;
398 result[wp++] = src1end;
404 // Reuse the rest of src1
405 result[wp++] = src2begin;
406 result[wp++] = src2end;
407 this.ranges[src1] = src2end+1;
409 } else if (src2end < src1begin) {
415 throw new RuntimeException("Token#intersectRanges(): Internal Error: ["
417 +","+this.ranges[src1+1]
418 +"] & ["+tok.ranges[src2]
419 +","+tok.ranges[src2+1]
423 while (src1 < this.ranges.length) {
424 result[wp++] = this.ranges[src1++];
425 result[wp++] = this.ranges[src1++];
427 this.ranges = new int[wp];
428 System.arraycopy(result, 0, this.ranges, 0, wp);
429 // this.ranges is sorted and compacted.
433 * for RANGE: Creates complement.
434 * for NRANGE: Creates the same meaning RANGE.
436 static Token complementRanges(Token token) {
437 if (token.type != RANGE && token.type != NRANGE) {
438 throw new IllegalArgumentException("Token#complementRanges(): must be RANGE: "+token.type);
440 RangeToken tok = (RangeToken)token;
443 int len = tok.ranges.length+2;
444 if (tok.ranges[0] == 0) {
447 int last = tok.ranges[tok.ranges.length-1];
448 if (last == UTF16_MAX) {
451 RangeToken ret = Token.createRange();
452 ret.ranges = new int[len];
454 if (tok.ranges[0] > 0) {
455 ret.ranges[wp++] = 0;
456 ret.ranges[wp++] = tok.ranges[0]-1;
458 for (int i = 1; i < tok.ranges.length-2; i += 2) {
459 ret.ranges[wp++] = tok.ranges[i]+1;
460 ret.ranges[wp++] = tok.ranges[i+1]-1;
462 if (last != UTF16_MAX) {
463 ret.ranges[wp++] = last+1;
464 ret.ranges[wp] = UTF16_MAX;
470 synchronized RangeToken getCaseInsensitiveToken() {
471 if (this.icaseCache != null) {
472 return this.icaseCache;
475 RangeToken uppers = this.type == Token.RANGE ? Token.createRange() : Token.createNRange();
476 for (int i = 0; i < this.ranges.length; i += 2) {
477 for (int ch = this.ranges[i]; ch <= this.ranges[i+1]; ch ++) {
479 uppers.addRange(ch, ch);
481 char uch = Character.toUpperCase((char)ch);
482 uppers.addRange(uch, uch);
486 RangeToken lowers = this.type == Token.RANGE ? Token.createRange() : Token.createNRange();
487 for (int i = 0; i < uppers.ranges.length; i += 2) {
488 for (int ch = uppers.ranges[i]; ch <= uppers.ranges[i+1]; ch ++) {
490 lowers.addRange(ch, ch);
492 char uch = Character.toLowerCase((char)ch);
493 lowers.addRange(uch, uch);
497 lowers.mergeRanges(uppers);
498 lowers.mergeRanges(this);
499 lowers.compactRanges();
501 this.icaseCache = lowers;
506 System.err.print("RANGE: ");
507 if (this.ranges == null) {
508 System.err.println(" NULL");
511 for (int i = 0; i < this.ranges.length; i += 2) {
512 System.err.print("["+this.ranges[i]+","+this.ranges[i+1]+"] ");
514 System.err.println("");
518 boolean match(int ch) {
519 if (this.map == null) {
523 if (this.type == RANGE) {
525 return (this.map[ch/32] & (1<<(ch&0x1f))) != 0;
528 for (int i = this.nonMapIndex; i < this.ranges.length; i += 2) {
529 if (this.ranges[i] <= ch && ch <= this.ranges[i+1]) {
535 return (this.map[ch/32] & (1<<(ch&0x1f))) == 0;
538 for (int i = this.nonMapIndex; i < this.ranges.length; i += 2) {
539 if (this.ranges[i] <= ch && ch <= this.ranges[i+1]) {
547 private static final int MAPSIZE = 256;
548 private void createMap() {
549 int asize = MAPSIZE/32; // 32 is the number of bits in `int'.
550 int [] map = new int[asize];
551 int nonMapIndex = this.ranges.length;
552 for (int i = 0; i < asize; ++i) {
555 for (int i = 0; i < this.ranges.length; i += 2) {
556 int s = this.ranges[i];
557 int e = this.ranges[i+1];
559 for (int j = s; j <= e && j < MAPSIZE; j++) {
560 map[j/32] |= 1<<(j&0x1f); // s&0x1f : 0-31
573 this.nonMapIndex = nonMapIndex;
574 //for (int i = 0; i < asize; i ++) System.err.println("Map: "+Integer.toString(this.map[i], 16));
578 public String toString(int options) {
580 if (this.type == RANGE) {
581 if (this == Token.token_dot) {
583 } else if (this == Token.token_0to9) {
585 } else if (this == Token.token_wordchars) {
587 } else if (this == Token.token_spaces) {
590 StringBuffer sb = new StringBuffer();
592 for (int i = 0; i < this.ranges.length; i += 2) {
593 if ((options & RegularExpression.SPECIAL_COMMA) != 0 && i > 0) {
596 if (this.ranges[i] == this.ranges[i+1]) {
597 sb.append(escapeCharInCharClass(this.ranges[i], options));
599 sb.append(escapeCharInCharClass(this.ranges[i], options));
601 sb.append(escapeCharInCharClass(this.ranges[i+1], options));
608 if (this == Token.token_not_0to9) {
610 } else if (this == Token.token_not_wordchars) {
612 } else if (this == Token.token_not_spaces) {
615 StringBuffer sb = new StringBuffer();
617 for (int i = 0; i < this.ranges.length; i += 2) {
618 if ((options & RegularExpression.SPECIAL_COMMA) != 0 && i > 0) {
621 if (this.ranges[i] == this.ranges[i+1]) {
622 sb.append(escapeCharInCharClass(this.ranges[i], options));
624 sb.append(escapeCharInCharClass(this.ranges[i], options));
626 sb.append(escapeCharInCharClass(this.ranges[i+1], options));
636 private static String escapeCharInCharClass(int ch, int options) {
639 case '[': case ']': case '-': case '^':
643 case '\f': ret = "\\f"; break;
644 case '\n': ret = "\\n"; break;
645 case '\r': ret = "\\r"; break;
646 case '\t': ret = "\\t"; break;
647 case 0x1b: ret = "\\e"; break;
649 if (isSet(options, RegularExpression.XMLSCHEMA_MODE)) {
653 //case 0x0b: ret = "\\v"; break;
656 String pre = "0"+Integer.toHexString(ch);
657 ret = "\\x"+pre.substring(pre.length()-2, pre.length());
658 } else if (ch >= 0x10000) {
659 String pre = "0"+Integer.toHexString(ch);
660 String sub = pre.substring(pre.length()-6, pre.length());
661 if (isSet(options, RegularExpression.XMLSCHEMA_MODE)) {
662 ret = "\\x{"+sub+"}";