BUG-5410: introduce RegularExpression.toPatternString()
[yangtools.git] / third-party / xsd-regex / src / main / java / org / opendaylight / yangtools / xsd / regex / RangeToken.java
1 /*
2  * Licensed to the Apache Software Foundation (ASF) under one or more
3  * contributor license agreements.  See the NOTICE file distributed with
4  * this work for additional information regarding copyright ownership.
5  * The ASF licenses this file to You under the Apache License, Version 2.0
6  * (the "License"); you may not use this file except in compliance with
7  * the License.  You may obtain a copy of the License at
8  *
9  *      http://www.apache.org/licenses/LICENSE-2.0
10  *
11  * Unless required by applicable law or agreed to in writing, software
12  * distributed under the License is distributed on an "AS IS" BASIS,
13  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14  * See the License for the specific language governing permissions and
15  * limitations under the License.
16  */
17
18 package org.opendaylight.yangtools.xsd.regex;
19
20 /**
21  * This class represents a character class such as [a-z] or a period.
22  *
23  * @xerces.internal
24  *
25  * @version $Id: RangeToken.java 965250 2010-07-18 16:04:58Z mrglavas $
26  */
27 final class RangeToken extends Token implements java.io.Serializable {
28
29     private static final long serialVersionUID = -553983121197679934L;
30
31     int[] ranges;
32     boolean sorted;
33     boolean compacted;
34     RangeToken icaseCache = null;
35     int[] map = null;
36     int nonMapIndex;
37
38     RangeToken(int type) {
39         super(type);
40         this.setSorted(false);
41     }
42
43                                                 // for RANGE or NRANGE
44     @Override
45     protected void addRange(int start, int end) {
46         this.icaseCache = null;
47         //System.err.println("Token#addRange(): "+start+" "+end);
48         int r1, r2;
49         if (start <= end) {
50             r1 = start;
51             r2 = end;
52         } else {
53             r1 = end;
54             r2 = start;
55         }
56
57         int pos = 0;
58         if (this.ranges == null) {
59             this.ranges = new int[2];
60             this.ranges[0] = r1;
61             this.ranges[1] = r2;
62             this.setSorted(true);
63         } else {
64             pos = this.ranges.length;
65             if (this.ranges[pos-1]+1 == r1) {
66                 this.ranges[pos-1] = r2;
67                 return;
68             }
69             int[] temp = new int[pos+2];
70             System.arraycopy(this.ranges, 0, temp, 0, pos);
71             this.ranges = temp;
72             if (this.ranges[pos-1] >= r1) {
73                 this.setSorted(false);
74             }
75             this.ranges[pos++] = r1;
76             this.ranges[pos] = r2;
77             if (!this.sorted) {
78                 this.sortRanges();
79             }
80         }
81     }
82
83     private final boolean isSorted() {
84         return this.sorted;
85     }
86     private final void setSorted(boolean sort) {
87         this.sorted = sort;
88         if (!sort) {
89             this.compacted = false;
90         }
91     }
92     private final boolean isCompacted() {
93         return this.compacted;
94     }
95     private final void setCompacted() {
96         this.compacted = true;
97     }
98
99     @Override
100     protected void sortRanges() {
101         if (this.isSorted()) {
102             return;
103         }
104         if (this.ranges == null)
105          {
106             return;
107         //System.err.println("Do sorting: "+this.ranges.length);
108         }
109
110                                                 // Bubble sort
111                                                 // Why? -- In many cases,
112                                                 //         this.ranges has few elements.
113         for (int i = this.ranges.length-4;  i >= 0;  i -= 2) {
114             for (int j = 0;  j <= i;  j += 2) {
115                 if (this.ranges[j] > this.ranges[j+2]
116                     || this.ranges[j] == this.ranges[j+2] && this.ranges[j+1] > this.ranges[j+3]) {
117                     int tmp;
118                     tmp = this.ranges[j+2];
119                     this.ranges[j+2] = this.ranges[j];
120                     this.ranges[j] = tmp;
121                     tmp = this.ranges[j+3];
122                     this.ranges[j+3] = this.ranges[j+1];
123                     this.ranges[j+1] = tmp;
124                 }
125             }
126         }
127         this.setSorted(true);
128     }
129
130     /**
131      * this.ranges is sorted.
132      */
133     @Override
134     protected void compactRanges() {
135         boolean DEBUG = false;
136         if (this.ranges == null || this.ranges.length <= 2) {
137             return;
138         }
139         if (this.isCompacted()) {
140             return;
141         }
142         int base = 0;                           // Index of writing point
143         int target = 0;                         // Index of processing point
144
145         while (target < this.ranges.length) {
146             if (base != target) {
147                 this.ranges[base] = this.ranges[target++];
148                 this.ranges[base+1] = this.ranges[target++];
149             } else {
150                 target += 2;
151             }
152             int baseend = this.ranges[base+1];
153             while (target < this.ranges.length) {
154                 if (baseend+1 < this.ranges[target]) {
155                     break;
156                 }
157                 if (baseend+1 == this.ranges[target]) {
158                     if (DEBUG) {
159                         System.err.println("Token#compactRanges(): Compaction: ["+this.ranges[base]
160                                            +", "+this.ranges[base+1]
161                                            +"], ["+this.ranges[target]
162                                            +", "+this.ranges[target+1]
163                                            +"] -> ["+this.ranges[base]
164                                            +", "+this.ranges[target+1]
165                                            +"]");
166                     }
167                     this.ranges[base+1] = this.ranges[target+1];
168                     baseend = this.ranges[base+1];
169                     target += 2;
170                 } else if (baseend >= this.ranges[target+1]) {
171                     if (DEBUG) {
172                         System.err.println("Token#compactRanges(): Compaction: ["+this.ranges[base]
173                                            +", "+this.ranges[base+1]
174                                            +"], ["+this.ranges[target]
175                                            +", "+this.ranges[target+1]
176                                            +"] -> ["+this.ranges[base]
177                                            +", "+this.ranges[base+1]
178                                            +"]");
179                     }
180                     target += 2;
181                 } else if (baseend < this.ranges[target+1]) {
182                     if (DEBUG) {
183                         System.err.println("Token#compactRanges(): Compaction: ["+this.ranges[base]
184                                            +", "+this.ranges[base+1]
185                                            +"], ["+this.ranges[target]
186                                            +", "+this.ranges[target+1]
187                                            +"] -> ["+this.ranges[base]
188                                            +", "+this.ranges[target+1]
189                                            +"]");
190                     }
191                     this.ranges[base+1] = this.ranges[target+1];
192                     baseend = this.ranges[base+1];
193                     target += 2;
194                 } else {
195                     throw new RuntimeException("Token#compactRanges(): Internel Error: ["
196                                                +this.ranges[base]
197                                                +","+this.ranges[base+1]
198                                                +"] ["+this.ranges[target]
199                                                +","+this.ranges[target+1]+"]");
200                 }
201             } // while
202             base += 2;
203         }
204
205         if (base != this.ranges.length) {
206             int[] result = new int[base];
207             System.arraycopy(this.ranges, 0, result, 0, base);
208             this.ranges = result;
209         }
210         this.setCompacted();
211     }
212
213     @Override
214     protected void mergeRanges(Token token) {
215         RangeToken tok = (RangeToken)token;
216         this.sortRanges();
217         tok.sortRanges();
218         if (tok.ranges == null) {
219             return;
220         }
221         this.icaseCache = null;
222         this.setSorted(true);
223         if (this.ranges == null) {
224             this.ranges = new int[tok.ranges.length];
225             System.arraycopy(tok.ranges, 0, this.ranges, 0, tok.ranges.length);
226             return;
227         }
228         int[] result = new int[this.ranges.length+tok.ranges.length];
229         for (int i = 0, j = 0, k = 0;  i < this.ranges.length || j < tok.ranges.length;) {
230             if (i >= this.ranges.length) {
231                 result[k++] = tok.ranges[j++];
232                 result[k++] = tok.ranges[j++];
233             } else if (j >= tok.ranges.length) {
234                 result[k++] = this.ranges[i++];
235                 result[k++] = this.ranges[i++];
236             } else if (tok.ranges[j] < this.ranges[i]
237                        || tok.ranges[j] == this.ranges[i] && tok.ranges[j+1] < this.ranges[i+1]) {
238                 result[k++] = tok.ranges[j++];
239                 result[k++] = tok.ranges[j++];
240             } else {
241                 result[k++] = this.ranges[i++];
242                 result[k++] = this.ranges[i++];
243             }
244         }
245         this.ranges = result;
246     }
247
248     @Override
249     protected void subtractRanges(Token token) {
250         if (token.type == NRANGE) {
251             this.intersectRanges(token);
252             return;
253         }
254         RangeToken tok = (RangeToken)token;
255         if (tok.ranges == null || this.ranges == null) {
256             return;
257         }
258         this.icaseCache = null;
259         this.sortRanges();
260         this.compactRanges();
261         tok.sortRanges();
262         tok.compactRanges();
263
264         //System.err.println("Token#substractRanges(): Entry: "+this.ranges.length+", "+tok.ranges.length);
265
266         int[] result = new int[this.ranges.length+tok.ranges.length];
267         int wp = 0, src = 0, sub = 0;
268         while (src < this.ranges.length && sub < tok.ranges.length) {
269             int srcbegin = this.ranges[src];
270             int srcend = this.ranges[src+1];
271             int subbegin = tok.ranges[sub];
272             int subend = tok.ranges[sub+1];
273             if (srcend < subbegin) {            // Not overlapped
274                                                 // src: o-----o
275                                                 // sub:         o-----o
276                                                 // res: o-----o
277                                                 // Reuse sub
278                 result[wp++] = this.ranges[src++];
279                 result[wp++] = this.ranges[src++];
280             } else if (srcend >= subbegin
281                        && srcbegin <= subend) { // Overlapped
282                                                 // src:    o--------o
283                                                 // sub:  o----o
284                                                 // sub:      o----o
285                                                 // sub:          o----o
286                                                 // sub:  o------------o
287                 if (subbegin <= srcbegin && srcend <= subend) {
288                                                 // src:    o--------o
289                                                 // sub:  o------------o
290                                                 // res: empty
291                                                 // Reuse sub
292                     src += 2;
293                 } else if (subbegin <= srcbegin) {
294                                                 // src:    o--------o
295                                                 // sub:  o----o
296                                                 // res:       o-----o
297                                                 // Reuse src(=res)
298                     this.ranges[src] = subend+1;
299                     sub += 2;
300                 } else if (srcend <= subend) {
301                                                 // src:    o--------o
302                                                 // sub:          o----o
303                                                 // res:    o-----o
304                                                 // Reuse sub
305                     result[wp++] = srcbegin;
306                     result[wp++] = subbegin-1;
307                     src += 2;
308                 } else {
309                                                 // src:    o--------o
310                                                 // sub:      o----o
311                                                 // res:    o-o    o-o
312                                                 // Reuse src(=right res)
313                     result[wp++] = srcbegin;
314                     result[wp++] = subbegin-1;
315                     this.ranges[src] = subend+1;
316                     sub += 2;
317                 }
318             } else if (subend < srcbegin) {
319                                                 // Not overlapped
320                                                 // src:          o-----o
321                                                 // sub: o----o
322                 sub += 2;
323             } else {
324                 throw new RuntimeException("Token#subtractRanges(): Internal Error: ["+this.ranges[src]
325                                            +","+this.ranges[src+1]
326                                            +"] - ["+tok.ranges[sub]
327                                            +","+tok.ranges[sub+1]
328                                            +"]");
329             }
330         }
331         while (src < this.ranges.length) {
332             result[wp++] = this.ranges[src++];
333             result[wp++] = this.ranges[src++];
334         }
335         this.ranges = new int[wp];
336         System.arraycopy(result, 0, this.ranges, 0, wp);
337                                                 // this.ranges is sorted and compacted.
338     }
339
340     /**
341      * @param tok Ignore whether it is NRANGE or not.
342      */
343     @Override
344     protected void intersectRanges(Token token) {
345         RangeToken tok = (RangeToken)token;
346         if (tok.ranges == null || this.ranges == null) {
347             return;
348         }
349         this.icaseCache = null;
350         this.sortRanges();
351         this.compactRanges();
352         tok.sortRanges();
353         tok.compactRanges();
354
355         int[] result = new int[this.ranges.length+tok.ranges.length];
356         int wp = 0, src1 = 0, src2 = 0;
357         while (src1 < this.ranges.length && src2 < tok.ranges.length) {
358             int src1begin = this.ranges[src1];
359             int src1end = this.ranges[src1+1];
360             int src2begin = tok.ranges[src2];
361             int src2end = tok.ranges[src2+1];
362             if (src1end < src2begin) {          // Not overlapped
363                                                 // src1: o-----o
364                                                 // src2:         o-----o
365                                                 // res:  empty
366                                                 // Reuse src2
367                 src1 += 2;
368             } else if (src1end >= src2begin
369                        && src1begin <= src2end) { // Overlapped
370                                                 // src1:    o--------o
371                                                 // src2:  o----o
372                                                 // src2:      o----o
373                                                 // src2:          o----o
374                                                 // src2:  o------------o
375                 if (src2begin <= src1begin && src1end <= src2end) {
376                                                 // src1:    o--------o
377                                                 // src2:  o------------o
378                                                 // res:     o--------o
379                                                 // Reuse src2
380                     result[wp++] = src1begin;
381                     result[wp++] = src1end;
382                     src1 += 2;
383                 } else if (src2begin <= src1begin) {
384                                                 // src1:    o--------o
385                                                 // src2:  o----o
386                                                 // res:     o--o
387                                                 // Reuse the rest of src1
388                     result[wp++] = src1begin;
389                     result[wp++] = src2end;
390                     this.ranges[src1] = src2end+1;
391                     src2 += 2;
392                 } else if (src1end <= src2end) {
393                                                 // src1:    o--------o
394                                                 // src2:          o----o
395                                                 // res:           o--o
396                                                 // Reuse src2
397                     result[wp++] = src2begin;
398                     result[wp++] = src1end;
399                     src1 += 2;
400                 } else {
401                                                 // src1:    o--------o
402                                                 // src2:      o----o
403                                                 // res:       o----o
404                                                 // Reuse the rest of src1
405                     result[wp++] = src2begin;
406                     result[wp++] = src2end;
407                     this.ranges[src1] = src2end+1;
408                 }
409             } else if (src2end < src1begin) {
410                                                 // Not overlapped
411                                                 // src1:          o-----o
412                                                 // src2: o----o
413                 src2 += 2;
414             } else {
415                 throw new RuntimeException("Token#intersectRanges(): Internal Error: ["
416                                            +this.ranges[src1]
417                                            +","+this.ranges[src1+1]
418                                            +"] & ["+tok.ranges[src2]
419                                            +","+tok.ranges[src2+1]
420                                            +"]");
421             }
422         }
423         while (src1 < this.ranges.length) {
424             result[wp++] = this.ranges[src1++];
425             result[wp++] = this.ranges[src1++];
426         }
427         this.ranges = new int[wp];
428         System.arraycopy(result, 0, this.ranges, 0, wp);
429                                                 // this.ranges is sorted and compacted.
430     }
431
432     /**
433      * for RANGE: Creates complement.
434      * for NRANGE: Creates the same meaning RANGE.
435      */
436     static Token complementRanges(Token token) {
437         if (token.type != RANGE && token.type != NRANGE) {
438             throw new IllegalArgumentException("Token#complementRanges(): must be RANGE: "+token.type);
439         }
440         RangeToken tok = (RangeToken)token;
441         tok.sortRanges();
442         tok.compactRanges();
443         int len = tok.ranges.length+2;
444         if (tok.ranges[0] == 0) {
445             len -= 2;
446         }
447         int last = tok.ranges[tok.ranges.length-1];
448         if (last == UTF16_MAX) {
449             len -= 2;
450         }
451         RangeToken ret = Token.createRange();
452         ret.ranges = new int[len];
453         int wp = 0;
454         if (tok.ranges[0] > 0) {
455             ret.ranges[wp++] = 0;
456             ret.ranges[wp++] = tok.ranges[0]-1;
457         }
458         for (int i = 1;  i < tok.ranges.length-2;  i += 2) {
459             ret.ranges[wp++] = tok.ranges[i]+1;
460             ret.ranges[wp++] = tok.ranges[i+1]-1;
461         }
462         if (last != UTF16_MAX) {
463             ret.ranges[wp++] = last+1;
464             ret.ranges[wp] = UTF16_MAX;
465         }
466         ret.setCompacted();
467         return ret;
468     }
469
470     synchronized RangeToken getCaseInsensitiveToken() {
471         if (this.icaseCache != null) {
472             return this.icaseCache;
473         }
474
475         RangeToken uppers = this.type == Token.RANGE ? Token.createRange() : Token.createNRange();
476         for (int i = 0;  i < this.ranges.length;  i += 2) {
477             for (int ch = this.ranges[i];  ch <= this.ranges[i+1];  ch ++) {
478                 if (ch > 0xffff) {
479                     uppers.addRange(ch, ch);
480                 } else {
481                     char uch = Character.toUpperCase((char)ch);
482                     uppers.addRange(uch, uch);
483                 }
484             }
485         }
486         RangeToken lowers = this.type == Token.RANGE ? Token.createRange() : Token.createNRange();
487         for (int i = 0;  i < uppers.ranges.length;  i += 2) {
488             for (int ch = uppers.ranges[i];  ch <= uppers.ranges[i+1];  ch ++) {
489                 if (ch > 0xffff) {
490                     lowers.addRange(ch, ch);
491                 } else {
492                     char uch = Character.toLowerCase((char)ch);
493                     lowers.addRange(uch, uch);
494                 }
495             }
496         }
497         lowers.mergeRanges(uppers);
498         lowers.mergeRanges(this);
499         lowers.compactRanges();
500
501         this.icaseCache = lowers;
502         return lowers;
503     }
504
505     void dumpRanges() {
506         System.err.print("RANGE: ");
507         if (this.ranges == null) {
508             System.err.println(" NULL");
509             return;
510         }
511         for (int i = 0;  i < this.ranges.length;  i += 2) {
512             System.err.print("["+this.ranges[i]+","+this.ranges[i+1]+"] ");
513         }
514         System.err.println("");
515     }
516
517     @Override
518     boolean match(int ch) {
519         if (this.map == null) {
520             this.createMap();
521         }
522         boolean ret;
523         if (this.type == RANGE) {
524             if (ch < MAPSIZE) {
525                 return (this.map[ch/32] & (1<<(ch&0x1f))) != 0;
526             }
527             ret = false;
528             for (int i = this.nonMapIndex;  i < this.ranges.length;  i += 2) {
529                 if (this.ranges[i] <= ch && ch <= this.ranges[i+1]) {
530                     return true;
531                 }
532             }
533         } else {
534             if (ch < MAPSIZE) {
535                 return (this.map[ch/32] & (1<<(ch&0x1f))) == 0;
536             }
537             ret = true;
538             for (int i = this.nonMapIndex;  i < this.ranges.length;  i += 2) {
539                 if (this.ranges[i] <= ch && ch <= this.ranges[i+1]) {
540                     return false;
541                 }
542             }
543         }
544         return ret;
545     }
546
547     private static final int MAPSIZE = 256;
548     private void createMap() {
549         int asize = MAPSIZE/32;                 // 32 is the number of bits in `int'.
550         int [] map = new int[asize];
551         int nonMapIndex = this.ranges.length;
552         for (int i = 0; i < asize; ++i) {
553             map[i] = 0;
554         }
555         for (int i = 0; i < this.ranges.length;  i += 2) {
556             int s = this.ranges[i];
557             int e = this.ranges[i+1];
558             if (s < MAPSIZE) {
559                 for (int j = s; j <= e && j < MAPSIZE; j++) {
560                     map[j/32] |= 1<<(j&0x1f); // s&0x1f : 0-31
561                 }
562             }
563             else {
564                 nonMapIndex = i;
565                 break;
566             }
567             if (e >= MAPSIZE) {
568                 nonMapIndex = i;
569                 break;
570             }
571         }
572         this.map = map;
573         this.nonMapIndex = nonMapIndex;
574         //for (int i = 0;  i < asize;  i ++)  System.err.println("Map: "+Integer.toString(this.map[i], 16));
575     }
576
577     @Override
578     public String toString(int options) {
579         String ret;
580         if (this.type == RANGE) {
581             if (this == Token.token_dot) {
582                 ret = ".";
583             } else if (this == Token.token_0to9) {
584                 ret = "\\d";
585             } else if (this == Token.token_wordchars) {
586                 ret = "\\w";
587             } else if (this == Token.token_spaces) {
588                 ret = "\\s";
589             } else {
590                 StringBuffer sb = new StringBuffer();
591                 sb.append('[');
592                 for (int i = 0;  i < this.ranges.length;  i += 2) {
593                     if ((options & RegularExpression.SPECIAL_COMMA) != 0 && i > 0) {
594                         sb.append(',');
595                     }
596                     if (this.ranges[i] == this.ranges[i+1]) {
597                         sb.append(escapeCharInCharClass(this.ranges[i], options));
598                     } else {
599                         sb.append(escapeCharInCharClass(this.ranges[i], options));
600                         sb.append('-');
601                         sb.append(escapeCharInCharClass(this.ranges[i+1], options));
602                     }
603                 }
604                 sb.append(']');
605                 ret = sb.toString();
606             }
607         } else {
608             if (this == Token.token_not_0to9) {
609                 ret = "\\D";
610             } else if (this == Token.token_not_wordchars) {
611                 ret = "\\W";
612             } else if (this == Token.token_not_spaces) {
613                 ret = "\\S";
614             } else {
615                 StringBuffer sb = new StringBuffer();
616                 sb.append("[^");
617                 for (int i = 0;  i < this.ranges.length;  i += 2) {
618                     if ((options & RegularExpression.SPECIAL_COMMA) != 0 && i > 0) {
619                         sb.append(',');
620                     }
621                     if (this.ranges[i] == this.ranges[i+1]) {
622                         sb.append(escapeCharInCharClass(this.ranges[i], options));
623                     } else {
624                         sb.append(escapeCharInCharClass(this.ranges[i], options));
625                         sb.append('-');
626                         sb.append(escapeCharInCharClass(this.ranges[i+1], options));
627                     }
628                 }
629                 sb.append(']');
630                 ret = sb.toString();
631             }
632         }
633         return ret;
634     }
635
636     private static String escapeCharInCharClass(int ch, int options) {
637         String ret;
638         switch (ch) {
639           case '[':  case ']':  case '-':  case '^':
640           case ',':  case '\\':
641             ret = "\\"+(char)ch;
642             break;
643           case '\f':  ret = "\\f";  break;
644           case '\n':  ret = "\\n";  break;
645           case '\r':  ret = "\\r";  break;
646           case '\t':  ret = "\\t";  break;
647           case 0x1b:  ret = "\\e";  break;
648           case '$':
649               if (isSet(options, RegularExpression.XMLSCHEMA_MODE)) {
650                   ret = "\\$";
651                   break;
652               }
653           //case 0x0b:  ret = "\\v";  break;
654           default:
655             if (ch < 0x20) {
656                 String pre = "0"+Integer.toHexString(ch);
657                 ret = "\\x"+pre.substring(pre.length()-2, pre.length());
658             } else if (ch >= 0x10000) {
659                 String pre = "0"+Integer.toHexString(ch);
660                 String sub = pre.substring(pre.length()-6, pre.length());
661                 if (isSet(options, RegularExpression.XMLSCHEMA_MODE)) {
662                     ret = "\\x{"+sub+"}";
663                 } else {
664                     ret = "\\v"+sub;
665                 }
666             } else {
667                 ret = ""+(char)ch;
668             }
669         }
670         return ret;
671     }
672
673 }