* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
- *
+ *
* http://www.apache.org/licenses/LICENSE-2.0
- *
+ *
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
/**
* @xerces.internal
- *
+ *
* @version $Id: Op.java 572108 2007-09-02 18:48:31Z mrglavas $
*/
class Op {
static final int RANGE = 3; // [a-zA-Z]
static final int NRANGE = 4; // [^a-zA-Z]
static final int ANCHOR = 5; // ^ $ ...
- static final int STRING = 6; // literal String
+ static final int STRING = 6; // literal String
static final int CLOSURE = 7; // X*
static final int NONGREEDYCLOSURE = 8; // X*?
static final int QUESTION = 9; // X?
static final boolean COUNT = false;
static Op createDot() {
- if (Op.COUNT) Op.nofinstances ++;
+ if (Op.COUNT) {
+ Op.nofinstances ++;
+ }
return new Op(Op.DOT);
}
static CharOp createChar(int data) {
- if (Op.COUNT) Op.nofinstances ++;
+ if (Op.COUNT) {
+ Op.nofinstances ++;
+ }
return new CharOp(Op.CHAR, data);
}
static CharOp createAnchor(int data) {
- if (Op.COUNT) Op.nofinstances ++;
+ if (Op.COUNT) {
+ Op.nofinstances ++;
+ }
return new CharOp(Op.ANCHOR, data);
}
static CharOp createCapture(int number, Op next) {
- if (Op.COUNT) Op.nofinstances ++;
+ if (Op.COUNT) {
+ Op.nofinstances ++;
+ }
CharOp op = new CharOp(Op.CAPTURE, number);
op.next = next;
return op;
}
static UnionOp createUnion(int size) {
- if (Op.COUNT) Op.nofinstances ++;
+ if (Op.COUNT) {
+ Op.nofinstances ++;
+ }
//System.err.println("Creates UnionOp");
return new UnionOp(Op.UNION, size);
}
static ChildOp createClosure(int id) {
- if (Op.COUNT) Op.nofinstances ++;
+ if (Op.COUNT) {
+ Op.nofinstances ++;
+ }
return new ModifierOp(Op.CLOSURE, id, -1);
}
static ChildOp createNonGreedyClosure() {
- if (Op.COUNT) Op.nofinstances ++;
+ if (Op.COUNT) {
+ Op.nofinstances ++;
+ }
return new ChildOp(Op.NONGREEDYCLOSURE);
}
static ChildOp createQuestion(boolean nongreedy) {
- if (Op.COUNT) Op.nofinstances ++;
+ if (Op.COUNT) {
+ Op.nofinstances ++;
+ }
return new ChildOp(nongreedy ? Op.NONGREEDYQUESTION : Op.QUESTION);
}
static RangeOp createRange(Token tok) {
- if (Op.COUNT) Op.nofinstances ++;
+ if (Op.COUNT) {
+ Op.nofinstances ++;
+ }
return new RangeOp(Op.RANGE, tok);
}
static ChildOp createLook(int type, Op next, Op branch) {
- if (Op.COUNT) Op.nofinstances ++;
+ if (Op.COUNT) {
+ Op.nofinstances ++;
+ }
ChildOp op = new ChildOp(type);
op.setChild(branch);
op.next = next;
return op;
}
static CharOp createBackReference(int refno) {
- if (Op.COUNT) Op.nofinstances ++;
+ if (Op.COUNT) {
+ Op.nofinstances ++;
+ }
return new CharOp(Op.BACKREFERENCE, refno);
}
static StringOp createString(String literal) {
- if (Op.COUNT) Op.nofinstances ++;
+ if (Op.COUNT) {
+ Op.nofinstances ++;
+ }
return new StringOp(Op.STRING, literal);
}
static ChildOp createIndependent(Op next, Op branch) {
- if (Op.COUNT) Op.nofinstances ++;
+ if (Op.COUNT) {
+ Op.nofinstances ++;
+ }
ChildOp op = new ChildOp(Op.INDEPENDENT);
op.setChild(branch);
op.next = next;
return op;
}
static ModifierOp createModifier(Op next, Op branch, int add, int mask) {
- if (Op.COUNT) Op.nofinstances ++;
+ if (Op.COUNT) {
+ Op.nofinstances ++;
+ }
ModifierOp op = new ModifierOp(Op.MODIFIER, add, mask);
op.setChild(branch);
op.next = next;
return op;
}
static ConditionOp createCondition(Op next, int ref, Op conditionflow, Op yesflow, Op noflow) {
- if (Op.COUNT) Op.nofinstances ++;
+ if (Op.COUNT) {
+ Op.nofinstances ++;
+ }
ConditionOp op = new ConditionOp(Op.CONDITION, ref, conditionflow, yesflow, noflow);
op.next = next;
return op;
throw new RuntimeException("Internal Error: type="+this.type);
}
// ModifierOp
- int getData() { // CharOp for CHAR, BACKREFERENCE, CAPTURE, ANCHOR,
+ int getData() { // CharOp for CHAR, BACKREFERENCE, CAPTURE, ANCHOR,
throw new RuntimeException("Internal Error: type="+this.type);
}
int getData2() { // ModifierOp
super(type);
this.charData = data;
}
+ @Override
int getData() {
return this.charData;
}
// ================================================================
static class UnionOp extends Op {
- final Vector branches;
+ final Vector<Op> branches;
UnionOp(int type, int size) {
super(type);
- this.branches = new Vector(size);
+ this.branches = new Vector<>(size);
}
void addElement(Op op) {
this.branches.addElement(op);
}
+ @Override
int size() {
return this.branches.size();
}
+ @Override
Op elementAt(int index) {
- return (Op)this.branches.elementAt(index);
+ return this.branches.elementAt(index);
}
}
void setChild(Op child) {
this.child = child;
}
+ @Override
Op getChild() {
return this.child;
}
this.v1 = v1;
this.v2 = v2;
}
+ @Override
int getData() {
return this.v1;
}
+ @Override
int getData2() {
return this.v2;
}
super(type);
this.tok = tok;
}
+ @Override
RangeToken getToken() {
return (RangeToken)this.tok;
}
super(type);
this.string = literal;
}
+ @Override
String getString() {
return this.string;
}
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
- *
+ *
* http://www.apache.org/licenses/LICENSE-2.0
- *
+ *
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
/**
* A regular expression parser for the XML Schema.
- *
+ *
* @xerces.internal
*
* @author TAMURA Kent <kent@trl.ibm.co.jp>
super(locale);
}
+ @Override
Token processCaret() throws ParseException {
this.next();
return Token.createChar('^');
}
+ @Override
Token processDollar() throws ParseException {
this.next();
return Token.createChar('$');
}
+ @Override
Token processLookahead() throws ParseException {
throw ex("parser.process.1", this.offset);
}
+ @Override
Token processNegativelookahead() throws ParseException {
throw ex("parser.process.1", this.offset);
}
+ @Override
Token processLookbehind() throws ParseException {
throw ex("parser.process.1", this.offset);
}
+ @Override
Token processNegativelookbehind() throws ParseException {
throw ex("parser.process.1", this.offset);
}
+ @Override
Token processBacksolidus_A() throws ParseException {
throw ex("parser.process.1", this.offset);
}
+ @Override
Token processBacksolidus_Z() throws ParseException {
throw ex("parser.process.1", this.offset);
}
+ @Override
Token processBacksolidus_z() throws ParseException {
throw ex("parser.process.1", this.offset);
}
+ @Override
Token processBacksolidus_b() throws ParseException {
throw ex("parser.process.1", this.offset);
}
+ @Override
Token processBacksolidus_B() throws ParseException {
throw ex("parser.process.1", this.offset);
}
+ @Override
Token processBacksolidus_lt() throws ParseException {
throw ex("parser.process.1", this.offset);
}
+ @Override
Token processBacksolidus_gt() throws ParseException {
throw ex("parser.process.1", this.offset);
}
+ @Override
Token processStar(Token tok) throws ParseException {
this.next();
return Token.createClosure(tok);
}
+ @Override
Token processPlus(Token tok) throws ParseException {
// X+ -> XX*
this.next();
return Token.createConcat(tok, Token.createClosure(tok));
}
+ @Override
Token processQuestion(Token tok) throws ParseException {
// X? -> X|
this.next();
par.addChild(Token.createEmpty());
return par;
}
+ @Override
boolean checkQuestion(int off) {
return false;
}
+ @Override
Token processParen() throws ParseException {
this.next();
Token tok = Token.createParen(this.parseRegex(), 0);
- if (this.read() != T_RPAREN) throw ex("parser.factor.1", this.offset-1);
+ if (this.read() != T_RPAREN) {
+ throw ex("parser.factor.1", this.offset-1);
+ }
this.next(); // Skips ')'
return tok;
}
+ @Override
Token processParen2() throws ParseException {
throw ex("parser.process.1", this.offset);
}
+ @Override
Token processCondition() throws ParseException {
throw ex("parser.process.1", this.offset);
}
+ @Override
Token processModifiers() throws ParseException {
throw ex("parser.process.1", this.offset);
}
+ @Override
Token processIndependent() throws ParseException {
throw ex("parser.process.1", this.offset);
}
+ @Override
Token processBacksolidus_c() throws ParseException {
this.next();
return this.getTokenForShorthand('c');
}
+ @Override
Token processBacksolidus_C() throws ParseException {
this.next();
return this.getTokenForShorthand('C');
}
+ @Override
Token processBacksolidus_i() throws ParseException {
this.next();
return this.getTokenForShorthand('i');
}
+ @Override
Token processBacksolidus_I() throws ParseException {
this.next();
return this.getTokenForShorthand('I');
}
+ @Override
Token processBacksolidus_g() throws ParseException {
throw this.ex("parser.process.1", this.offset-2);
}
+ @Override
Token processBacksolidus_X() throws ParseException {
throw ex("parser.process.1", this.offset-2);
}
+ @Override
Token processBackreference() throws ParseException {
throw ex("parser.process.1", this.offset-4);
}
+ @Override
int processCIinCharacterClass(RangeToken tok, int c) {
tok.mergeRanges(this.getTokenForShorthand(c));
return -1;
* @param useNrage Ignored.
* @return This returns no NrageToken.
*/
+ @Override
protected RangeToken parseCharacterClass(boolean useNrange) throws ParseException {
this.setContext(S_INBRACKETS);
this.next(); // '['
int type;
boolean firstloop = true;
while ((type = this.read()) != T_EOF) { // Don't use 'cotinue' for this loop.
-
+
wasDecoded = false;
// single-range | from-to-range | subtraction
if (type == T_CHAR && this.chardata == ']' && !firstloop) {
case 'i': case 'I':
case 'c': case 'C':
c = this.processCIinCharacterClass(tok, c);
- if (c < 0) end = true;
+ if (c < 0) {
+ end = true;
+ }
break;
-
+
case 'p':
case 'P':
int pstart = this.offset;
RangeToken tok2 = this.processBacksolidus_pP(c);
- if (tok2 == null) throw this.ex("parser.atom.5", pstart);
+ if (tok2 == null) {
+ throw this.ex("parser.atom.5", pstart);
+ }
tok.mergeRanges(tok2);
end = true;
break;
-
+
case '-':
c = this.decodeEscaped();
wasDecoded = true;
}
RangeToken range2 = this.parseCharacterClass(false);
tok.subtractRanges(range2);
- if (this.read() != T_CHAR || this.chardata != ']')
+ if (this.read() != T_CHAR || this.chardata != ']') {
throw this.ex("parser.cc.5", this.offset);
+ }
break; // Exit this loop
}
this.next();
if (!end) { // if not shorthands...
if (type == T_CHAR) {
- if (c == '[') throw this.ex("parser.cc.6", this.offset-2);
- if (c == ']') throw this.ex("parser.cc.7", this.offset-2);
- if (c == '-' && this.chardata != ']' && !firstloop) throw this.ex("parser.cc.8", this.offset-2); // if regex = '[-]' then invalid
+ if (c == '[') {
+ throw this.ex("parser.cc.6", this.offset-2);
+ }
+ if (c == ']') {
+ throw this.ex("parser.cc.7", this.offset-2);
+ }
+ if (c == '-' && this.chardata != ']' && !firstloop)
+ {
+ throw this.ex("parser.cc.8", this.offset-2); // if regex = '[-]' then invalid
+ }
}
if (this.read() != T_CHAR || this.chardata != '-' || c == '-' && !wasDecoded && firstloop) { // Here is no '-'.
if (!this.isSet(RegularExpression.IGNORE_CASE) || c > 0xffff) {
} else { // Found '-'
// Is this '-' is a from-to token??
this.next(); // Skips '-'
- if ((type = this.read()) == T_EOF) throw this.ex("parser.cc.2", this.offset);
+ if ((type = this.read()) == T_EOF) {
+ throw this.ex("parser.cc.2", this.offset);
+ }
// c '-' ']' -> '-' is a single-range.
if(type == T_CHAR && this.chardata == ']') { // if - is at the last position of the group
if (!this.isSet(RegularExpression.IGNORE_CASE) || c > 0xffff) {
else if (type == T_XMLSCHEMA_CC_SUBTRACTION) {
throw this.ex("parser.cc.8", this.offset-1);
} else {
-
+
int rangeend = this.chardata;
if (type == T_CHAR) {
- if (rangeend == '[') throw this.ex("parser.cc.6", this.offset-1);
- if (rangeend == ']') throw this.ex("parser.cc.7", this.offset-1);
- if (rangeend == '-') throw this.ex("parser.cc.8", this.offset-2);
+ if (rangeend == '[') {
+ throw this.ex("parser.cc.6", this.offset-1);
+ }
+ if (rangeend == ']') {
+ throw this.ex("parser.cc.7", this.offset-1);
+ }
+ if (rangeend == '-') {
+ throw this.ex("parser.cc.8", this.offset-2);
+ }
}
- else if (type == T_BACKSOLIDUS)
+ else if (type == T_BACKSOLIDUS) {
rangeend = this.decodeEscaped();
+ }
this.next();
- if (c > rangeend) throw this.ex("parser.ope.3", this.offset-1);
+ if (c > rangeend) {
+ throw this.ex("parser.ope.3", this.offset-1);
+ }
if (!this.isSet(RegularExpression.IGNORE_CASE) ||
(c > 0xffff && rangeend > 0xffff)) {
tok.addRange(c, rangeend);
}
firstloop = false;
}
- if (this.read() == T_EOF)
+ if (this.read() == T_EOF) {
throw this.ex("parser.cc.2", this.offset);
+ }
tok.sortRanges();
tok.compactRanges();
//tok.dumpRanges();
return tok;
}
+ @Override
protected RangeToken parseSetOperations() throws ParseException {
throw this.ex("parser.process.1", this.offset);
}
-
+
+ @Override
Token getTokenForShorthand(int ch) {
switch (ch) {
case 'd':
throw new RuntimeException("Internal Error: shorthands: \\u"+Integer.toString(ch, 16));
}
}
+ @Override
int decodeEscaped() throws ParseException {
- if (this.read() != T_BACKSOLIDUS) throw ex("parser.next.1", this.offset-1);
+ if (this.read() != T_BACKSOLIDUS) {
+ throw ex("parser.next.1", this.offset-1);
+ }
int c = this.chardata;
switch (c) {
case 'n': c = '\n'; break; // LINE FEED U+000A
return c;
}
- static private Hashtable ranges = null;
- static private Hashtable ranges2 = null;
+ static private Hashtable<String, Token> ranges = null;
+ static private Hashtable<String, Token> ranges2 = null;
static synchronized protected RangeToken getRange(String name, boolean positive) {
if (ranges == null) {
- ranges = new Hashtable();
- ranges2 = new Hashtable();
+ ranges = new Hashtable<>();
+ ranges2 = new Hashtable<>();
Token tok = Token.createRange();
setupRange(tok, SPACES);
/*
* \w is defined by the XML Schema specification to be:
- * [#x0000-#x10FFFF]-[\p{P}\p{Z}\p{C}] (all characters except the set of "punctuation", "separator" and "other" characters)
+ * [#x0000-#x10FFFF]-[\p{P}\p{Z}\p{C}] (all characters except the set of "punctuation", "separator" and "other" characters)
*/
tok = Token.createRange();
tok.mergeRanges(Token.getRange("P", true));
static void setupRange(Token range, String src) {
int len = src.length();
- for (int i = 0; i < len; i += 2)
+ for (int i = 0; i < len; i += 2) {
range.addRange(src.charAt(i), src.charAt(i+1));
+ }
}
static void setupRange(Token range, int[] src) {
int len = src.length;
- for (int i = 0; i < len; i += 2)
+ for (int i = 0; i < len; i += 2) {
range.addRange(src[i], src[i+1]);
+ }
}
private static final String SPACES = "\t\n\r\r ";
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
- *
+ *
* http://www.apache.org/licenses/LICENSE-2.0
- *
+ *
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
/**
* @xerces.internal
- *
+ *
* @version $Id: REUtil.java 828015 2009-10-21 13:56:13Z knoaman $
*/
public final class REUtil {
static final String substring(CharacterIterator iterator, int begin, int end) {
char[] src = new char[end-begin];
- for (int i = 0; i < src.length; i ++)
+ for (int i = 0; i < src.length; i ++) {
src[i] = iterator.setIndex(i+begin);
+ }
return new String(src);
}
}
static final int parseOptions(String opts) throws ParseException {
- if (opts == null) return 0;
+ if (opts == null) {
+ return 0;
+ }
int options = 0;
for (int i = 0; i < opts.length(); i ++) {
int v = getOptionValue(opts.charAt(i));
- if (v == 0)
+ if (v == 0) {
throw new ParseException("Unknown Option: "+opts.substring(i), -1);
+ }
options |= v;
}
return options;
static final String createOptionString(int options) {
StringBuffer sb = new StringBuffer(9);
- if ((options & RegularExpression.PROHIBIT_FIXED_STRING_OPTIMIZATION) != 0)
- sb.append((char)'F');
- if ((options & RegularExpression.PROHIBIT_HEAD_CHARACTER_OPTIMIZATION) != 0)
- sb.append((char)'H');
- if ((options & RegularExpression.XMLSCHEMA_MODE) != 0)
- sb.append((char)'X');
- if ((options & RegularExpression.IGNORE_CASE) != 0)
- sb.append((char)'i');
- if ((options & RegularExpression.MULTIPLE_LINES) != 0)
- sb.append((char)'m');
- if ((options & RegularExpression.SINGLE_LINE) != 0)
- sb.append((char)'s');
- if ((options & RegularExpression.USE_UNICODE_CATEGORY) != 0)
- sb.append((char)'u');
- if ((options & RegularExpression.UNICODE_WORD_BOUNDARY) != 0)
- sb.append((char)'w');
- if ((options & RegularExpression.EXTENDED_COMMENT) != 0)
- sb.append((char)'x');
- if ((options & RegularExpression.SPECIAL_COMMA) != 0)
- sb.append((char)',');
+ if ((options & RegularExpression.PROHIBIT_FIXED_STRING_OPTIMIZATION) != 0) {
+ sb.append('F');
+ }
+ if ((options & RegularExpression.PROHIBIT_HEAD_CHARACTER_OPTIMIZATION) != 0) {
+ sb.append('H');
+ }
+ if ((options & RegularExpression.XMLSCHEMA_MODE) != 0) {
+ sb.append('X');
+ }
+ if ((options & RegularExpression.IGNORE_CASE) != 0) {
+ sb.append('i');
+ }
+ if ((options & RegularExpression.MULTIPLE_LINES) != 0) {
+ sb.append('m');
+ }
+ if ((options & RegularExpression.SINGLE_LINE) != 0) {
+ sb.append('s');
+ }
+ if ((options & RegularExpression.USE_UNICODE_CATEGORY) != 0) {
+ sb.append('u');
+ }
+ if ((options & RegularExpression.UNICODE_WORD_BOUNDARY) != 0) {
+ sb.append('w');
+ }
+ if ((options & RegularExpression.EXTENDED_COMMENT) != 0) {
+ sb.append('x');
+ }
+ if ((options & RegularExpression.SPECIAL_COMMA) != 0) {
+ sb.append(',');
+ }
return sb.toString().intern();
}
if (ch == '#') { // Skips chracters between '#' and a line end.
while (offset < len) {
ch = regex.charAt(offset++);
- if (ch == '\r' || ch == '\n')
+ if (ch == '\r' || ch == '\n') {
break;
+ }
}
continue;
}
buffer.append((char)next);
offset ++;
} else { // Other escaped character.
- buffer.append((char)'\\');
+ buffer.append('\\');
buffer.append((char)next);
offset ++;
}
else if (next == '^' && offset + 1 < len) {
next = regex.charAt(offset + 1);
if (next == '[' || next ==']') {
- buffer.append((char)'^');
+ buffer.append('^');
buffer.append((char)next);
offset += 2;
}
System.out.println( "Error:Usage: java REUtil -i|-m|-s|-u|-w|-X regularExpression String" );
System.exit( 0 );
}
- for (int i = 0; i < argv.length; i ++) {
- if (argv[i].length() == 0 || argv[i].charAt(0) != '-') {
- if (pattern == null)
- pattern = argv[i];
- else if (target == null)
- target = argv[i];
- else
- System.err.println("Unnecessary: "+argv[i]);
- } else if (argv[i].equals("-i")) {
+ for (String element : argv) {
+ if (element.length() == 0 || element.charAt(0) != '-') {
+ if (pattern == null) {
+ pattern = element;
+ } else if (target == null) {
+ target = element;
+ } else {
+ System.err.println("Unnecessary: "+element);
+ }
+ } else if (element.equals("-i")) {
options += "i";
- } else if (argv[i].equals("-m")) {
+ } else if (element.equals("-m")) {
options += "m";
- } else if (argv[i].equals("-s")) {
+ } else if (element.equals("-s")) {
options += "s";
- } else if (argv[i].equals("-u")) {
+ } else if (element.equals("-u")) {
options += "u";
- } else if (argv[i].equals("-w")) {
+ } else if (element.equals("-w")) {
options += "w";
- } else if (argv[i].equals("-X")) {
+ } else if (element.equals("-X")) {
options += "X";
} else {
- System.err.println("Unknown option: "+argv[i]);
+ System.err.println("Unknown option: "+element);
}
}
RegularExpression reg = new RegularExpression(pattern, options);
Match match = new Match();
reg.matches(target, match);
for (int i = 0; i < match.getNumberOfGroups(); i ++) {
- if (i == 0 ) System.out.print("Matched range for the whole pattern: ");
- else System.out.print("["+i+"]: ");
- if (match.getBeginning(i) < 0)
+ if (i == 0 ) {
+ System.out.print("Matched range for the whole pattern: ");
+ } else {
+ System.out.print("["+i+"]: ");
+ }
+ if (match.getBeginning(i) < 0) {
System.out.println("-1");
- else {
+ } else {
System.out.print(match.getBeginning(i)+", "+match.getEnd(i)+", ");
System.out.println("\""+match.getCapturedText(i)+"\"");
}
int loc = pe.getLocation();
if (loc >= 0) {
System.err.print(indent);
- for (int i = 0; i < loc; i ++) System.err.print("-");
+ for (int i = 0; i < loc; i ++) {
+ System.err.print("-");
+ }
System.err.println("^");
}
}
if (".*+?{[()|\\^$".indexOf(ch) >= 0) {
if (buffer == null) {
buffer = new StringBuffer(i+(len-i)*2);
- if (i > 0) buffer.append(literal.substring(0, i));
+ if (i > 0) {
+ buffer.append(literal.substring(0, i));
+ }
}
- buffer.append((char)'\\');
+ buffer.append('\\');
buffer.append((char)ch);
- } else if (buffer != null)
+ } else if (buffer != null) {
buffer.append((char)ch);
+ }
}
return buffer != null ? buffer.toString() : literal;
}
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
- *
+ *
* http://www.apache.org/licenses/LICENSE-2.0
- *
+ *
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
/**
* This class represents a character class such as [a-z] or a period.
- *
+ *
* @xerces.internal
*
* @version $Id: RangeToken.java 965250 2010-07-18 16:04:58Z mrglavas $
final class RangeToken extends Token implements java.io.Serializable {
private static final long serialVersionUID = -553983121197679934L;
-
+
int[] ranges;
boolean sorted;
boolean compacted;
}
// for RANGE or NRANGE
+ @Override
protected void addRange(int start, int end) {
this.icaseCache = null;
//System.err.println("Token#addRange(): "+start+" "+end);
int[] temp = new int[pos+2];
System.arraycopy(this.ranges, 0, temp, 0, pos);
this.ranges = temp;
- if (this.ranges[pos-1] >= r1)
+ if (this.ranges[pos-1] >= r1) {
this.setSorted(false);
+ }
this.ranges[pos++] = r1;
this.ranges[pos] = r2;
- if (!this.sorted)
+ if (!this.sorted) {
this.sortRanges();
+ }
}
}
}
private final void setSorted(boolean sort) {
this.sorted = sort;
- if (!sort) this.compacted = false;
+ if (!sort) {
+ this.compacted = false;
+ }
}
private final boolean isCompacted() {
return this.compacted;
this.compacted = true;
}
+ @Override
protected void sortRanges() {
- if (this.isSorted())
+ if (this.isSorted()) {
return;
+ }
if (this.ranges == null)
+ {
return;
//System.err.println("Do sorting: "+this.ranges.length);
+ }
// Bubble sort
// Why? -- In many cases,
/**
* this.ranges is sorted.
*/
+ @Override
protected void compactRanges() {
boolean DEBUG = false;
- if (this.ranges == null || this.ranges.length <= 2)
+ if (this.ranges == null || this.ranges.length <= 2) {
return;
- if (this.isCompacted())
+ }
+ if (this.isCompacted()) {
return;
+ }
int base = 0; // Index of writing point
int target = 0; // Index of processing point
if (base != target) {
this.ranges[base] = this.ranges[target++];
this.ranges[base+1] = this.ranges[target++];
- } else
+ } else {
target += 2;
+ }
int baseend = this.ranges[base+1];
while (target < this.ranges.length) {
- if (baseend+1 < this.ranges[target])
+ if (baseend+1 < this.ranges[target]) {
break;
+ }
if (baseend+1 == this.ranges[target]) {
- if (DEBUG)
+ if (DEBUG) {
System.err.println("Token#compactRanges(): Compaction: ["+this.ranges[base]
+", "+this.ranges[base+1]
+"], ["+this.ranges[target]
+"] -> ["+this.ranges[base]
+", "+this.ranges[target+1]
+"]");
+ }
this.ranges[base+1] = this.ranges[target+1];
baseend = this.ranges[base+1];
target += 2;
} else if (baseend >= this.ranges[target+1]) {
- if (DEBUG)
+ if (DEBUG) {
System.err.println("Token#compactRanges(): Compaction: ["+this.ranges[base]
+", "+this.ranges[base+1]
+"], ["+this.ranges[target]
+"] -> ["+this.ranges[base]
+", "+this.ranges[base+1]
+"]");
+ }
target += 2;
} else if (baseend < this.ranges[target+1]) {
- if (DEBUG)
+ if (DEBUG) {
System.err.println("Token#compactRanges(): Compaction: ["+this.ranges[base]
+", "+this.ranges[base+1]
+"], ["+this.ranges[target]
+"] -> ["+this.ranges[base]
+", "+this.ranges[target+1]
+"]");
+ }
this.ranges[base+1] = this.ranges[target+1];
baseend = this.ranges[base+1];
target += 2;
this.setCompacted();
}
+ @Override
protected void mergeRanges(Token token) {
RangeToken tok = (RangeToken)token;
this.sortRanges();
tok.sortRanges();
- if (tok.ranges == null)
+ if (tok.ranges == null) {
return;
+ }
this.icaseCache = null;
this.setSorted(true);
if (this.ranges == null) {
this.ranges = result;
}
+ @Override
protected void subtractRanges(Token token) {
if (token.type == NRANGE) {
this.intersectRanges(token);
return;
}
RangeToken tok = (RangeToken)token;
- if (tok.ranges == null || this.ranges == null)
+ if (tok.ranges == null || this.ranges == null) {
return;
+ }
this.icaseCache = null;
this.sortRanges();
this.compactRanges();
/**
* @param tok Ignore whether it is NRANGE or not.
*/
+ @Override
protected void intersectRanges(Token token) {
RangeToken tok = (RangeToken)token;
- if (tok.ranges == null || this.ranges == null)
+ if (tok.ranges == null || this.ranges == null) {
return;
+ }
this.icaseCache = null;
this.sortRanges();
this.compactRanges();
* for NRANGE: Creates the same meaning RANGE.
*/
static Token complementRanges(Token token) {
- if (token.type != RANGE && token.type != NRANGE)
+ if (token.type != RANGE && token.type != NRANGE) {
throw new IllegalArgumentException("Token#complementRanges(): must be RANGE: "+token.type);
+ }
RangeToken tok = (RangeToken)token;
tok.sortRanges();
tok.compactRanges();
int len = tok.ranges.length+2;
- if (tok.ranges[0] == 0)
+ if (tok.ranges[0] == 0) {
len -= 2;
+ }
int last = tok.ranges[tok.ranges.length-1];
- if (last == UTF16_MAX)
+ if (last == UTF16_MAX) {
len -= 2;
+ }
RangeToken ret = Token.createRange();
ret.ranges = new int[len];
int wp = 0;
}
synchronized RangeToken getCaseInsensitiveToken() {
- if (this.icaseCache != null)
+ if (this.icaseCache != null) {
return this.icaseCache;
-
+ }
+
RangeToken uppers = this.type == Token.RANGE ? Token.createRange() : Token.createNRange();
for (int i = 0; i < this.ranges.length; i += 2) {
for (int ch = this.ranges[i]; ch <= this.ranges[i+1]; ch ++) {
- if (ch > 0xffff)
+ if (ch > 0xffff) {
uppers.addRange(ch, ch);
- else {
+ } else {
char uch = Character.toUpperCase((char)ch);
uppers.addRange(uch, uch);
}
RangeToken lowers = this.type == Token.RANGE ? Token.createRange() : Token.createNRange();
for (int i = 0; i < uppers.ranges.length; i += 2) {
for (int ch = uppers.ranges[i]; ch <= uppers.ranges[i+1]; ch ++) {
- if (ch > 0xffff)
+ if (ch > 0xffff) {
lowers.addRange(ch, ch);
- else {
+ } else {
char uch = Character.toLowerCase((char)ch);
lowers.addRange(uch, uch);
}
System.err.println("");
}
+ @Override
boolean match(int ch) {
- if (this.map == null) this.createMap();
+ if (this.map == null) {
+ this.createMap();
+ }
boolean ret;
if (this.type == RANGE) {
- if (ch < MAPSIZE)
+ if (ch < MAPSIZE) {
return (this.map[ch/32] & (1<<(ch&0x1f))) != 0;
+ }
ret = false;
for (int i = this.nonMapIndex; i < this.ranges.length; i += 2) {
- if (this.ranges[i] <= ch && ch <= this.ranges[i+1])
+ if (this.ranges[i] <= ch && ch <= this.ranges[i+1]) {
return true;
+ }
}
} else {
- if (ch < MAPSIZE)
+ if (ch < MAPSIZE) {
return (this.map[ch/32] & (1<<(ch&0x1f))) == 0;
+ }
ret = true;
for (int i = this.nonMapIndex; i < this.ranges.length; i += 2) {
- if (this.ranges[i] <= ch && ch <= this.ranges[i+1])
+ if (this.ranges[i] <= ch && ch <= this.ranges[i+1]) {
return false;
+ }
}
}
return ret;
for (int j = s; j <= e && j < MAPSIZE; j++) {
map[j/32] |= 1<<(j&0x1f); // s&0x1f : 0-31
}
- }
+ }
else {
nonMapIndex = i;
break;
//for (int i = 0; i < asize; i ++) System.err.println("Map: "+Integer.toString(this.map[i], 16));
}
+ @Override
public String toString(int options) {
String ret;
if (this.type == RANGE) {
- if (this == Token.token_dot)
+ if (this == Token.token_dot) {
ret = ".";
- else if (this == Token.token_0to9)
+ } else if (this == Token.token_0to9) {
ret = "\\d";
- else if (this == Token.token_wordchars)
+ } else if (this == Token.token_wordchars) {
ret = "\\w";
- else if (this == Token.token_spaces)
+ } else if (this == Token.token_spaces) {
ret = "\\s";
- else {
+ } else {
StringBuffer sb = new StringBuffer();
sb.append('[');
for (int i = 0; i < this.ranges.length; i += 2) {
- if ((options & RegularExpression.SPECIAL_COMMA) != 0 && i > 0) sb.append(',');
+ if ((options & RegularExpression.SPECIAL_COMMA) != 0 && i > 0) {
+ sb.append(',');
+ }
if (this.ranges[i] == this.ranges[i+1]) {
sb.append(escapeCharInCharClass(this.ranges[i]));
} else {
sb.append(escapeCharInCharClass(this.ranges[i]));
- sb.append((char)'-');
+ sb.append('-');
sb.append(escapeCharInCharClass(this.ranges[i+1]));
}
}
ret = sb.toString();
}
} else {
- if (this == Token.token_not_0to9)
+ if (this == Token.token_not_0to9) {
ret = "\\D";
- else if (this == Token.token_not_wordchars)
+ } else if (this == Token.token_not_wordchars) {
ret = "\\W";
- else if (this == Token.token_not_spaces)
+ } else if (this == Token.token_not_spaces) {
ret = "\\S";
- else {
+ } else {
StringBuffer sb = new StringBuffer();
sb.append("[^");
for (int i = 0; i < this.ranges.length; i += 2) {
- if ((options & RegularExpression.SPECIAL_COMMA) != 0 && i > 0) sb.append(',');
+ if ((options & RegularExpression.SPECIAL_COMMA) != 0 && i > 0) {
+ sb.append(',');
+ }
if (this.ranges[i] == this.ranges[i+1]) {
sb.append(escapeCharInCharClass(this.ranges[i]));
} else {
} else if (ch >= 0x10000) {
String pre = "0"+Integer.toHexString(ch);
ret = "\\v"+pre.substring(pre.length()-6, pre.length());
- } else
+ } else {
ret = ""+(char)ch;
+ }
}
return ret;
}
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
- *
+ *
* http://www.apache.org/licenses/LICENSE-2.0
- *
+ *
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
/**
* A Regular Expression Parser.
- *
+ *
* @xerces.internal
*
* @version $Id: RegexParser.java 1129306 2011-05-30 19:18:04Z sandygao $
int parenOpened = 1;
int parennumber = 1;
boolean hasBackReferences;
- Vector references = null;
+ Vector<ReferencePosition> references = null;
public RegexParser() {
this.setLocale(Locale.getDefault());
else {
this.resources = ResourceBundle.getBundle("org.apache.xerces.impl.xpath.regex.message");
}
- }
+ }
catch (MissingResourceException mre) {
throw new RuntimeException("Installation Problem??? Couldn't load messages: "
+ mre.getMessage());
this.parenOpened = 1;
this.hasBackReferences = false;
this.regex = regex;
- if (this.isSet(RegularExpression.EXTENDED_COMMENT))
+ if (this.isSet(RegularExpression.EXTENDED_COMMENT)) {
this.regex = REUtil.stripExtendedComment(this.regex);
+ }
this.regexlen = this.regex.length();
this.next();
Token ret = this.parseRegex();
- if (this.offset != this.regexlen)
+ if (this.offset != this.regexlen) {
throw ex("parser.parse.1", this.offset);
+ }
if (this.read() != T_EOF) {
throw ex("parser.parse.1", this.offset-1);
}
if (this.references != null) {
for (int i = 0; i < this.references.size(); i ++) {
- ReferencePosition position = (ReferencePosition)this.references.elementAt(i);
- if (this.parennumber <= position.refNumber)
+ ReferencePosition position = this.references.elementAt(i);
+ if (this.parennumber <= position.refNumber) {
throw ex("parser.parse.2", position.position);
+ }
}
this.references.removeAllElements();
}
switch (ch) {
case '\\':
ret = T_BACKSOLIDUS;
- if (this.offset >= this.regexlen)
+ if (this.offset >= this.regexlen) {
throw ex("parser.next.1", this.offset-1);
+ }
this.chardata = this.regex.charAt(this.offset++);
break;
if (this.offset < this.regexlen && this.regex.charAt(this.offset) == '[') {
this.offset++;
ret = T_XMLSCHEMA_CC_SUBTRACTION;
- } else
+ } else {
ret = T_CHAR;
+ }
break;
case '[':
ret = T_CARET;
}
break;
- case '$':
+ case '$':
if (this.isSet(RegularExpression.XMLSCHEMA_MODE)) {
ret = T_CHAR;
}
break;
case '(':
ret = T_LPAREN;
- if (this.offset >= this.regexlen)
+ if (this.offset >= this.regexlen) {
break;
- if (this.regex.charAt(this.offset) != '?')
+ }
+ if (this.regex.charAt(this.offset) != '?') {
break;
- if (++this.offset >= this.regexlen)
+ }
+ if (++this.offset >= this.regexlen) {
throw ex("parser.next.2", this.offset-1);
+ }
ch = this.regex.charAt(this.offset++);
switch (ch) {
case ':': ret = T_LPAREN2; break;
case '[': ret = T_SET_OPERATIONS; break;
case '>': ret = T_INDEPENDENT; break;
case '<':
- if (this.offset >= this.regexlen)
+ if (this.offset >= this.regexlen) {
throw ex("parser.next.2", this.offset-3);
+ }
ch = this.regex.charAt(this.offset++);
if (ch == '=') {
ret = T_LOOKBEHIND;
} else if (ch == '!') {
ret = T_NEGATIVELOOKBEHIND;
- } else
+ } else {
throw ex("parser.next.3", this.offset-3);
+ }
break;
case '#':
while (this.offset < this.regexlen) {
ch = this.regex.charAt(this.offset++);
- if (ch == ')') break;
+ if (ch == ')') {
+ break;
+ }
}
- if (ch != ')')
+ if (ch != ')') {
throw ex("parser.next.4", this.offset-1);
+ }
ret = T_COMMENT;
break;
default:
throw ex("parser.next.2", this.offset-2);
}
break;
-
+
case '\\':
ret = T_BACKSOLIDUS;
- if (this.offset >= this.regexlen)
+ if (this.offset >= this.regexlen) {
throw ex("parser.next.1", this.offset-1);
+ }
this.chardata = this.regex.charAt(this.offset++);
break;
* | atom (('*' | '+' | '?' | minmax ) '?'? )?)
* | '(?=' regex ')' | '(?!' regex ')' | '(?<=' regex ')' | '(?<!' regex ')'
* atom ::= char | '.' | range | '(' regex ')' | '(?:' regex ')' | '\' [0-9]
- * | '\w' | '\W' | '\d' | '\D' | '\s' | '\S' | category-block
+ * | '\w' | '\W' | '\d' | '\D' | '\s' | '\S' | category-block
*/
Token parseRegex() throws ParseException {
Token tok = this.parseTerm();
Token processLookahead() throws ParseException {
this.next();
Token tok = Token.createLook(Token.LOOKAHEAD, this.parseRegex());
- if (this.read() != T_RPAREN) throw ex("parser.factor.1", this.offset-1);
+ if (this.read() != T_RPAREN) {
+ throw ex("parser.factor.1", this.offset-1);
+ }
this.next(); // ')'
return tok;
}
Token processNegativelookahead() throws ParseException {
this.next();
Token tok = Token.createLook(Token.NEGATIVELOOKAHEAD, this.parseRegex());
- if (this.read() != T_RPAREN) throw ex("parser.factor.1", this.offset-1);
+ if (this.read() != T_RPAREN) {
+ throw ex("parser.factor.1", this.offset-1);
+ }
this.next(); // ')'
return tok;
}
Token processLookbehind() throws ParseException {
this.next();
Token tok = Token.createLook(Token.LOOKBEHIND, this.parseRegex());
- if (this.read() != T_RPAREN) throw ex("parser.factor.1", this.offset-1);
+ if (this.read() != T_RPAREN) {
+ throw ex("parser.factor.1", this.offset-1);
+ }
this.next(); // ')'
return tok;
}
Token processNegativelookbehind() throws ParseException {
this.next();
Token tok = Token.createLook(Token.NEGATIVELOOKBEHIND, this.parseRegex());
- if (this.read() != T_RPAREN) throw ex("parser.factor.1", this.offset-1);
+ if (this.read() != T_RPAREN) {
+ throw ex("parser.factor.1", this.offset-1);
+ }
this.next(); // ')'
return tok;
}
if (this.read() == T_QUESTION) {
this.next();
return Token.createNGClosure(tok);
- } else
+ } else {
return Token.createClosure(tok);
+ }
}
Token processPlus(Token tok) throws ParseException {
// X+ -> XX*
if (this.read() == T_QUESTION) {
this.next();
return Token.createConcat(tok, Token.createNGClosure(tok));
- } else
+ } else {
return Token.createConcat(tok, Token.createClosure(tok));
+ }
}
Token processQuestion(Token tok) throws ParseException {
// X? -> X|
this.next();
int p = this.parenOpened++;
Token tok = Token.createParen(this.parseRegex(), p);
- if (this.read() != T_RPAREN) throw ex("parser.factor.1", this.offset-1);
+ if (this.read() != T_RPAREN) {
+ throw ex("parser.factor.1", this.offset-1);
+ }
this.parennumber++;
this.next(); // Skips ')'
return tok;
Token processParen2() throws ParseException {
this.next();
Token tok = Token.createParen(this.parseRegex(), 0);
- if (this.read() != T_RPAREN) throw ex("parser.factor.1", this.offset-1);
+ if (this.read() != T_RPAREN) {
+ throw ex("parser.factor.1", this.offset-1);
+ }
this.next(); // Skips ')'
return tok;
}
Token processCondition() throws ParseException {
// this.offset points the next of '('
- if (this.offset+1 >= this.regexlen) throw ex("parser.factor.4", this.offset);
+ if (this.offset+1 >= this.regexlen) {
+ throw ex("parser.factor.4", this.offset);
+ }
// Parses a condition.
int refno = -1;
Token condition = null;
if ('1' <= ch && ch <= '9') {
refno = ch-'0';
int finalRefno = refno;
-
- if (this.parennumber <= refno)
+
+ if (this.parennumber <= refno) {
throw ex("parser.parse.2", this.offset);
+ }
while (this.offset + 1 < this.regexlen) {
ch = this.regex.charAt(this.offset + 1);
}
this.hasBackReferences = true;
- if (this.references == null) this.references = new Vector();
+ if (this.references == null) {
+ this.references = new Vector<>();
+ }
this.references.addElement(new ReferencePosition(finalRefno, this.offset));
this.offset ++;
- if (this.regex.charAt(this.offset) != ')') throw ex("parser.factor.1", this.offset);
+ if (this.regex.charAt(this.offset) != ')') {
+ throw ex("parser.factor.1", this.offset);
+ }
this.offset ++;
} else {
- if (ch == '?') this.offset --; // Points '('.
+ if (ch == '?')
+ {
+ this.offset --; // Points '('.
+ }
this.next();
condition = this.parseFactor();
switch (condition.type) {
case Token.NEGATIVELOOKBEHIND:
break;
case Token.ANCHOR:
- if (this.read() != T_RPAREN) throw ex("parser.factor.1", this.offset-1);
+ if (this.read() != T_RPAREN) {
+ throw ex("parser.factor.1", this.offset-1);
+ }
break;
default:
throw ex("parser.factor.5", this.offset);
Token yesPattern = this.parseRegex();
Token noPattern = null;
if (yesPattern.type == Token.UNION) {
- if (yesPattern.size() != 2) throw ex("parser.factor.6", this.offset);
+ if (yesPattern.size() != 2) {
+ throw ex("parser.factor.6", this.offset);
+ }
noPattern = yesPattern.getChild(1);
yesPattern = yesPattern.getChild(0);
}
- if (this.read() != T_RPAREN) throw ex("parser.factor.1", this.offset-1);
+ if (this.read() != T_RPAREN) {
+ throw ex("parser.factor.1", this.offset-1);
+ }
this.next();
return Token.createCondition(refno, condition, yesPattern, noPattern);
}
while (this.offset < this.regexlen) {
ch = this.regex.charAt(this.offset);
int v = REUtil.getOptionValue(ch);
- if (v == 0) break; // '-' or ':'?
+ if (v == 0)
+ {
+ break; // '-' or ':'?
+ }
add |= v;
this.offset ++;
}
- if (this.offset >= this.regexlen) throw ex("parser.factor.2", this.offset-1);
+ if (this.offset >= this.regexlen) {
+ throw ex("parser.factor.2", this.offset-1);
+ }
if (ch == '-') {
this.offset ++;
while (this.offset < this.regexlen) {
ch = this.regex.charAt(this.offset);
int v = REUtil.getOptionValue(ch);
- if (v == 0) break; // ':'?
+ if (v == 0)
+ {
+ break; // ':'?
+ }
mask |= v;
this.offset ++;
}
- if (this.offset >= this.regexlen) throw ex("parser.factor.2", this.offset-1);
+ if (this.offset >= this.regexlen) {
+ throw ex("parser.factor.2", this.offset-1);
+ }
}
Token tok;
if (ch == ':') {
this.offset ++;
this.next();
tok = Token.createModifierGroup(this.parseRegex(), add, mask);
- if (this.read() != T_RPAREN) throw ex("parser.factor.1", this.offset-1);
+ if (this.read() != T_RPAREN) {
+ throw ex("parser.factor.1", this.offset-1);
+ }
this.next();
} else if (ch == ')') { // such as (?-i)
this.offset ++;
this.next();
tok = Token.createModifierGroup(this.parseRegex(), add, mask);
- } else
+ } else {
throw ex("parser.factor.3", this.offset);
+ }
return tok;
}
Token processIndependent() throws ParseException {
this.next();
Token tok = Token.createLook(Token.INDEPENDENT, this.parseRegex());
- if (this.read() != T_RPAREN) throw ex("parser.factor.1", this.offset-1);
+ if (this.read() != T_RPAREN) {
+ throw ex("parser.factor.1", this.offset-1);
+ }
this.next(); // Skips ')'
return tok;
}
Token processBacksolidus_c() throws ParseException {
int ch2; // Must be in 0x0040-0x005f
if (this.offset >= this.regexlen
- || ((ch2 = this.regex.charAt(this.offset++)) & 0xffe0) != 0x0040)
+ || ((ch2 = this.regex.charAt(this.offset++)) & 0xffe0) != 0x0040) {
throw ex("parser.atom.1", this.offset-1);
+ }
this.next();
return Token.createChar(ch2-0x40);
}
int refnum = this.chardata-'0';
int finalRefnum = refnum;
- if (this.parennumber <= refnum)
+ if (this.parennumber <= refnum) {
throw ex("parser.parse.2", this.offset-2);
+ }
while (this.offset < this.regexlen) {
final int ch = this.regex.charAt(this.offset);
Token tok = Token.createBackReference(finalRefnum);
this.hasBackReferences = true;
- if (this.references == null) this.references = new Vector();
+ if (this.references == null) {
+ this.references = new Vector<>();
+ }
this.references.addElement(new ReferencePosition(finalRefnum, this.offset-2));
this.next();
return tok;
* min ::= [0-9]+
* max ::= [0-9]+
*/
- Token parseFactor() throws ParseException {
+ Token parseFactor() throws ParseException {
int ch = this.read();
Token tok;
switch (ch) {
while (off < this.regexlen
&& (ch = this.regex.charAt(off++)) >= '0' && ch <= '9') {
min = min*10 +ch-'0';
- if (min < 0)
+ if (min < 0) {
throw ex("parser.quantifier.5", this.offset);
+ }
}
}
else {
if (off >= this.regexlen) {
throw ex("parser.quantifier.3", this.offset);
}
- else if ((ch = this.regex.charAt(off++)) >= '0' && ch <= '9') {
+ else if ((ch = this.regex.charAt(off++)) >= '0' && ch <= '9') {
max = ch -'0'; // {min,max}
while (off < this.regexlen
&& (ch = this.regex.charAt(off++)) >= '0'
&& ch <= '9') {
max = max*10 +ch-'0';
- if (max < 0)
+ if (max < 0) {
throw ex("parser.quantifier.5", this.offset);
+ }
}
- if (min > max)
+ if (min > max) {
throw ex("parser.quantifier.4", this.offset);
+ }
}
else { // assume {min,}
- max = -1;
+ max = -1;
}
}
- if (ch != '}')
- throw ex("parser.quantifier.2", this.offset);
+ if (ch != '}') {
+ throw ex("parser.quantifier.2", this.offset);
+ }
if (this.checkQuestion(off)) { // off -> next of '}'
tok = Token.createNGClosure(tok);
case 'p':
int pstart = this.offset;
tok = processBacksolidus_pP(this.chardata);
- if (tok == null) throw this.ex("parser.atom.5", pstart);
+ if (tok == null) {
+ throw this.ex("parser.atom.5", pstart);
+ }
break;
default:
break;
case T_CHAR:
- if (this.chardata == ']' || this.chardata == '{' || this.chardata == '}')
+ if (this.chardata == ']' || this.chardata == '{' || this.chardata == '}') {
throw this.ex("parser.atom.4", this.offset-1);
+ }
tok = Token.createChar(this.chardata);
int high = this.chardata;
this.next();
protected RangeToken processBacksolidus_pP(int c) throws ParseException {
this.next();
- if (this.read() != T_CHAR || this.chardata != '{')
+ if (this.read() != T_CHAR || this.chardata != '{') {
throw this.ex("parser.atom.2", this.offset-1);
+ }
// handle category escape
boolean positive = c == 'p';
int namestart = this.offset;
int nameend = this.regex.indexOf('}', namestart);
- if (nameend < 0)
+ if (nameend < 0) {
throw this.ex("parser.atom.3", this.offset);
+ }
String pname = this.regex.substring(namestart, nameend);
this.offset = nameend+1;
int type;
boolean firstloop = true;
while ((type = this.read()) != T_EOF) {
- if (type == T_CHAR && this.chardata == ']' && !firstloop)
+ if (type == T_CHAR && this.chardata == ']' && !firstloop) {
break;
+ }
int c = this.chardata;
boolean end = false;
if (type == T_BACKSOLIDUS) {
case 'i': case 'I':
case 'c': case 'C':
c = this.processCIinCharacterClass(tok, c);
- if (c < 0) end = true;
+ if (c < 0) {
+ end = true;
+ }
break;
-
+
case 'p':
case 'P':
int pstart = this.offset;
RangeToken tok2 = this.processBacksolidus_pP(c);
- if (tok2 == null) throw this.ex("parser.atom.5", pstart);
+ if (tok2 == null) {
+ throw this.ex("parser.atom.5", pstart);
+ }
tok.mergeRanges(tok2);
end = true;
break;
// POSIX Character class such as [:alnum:]
else if (type == T_POSIX_CHARCLASS_START) {
int nameend = this.regex.indexOf(':', this.offset);
- if (nameend < 0) throw this.ex("parser.cc.1", this.offset);
+ if (nameend < 0) {
+ throw this.ex("parser.cc.1", this.offset);
+ }
boolean positive = true;
if (this.regex.charAt(this.offset) == '^') {
this.offset ++;
String name = this.regex.substring(this.offset, nameend);
RangeToken range = Token.getRange(name, positive,
this.isSet(RegularExpression.XMLSCHEMA_MODE));
- if (range == null) throw this.ex("parser.cc.3", this.offset);
+ if (range == null) {
+ throw this.ex("parser.cc.3", this.offset);
+ }
tok.mergeRanges(range);
end = true;
- if (nameend+1 >= this.regexlen || this.regex.charAt(nameend+1) != ']')
+ if (nameend+1 >= this.regexlen || this.regex.charAt(nameend+1) != ']') {
throw this.ex("parser.cc.1", nameend);
+ }
this.offset = nameend+2;
}
else if (type == T_XMLSCHEMA_CC_SUBTRACTION && !firstloop) {
}
else {
this.next(); // Skips '-'
- if ((type = this.read()) == T_EOF) throw this.ex("parser.cc.2", this.offset);
+ if ((type = this.read()) == T_EOF) {
+ throw this.ex("parser.cc.2", this.offset);
+ }
if (type == T_CHAR && this.chardata == ']') {
if (!this.isSet(RegularExpression.IGNORE_CASE) || c > 0xffff) {
tok.addRange(c, c);
if (this.read() == T_EOF) {
throw this.ex("parser.cc.2", this.offset);
}
-
+
if (!useNrange && nrange) {
base.subtractRanges(tok);
tok = base;
if (type == T_CHAR && (ch == '-' || ch == '&')
|| type == T_PLUS) {
this.next();
- if (this.read() != T_LBRACKET) throw ex("parser.ope.1", this.offset-1);
+ if (this.read() != T_LBRACKET) {
+ throw ex("parser.ope.1", this.offset-1);
+ }
RangeToken t2 = this.parseCharacterClass(false);
- if (type == T_PLUS)
+ if (type == T_PLUS) {
tok.mergeRanges(t2);
- else if (ch == '-')
+ } else if (ch == '-') {
tok.subtractRanges(t2);
- else if (ch == '&')
+ } else if (ch == '&') {
tok.intersectRanges(t2);
- else
+ } else {
throw new RuntimeException("ASSERT");
+ }
} else {
throw ex("parser.ope.2", this.offset-1);
}
/**
*/
int decodeEscaped() throws ParseException {
- if (this.read() != T_BACKSOLIDUS) throw ex("parser.next.1", this.offset-1);
+ if (this.read() != T_BACKSOLIDUS) {
+ throw ex("parser.next.1", this.offset-1);
+ }
int c = this.chardata;
switch (c) {
case 'e': c = 0x1b; break; // ESCAPE U+001B
//case 'v': c = 0x0b; break; // VERTICAL TABULATION U+000B
case 'x':
this.next();
- if (this.read() != T_CHAR) throw ex("parser.descape.1", this.offset-1);
+ if (this.read() != T_CHAR) {
+ throw ex("parser.descape.1", this.offset-1);
+ }
if (this.chardata == '{') {
int v1 = 0;
int uv = 0;
do {
this.next();
- if (this.read() != T_CHAR) throw ex("parser.descape.1", this.offset-1);
- if ((v1 = hexChar(this.chardata)) < 0)
+ if (this.read() != T_CHAR) {
+ throw ex("parser.descape.1", this.offset-1);
+ }
+ if ((v1 = hexChar(this.chardata)) < 0) {
break;
- if (uv > uv*16) throw ex("parser.descape.2", this.offset-1);
+ }
+ if (uv > uv*16) {
+ throw ex("parser.descape.2", this.offset-1);
+ }
uv = uv*16+v1;
} while (true);
- if (this.chardata != '}') throw ex("parser.descape.3", this.offset-1);
- if (uv > Token.UTF16_MAX) throw ex("parser.descape.4", this.offset-1);
+ if (this.chardata != '}') {
+ throw ex("parser.descape.3", this.offset-1);
+ }
+ if (uv > Token.UTF16_MAX) {
+ throw ex("parser.descape.4", this.offset-1);
+ }
c = uv;
} else {
int v1 = 0;
- if (this.read() != T_CHAR || (v1 = hexChar(this.chardata)) < 0)
+ if (this.read() != T_CHAR || (v1 = hexChar(this.chardata)) < 0) {
throw ex("parser.descape.1", this.offset-1);
+ }
int uv = v1;
this.next();
- if (this.read() != T_CHAR || (v1 = hexChar(this.chardata)) < 0)
+ if (this.read() != T_CHAR || (v1 = hexChar(this.chardata)) < 0) {
throw ex("parser.descape.1", this.offset-1);
+ }
uv = uv*16+v1;
c = uv;
}
case 'u':
int v1 = 0;
this.next();
- if (this.read() != T_CHAR || (v1 = hexChar(this.chardata)) < 0)
+ if (this.read() != T_CHAR || (v1 = hexChar(this.chardata)) < 0) {
throw ex("parser.descape.1", this.offset-1);
+ }
int uv = v1;
this.next();
- if (this.read() != T_CHAR || (v1 = hexChar(this.chardata)) < 0)
+ if (this.read() != T_CHAR || (v1 = hexChar(this.chardata)) < 0) {
throw ex("parser.descape.1", this.offset-1);
+ }
uv = uv*16+v1;
this.next();
- if (this.read() != T_CHAR || (v1 = hexChar(this.chardata)) < 0)
+ if (this.read() != T_CHAR || (v1 = hexChar(this.chardata)) < 0) {
throw ex("parser.descape.1", this.offset-1);
+ }
uv = uv*16+v1;
this.next();
- if (this.read() != T_CHAR || (v1 = hexChar(this.chardata)) < 0)
+ if (this.read() != T_CHAR || (v1 = hexChar(this.chardata)) < 0) {
throw ex("parser.descape.1", this.offset-1);
+ }
uv = uv*16+v1;
c = uv;
break;
case 'v':
this.next();
- if (this.read() != T_CHAR || (v1 = hexChar(this.chardata)) < 0)
+ if (this.read() != T_CHAR || (v1 = hexChar(this.chardata)) < 0) {
throw ex("parser.descape.1", this.offset-1);
+ }
uv = v1;
this.next();
- if (this.read() != T_CHAR || (v1 = hexChar(this.chardata)) < 0)
+ if (this.read() != T_CHAR || (v1 = hexChar(this.chardata)) < 0) {
throw ex("parser.descape.1", this.offset-1);
+ }
uv = uv*16+v1;
this.next();
- if (this.read() != T_CHAR || (v1 = hexChar(this.chardata)) < 0)
+ if (this.read() != T_CHAR || (v1 = hexChar(this.chardata)) < 0) {
throw ex("parser.descape.1", this.offset-1);
+ }
uv = uv*16+v1;
this.next();
- if (this.read() != T_CHAR || (v1 = hexChar(this.chardata)) < 0)
+ if (this.read() != T_CHAR || (v1 = hexChar(this.chardata)) < 0) {
throw ex("parser.descape.1", this.offset-1);
+ }
uv = uv*16+v1;
this.next();
- if (this.read() != T_CHAR || (v1 = hexChar(this.chardata)) < 0)
+ if (this.read() != T_CHAR || (v1 = hexChar(this.chardata)) < 0) {
throw ex("parser.descape.1", this.offset-1);
+ }
uv = uv*16+v1;
this.next();
- if (this.read() != T_CHAR || (v1 = hexChar(this.chardata)) < 0)
+ if (this.read() != T_CHAR || (v1 = hexChar(this.chardata)) < 0) {
throw ex("parser.descape.1", this.offset-1);
+ }
uv = uv*16+v1;
- if (uv > Token.UTF16_MAX) throw ex("parser.descappe.4", this.offset-1);
+ if (uv > Token.UTF16_MAX) {
+ throw ex("parser.descappe.4", this.offset-1);
+ }
c = uv;
break;
case 'A':
}
static private final int hexChar(int ch) {
- if (ch < '0') return -1;
- if (ch > 'f') return -1;
- if (ch <= '9') return ch-'0';
- if (ch < 'A') return -1;
- if (ch <= 'F') return ch-'A'+10;
- if (ch < 'a') return -1;
+ if (ch < '0') {
+ return -1;
+ }
+ if (ch > 'f') {
+ return -1;
+ }
+ if (ch <= '9') {
+ return ch-'0';
+ }
+ if (ch < 'A') {
+ return -1;
+ }
+ if (ch <= 'F') {
+ return ch-'A'+10;
+ }
+ if (ch < 'a') {
+ return -1;
+ }
return ch-'a'+10;
}
-
+
static protected final void addCaseInsensitiveChar(RangeToken tok, int c) {
final int[] caseMap = CaseInsensitiveMap.get(c);
tok.addRange(c, c);
-
+
if (caseMap != null) {
for (int i=0; i<caseMap.length; i+=2) {
tok.addRange(caseMap[i], caseMap[i]);
}
}
-
+
static protected final void addCaseInsensitiveCharRange(RangeToken tok, int start, int end) {
int[] caseMap;
int r1, r2;
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
- *
+ *
* http://www.apache.org/licenses/LICENSE-2.0
- *
+ *
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* but entire string matching.
*
* </dl>
- *
+ *
* <hr width="50%">
* <h3>Syntax</h3>
* <table border="1" bgcolor="#ddeeff">
* <dt class="REGEX"><kbd>[</kbd><var>R<sub>1</sub>R<sub>2</sub>...</var><kbd>-[</kbd><var>R<sub>n</sub>R<sub>n+1</sub>...</var><kbd>]]</kbd> (with an <a href="#X_OPTION">"X" option</a>)</dt>
* <dd>Character class subtraction for the XML Schema.
* You can use this syntax when you specify an <a href="#X_OPTION">"X" option</a>.
- *
+ *
* <dt class="REGEX"><kbd>\d</kbd>
* <dd class="REGEX">Equivalent to <kbd>[0-9]</kbd>.
* <dd>When <a href="#U_OPTION">a "u" option</a> is set, it is equivalent to
* after <code><a href="#matches(java.lang.String, org.apache.xerces.utils.regex.Match)">matches(String,Match)</a></code>.
* The 0th group means whole of this regular expression.
* The <VAR>N</VAR>th gorup is the inside of the <VAR>N</VAR>th left parenthesis.
- *
+ *
* <p>For instance, a regular expression is
* "<FONT color=blue><KBD> *([^<:]*) +<([^>]*)> *</KBD></FONT>"
* and target text is
* </ul>
*
* <hr width="50%">
- *
+ *
* @xerces.internal
*
* @author TAMURA Kent <kent@trl.ibm.co.jp>
* @version $Id: RegularExpression.java 961928 2010-07-08 20:43:46Z knoaman $
*/
public class RegularExpression implements java.io.Serializable {
-
+
private static final long serialVersionUID = 6242499334195006401L;
static final boolean DEBUG = false;
* Compiles a token tree into an operation flow.
*/
private synchronized void compile(Token tok) {
- if (this.operations != null)
+ if (this.operations != null) {
return;
+ }
this.numberOfClosures = 0;
this.operations = this.compile(tok, null, false);
}
}
break;
}
- if (min > 0 && max > 0)
+ if (min > 0 && max > 0) {
max -= min;
+ }
if (max > 0) {
// X{2,6} -> XX(X(X(XX?)?)?)?
ret = next;
public boolean matches(char[] target, int start, int end, Match match) {
synchronized (this) {
- if (this.operations == null)
+ if (this.operations == null) {
this.prepare();
- if (this.context == null)
+ }
+ if (this.context == null) {
this.context = new Context();
+ }
}
Context con = null;
synchronized (this.context) {
} else {
if (previousIsEOL) {
if (0 <= (matchEnd = this. match(con, this.operations,
- matchStart, 1, this.options)))
+ matchStart, 1, this.options))) {
break;
+ }
}
previousIsEOL = false;
}
*/
else {
for (matchStart = con.start; matchStart <= limit; matchStart ++) {
- if (0 <= (matchEnd = this. match(con, this.operations, matchStart, 1, this.options)))
+ if (0 <= (matchEnd = this. match(con, this.operations, matchStart, 1, this.options))) {
break;
+ }
}
}
public boolean matches(String target, int start, int end, Match match) {
synchronized (this) {
- if (this.operations == null)
+ if (this.operations == null) {
this.prepare();
- if (this.context == null)
+ }
+ if (this.context == null) {
this.context = new Context();
+ }
}
Context con = null;
synchronized (this.context) {
} else {
if (previousIsEOL) {
if (0 <= (matchEnd = this.match(con, this.operations,
- matchStart, 1, this.options)))
+ matchStart, 1, this.options))) {
break;
+ }
}
previousIsEOL = false;
}
if (0 <= (matchEnd = this.match(con, this.operations,
matchStart, 1, this.options))) {
break;
- }
+ }
}
}
*/
else {
for (matchStart = con.start; matchStart <= limit; matchStart ++) {
- if (0 <= (matchEnd = this.match(con, this.operations, matchStart, 1, this.options)))
+ if (0 <= (matchEnd = this.match(con, this.operations, matchStart, 1, this.options))) {
break;
+ }
}
}
*/
private int match(Context con, Op op, int offset, int dx, int opts) {
final ExpressionTarget target = con.target;
- final Stack opStack = new Stack();
+ final Stack<Op> opStack = new Stack<>();
final Stack<Integer> dataStack = new Stack<>();
final boolean isSetIgnoreCase = isSet(opts, IGNORE_CASE);
int retValue = -1;
retValue = isSet(opts, XMLSCHEMA_MODE) && offset != con.limit ? -1 : offset;
}
else {
- retValue = -1;
+ retValue = -1;
}
returned = true;
}
returned = true;
break;
}
-
+
con.closureContexts[id].addOffset(offset);
}
// fall through
return retValue;
}
- op = (Op) opStack.pop();
+ op = opStack.pop();
offset = dataStack.pop();
switch (op.type) {
}
}
break;
-
+
case Op.LOOKAHEAD:
case Op.LOOKBEHIND:
{
}
}
- private boolean matchChar(int ch, int other, boolean ignoreCase) {
+ private static boolean matchChar(int ch, int other, boolean ignoreCase) {
return (ignoreCase) ? matchIgnoreCase(ch, other) : ch == other;
}
case '^':
if (isSet(opts, MULTIPLE_LINES)) {
if (!(offset == con.start
- || offset > con.start && offset < con.limit && isEOLChar(target.charAt(offset-1))))
+ || offset > con.start && offset < con.limit && isEOLChar(target.charAt(offset-1)))) {
return false;
+ }
} else {
- if (offset != con.start)
+ if (offset != con.start) {
return false;
+ }
}
break;
case '@': // Internal use only.
// The @ always matches line beginnings.
if (!(offset == con.start
- || offset > con.start && isEOLChar(target.charAt(offset-1))))
+ || offset > con.start && isEOLChar(target.charAt(offset-1)))) {
return false;
+ }
break;
case '$':
if (isSet(opts, MULTIPLE_LINES)) {
if (!(offset == con.limit
- || offset < con.limit && isEOLChar(target.charAt(offset))))
+ || offset < con.limit && isEOLChar(target.charAt(offset)))) {
return false;
+ }
} else {
if (!(offset == con.limit
|| offset+1 == con.limit && isEOLChar(target.charAt(offset))
|| offset+2 == con.limit && target.charAt(offset) == CARRIAGE_RETURN
- && target.charAt(offset+1) == LINE_FEED))
+ && target.charAt(offset+1) == LINE_FEED)) {
return false;
+ }
}
break;
case 'A':
- if (offset != con.start) return false;
+ if (offset != con.start) {
+ return false;
+ }
break;
case 'Z':
if (!(offset == con.limit
|| offset+1 == con.limit && isEOLChar(target.charAt(offset))
|| offset+2 == con.limit && target.charAt(offset) == CARRIAGE_RETURN
- && target.charAt(offset+1) == LINE_FEED))
+ && target.charAt(offset+1) == LINE_FEED)) {
return false;
+ }
break;
case 'z':
- if (offset != con.limit) return false;
+ if (offset != con.limit) {
+ return false;
+ }
break;
case 'b':
- if (con.length == 0)
+ if (con.length == 0) {
return false;
+ }
{
int after = getWordType(target, con.start, con.limit, offset, opts);
- if (after == WT_IGNORE) return false;
+ if (after == WT_IGNORE) {
+ return false;
+ }
int before = getPreviousWordType(target, con.start, con.limit, offset, opts);
- if (after == before) return false;
+ if (after == before) {
+ return false;
+ }
}
break;
case 'B':
- if (con.length == 0)
+ if (con.length == 0) {
go = true;
- else {
+ } else {
int after = getWordType(target, con.start, con.limit, offset, opts);
go = after == WT_IGNORE
|| after == getPreviousWordType(target, con.start, con.limit, offset, opts);
}
- if (!go) return false;
+ if (!go) {
+ return false;
+ }
break;
case '<':
- if (con.length == 0 || offset == con.limit) return false;
+ if (con.length == 0 || offset == con.limit) {
+ return false;
+ }
if (getWordType(target, con.start, con.limit, offset, opts) != WT_LETTER
- || getPreviousWordType(target, con.start, con.limit, offset, opts) != WT_OTHER)
+ || getPreviousWordType(target, con.start, con.limit, offset, opts) != WT_OTHER) {
return false;
+ }
break;
case '>':
- if (con.length == 0 || offset == con.start) return false;
+ if (con.length == 0 || offset == con.start) {
+ return false;
+ }
if (getWordType(target, con.start, con.limit, offset, opts) != WT_OTHER
- || getPreviousWordType(target, con.start, con.limit, offset, opts) != WT_LETTER)
+ || getPreviousWordType(target, con.start, con.limit, offset, opts) != WT_LETTER) {
return false;
+ }
break;
} // switch anchor type
-
+
return true;
}
private static final int getPreviousWordType(ExpressionTarget target, int begin, int end,
int offset, int opts) {
int ret = getWordType(target, begin, end, --offset, opts);
- while (ret == WT_IGNORE)
+ while (ret == WT_IGNORE) {
ret = getWordType(target, begin, end, --offset, opts);
+ }
return ret;
}
private static final int getWordType(ExpressionTarget target, int begin, int end,
int offset, int opts) {
- if (offset < begin || offset >= end) return WT_OTHER;
+ if (offset < begin || offset >= end) {
+ return WT_OTHER;
+ }
return getWordType0(target.charAt(offset) , opts);
}
synchronized (this) {
- if (this.operations == null)
+ if (this.operations == null) {
this.prepare();
- if (this.context == null)
+ }
+ if (this.context == null) {
this.context = new Context();
+ }
}
Context con = null;
synchronized (this.context) {
} else {
if (previousIsEOL) {
if (0 <= (matchEnd = this.match(con, this.operations,
- matchStart, 1, this.options)))
+ matchStart, 1, this.options))) {
break;
+ }
}
previousIsEOL = false;
}
*/
else {
for (matchStart = con.start; matchStart <= limit; matchStart ++) {
- if (0 <= (matchEnd = this. match(con, this.operations, matchStart, 1, this.options)))
+ if (0 <= (matchEnd = this. match(con, this.operations, matchStart, 1, this.options))) {
break;
+ }
}
}
abstract boolean regionMatches(boolean ignoreCase, int offset, int limit, String part, int partlen);
abstract boolean regionMatches(boolean ignoreCase, int offset, int limit, int offset2, int partlen);
}
-
+
static final class StringTarget extends ExpressionTarget {
-
+
private String target;
-
+
StringTarget(String target) {
this.target = target;
}
-
+
final void resetTarget(String target) {
this.target = target;
}
-
+
+ @Override
final char charAt(int index) {
return target.charAt(index);
}
-
+
+ @Override
final boolean regionMatches(boolean ignoreCase, int offset, int limit,
String part, int partlen) {
if (limit-offset < partlen) {
return (ignoreCase) ? target.regionMatches(true, offset, part, 0, partlen) : target.regionMatches(offset, part, 0, partlen);
}
+ @Override
final boolean regionMatches(boolean ignoreCase, int offset, int limit,
int offset2, int partlen) {
if (limit-offset < partlen) {
: target.regionMatches(offset, target, offset2, partlen);
}
}
-
+
static final class CharArrayTarget extends ExpressionTarget {
-
+
char[] target;
-
+
CharArrayTarget(char[] target) {
- this.target = target;
+ this.target = target;
}
final void resetTarget(char[] target) {
this.target = target;
}
+ @Override
char charAt(int index) {
return target[index];
}
-
+
+ @Override
final boolean regionMatches(boolean ignoreCase, int offset, int limit,
String part, int partlen) {
if (offset < 0 || limit-offset < partlen) {
return true;
}
+ @Override
final boolean regionMatches(boolean ignoreCase, int offset, int limit, int offset2, int partlen) {
if (offset < 0 || limit-offset < partlen) {
return false;
private final boolean regionMatches(int offset, int limit, int offset2, int partlen) {
int i = offset2;
while (partlen-- > 0) {
- if ( target [ offset++ ] != target [ i++ ] )
+ if ( target [ offset++ ] != target [ i++ ] ) {
return false;
+ }
}
return true;
}
static final class CharacterIteratorTarget extends ExpressionTarget {
CharacterIterator target;
-
+
CharacterIteratorTarget(CharacterIterator target) {
- this.target = target;
+ this.target = target;
}
final void resetTarget(CharacterIterator target) {
this.target = target;
}
+ @Override
final char charAt(int index) {
return target.setIndex(index);
}
+ @Override
final boolean regionMatches(boolean ignoreCase, int offset, int limit,
String part, int partlen) {
if (offset < 0 || limit-offset < partlen) {
return (ignoreCase) ? regionMatchesIgnoreCase(offset, limit, part, partlen)
: regionMatches(offset, limit, part, partlen);
}
-
+
private final boolean regionMatches(int offset, int limit, String part, int partlen) {
int i = 0;
while (partlen-- > 0) {
}
return true;
}
-
+
private final boolean regionMatchesIgnoreCase(int offset, int limit, String part, int partlen) {
int i = 0;
while (partlen-- > 0) {
return true;
}
+ @Override
final boolean regionMatches(boolean ignoreCase, int offset, int limit, int offset2, int partlen) {
if (offset < 0 || limit-offset < partlen) {
return false;
}
static final class ClosureContext {
-
+
int[] offsets = new int[4];
int currentIndex = 0;
-
+
boolean contains(int offset) {
for (int i=0; i<currentIndex;++i) {
if (offsets[i] == offset) {
}
return false;
}
-
+
void reset() {
currentIndex = 0;
}
}
offsets[currentIndex++] = offset;
}
-
+
private int[] expandOffsets() {
final int len = offsets.length;
final int newLen = len << 1;
int[] newOffsets = new int[newLen];
-
+
System.arraycopy(offsets, 0, newOffsets, 0, currentIndex);
return newOffsets;
}
}
-
+
static final class Context {
int start;
int limit;
Match match;
boolean inuse = false;
ClosureContext[] closureContexts;
-
- private StringTarget stringTarget;
+
+ private StringTarget stringTarget;
private CharArrayTarget charArrayTarget;
private CharacterIteratorTarget characterIteratorTarget;
* Prepares for matching. This method is called just before starting matching.
*/
void prepare() {
- if (Op.COUNT) Op.nofinstances = 0;
+ if (Op.COUNT) {
+ Op.nofinstances = 0;
+ }
this.compile(this.tokentree);
/*
if (this.operations.type == Op.CLOSURE && this.operations.getChild().type == Op.DOT) { // .*
this.operations = anchor;
}
*/
- if (Op.COUNT) System.err.println("DEBUG: The number of operations: "+Op.nofinstances);
+ if (Op.COUNT) {
+ System.err.println("DEBUG: The number of operations: "+Op.nofinstances);
+ }
this.minlength = this.tokentree.getMinLength();
if (fresult == Token.FC_TERMINAL) {
firstChar.compactRanges();
this.firstChar = firstChar;
- if (DEBUG)
+ if (DEBUG) {
System.err.println("DEBUG: Use the first character optimization: "+firstChar);
+ }
}
}
if (this.operations != null
&& (this.operations.type == Op.STRING || this.operations.type == Op.CHAR)
&& this.operations.next == null) {
- if (DEBUG)
+ if (DEBUG) {
System.err.print(" *** Only fixed string! *** ");
+ }
this.fixedStringOnly = true;
- if (this.operations.type == Op.STRING)
+ if (this.operations.type == Op.STRING) {
this.fixedString = this.operations.getString();
- else if (this.operations.getData() >= 0x10000) { // Op.CHAR
+ } else if (this.operations.getData() >= 0x10000) { // Op.CHAR
this.fixedString = REUtil.decomposeToSurrogates(this.operations.getData());
} else {
char[] ac = new char[1];
this.tokentree.findFixedString(container, this.options);
this.fixedString = container.token == null ? null : container.token.getString();
this.fixedStringOptions = container.options;
- if (this.fixedString != null && this.fixedString.length() < 2)
+ if (this.fixedString != null && this.fixedString.length() < 2) {
this.fixedString = null;
+ }
// This pattern has a fixed string of which length is more than one.
if (this.fixedString != null) {
this.fixedStringTable = new BMPattern(this.fixedString, 256,
public RegularExpression(String regex, String options) throws ParseException {
this.setPattern(regex, options);
}
-
+
/**
* Creates a new RegularExpression instance with options.
*
public void setPattern(String newPattern) throws ParseException {
this.setPattern(newPattern, Locale.getDefault());
}
-
+
public void setPattern(String newPattern, Locale locale) throws ParseException {
this.setPattern(newPattern, this.options, locale);
}
public void setPattern(String newPattern, String options) throws ParseException {
this.setPattern(newPattern, options, Locale.getDefault());
}
-
+
public void setPattern(String newPattern, String options, Locale locale) throws ParseException {
this.setPattern(newPattern, REUtil.parseOptions(options), locale);
}
/**
* Represents this instence in String.
*/
+ @Override
public String toString() {
return this.tokentree.toString(this.options);
}
/**
* Return true if patterns are the same and the options are equivalent.
*/
+ @Override
public boolean equals(Object obj) {
- if (obj == null) return false;
- if (!(obj instanceof RegularExpression))
+ if (obj == null) {
return false;
+ }
+ if (!(obj instanceof RegularExpression)) {
+ return false;
+ }
RegularExpression r = (RegularExpression)obj;
return this.regex.equals(r.regex) && this.options == r.options;
}
/**
*
*/
+ @Override
public int hashCode() {
return (this.regex+"/"+this.getOptions()).hashCode();
}
}
private static final boolean isWordChar(int ch) { // Legacy word characters
- if (ch == '_') return true;
- if (ch < '0') return false;
- if (ch > 'z') return false;
- if (ch <= '9') return true;
- if (ch < 'A') return false;
- if (ch <= 'Z') return true;
- if (ch < 'a') return false;
+ if (ch == '_') {
+ return true;
+ }
+ if (ch < '0') {
+ return false;
+ }
+ if (ch > 'z') {
+ return false;
+ }
+ if (ch <= '9') {
+ return true;
+ }
+ if (ch < 'A') {
+ return false;
+ }
+ if (ch <= 'Z') {
+ return true;
+ }
+ if (ch < 'a') {
+ return false;
+ }
return true;
}
private static final boolean matchIgnoreCase(int chardata, int ch) {
- if (chardata == ch) return true;
- if (chardata > 0xffff || ch > 0xffff) return false;
+ if (chardata == ch) {
+ return true;
+ }
+ if (chardata > 0xffff || ch > 0xffff) {
+ return false;
+ }
char uch1 = Character.toUpperCase((char)chardata);
char uch2 = Character.toUpperCase((char)ch);
- if (uch1 == uch2) return true;
+ if (uch1 == uch2) {
+ return true;
+ }
return Character.toLowerCase(uch1) == Character.toLowerCase(uch2);
}
}
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
- *
+ *
* http://www.apache.org/licenses/LICENSE-2.0
- *
+ *
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
/**
* This class represents a node in parse tree.
- *
+ *
* @xerces.internal
*
* @version $Id: Token.java 1638344 2014-11-11 20:15:46Z mrglavas $
}
static Token.ParenToken createLook(int type, Token child) {
- if (COUNTTOKENS) Token.tokens ++;
+ if (COUNTTOKENS) {
+ Token.tokens ++;
+ }
return new Token.ParenToken(type, child, 0);
}
static Token.ParenToken createParen(Token child, int pnumber) {
- if (COUNTTOKENS) Token.tokens ++;
+ if (COUNTTOKENS) {
+ Token.tokens ++;
+ }
return new Token.ParenToken(Token.PAREN, child, pnumber);
}
static Token.ClosureToken createClosure(Token tok) {
- if (COUNTTOKENS) Token.tokens ++;
+ if (COUNTTOKENS) {
+ Token.tokens ++;
+ }
return new Token.ClosureToken(Token.CLOSURE, tok);
}
static Token.ClosureToken createNGClosure(Token tok) {
- if (COUNTTOKENS) Token.tokens ++;
+ if (COUNTTOKENS) {
+ Token.tokens ++;
+ }
return new Token.ClosureToken(Token.NONGREEDYCLOSURE, tok);
}
static Token.ConcatToken createConcat(Token tok1, Token tok2) {
- if (COUNTTOKENS) Token.tokens ++;
+ if (COUNTTOKENS) {
+ Token.tokens ++;
+ }
return new Token.ConcatToken(tok1, tok2);
}
static Token.UnionToken createConcat() {
- if (COUNTTOKENS) Token.tokens ++;
+ if (COUNTTOKENS) {
+ Token.tokens ++;
+ }
return new Token.UnionToken(Token.CONCAT); // *** It is not a bug.
}
static Token.UnionToken createUnion() {
- if (COUNTTOKENS) Token.tokens ++;
+ if (COUNTTOKENS) {
+ Token.tokens ++;
+ }
return new Token.UnionToken(Token.UNION);
}
static Token createEmpty() {
return Token.token_empty;
}
static RangeToken createRange() {
- if (COUNTTOKENS) Token.tokens ++;
+ if (COUNTTOKENS) {
+ Token.tokens ++;
+ }
return new RangeToken(Token.RANGE);
}
static RangeToken createNRange() {
- if (COUNTTOKENS) Token.tokens ++;
+ if (COUNTTOKENS) {
+ Token.tokens ++;
+ }
return new RangeToken(Token.NRANGE);
}
static Token.CharToken createChar(int ch) {
- if (COUNTTOKENS) Token.tokens ++;
+ if (COUNTTOKENS) {
+ Token.tokens ++;
+ }
return new Token.CharToken(Token.CHAR, ch);
}
static private Token.CharToken createAnchor(int ch) {
- if (COUNTTOKENS) Token.tokens ++;
+ if (COUNTTOKENS) {
+ Token.tokens ++;
+ }
return new Token.CharToken(Token.ANCHOR, ch);
}
static Token.StringToken createBackReference(int refno) {
- if (COUNTTOKENS) Token.tokens ++;
+ if (COUNTTOKENS) {
+ Token.tokens ++;
+ }
return new Token.StringToken(Token.BACKREFERENCE, null, refno);
}
static Token.StringToken createString(String str) {
- if (COUNTTOKENS) Token.tokens ++;
+ if (COUNTTOKENS) {
+ Token.tokens ++;
+ }
return new Token.StringToken(Token.STRING, str, 0);
}
static Token.ModifierToken createModifierGroup(Token child, int add, int mask) {
- if (COUNTTOKENS) Token.tokens ++;
+ if (COUNTTOKENS) {
+ Token.tokens ++;
+ }
return new Token.ModifierToken(child, add, mask);
}
static Token.ConditionToken createCondition(int refno, Token condition,
Token yespat, Token nopat) {
- if (COUNTTOKENS) Token.tokens ++;
+ if (COUNTTOKENS) {
+ Token.tokens ++;
+ }
return new Token.ConditionToken(refno, condition, yespat, nopat);
}
return -1;
}
+ @Override
public String toString() {
return this.toString(0);
}
switch (this.type) {
case CONCAT:
int sum = 0;
- for (int i = 0; i < this.size(); i ++)
+ for (int i = 0; i < this.size(); i ++) {
sum += this.getChild(i).getMinLength();
+ }
return sum;
case CONDITION:
case UNION:
- if (this.size() == 0)
+ if (this.size() == 0) {
return 0;
+ }
int ret = this.getChild(0).getMinLength();
for (int i = 1; i < this.size(); i ++) {
int min = this.getChild(i).getMinLength();
- if (min < ret) ret = min;
+ if (min < ret) {
+ ret = min;
+ }
}
return ret;
case CLOSURE:
case NONGREEDYCLOSURE:
- if (this.getMin() >= 0)
+ if (this.getMin() >= 0) {
return this.getMin() * this.getChild(0).getMinLength();
+ }
return 0;
case EMPTY:
int sum = 0;
for (int i = 0; i < this.size(); i ++) {
int d = this.getChild(i).getMaxLength();
- if (d < 0) return -1;
+ if (d < 0) {
+ return -1;
+ }
sum += d;
}
return sum;
case CONDITION:
case UNION:
- if (this.size() == 0)
+ if (this.size() == 0) {
return 0;
+ }
int ret = this.getChild(0).getMaxLength();
for (int i = 1; ret >= 0 && i < this.size(); i ++) {
int max = this.getChild(i).getMaxLength();
ret = -1;
break;
}
- if (max > ret) ret = max;
+ if (max > ret) {
+ ret = max;
+ }
}
return ret;
case CLOSURE:
case NONGREEDYCLOSURE:
- if (this.getMax() >= 0)
- // When this.child.getMaxLength() < 0,
+ if (this.getMax() >= 0) {
+ // When this.child.getMaxLength() < 0,
// this returns minus value
return this.getMax() * this.getChild(0).getMaxLength();
+ }
return -1;
case EMPTY:
switch (this.type) {
case CONCAT:
int ret = FC_CONTINUE;
- for (int i = 0; i < this.size(); i ++)
- if ((ret = this.getChild(i).analyzeFirstCharacter(result, options)) != FC_CONTINUE)
+ for (int i = 0; i < this.size(); i ++) {
+ if ((ret = this.getChild(i).analyzeFirstCharacter(result, options)) != FC_CONTINUE) {
break;
+ }
+ }
return ret;
case UNION:
- if (this.size() == 0)
+ if (this.size() == 0) {
return FC_CONTINUE;
+ }
/*
* a|b|c -> FC_TERMINAL
* a|.|c -> FC_ANY
boolean hasEmpty = false;
for (int i = 0; i < this.size(); i ++) {
ret2 = this.getChild(i).analyzeFirstCharacter(result, options);
- if (ret2 == FC_ANY)
+ if (ret2 == FC_ANY) {
break;
- else if (ret2 == FC_CONTINUE)
+ } else if (ret2 == FC_CONTINUE) {
hasEmpty = true;
+ }
}
return hasEmpty ? FC_CONTINUE : ret2;
case CONDITION:
int ret3 = this.getChild(0).analyzeFirstCharacter(result, options);
- if (this.size() == 1) return FC_CONTINUE;
- if (ret3 == FC_ANY) return ret3;
+ if (this.size() == 1) {
+ return FC_CONTINUE;
+ }
+ if (ret3 == FC_ANY) {
+ return ret3;
+ }
int ret4 = this.getChild(1).analyzeFirstCharacter(result, options);
- if (ret4 == FC_ANY) return ret4;
+ if (ret4 == FC_ANY) {
+ return ret4;
+ }
return ret3 == FC_CONTINUE || ret4 == FC_CONTINUE ? FC_CONTINUE : FC_TERMINAL;
case CLOSURE:
int ch2;
if (REUtil.isHighSurrogate(cha)
&& this.getString().length() >= 2
- && REUtil.isLowSurrogate((ch2 = this.getString().charAt(1))))
+ && REUtil.isLowSurrogate((ch2 = this.getString().charAt(1)))) {
cha = REUtil.composeFromSurrogates(cha, ch2);
+ }
result.addRange(cha, cha);
if (cha < 0x10000 && isSet(options, RegularExpression.IGNORE_CASE)) {
cha = Character.toUpperCase((char)cha);
}
private final boolean isShorterThan(Token tok) {
- if (tok == null) return false;
+ if (tok == null) {
+ return false;
+ }
/*
int mylength;
if (this.type == STRING) mylength = this.getString().length();
else throw new RuntimeException("Internal Error: Illegal type: "+tok.type);
*/
int mylength;
- if (this.type == STRING) mylength = this.getString().length();
- else throw new RuntimeException("Internal Error: Illegal type: "+this.type);
+ if (this.type == STRING) {
+ mylength = this.getString().length();
+ } else {
+ throw new RuntimeException("Internal Error: Illegal type: "+this.type);
+ }
int otherlength;
- if (tok.type == STRING) otherlength = tok.getString().length();
- else throw new RuntimeException("Internal Error: Illegal type: "+tok.type);
+ if (tok.type == STRING) {
+ otherlength = tok.getString().length();
+ } else {
+ throw new RuntimeException("Internal Error: Illegal type: "+tok.type);
+ }
return mylength < otherlength;
}
}
// ------------------------------------------------------
- private final static Hashtable categories = new Hashtable();
- private final static Hashtable categories2 = new Hashtable();
+ private final static Hashtable<String, Token> categories = new Hashtable<>();
+ private final static Hashtable<String, Token> categories2 = new Hashtable<>();
private static final String[] categoryNames = {
"Cn", "Lu", "Ll", "Lt", "Lm", "Lo", "Mn", "Me", "Mc", "Nd",
"Nl", "No", "Zs", "Zl", "Zp", "Cc", "Cf", null, "Co", "Cs",
"L", "M", "N", "Z", "C", "P", "S", // 31-37
};
- // Schema Rec. {Datatypes} - Punctuation
+ // Schema Rec. {Datatypes} - Punctuation
static final int CHAR_INIT_QUOTE = 29; // Pi - initial quote
static final int CHAR_FINAL_QUOTE = 30; // Pf - final quote
static final int CHAR_LETTER = 31;
static final int CHAR_OTHER = 35;
static final int CHAR_PUNCTUATION = 36;
static final int CHAR_SYMBOL = 37;
-
- //blockNames in UNICODE 3.1 that supported by XML Schema REC
+
+ //blockNames in UNICODE 3.1 that supported by XML Schema REC
private static final String[] blockNames = {
/*0000..007F;*/ "Basic Latin",
/*0080..00FF;*/ "Latin-1 Supplement",
/*0530..058F;*/ "Armenian",
/*0590..05FF;*/ "Hebrew",
/*0600..06FF;*/ "Arabic",
- /*0700..074F;*/ "Syriac",
+ /*0700..074F;*/ "Syriac",
/*0780..07BF;*/ "Thaana",
/*0900..097F;*/ "Devanagari",
/*0980..09FF;*/ "Bengali",
/*0E00..0E7F;*/ "Thai",
/*0E80..0EFF;*/ "Lao",
/*0F00..0FFF;*/ "Tibetan",
- /*1000..109F;*/ "Myanmar",
+ /*1000..109F;*/ "Myanmar",
/*10A0..10FF;*/ "Georgian",
/*1100..11FF;*/ "Hangul Jamo",
/*1200..137F;*/ "Ethiopic",
//ADD THOSE MANUALLY
//F0000..FFFFD; "Private Use",
//100000..10FFFD; "Private Use"
- //FFF0..FFFD; "Specials",
- static final String blockRanges =
+ //FFF0..FFFD; "Specials",
+ static final String blockRanges =
"\u0000\u007F\u0080\u00FF\u0100\u017F\u0180\u024F\u0250\u02AF\u02B0\u02FF\u0300\u036F"
+"\u0370\u03FF\u0400\u04FF\u0530\u058F\u0590\u05FF\u0600\u06FF\u0700\u074F\u0780\u07BF"
+"\u0900\u097F\u0980\u09FF\u0A00\u0A7F\u0A80\u0AFF\u0B00\u0B7F\u0B80\u0BFF\u0C00\u0C7F\u0C80\u0CFF"
int type;
for (int i = 0; i < 0x10000; i ++) {
type = Character.getType((char)i);
- if (type == Character.START_PUNCTUATION ||
+ if (type == Character.START_PUNCTUATION ||
type == Character.END_PUNCTUATION) {
//build table of Pi values
if (i == 0x00AB || i == 0x2018 || i == 0x201B || i == 0x201C ||
Token.nonBMPBlockRanges[location + 1]);
}
String n = Token.blockNames[i];
- if (n.equals("Specials"))
+ if (n.equals("Specials")) {
r1.addRange(0xfff0, 0xfffd);
+ }
if (n.equals("Private Use")) {
r1.addRange(0xF0000,0xFFFFD);
r1.addRange(0x100000,0x10FFFD);
buffer.setLength(0);
buffer.append("Is");
if (n.indexOf(' ') >= 0) {
- for (int ci = 0; ci < n.length(); ci ++)
- if (n.charAt(ci) != ' ') buffer.append((char)n.charAt(ci));
+ for (int ci = 0; ci < n.length(); ci ++) {
+ if (n.charAt(ci) != ' ') {
+ buffer.append(n.charAt(ci));
+ }
+ }
}
else {
buffer.append(n);
}
static protected RangeToken getRange(String name, boolean positive, boolean xs) {
RangeToken range = Token.getRange(name, positive);
- if (xs && range != null && Token.isRegisterNonXS(name))
+ if (xs && range != null && Token.isRegisterNonXS(name)) {
range = null;
+ }
return range;
}
- static Hashtable nonxs = null;
+ static Hashtable<String, String> nonxs = null;
/**
* This method is called by only getRange().
* So this method need not MT-safe.
*/
static protected void registerNonXS(String name) {
- if (Token.nonxs == null)
- Token.nonxs = new Hashtable();
+ if (Token.nonxs == null) {
+ Token.nonxs = new Hashtable<>();
+ }
Token.nonxs.put(name, name);
}
static protected boolean isRegisterNonXS(String name) {
- if (Token.nonxs == null)
+ if (Token.nonxs == null) {
return false;
+ }
//DEBUG
//System.err.println("isRegisterNonXS: "+name);
return Token.nonxs.containsKey(name);
}
private static void setAlias(String newName, String name, boolean positive) {
- Token t1 = (Token)Token.categories.get(name);
- Token t2 = (Token)Token.categories2.get(name);
+ Token t1 = Token.categories.get(name);
+ Token t2 = Token.categories2.get(name);
if (positive) {
Token.categories.put(newName, t1);
Token.categories2.put(newName, t2);
static private Token token_grapheme = null;
static synchronized Token getGraphemePattern() {
- if (Token.token_grapheme != null)
+ if (Token.token_grapheme != null) {
return Token.token_grapheme;
+ }
Token base_char = Token.createRange(); // [{ASSIGNED}]-[{M},{C}]
base_char.mergeRanges(Token.getRange("ASSIGNED", true));
*/
static private Token token_ccs = null;
static synchronized Token getCombiningCharacterSequence() {
- if (Token.token_ccs != null)
+ if (Token.token_ccs != null) {
return Token.token_ccs;
+ }
Token foo = Token.createClosure(Token.getRange("M", true)); // \pM*
foo = Token.createConcat(Token.getRange("M", false), foo); // \PM + \pM*
static class StringToken extends Token implements java.io.Serializable {
private static final long serialVersionUID = -4614366944218504172L;
-
+
String string;
final int refNumber;
this.refNumber = n;
}
+ @Override
int getReferenceNumber() { // for STRING
return this.refNumber;
}
+ @Override
String getString() { // for STRING
return this.string;
}
-
+
+ @Override
public String toString(int options) {
- if (this.type == BACKREFERENCE)
+ if (this.type == BACKREFERENCE) {
return "\\"+this.refNumber;
- else
+ } else {
return REUtil.quoteMeta(this.string);
+ }
}
}
static class ConcatToken extends Token implements java.io.Serializable {
private static final long serialVersionUID = 8717321425541346381L;
-
+
final Token child;
final Token child2;
-
+
ConcatToken(Token t1, Token t2) {
super(Token.CONCAT);
this.child = t1;
this.child2 = t2;
}
+ @Override
int size() {
return 2;
}
+ @Override
Token getChild(int index) {
return index == 0 ? this.child : this.child2;
}
+ @Override
public String toString(int options) {
String ret;
if (this.child2.type == CLOSURE && this.child2.getChild(0) == this.child) {
ret = this.child.toString(options)+"+";
} else if (this.child2.type == NONGREEDYCLOSURE && this.child2.getChild(0) == this.child) {
ret = this.child.toString(options)+"+?";
- } else
+ } else {
ret = this.child.toString(options)+this.child2.toString(options);
+ }
return ret;
}
}
static class CharToken extends Token implements java.io.Serializable {
private static final long serialVersionUID = -4394272816279496989L;
-
+
final int chardata;
CharToken(int type, int ch) {
this.chardata = ch;
}
+ @Override
int getChar() {
return this.chardata;
}
+ @Override
public String toString(int options) {
String ret;
switch (this.type) {
if (this.chardata >= 0x10000) {
String pre = "0"+Integer.toHexString(this.chardata);
ret = "\\v"+pre.substring(pre.length()-6, pre.length());
- } else
+ } else {
ret = ""+(char)this.chardata;
+ }
}
break;
case ANCHOR:
- if (this == Token.token_linebeginning || this == Token.token_lineend)
+ if (this == Token.token_linebeginning || this == Token.token_lineend) {
ret = ""+(char)this.chardata;
- else
+ } else {
ret = "\\"+(char)this.chardata;
+ }
break;
default:
return ret;
}
+ @Override
boolean match(int ch) {
if (this.type == CHAR) {
return ch == this.chardata;
- } else
+ } else {
throw new RuntimeException("NFAArrow#match(): Internal error: "+this.type);
+ }
}
}
static class ClosureToken extends Token implements java.io.Serializable {
private static final long serialVersionUID = 1308971930673997452L;
-
+
int min;
int max;
final Token child;
this.setMax(-1);
}
+ @Override
int size() {
return 1;
}
+ @Override
Token getChild(int index) {
return this.child;
}
+ @Override
final void setMin(int min) {
this.min = min;
}
+ @Override
final void setMax(int max) {
this.max = max;
}
+ @Override
final int getMin() {
return this.min;
}
+ @Override
final int getMax() {
return this.max;
}
+ @Override
public String toString(int options) {
String ret;
if (this.type == CLOSURE) {
ret = this.child.toString(options)+"{"+this.getMin()+","+this.getMax()+"}";
} else if (this.getMin() >= 0 && this.getMax() < 0) {
ret = this.child.toString(options)+"{"+this.getMin()+",}";
- } else
+ } else {
throw new RuntimeException("Token#toString(): CLOSURE "
+this.getMin()+", "+this.getMax());
+ }
} else {
if (this.getMin() < 0 && this.getMax() < 0) {
ret = this.child.toString(options)+"*?";
ret = this.child.toString(options)+"{"+this.getMin()+","+this.getMax()+"}?";
} else if (this.getMin() >= 0 && this.getMax() < 0) {
ret = this.child.toString(options)+"{"+this.getMin()+",}?";
- } else
+ } else {
throw new RuntimeException("Token#toString(): NONGREEDYCLOSURE "
+this.getMin()+", "+this.getMax());
+ }
}
return ret;
}
static class ParenToken extends Token implements java.io.Serializable {
private static final long serialVersionUID = -5938014719827987704L;
-
+
final Token child;
final int parennumber;
this.parennumber = paren;
}
+ @Override
int size() {
return 1;
}
+ @Override
Token getChild(int index) {
return this.child;
}
+ @Override
int getParenNumber() {
return this.parennumber;
}
+ @Override
public String toString(int options) {
String ret = null;
switch (this.type) {
static class ConditionToken extends Token implements java.io.Serializable {
private static final long serialVersionUID = 4353765277910594411L;
-
+
final int refNumber;
final Token condition;
final Token yes;
this.yes = yespat;
this.no = nopat;
}
+ @Override
int size() {
return this.no == null ? 1 : 2;
}
+ @Override
Token getChild(int index) {
- if (index == 0) return this.yes;
- if (index == 1) return this.no;
+ if (index == 0) {
+ return this.yes;
+ }
+ if (index == 1) {
+ return this.no;
+ }
throw new RuntimeException("Internal Error: "+index);
}
+ @Override
public String toString(int options) {
String ret;
if (refNumber > 0) {
static class ModifierToken extends Token implements java.io.Serializable {
private static final long serialVersionUID = -9114536559696480356L;
-
+
final Token child;
final int add;
final int mask;
this.mask = mask;
}
+ @Override
int size() {
return 1;
}
+ @Override
Token getChild(int index) {
return this.child;
}
return this.mask;
}
+ @Override
public String toString(int options) {
return "(?"
+(this.add == 0 ? "" : REUtil.createOptionString(this.add))
static class UnionToken extends Token implements java.io.Serializable {
private static final long serialVersionUID = -2568843945989489861L;
-
- Vector children;
+
+ Vector<Token> children;
UnionToken(int type) {
super(type);
}
+ @Override
void addChild(Token tok) {
- if (tok == null) return;
- if (this.children == null) this.children = new Vector();
+ if (tok == null) {
+ return;
+ }
+ if (this.children == null) {
+ this.children = new Vector<>();
+ }
if (this.type == UNION) {
this.children.addElement(tok);
return;
// This is CONCAT, and new child is CONCAT.
if (tok.type == CONCAT) {
for (int i = 0; i < tok.size(); i ++)
+ {
this.addChild(tok.getChild(i)); // Recursion
+ }
return;
}
int size = this.children.size();
this.children.addElement(tok);
return;
}
- Token previous = (Token)this.children.elementAt(size-1);
+ Token previous = this.children.elementAt(size-1);
if (!((previous.type == CHAR || previous.type == STRING)
&& (tok.type == CHAR || tok.type == STRING))) {
this.children.addElement(tok);
return;
}
-
+
//System.err.println("Merge '"+previous+"' and '"+tok+"'.");
StringBuffer buffer;
if (previous.type == CHAR) { // Replace previous token by STRING
buffer = new StringBuffer(2 + nextMaxLength);
int ch = previous.getChar();
- if (ch >= 0x10000)
+ if (ch >= 0x10000) {
buffer.append(REUtil.decomposeToSurrogates(ch));
- else
+ } else {
buffer.append((char)ch);
+ }
previous = Token.createString(null);
this.children.setElementAt(previous, size-1);
} else { // STRING
if (tok.type == CHAR) {
int ch = tok.getChar();
- if (ch >= 0x10000)
+ if (ch >= 0x10000) {
buffer.append(REUtil.decomposeToSurrogates(ch));
- else
+ } else {
buffer.append((char)ch);
+ }
} else {
buffer.append(tok.getString());
}
((StringToken)previous).string = new String(buffer);
}
+ @Override
int size() {
return this.children == null ? 0 : this.children.size();
}
+ @Override
Token getChild(int index) {
- return (Token)this.children.elementAt(index);
+ return this.children.elementAt(index);
}
+ @Override
public String toString(int options) {
String ret;
if (this.type == CONCAT) {
ret = ch.toString(options)+"+";
} else if (ch2.type == NONGREEDYCLOSURE && ch2.getChild(0) == ch) {
ret = ch.toString(options)+"+?";
- } else
+ } else {
ret = ch.toString(options)+ch2.toString(options);
+ }
} else {
StringBuffer sb = new StringBuffer();
for (int i = 0; i < this.children.size(); i ++) {
- sb.append(((Token)this.children.elementAt(i)).toString(options));
+ sb.append(this.children.elementAt(i).toString(options));
}
ret = new String(sb);
}
ret = this.getChild(1).toString(options)+"??";
} else {
StringBuffer sb = new StringBuffer();
- sb.append(((Token)this.children.elementAt(0)).toString(options));
+ sb.append(this.children.elementAt(0).toString(options));
for (int i = 1; i < this.children.size(); i ++) {
- sb.append((char)'|');
- sb.append(((Token)this.children.elementAt(i)).toString(options));
+ sb.append('|');
+ sb.append(this.children.elementAt(i).toString(options));
}
ret = new String(sb);
}