2 * Copyright (c) 2015 Cisco Systems, Inc. and others. All rights reserved.
4 * This program and the accompanying materials are made available under the
5 * terms of the Eclipse Public License v1.0 which accompanies this distribution,
6 * and is available at http://www.eclipse.org/legal/epl-v10.html
8 package org.opendaylight.yangtools.yang.parser.rfc7950.repo;
10 import static com.google.common.base.Verify.verify;
12 import com.google.common.annotations.VisibleForTesting;
13 import com.google.common.base.CharMatcher;
14 import com.google.common.base.VerifyException;
15 import java.util.regex.Pattern;
16 import org.antlr.v4.runtime.tree.ParseTree;
17 import org.antlr.v4.runtime.tree.TerminalNode;
18 import org.eclipse.jdt.annotation.NonNull;
19 import org.opendaylight.yangtools.yang.common.YangVersion;
20 import org.opendaylight.yangtools.yang.parser.antlr.YangStatementParser;
21 import org.opendaylight.yangtools.yang.parser.antlr.YangStatementParser.ArgumentContext;
22 import org.opendaylight.yangtools.yang.parser.spi.source.SourceException;
23 import org.opendaylight.yangtools.yang.parser.spi.source.StatementSourceReference;
26 * Utilities for dealing with YANG statement argument strings, encapsulated in ANTLR grammar's ArgumentContext.
28 enum ArgumentContextUtils {
30 * YANG 1.0 version of strings, which were not completely clarified in RFC6020.
34 void checkDoubleQuoted(final String str, final StatementSourceReference ref) {
39 void checkUnquoted(final String str, final StatementSourceReference ref) {
44 * YANG 1.1 version of strings, which were clarified in RFC7950.
46 // NOTE: the differences clarified lead to a proper ability to delegate this to ANTLR lexer, but that does not
47 // understand versions and needs to work with both.
50 void checkDoubleQuoted(final String str, final StatementSourceReference ref) {
51 // FIXME: YANGTOOLS-1079: we should forward backslash to this method, so that it does not start from the
52 // start from the start of the string. Furthermore this logic should operate on spans
53 // of characters -- i.e. the check for backslash should be a search instead -- as
54 // String knows how to do that and can do it more efficiently than this loop.
55 for (int i = 0; i < str.length() - 1; i++) {
56 if (str.charAt(i) == '\\') {
57 switch (str.charAt(i + 1)) {
65 throw new SourceException(ref, "YANG 1.1: illegal double quoted string (%s). In double "
66 + "quoted string the backslash must be followed by one of the following character "
67 + "[n,t,\",\\], but was '%s'.", str, str.charAt(i + 1));
74 void checkUnquoted(final String str, final StatementSourceReference ref) {
75 SourceException.throwIf(ANYQUOTE_MATCHER.matchesAnyOf(str), ref,
76 "YANG 1.1: unquoted string (%s) contains illegal characters", str);
80 private static final CharMatcher WHITESPACE_MATCHER = CharMatcher.whitespace();
81 private static final CharMatcher ANYQUOTE_MATCHER = CharMatcher.anyOf("'\"");
82 private static final Pattern ESCAPED_DQUOT = Pattern.compile("\\\"", Pattern.LITERAL);
83 private static final Pattern ESCAPED_BACKSLASH = Pattern.compile("\\\\", Pattern.LITERAL);
84 private static final Pattern ESCAPED_LF = Pattern.compile("\\n", Pattern.LITERAL);
85 private static final Pattern ESCAPED_TAB = Pattern.compile("\\t", Pattern.LITERAL);
87 static @NonNull ArgumentContextUtils forVersion(final YangVersion version) {
94 throw new IllegalStateException("Unhandled version " + version);
99 * NOTE: this method we do not use convenience methods provided by generated parser code, but instead are making
100 * based on the grammar assumptions. While this is more verbose, it cuts out a number of unnecessary code,
101 * such as intermediate List allocation et al.
103 final @NonNull String stringFromStringContext(final ArgumentContext context, final StatementSourceReference ref) {
104 // Get first child, which we fully expect to exist and be a lexer token
105 final ParseTree firstChild = context.getChild(0);
106 verify(firstChild instanceof TerminalNode, "Unexpected shape of %s", context);
107 final TerminalNode firstNode = (TerminalNode) firstChild;
108 final int firstType = firstNode.getSymbol().getType();
110 case YangStatementParser.IDENTIFIER:
111 // Simple case, there is a simple string, which cannot contain anything that we would need to process.
112 return firstNode.getText();
113 case YangStatementParser.STRING:
114 // Complex case, defer to a separate method
115 return concatStrings(context, ref);
117 throw new VerifyException("Unexpected first symbol in " + context);
121 private String concatStrings(final ArgumentContext context, final StatementSourceReference ref) {
123 * We have multiple fragments. Just search the tree. This code is equivalent to
125 * context.STRING().forEach(stringNode -> appendString(sb, stringNode, ref))
127 * except we minimize allocations which that would do.
129 final StringBuilder sb = new StringBuilder();
130 for (ParseTree child : context.children) {
131 verify(child instanceof TerminalNode, "Unexpected fragment component %s", child);
132 final TerminalNode childNode = (TerminalNode) child;
133 switch (childNode.getSymbol().getType()) {
134 case YangStatementParser.SEP:
137 case YangStatementParser.PLUS:
138 // Operator, which we are handling by concat
140 case YangStatementParser.STRING:
141 // a lexer string, could be pretty much anything
142 // FIXME: YANGTOOLS-1079: appendString() is a dispatch based on quotes, which we should be able to
143 // defer to lexer for a dedicated type. That would expand the switch table
144 // here, but since we have it anyway, it would be nice to have the quoting
145 // distinction already taken care of. The performance difference will need to
146 // be benchmarked, though.
147 appendString(sb, childNode, ref);
150 throw new VerifyException("Unexpected symbol in " + childNode);
153 return sb.toString();
156 private void appendString(final StringBuilder sb, final TerminalNode stringNode,
157 final StatementSourceReference ref) {
158 final String str = stringNode.getText();
159 final char firstChar = str.charAt(0);
160 final char lastChar = str.charAt(str.length() - 1);
161 if (firstChar == '"' && lastChar == '"') {
162 sb.append(normalizeDoubleQuoted(str.substring(1, str.length() - 1),
163 stringNode.getSymbol().getCharPositionInLine(), ref));
164 } else if (firstChar == '\'' && lastChar == '\'') {
166 * According to RFC6020 a single quote character cannot occur in a single-quoted string, even when preceded
169 sb.append(str, 1, str.length() - 1);
171 checkUnquoted(str, ref);
176 private String normalizeDoubleQuoted(final String str, final int dquot, final StatementSourceReference ref) {
177 // Whitespace normalization happens irrespective of further handling and has no effect on the result
178 final String stripped = trimWhitespace(str, dquot);
180 // Now we need to perform some amount of unescaping. This serves as a pre-check before we dispatch
181 // validation and processing (which will reuse the work we have done)
182 final int backslash = stripped.indexOf('\\');
183 return backslash == -1 ? stripped : unescape(stripped, backslash, ref);
187 * NOTE: Enforcement and transformation logic done by these methods should logically reside in the lexer and ANTLR
188 * account the for it with lexer modes. We do not want to force a re-lexing phase in the parser just because
189 * we decided to let ANTLR do the work.
191 // FIXME: YANGTOOLS-1079: Re-evaluate above comment once our integration surface with lexer has been decided
192 abstract void checkDoubleQuoted(String str, StatementSourceReference ref);
194 abstract void checkUnquoted(String str, StatementSourceReference ref);
197 * Unescape escaped double quotes, tabs, new line and backslash in the inner string and trim the result.
199 private String unescape(final String str, final int backslash, final StatementSourceReference ref) {
200 checkDoubleQuoted(str, ref);
202 // FIXME: YANGTOOLS-1079: given we the leading backslash, it would be more efficient to walk the string and
203 // unescape in one go
204 return ESCAPED_TAB.matcher(
206 ESCAPED_BACKSLASH.matcher(
207 ESCAPED_DQUOT.matcher(str).replaceAll("\\\""))
214 static String trimWhitespace(final String str, final int dquot) {
215 final int firstBrk = str.indexOf('\n');
216 if (firstBrk == -1) {
220 // Okay, we may need to do some trimming, set up a builder and append the first segment
221 final int length = str.length();
222 final StringBuilder sb = new StringBuilder(length);
224 // Append first segment, which needs only tail-trimming
225 sb.append(str, 0, trimTrailing(str, 0, firstBrk)).append('\n');
227 // With that out of the way, setup our iteration state. The string segment we are looking at is
228 // str.substring(start, end), which is guaranteed not to include any line breaks, i.e. end <= brk unless we are
229 // at the last segment.
230 int start = firstBrk + 1;
231 int brk = str.indexOf('\n', start);
233 // Loop over inner strings
235 trimLeadingAndAppend(sb, dquot, str, start, trimTrailing(str, start, brk)).append('\n');
237 brk = str.indexOf('\n', start);
240 return trimLeadingAndAppend(sb, dquot, str, start, length).toString();
243 private static StringBuilder trimLeadingAndAppend(final StringBuilder sb, final int dquot, final String str,
244 final int start, final int end) {
248 while (pos <= dquot) {
250 // We ran out of data, nothing to append
254 final char ch = str.charAt(offset);
256 // tabs are to be treated as 8 spaces
258 } else if (WHITESPACE_MATCHER.matches(ch)) {
267 // We have expanded beyond double quotes, push equivalent spaces
268 while (pos - 1 > dquot) {
273 return sb.append(str, offset, end);
276 private static int trimTrailing(final String str, final int start, final int end) {
278 while (ret > start) {
279 final int prev = ret - 1;
280 if (!WHITESPACE_MATCHER.matches(str.charAt(prev))) {