2 * Copyright (c) 2015 Cisco Systems, Inc. and others. All rights reserved.
4 * This program and the accompanying materials are made available under the
5 * terms of the Eclipse Public License v1.0 which accompanies this distribution,
6 * and is available at http://www.eclipse.org/legal/epl-v10.html
8 package org.opendaylight.yangtools.yang.parser.rfc7950.repo;
10 import static com.google.common.base.Verify.verify;
12 import com.google.common.annotations.VisibleForTesting;
13 import com.google.common.base.CharMatcher;
14 import com.google.common.base.VerifyException;
15 import java.util.regex.Pattern;
16 import org.antlr.v4.runtime.tree.ParseTree;
17 import org.antlr.v4.runtime.tree.TerminalNode;
18 import org.eclipse.jdt.annotation.NonNull;
19 import org.opendaylight.yangtools.yang.common.YangVersion;
20 import org.opendaylight.yangtools.yang.parser.antlr.YangStatementParser;
21 import org.opendaylight.yangtools.yang.parser.antlr.YangStatementParser.ArgumentContext;
22 import org.opendaylight.yangtools.yang.parser.spi.source.SourceException;
23 import org.opendaylight.yangtools.yang.parser.spi.source.StatementSourceReference;
26 * Utilities for dealing with YANG statement argument strings, encapsulated in ANTLR grammar's ArgumentContext.
28 enum ArgumentContextUtils {
30 * YANG 1.0 version of strings, which were not completely clarified in RFC6020.
34 void checkDoubleQuotedString(final String str, final StatementSourceReference ref) {
39 void checkUnquotedString(final String str, final StatementSourceReference ref) {
44 * YANG 1.1 version of strings, which were clarified in RFC7950.
46 // NOTE: the differences clarified lead to a proper ability to delegate this to ANTLR lexer, but that does not
47 // understand versions and needs to work with both.
50 void checkDoubleQuotedString(final String str, final StatementSourceReference ref) {
51 for (int i = 0; i < str.length() - 1; i++) {
52 if (str.charAt(i) == '\\') {
53 switch (str.charAt(i + 1)) {
61 throw new SourceException(ref, "YANG 1.1: illegal double quoted string (%s). In double "
62 + "quoted string the backslash must be followed by one of the following character "
63 + "[n,t,\",\\], but was '%s'.", str, str.charAt(i + 1));
70 void checkUnquotedString(final String str, final StatementSourceReference ref) {
71 SourceException.throwIf(ANYQUOTE_MATCHER.matchesAnyOf(str), ref,
72 "YANG 1.1: unquoted string (%s) contains illegal characters", str);
76 private static final CharMatcher WHITESPACE_MATCHER = CharMatcher.whitespace();
77 private static final CharMatcher ANYQUOTE_MATCHER = CharMatcher.anyOf("'\"");
78 private static final Pattern ESCAPED_DQUOT = Pattern.compile("\\\"", Pattern.LITERAL);
79 private static final Pattern ESCAPED_BACKSLASH = Pattern.compile("\\\\", Pattern.LITERAL);
80 private static final Pattern ESCAPED_LF = Pattern.compile("\\n", Pattern.LITERAL);
81 private static final Pattern ESCAPED_TAB = Pattern.compile("\\t", Pattern.LITERAL);
83 static @NonNull ArgumentContextUtils forVersion(final YangVersion version) {
90 throw new IllegalStateException("Unhandled version " + version);
95 * NOTE: this method we do not use convenience methods provided by generated parser code, but instead are making
96 * based on the grammar assumptions. While this is more verbose, it cuts out a number of unnecessary code,
97 * such as intermediate List allocation et al.
99 final @NonNull String stringFromStringContext(final ArgumentContext context, final StatementSourceReference ref) {
100 // Get first child, which we fully expect to exist and be a lexer token
101 final ParseTree firstChild = context.getChild(0);
102 verify(firstChild instanceof TerminalNode, "Unexpected shape of %s", context);
103 final TerminalNode firstNode = (TerminalNode) firstChild;
104 final int firstType = firstNode.getSymbol().getType();
106 case YangStatementParser.IDENTIFIER:
107 // Simple case, there is a simple string, which cannot contain anything that we would need to process.
108 return firstNode.getText();
109 case YangStatementParser.STRING:
110 // Complex case, defer to a separate method
111 return concatStrings(context, ref);
113 throw new VerifyException("Unexpected first symbol in " + context);
117 private String concatStrings(final ArgumentContext context, final StatementSourceReference ref) {
119 * We have multiple fragments. Just search the tree. This code is equivalent to
121 * context.STRING().forEach(stringNode -> appendString(sb, stringNode, ref))
123 * except we minimize allocations which that would do.
125 final StringBuilder sb = new StringBuilder();
126 for (ParseTree child : context.children) {
127 verify(child instanceof TerminalNode, "Unexpected fragment component %s", child);
128 final TerminalNode childNode = (TerminalNode) child;
129 switch (childNode.getSymbol().getType()) {
130 case YangStatementParser.SEP:
133 case YangStatementParser.PLUS:
134 // Operator, which we are handling by concat
136 case YangStatementParser.STRING:
137 // a lexer string, could be pretty much anything
138 appendString(sb, childNode, ref);
141 throw new VerifyException("Unexpected symbol in " + childNode);
144 return sb.toString();
147 private void appendString(final StringBuilder sb, final TerminalNode stringNode,
148 final StatementSourceReference ref) {
150 final String str = stringNode.getText();
151 final char firstChar = str.charAt(0);
152 final char lastChar = str.charAt(str.length() - 1);
153 // NOTE: Enforcement and transformation logic here should certainly be pushed down to the lexer, as ANTLR can
154 // account the for it with lexer modes. One problem is that lexing here depends on version being lexed,
155 // hence we really would have to re-parse the YANG file after determining its version. We certainly do not
157 // FIXME: YANGTOOLS-1079: but since we are performing quoting checks, perhaps at least that part could be lexed?
158 if (firstChar == '"' && lastChar == '"') {
159 final String innerStr = str.substring(1, str.length() - 1);
161 * Unescape escaped double quotes, tabs, new line and backslash
162 * in the inner string and trim the result.
164 checkDoubleQuotedString(innerStr, ref);
165 sb.append(unescape(trimWhitespace(innerStr, stringNode.getSymbol().getCharPositionInLine())));
166 } else if (firstChar == '\'' && lastChar == '\'') {
168 * According to RFC6020 a single quote character cannot occur in
169 * a single-quoted string, even when preceded by a backslash.
171 sb.append(str, 1, str.length() - 1);
173 checkUnquotedString(str, ref);
178 abstract void checkDoubleQuotedString(String str, StatementSourceReference ref);
180 abstract void checkUnquotedString(String str, StatementSourceReference ref);
182 private static String unescape(final String str) {
183 final int backslash = str.indexOf('\\');
184 if (backslash == -1) {
188 // FIXME: YANGTOOLS-1079: given we the leading backslash, it would be more efficient to walk the string and
189 // unescape in one go
190 return ESCAPED_TAB.matcher(
192 ESCAPED_BACKSLASH.matcher(
193 ESCAPED_DQUOT.matcher(str).replaceAll("\\\""))
200 static String trimWhitespace(final String str, final int dquot) {
201 int brk = str.indexOf('\n');
203 // No need to trim whitespace
207 // Okay, we may need to do some trimming, set up a builder and append the first segment
208 final int length = str.length();
209 final StringBuilder sb = new StringBuilder(length);
211 // Append first segment, which needs only tail-trimming
212 sb.append(str, 0, trimTrailing(str, 0, brk)).append('\n');
214 // With that out of the way, setup our iteration state. The string segment we are looking at is
215 // str.substring(start, end), which is guaranteed not to include any line breaks, i.e. end <= brk unless we are
216 // at the last segment.
218 brk = str.indexOf('\n', start);
220 // Loop over inner strings
222 trimLeadingAndAppend(sb, dquot, str, start, trimTrailing(str, start, brk)).append('\n');
224 brk = str.indexOf('\n', start);
227 return trimLeadingAndAppend(sb, dquot, str, start, length).toString();
230 private static StringBuilder trimLeadingAndAppend(final StringBuilder sb, final int dquot, final String str,
231 final int start, final int end) {
235 while (pos <= dquot) {
237 // We ran out of data, nothing to append
241 final char ch = str.charAt(offset);
243 // tabs are to be treated as 8 spaces
245 } else if (WHITESPACE_MATCHER.matches(ch)) {
254 // We have expanded beyond double quotes, push equivalent spaces
255 while (pos - 1 > dquot) {
260 return sb.append(str, offset, end);
263 private static int trimTrailing(final String str, final int start, final int end) {
265 while (ret > start) {
266 final int prev = ret - 1;
267 if (!WHITESPACE_MATCHER.matches(str.charAt(prev))) {