2 * Copyright (c) 2015 Cisco Systems, Inc. and others. All rights reserved.
4 * This program and the accompanying materials are made available under the
5 * terms of the Eclipse Public License v1.0 which accompanies this distribution,
6 * and is available at http://www.eclipse.org/legal/epl-v10.html
8 package org.opendaylight.yangtools.yang.parser.rfc7950.repo;
10 import static com.google.common.base.Verify.verify;
12 import com.google.common.annotations.VisibleForTesting;
13 import com.google.common.base.CharMatcher;
14 import com.google.common.base.VerifyException;
15 import org.antlr.v4.runtime.tree.ParseTree;
16 import org.antlr.v4.runtime.tree.TerminalNode;
17 import org.eclipse.jdt.annotation.NonNull;
18 import org.opendaylight.yangtools.yang.common.YangVersion;
19 import org.opendaylight.yangtools.yang.parser.antlr.YangStatementParser;
20 import org.opendaylight.yangtools.yang.parser.antlr.YangStatementParser.ArgumentContext;
21 import org.opendaylight.yangtools.yang.parser.spi.source.SourceException;
22 import org.opendaylight.yangtools.yang.parser.spi.source.StatementSourceReference;
25 * Utilities for dealing with YANG statement argument strings, encapsulated in ANTLR grammar's ArgumentContext.
27 abstract class ArgumentContextUtils {
29 * YANG 1.0 version of strings, which were not completely clarified in
30 * <a href="https://tools.ietf.org/html/rfc6020#section-6.1.3">RFC6020</a>.
32 private static final class RFC6020 extends ArgumentContextUtils {
33 private static final @NonNull RFC6020 INSTANCE = new RFC6020();
36 void checkDoubleQuoted(final String str, final StatementSourceReference ref, final int backslash) {
41 void checkUnquoted(final String str, final StatementSourceReference ref) {
47 * YANG 1.1 version of strings, which were clarified in
48 * <a href="https://tools.ietf.org/html/rfc7950#section-6.1.3">RFC7950</a>.
50 // NOTE: the differences clarified lead to a proper ability to delegate this to ANTLR lexer, but that does not
51 // understand versions and needs to work with both.
52 private static final class RFC7950 extends ArgumentContextUtils {
53 private static final CharMatcher ANYQUOTE_MATCHER = CharMatcher.anyOf("'\"");
54 private static final @NonNull RFC7950 INSTANCE = new RFC7950();
57 void checkDoubleQuoted(final String str, final StatementSourceReference ref, final int backslash) {
58 if (backslash < str.length() - 1) {
59 int index = backslash;
61 switch (str.charAt(index + 1)) {
66 index = str.indexOf('\\', index + 2);
69 throw new SourceException(ref, "YANG 1.1: illegal double quoted string (%s). In double "
70 + "quoted string the backslash must be followed by one of the following character "
71 + "[n,t,\",\\], but was '%s'.", str, str.charAt(index + 1));
78 void checkUnquoted(final String str, final StatementSourceReference ref) {
79 SourceException.throwIf(ANYQUOTE_MATCHER.matchesAnyOf(str), ref,
80 "YANG 1.1: unquoted string (%s) contains illegal characters", str);
84 private static final CharMatcher WHITESPACE_MATCHER = CharMatcher.whitespace();
86 private ArgumentContextUtils() {
90 static @NonNull ArgumentContextUtils forVersion(final YangVersion version) {
93 return RFC6020.INSTANCE;
95 return RFC7950.INSTANCE;
97 throw new IllegalStateException("Unhandled version " + version);
101 // TODO: teach the only caller about versions, or provide common-enough idioms for its use case
102 static @NonNull ArgumentContextUtils rfc6020() {
103 return RFC6020.INSTANCE;
107 * NOTE: this method we do not use convenience methods provided by generated parser code, but instead are making
108 * based on the grammar assumptions. While this is more verbose, it cuts out a number of unnecessary code,
109 * such as intermediate List allocation et al.
111 final @NonNull String stringFromStringContext(final ArgumentContext context, final StatementSourceReference ref) {
112 // Get first child, which we fully expect to exist and be a lexer token
113 final ParseTree firstChild = context.getChild(0);
114 verify(firstChild instanceof TerminalNode, "Unexpected shape of %s", context);
115 final TerminalNode firstNode = (TerminalNode) firstChild;
116 final int firstType = firstNode.getSymbol().getType();
118 case YangStatementParser.IDENTIFIER:
119 // Simple case, there is a simple string, which cannot contain anything that we would need to process.
120 return firstNode.getText();
121 case YangStatementParser.PLUS:
123 case YangStatementParser.STRING:
124 // Complex case, defer to a separate method
125 return concatStrings(context, ref);
127 throw new VerifyException("Unexpected first symbol in " + context);
131 private String concatStrings(final ArgumentContext context, final StatementSourceReference ref) {
133 * We have multiple fragments. Just search the tree. This code is equivalent to
135 * context.STRING().forEach(stringNode -> appendString(sb, stringNode, ref))
137 * except we minimize allocations which that would do.
139 final StringBuilder sb = new StringBuilder();
140 for (ParseTree child : context.children) {
141 verify(child instanceof TerminalNode, "Unexpected fragment component %s", child);
142 final TerminalNode childNode = (TerminalNode) child;
143 switch (childNode.getSymbol().getType()) {
144 case YangStatementParser.SEP:
147 case YangStatementParser.PLUS:
148 // Operator, which we are handling by concat
150 case YangStatementParser.STRING:
151 // a lexer string, could be pretty much anything
152 // TODO: appendString() is a dispatch based on quotes, which we should be able to defer to lexer for
153 // a dedicated type. That would expand the switch table here, but since we have it anyway, it
154 // would be nice to have the quoting distinction already taken care of. The performance
155 // difference will need to be benchmarked, though.
156 appendString(sb, childNode, ref);
159 throw new VerifyException("Unexpected symbol in " + childNode);
162 return sb.toString();
165 private void appendString(final StringBuilder sb, final TerminalNode stringNode,
166 final StatementSourceReference ref) {
167 final String str = stringNode.getText();
168 final char firstChar = str.charAt(0);
169 final char lastChar = str.charAt(str.length() - 1);
170 if (firstChar == '"' && lastChar == '"') {
171 sb.append(normalizeDoubleQuoted(str.substring(1, str.length() - 1),
172 stringNode.getSymbol().getCharPositionInLine(), ref));
173 } else if (firstChar == '\'' && lastChar == '\'') {
175 * According to RFC6020 a single quote character cannot occur in a single-quoted string, even when preceded
178 sb.append(str, 1, str.length() - 1);
180 checkUnquoted(str, ref);
185 private String normalizeDoubleQuoted(final String str, final int dquot, final StatementSourceReference ref) {
186 // Whitespace normalization happens irrespective of further handling and has no effect on the result
187 final String stripped = trimWhitespace(str, dquot);
189 // Now we need to perform some amount of unescaping. This serves as a pre-check before we dispatch
190 // validation and processing (which will reuse the work we have done)
191 final int backslash = stripped.indexOf('\\');
192 return backslash == -1 ? stripped : unescape(ref, stripped, backslash);
196 * NOTE: Enforcement and transformation logic done by these methods should logically reside in the lexer and ANTLR
197 * account the for it with lexer modes. We do not want to force a re-lexing phase in the parser just because
198 * we decided to let ANTLR do the work.
200 abstract void checkDoubleQuoted(String str, StatementSourceReference ref, int backslash);
202 abstract void checkUnquoted(String str, StatementSourceReference ref);
205 * Unescape escaped double quotes, tabs, new line and backslash in the inner string and trim the result.
207 private String unescape(final StatementSourceReference ref, final String str, final int backslash) {
208 checkDoubleQuoted(str, ref, backslash);
209 StringBuilder sb = new StringBuilder(str.length());
210 unescapeBackslash(sb, str, backslash);
211 return sb.toString();
215 static void unescapeBackslash(final StringBuilder sb, final String str, final int backslash) {
216 String substring = str;
217 int backslashIndex = backslash;
219 int nextIndex = backslashIndex + 1;
220 if (backslashIndex != -1 && nextIndex < substring.length()) {
221 replaceBackslash(sb, substring, nextIndex);
222 substring = substring.substring(nextIndex + 1);
223 if (substring.length() > 0) {
224 backslashIndex = substring.indexOf('\\');
229 sb.append(substring);
235 private static void replaceBackslash(final StringBuilder sb, final String str, final int nextAfterBackslash) {
236 int backslash = nextAfterBackslash - 1;
237 sb.append(str, 0, backslash);
238 final char c = str.charAt(nextAfterBackslash);
251 sb.append(str, backslash, nextAfterBackslash + 1);
256 static String trimWhitespace(final String str, final int dquot) {
257 final int firstBrk = str.indexOf('\n');
258 if (firstBrk == -1) {
262 // Okay, we may need to do some trimming, set up a builder and append the first segment
263 final int length = str.length();
264 final StringBuilder sb = new StringBuilder(length);
266 // Append first segment, which needs only tail-trimming
267 sb.append(str, 0, trimTrailing(str, 0, firstBrk)).append('\n');
269 // With that out of the way, setup our iteration state. The string segment we are looking at is
270 // str.substring(start, end), which is guaranteed not to include any line breaks, i.e. end <= brk unless we are
271 // at the last segment.
272 int start = firstBrk + 1;
273 int brk = str.indexOf('\n', start);
275 // Loop over inner strings
277 trimLeadingAndAppend(sb, dquot, str, start, trimTrailing(str, start, brk)).append('\n');
279 brk = str.indexOf('\n', start);
282 return trimLeadingAndAppend(sb, dquot, str, start, length).toString();
285 private static StringBuilder trimLeadingAndAppend(final StringBuilder sb, final int dquot, final String str,
286 final int start, final int end) {
290 while (pos <= dquot) {
292 // We ran out of data, nothing to append
296 final char ch = str.charAt(offset);
298 // tabs are to be treated as 8 spaces
300 } else if (WHITESPACE_MATCHER.matches(ch)) {
309 // We have expanded beyond double quotes, push equivalent spaces
310 while (pos - 1 > dquot) {
315 return sb.append(str, offset, end);
318 private static int trimTrailing(final String str, final int start, final int end) {
320 while (ret > start) {
321 final int prev = ret - 1;
322 if (!WHITESPACE_MATCHER.matches(str.charAt(prev))) {