2 * Copyright (c) 2015 Cisco Systems, Inc. and others. All rights reserved.
4 * This program and the accompanying materials are made available under the
5 * terms of the Eclipse Public License v1.0 which accompanies this distribution,
6 * and is available at http://www.eclipse.org/legal/epl-v10.html
8 package org.opendaylight.yangtools.yang.parser.rfc7950.repo;
10 import static com.google.common.base.Verify.verify;
12 import com.google.common.annotations.VisibleForTesting;
13 import com.google.common.base.CharMatcher;
14 import com.google.common.base.VerifyException;
15 import org.antlr.v4.runtime.Token;
16 import org.antlr.v4.runtime.tree.ParseTree;
17 import org.antlr.v4.runtime.tree.TerminalNode;
18 import org.eclipse.jdt.annotation.NonNull;
19 import org.opendaylight.yangtools.yang.common.YangVersion;
20 import org.opendaylight.yangtools.yang.parser.antlr.YangStatementParser;
21 import org.opendaylight.yangtools.yang.parser.antlr.YangStatementParser.ArgumentContext;
22 import org.opendaylight.yangtools.yang.parser.antlr.YangStatementParser.QuotedStringContext;
23 import org.opendaylight.yangtools.yang.parser.antlr.YangStatementParser.UnquotedStringContext;
24 import org.opendaylight.yangtools.yang.parser.spi.source.SourceException;
25 import org.opendaylight.yangtools.yang.parser.spi.source.StatementSourceReference;
28 * Utilities for dealing with YANG statement argument strings, encapsulated in ANTLR grammar's ArgumentContext.
30 abstract class ArgumentContextUtils {
32 * YANG 1.0 version of strings, which were not completely clarified in
33 * <a href="https://tools.ietf.org/html/rfc6020#section-6.1.3">RFC6020</a>.
35 private static final class RFC6020 extends ArgumentContextUtils {
36 private static final @NonNull RFC6020 INSTANCE = new RFC6020();
39 void checkDoubleQuoted(final String str, final StatementSourceReference ref, final int backslash) {
44 void checkUnquoted(final String str, final StatementSourceReference ref) {
50 * YANG 1.1 version of strings, which were clarified in
51 * <a href="https://tools.ietf.org/html/rfc7950#section-6.1.3">RFC7950</a>.
53 // NOTE: the differences clarified lead to a proper ability to delegate this to ANTLR lexer, but that does not
54 // understand versions and needs to work with both.
55 private static final class RFC7950 extends ArgumentContextUtils {
56 private static final CharMatcher ANYQUOTE_MATCHER = CharMatcher.anyOf("'\"");
57 private static final @NonNull RFC7950 INSTANCE = new RFC7950();
60 void checkDoubleQuoted(final String str, final StatementSourceReference ref, final int backslash) {
61 if (backslash < str.length() - 1) {
62 int index = backslash;
64 switch (str.charAt(index + 1)) {
69 index = str.indexOf('\\', index + 2);
72 throw new SourceException(ref, "YANG 1.1: illegal double quoted string (%s). In double "
73 + "quoted string the backslash must be followed by one of the following character "
74 + "[n,t,\",\\], but was '%s'.", str, str.charAt(index + 1));
81 void checkUnquoted(final String str, final StatementSourceReference ref) {
82 SourceException.throwIf(ANYQUOTE_MATCHER.matchesAnyOf(str), ref,
83 "YANG 1.1: unquoted string (%s) contains illegal characters", str);
87 private static final CharMatcher WHITESPACE_MATCHER = CharMatcher.whitespace();
89 private ArgumentContextUtils() {
93 static @NonNull ArgumentContextUtils forVersion(final YangVersion version) {
96 return RFC6020.INSTANCE;
98 return RFC7950.INSTANCE;
100 throw new IllegalStateException("Unhandled version " + version);
104 // TODO: teach the only caller about versions, or provide common-enough idioms for its use case
105 static @NonNull ArgumentContextUtils rfc6020() {
106 return RFC6020.INSTANCE;
110 * NOTE: this method we do not use convenience methods provided by generated parser code, but instead are making
111 * based on the grammar assumptions. While this is more verbose, it cuts out a number of unnecessary code,
112 * such as intermediate List allocation et al.
114 final @NonNull String stringFromStringContext(final ArgumentContext context, final StatementSourceReference ref) {
115 // Get first child, which we fully expect to exist and be a lexer token
116 final ParseTree firstChild = context.getChild(0);
117 if (firstChild instanceof TerminalNode) {
118 // Simplest of cases -- it is a simple IDENTIFIER, hence we do not need to validate anything else and can
119 // just grab the string and run with it.
120 return firstChild.getText();
123 if (firstChild instanceof UnquotedStringContext) {
124 // Simple case, just grab the text, as ANTLR has done all the heavy lifting
125 final String str = firstChild.getText();
126 checkUnquoted(str, ref);
130 verify(firstChild instanceof QuotedStringContext, "Unexpected shape of %s", context);
131 if (context.getChildCount() == 1) {
132 // No concatenation needed, special-case
133 return unquoteString((QuotedStringContext) firstChild, ref);
136 // Potentially-complex case of string quoting, escaping and concatenation.
137 return concatStrings(context, ref);
140 private String unquoteString(final QuotedStringContext context, final StatementSourceReference ref) {
141 final ParseTree secondChild = context.getChild(1);
142 verify(secondChild instanceof TerminalNode, "Unexpected shape of %s", context);
143 final Token secondToken = ((TerminalNode) secondChild).getSymbol();
144 final int type = secondToken.getType();
146 case YangStatementParser.DQUOT_END:
147 case YangStatementParser.SQUOT_END:
148 // We are missing actual body, hence this is an empty string
150 case YangStatementParser.SQUOT_STRING:
151 return secondChild.getText();
152 case YangStatementParser.DQUOT_STRING:
153 // We should be looking at the first token, which is DQUOT_START, but since it is a single-character
154 // token, let's not bother.
155 return normalizeDoubleQuoted(secondChild.getText(), secondToken.getCharPositionInLine() - 1, ref);
157 throw new VerifyException("Unhandled token type " + type);
161 private String concatStrings(final ArgumentContext context, final StatementSourceReference ref) {
163 * We have multiple fragments. Just search the tree. This code is equivalent to
165 * context.quotedString().forEach(stringNode -> sb.append(unquoteString(stringNode, ref))
167 * except we minimize allocations which that would do.
169 final StringBuilder sb = new StringBuilder();
170 for (ParseTree child : context.children) {
171 if (child instanceof TerminalNode) {
172 final TerminalNode childNode = (TerminalNode) child;
173 switch (childNode.getSymbol().getType()) {
174 case YangStatementParser.SEP:
175 case YangStatementParser.PLUS:
176 // Operator, which we are handling by concat
179 throw new VerifyException("Unexpected symbol in " + childNode);
182 verify(child instanceof QuotedStringContext, "Unexpected fragment component %s", child);
183 sb.append(unquoteString((QuotedStringContext) child, ref));
186 return sb.toString();
189 private String normalizeDoubleQuoted(final String str, final int dquot, final StatementSourceReference ref) {
190 // Whitespace normalization happens irrespective of further handling and has no effect on the result
191 final String stripped = trimWhitespace(str, dquot);
193 // Now we need to perform some amount of unescaping. This serves as a pre-check before we dispatch
194 // validation and processing (which will reuse the work we have done)
195 final int backslash = stripped.indexOf('\\');
196 return backslash == -1 ? stripped : unescape(ref, stripped, backslash);
200 * NOTE: Enforcement and transformation logic done by these methods should logically reside in the lexer and ANTLR
201 * account the for it with lexer modes. We do not want to force a re-lexing phase in the parser just because
202 * we decided to let ANTLR do the work.
204 abstract void checkDoubleQuoted(String str, StatementSourceReference ref, int backslash);
206 abstract void checkUnquoted(String str, StatementSourceReference ref);
209 * Unescape escaped double quotes, tabs, new line and backslash in the inner string and trim the result.
211 private String unescape(final StatementSourceReference ref, final String str, final int backslash) {
212 checkDoubleQuoted(str, ref, backslash);
213 StringBuilder sb = new StringBuilder(str.length());
214 unescapeBackslash(sb, str, backslash);
215 return sb.toString();
219 static void unescapeBackslash(final StringBuilder sb, final String str, final int backslash) {
220 String substring = str;
221 int backslashIndex = backslash;
223 int nextIndex = backslashIndex + 1;
224 if (backslashIndex != -1 && nextIndex < substring.length()) {
225 replaceBackslash(sb, substring, nextIndex);
226 substring = substring.substring(nextIndex + 1);
227 if (substring.length() > 0) {
228 backslashIndex = substring.indexOf('\\');
233 sb.append(substring);
239 private static void replaceBackslash(final StringBuilder sb, final String str, final int nextAfterBackslash) {
240 int backslash = nextAfterBackslash - 1;
241 sb.append(str, 0, backslash);
242 final char c = str.charAt(nextAfterBackslash);
255 sb.append(str, backslash, nextAfterBackslash + 1);
260 static String trimWhitespace(final String str, final int dquot) {
261 final int firstBrk = str.indexOf('\n');
262 if (firstBrk == -1) {
266 // Okay, we may need to do some trimming, set up a builder and append the first segment
267 final int length = str.length();
268 final StringBuilder sb = new StringBuilder(length);
270 // Append first segment, which needs only tail-trimming
271 sb.append(str, 0, trimTrailing(str, 0, firstBrk)).append('\n');
273 // With that out of the way, setup our iteration state. The string segment we are looking at is
274 // str.substring(start, end), which is guaranteed not to include any line breaks, i.e. end <= brk unless we are
275 // at the last segment.
276 int start = firstBrk + 1;
277 int brk = str.indexOf('\n', start);
279 // Loop over inner strings
281 trimLeadingAndAppend(sb, dquot, str, start, trimTrailing(str, start, brk)).append('\n');
283 brk = str.indexOf('\n', start);
286 return trimLeadingAndAppend(sb, dquot, str, start, length).toString();
289 private static StringBuilder trimLeadingAndAppend(final StringBuilder sb, final int dquot, final String str,
290 final int start, final int end) {
294 while (pos <= dquot) {
296 // We ran out of data, nothing to append
300 final char ch = str.charAt(offset);
302 // tabs are to be treated as 8 spaces
304 } else if (WHITESPACE_MATCHER.matches(ch)) {
313 // We have expanded beyond double quotes, push equivalent spaces
314 while (pos - 1 > dquot) {
319 return sb.append(str, offset, end);
322 private static int trimTrailing(final String str, final int start, final int end) {
324 while (ret > start) {
325 final int prev = ret - 1;
326 if (!WHITESPACE_MATCHER.matches(str.charAt(prev))) {