2 * Copyright (c) 2015 Cisco Systems, Inc. and others. All rights reserved.
4 * This program and the accompanying materials are made available under the
5 * terms of the Eclipse Public License v1.0 which accompanies this distribution,
6 * and is available at http://www.eclipse.org/legal/epl-v10.html
8 package org.opendaylight.yangtools.yang.parser.rfc7950.repo;
10 import static com.google.common.base.Verify.verify;
12 import com.google.common.annotations.VisibleForTesting;
13 import com.google.common.base.CharMatcher;
14 import com.google.common.base.VerifyException;
15 import java.util.List;
16 import org.antlr.v4.runtime.Token;
17 import org.antlr.v4.runtime.tree.ParseTree;
18 import org.antlr.v4.runtime.tree.TerminalNode;
19 import org.eclipse.jdt.annotation.NonNull;
20 import org.opendaylight.yangtools.yang.common.YangVersion;
21 import org.opendaylight.yangtools.yang.parser.antlr.YangStatementParser;
22 import org.opendaylight.yangtools.yang.parser.antlr.YangStatementParser.ArgumentContext;
23 import org.opendaylight.yangtools.yang.parser.antlr.YangStatementParser.UnquotedStringContext;
24 import org.opendaylight.yangtools.yang.parser.rfc7950.ir.AntlrSupport;
25 import org.opendaylight.yangtools.yang.parser.rfc7950.ir.IRArgument;
26 import org.opendaylight.yangtools.yang.parser.rfc7950.ir.IRArgument.Concatenation;
27 import org.opendaylight.yangtools.yang.parser.rfc7950.ir.IRArgument.Single;
28 import org.opendaylight.yangtools.yang.parser.spi.source.SourceException;
29 import org.opendaylight.yangtools.yang.parser.spi.source.StatementSourceReference;
32 * Utilities for dealing with YANG statement argument strings, encapsulated in ANTLR grammar's ArgumentContext.
34 abstract class ArgumentContextUtils {
36 * YANG 1.0 version of strings, which were not completely clarified in
37 * <a href="https://tools.ietf.org/html/rfc6020#section-6.1.3">RFC6020</a>.
39 private static final class RFC6020 extends ArgumentContextUtils {
40 private static final @NonNull RFC6020 INSTANCE = new RFC6020();
43 void checkDoubleQuoted(final String str, final StatementSourceReference ref, final int backslash) {
48 void checkUnquoted(final String str, final StatementSourceReference ref) {
54 * YANG 1.1 version of strings, which were clarified in
55 * <a href="https://tools.ietf.org/html/rfc7950#section-6.1.3">RFC7950</a>.
57 // NOTE: the differences clarified lead to a proper ability to delegate this to ANTLR lexer, but that does not
58 // understand versions and needs to work with both.
59 private static final class RFC7950 extends ArgumentContextUtils {
60 private static final CharMatcher ANYQUOTE_MATCHER = CharMatcher.anyOf("'\"");
61 private static final @NonNull RFC7950 INSTANCE = new RFC7950();
64 void checkDoubleQuoted(final String str, final StatementSourceReference ref, final int backslash) {
65 if (backslash < str.length() - 1) {
66 int index = backslash;
68 switch (str.charAt(index + 1)) {
73 index = str.indexOf('\\', index + 2);
76 throw new SourceException(ref, "YANG 1.1: illegal double quoted string (%s). In double "
77 + "quoted string the backslash must be followed by one of the following character "
78 + "[n,t,\",\\], but was '%s'.", str, str.charAt(index + 1));
85 void checkUnquoted(final String str, final StatementSourceReference ref) {
86 SourceException.throwIf(ANYQUOTE_MATCHER.matchesAnyOf(str), ref,
87 "YANG 1.1: unquoted string (%s) contains illegal characters", str);
91 private ArgumentContextUtils() {
95 static @NonNull ArgumentContextUtils forVersion(final YangVersion version) {
98 return RFC6020.INSTANCE;
100 return RFC7950.INSTANCE;
102 throw new IllegalStateException("Unhandled version " + version);
106 // TODO: teach the only caller about versions, or provide common-enough idioms for its use case
107 static @NonNull ArgumentContextUtils rfc6020() {
108 return RFC6020.INSTANCE;
112 * NOTE: this method we do not use convenience methods provided by generated parser code, but instead are making
113 * based on the grammar assumptions. While this is more verbose, it cuts out a number of unnecessary code,
114 * such as intermediate List allocation et al.
116 final @NonNull String stringFromStringContext(final IRArgument argument, final StatementSourceReference ref) {
117 if (argument instanceof Single) {
118 final Single single = (Single) argument;
119 final String str = single.string();
120 if (single.needQuoteCheck()) {
121 checkUnquoted(str, ref);
123 return single.needUnescape() ? unescape(str, ref) : str;
126 verify(argument instanceof Concatenation, "Unexpected argument %s", argument);
127 return concatStrings(((Concatenation) argument).parts(), ref);
131 * NOTE: this method we do not use convenience methods provided by generated parser code, but instead are making
132 * based on the grammar assumptions. While this is more verbose, it cuts out a number of unnecessary code,
133 * such as intermediate List allocation et al.
135 @Deprecated(forRemoval = true)
136 final @NonNull String stringFromStringContext(final ArgumentContext context, final StatementSourceReference ref) {
137 // Get first child, which we fully expect to exist and be a lexer token
138 final ParseTree firstChild = context.getChild(0);
139 if (firstChild instanceof TerminalNode) {
140 final Token token = ((TerminalNode) firstChild).getSymbol();
141 switch (token.getType()) {
142 case YangStatementParser.IDENTIFIER:
143 // Simplest of cases -- it is an IDENTIFIER, hence we do not need to validate anything else and can
144 // just grab the string and run with it.
145 return firstChild.getText();
146 case YangStatementParser.DQUOT_STRING:
147 case YangStatementParser.DQUOT_END:
148 case YangStatementParser.SQUOT_STRING:
149 case YangStatementParser.SQUOT_END:
150 // Quoted strings are potentially a pain, deal with them separately
151 return decodeQuoted(context, ref);
153 throw new VerifyException("Unexpected token " + token);
157 verify(firstChild instanceof UnquotedStringContext, "Unexpected shape of %s", context);
158 // Simple case, just grab the text, as ANTLR has done all the heavy lifting
159 final String str = firstChild.getText();
160 checkUnquoted(str, ref);
165 private @NonNull String decodeQuoted(final ArgumentContext context, final StatementSourceReference ref) {
166 if (context.getChildCount() > 2) {
167 // Potentially-complex case of string quoting, escaping and concatenation.
168 return concatStrings(context, ref);
171 // No concatenation needed, special-case
172 final ParseTree child = context.getChild(0);
173 verify(child instanceof TerminalNode, "Unexpected shape of %s", context);
174 final Token token = ((TerminalNode) child).getSymbol();
175 switch (token.getType()) {
176 case YangStatementParser.DQUOT_END:
177 case YangStatementParser.SQUOT_END:
178 // We are missing actual body, hence this is an empty string
180 case YangStatementParser.SQUOT_STRING:
181 return token.getText();
182 case YangStatementParser.DQUOT_STRING:
183 return normalizeDoubleQuoted(token, ref);
185 throw new VerifyException("Unhandled token " + token);
189 private @NonNull String concatStrings(final List<? extends Single> parts, final StatementSourceReference ref) {
190 final StringBuilder sb = new StringBuilder();
191 for (Single part : parts) {
192 final String str = part.string();
193 sb.append(part.needUnescape() ? unescape(str, ref) : str);
195 return sb.toString();
199 private String concatStrings(final ArgumentContext context, final StatementSourceReference ref) {
200 final StringBuilder sb = new StringBuilder();
201 for (ParseTree child : context.children) {
202 verify(child instanceof TerminalNode, "Unexpected argument component %s", child);
203 final Token token = ((TerminalNode) child).getSymbol();
204 switch (token.getType()) {
205 case YangStatementParser.SEP:
206 // Separator, just skip it over
207 case YangStatementParser.PLUS:
208 // Operator, which we are handling by concat, skip it over
209 case YangStatementParser.DQUOT_END:
210 case YangStatementParser.SQUOT_END:
211 // Quote stops, skip them over because we either already added the content, or would be appending
214 case YangStatementParser.SQUOT_STRING:
215 // Single-quoted string, append it as a literal
216 sb.append(token.getText());
218 case YangStatementParser.DQUOT_STRING:
219 sb.append(normalizeDoubleQuoted(token, ref));
222 throw new VerifyException("Unexpected token " + token);
225 return sb.toString();
229 private String normalizeDoubleQuoted(final Token token, final StatementSourceReference ref) {
230 // Whitespace normalization happens irrespective of further handling and has no effect on the result. Strictly
231 // speaking we should also have the previous token, which would be a DQUOT_START and get the position from it.
232 // Seeing as it is a single-character token let's just subtract one from this token to achieve the same result.
233 final String stripped = AntlrSupport.trimWhitespace(token.getText(), token.getCharPositionInLine() - 1);
235 // Now we need to perform some amount of unescaping. This serves as a pre-check before we dispatch
236 // validation and processing (which will reuse the work we have done)
237 final int backslash = stripped.indexOf('\\');
238 return backslash == -1 ? stripped : unescape(ref, stripped, backslash);
242 * NOTE: Enforcement and transformation logic done by these methods should logically reside in the lexer and ANTLR
243 * account the for it with lexer modes. We do not want to force a re-lexing phase in the parser just because
244 * we decided to let ANTLR do the work.
246 abstract void checkDoubleQuoted(String str, StatementSourceReference ref, int backslash);
248 abstract void checkUnquoted(String str, StatementSourceReference ref);
250 private @NonNull String unescape(final String str, final StatementSourceReference ref) {
251 // Now we need to perform some amount of unescaping. This serves as a pre-check before we dispatch
252 // validation and processing (which will reuse the work we have done)
253 final int backslash = str.indexOf('\\');
254 return backslash == -1 ? str : unescape(ref, str, backslash);
258 * Unescape escaped double quotes, tabs, new line and backslash in the inner string and trim the result.
260 private @NonNull String unescape(final StatementSourceReference ref, final String str, final int backslash) {
261 checkDoubleQuoted(str, ref, backslash);
262 StringBuilder sb = new StringBuilder(str.length());
263 unescapeBackslash(sb, str, backslash);
264 return sb.toString();
268 static void unescapeBackslash(final StringBuilder sb, final String str, final int backslash) {
269 String substring = str;
270 int backslashIndex = backslash;
272 int nextIndex = backslashIndex + 1;
273 if (backslashIndex != -1 && nextIndex < substring.length()) {
274 replaceBackslash(sb, substring, nextIndex);
275 substring = substring.substring(nextIndex + 1);
276 if (substring.length() > 0) {
277 backslashIndex = substring.indexOf('\\');
282 sb.append(substring);
288 private static void replaceBackslash(final StringBuilder sb, final String str, final int nextAfterBackslash) {
289 int backslash = nextAfterBackslash - 1;
290 sb.append(str, 0, backslash);
291 final char c = str.charAt(nextAfterBackslash);
304 sb.append(str, backslash, nextAfterBackslash + 1);