yang/yang-parser-rfc7950/src/main/java/org/opendaylight/yangtools/yang/parser/rfc7950/repo/ArgumentContextUtils.java

   1 /*
   2  * Copyright (c) 2015 Cisco Systems, Inc. and others.  All rights reserved.
   3  *
   4  * This program and the accompanying materials are made available under the
   5  * terms of the Eclipse Public License v1.0 which accompanies this distribution,
   6  * and is available at http://www.eclipse.org/legal/epl-v10.html
   7  */
   8 package org.opendaylight.yangtools.yang.parser.rfc7950.repo;
   9
  10 import static com.google.common.base.Verify.verify;
  11
  12 import com.google.common.annotations.VisibleForTesting;
  13 import com.google.common.base.CharMatcher;
  14 import com.google.common.base.VerifyException;
  15 import java.util.List;
  16 import org.antlr.v4.runtime.Token;
  17 import org.antlr.v4.runtime.tree.ParseTree;
  18 import org.antlr.v4.runtime.tree.TerminalNode;
  19 import org.eclipse.jdt.annotation.NonNull;
  20 import org.opendaylight.yangtools.yang.common.YangVersion;
  21 import org.opendaylight.yangtools.yang.parser.antlr.YangStatementParser;
  22 import org.opendaylight.yangtools.yang.parser.antlr.YangStatementParser.ArgumentContext;
  23 import org.opendaylight.yangtools.yang.parser.antlr.YangStatementParser.UnquotedStringContext;
  24 import org.opendaylight.yangtools.yang.parser.rfc7950.ir.AntlrSupport;
  25 import org.opendaylight.yangtools.yang.parser.rfc7950.ir.IRArgument;
  26 import org.opendaylight.yangtools.yang.parser.rfc7950.ir.IRArgument.Concatenation;
  27 import org.opendaylight.yangtools.yang.parser.rfc7950.ir.IRArgument.Single;
  28 import org.opendaylight.yangtools.yang.parser.spi.source.SourceException;
  29 import org.opendaylight.yangtools.yang.parser.spi.source.StatementSourceReference;
  30
  31 /**
  32  * Utilities for dealing with YANG statement argument strings, encapsulated in ANTLR grammar's ArgumentContext.
  33  */
  34 abstract class ArgumentContextUtils {
  35     /**
  36      * YANG 1.0 version of strings, which were not completely clarified in
  37      * <a href="https://tools.ietf.org/html/rfc6020#section-6.1.3">RFC6020</a>.
  38      */
  39     private static final class RFC6020 extends ArgumentContextUtils {
  40         private static final @NonNull RFC6020 INSTANCE = new RFC6020();
  41
  42         @Override
  43         void checkDoubleQuoted(final String str, final StatementSourceReference ref, final int backslash) {
  44             // No-op
  45         }
  46
  47         @Override
  48         void checkUnquoted(final String str, final StatementSourceReference ref) {
  49             // No-op
  50         }
  51     }
  52
  53     /**
  54      * YANG 1.1 version of strings, which were clarified in
  55      * <a href="https://tools.ietf.org/html/rfc7950#section-6.1.3">RFC7950</a>.
  56      */
  57     // NOTE: the differences clarified lead to a proper ability to delegate this to ANTLR lexer, but that does not
  58     //       understand versions and needs to work with both.
  59     private static final class RFC7950 extends ArgumentContextUtils {
  60         private static final CharMatcher ANYQUOTE_MATCHER = CharMatcher.anyOf("'\"");
  61         private static final @NonNull RFC7950 INSTANCE = new RFC7950();
  62
  63         @Override
  64         void checkDoubleQuoted(final String str, final StatementSourceReference ref, final int backslash) {
  65             if (backslash < str.length() - 1) {
  66                 int index = backslash;
  67                 while (index != -1) {
  68                     switch (str.charAt(index + 1)) {
  69                         case 'n':
  70                         case 't':
  71                         case '\\':
  72                         case '\"':
  73                             index = str.indexOf('\\', index + 2);
  74                             break;
  75                         default:
  76                             throw new SourceException(ref, "YANG 1.1: illegal double quoted string (%s). In double "
  77                                 + "quoted string the backslash must be followed by one of the following character "
  78                                 + "[n,t,\",\\], but was '%s'.", str, str.charAt(index + 1));
  79                     }
  80                 }
  81             }
  82         }
  83
  84         @Override
  85         void checkUnquoted(final String str, final StatementSourceReference ref) {
  86             SourceException.throwIf(ANYQUOTE_MATCHER.matchesAnyOf(str), ref,
  87                 "YANG 1.1: unquoted string (%s) contains illegal characters", str);
  88         }
  89     }
  90
  91     private ArgumentContextUtils() {
  92         // Hidden on purpose
  93     }
  94
  95     static @NonNull ArgumentContextUtils forVersion(final YangVersion version) {
  96         switch (version) {
  97             case VERSION_1:
  98                 return RFC6020.INSTANCE;
  99             case VERSION_1_1:
 100                 return RFC7950.INSTANCE;
 101             default:
 102                 throw new IllegalStateException("Unhandled version " + version);
 103         }
 104     }
 105
 106     // TODO: teach the only caller about versions, or provide common-enough idioms for its use case
 107     static @NonNull ArgumentContextUtils rfc6020() {
 108         return RFC6020.INSTANCE;
 109     }
 110
 111     /*
 112      * NOTE: this method we do not use convenience methods provided by generated parser code, but instead are making
 113      *       based on the grammar assumptions. While this is more verbose, it cuts out a number of unnecessary code,
 114      *       such as intermediate List allocation et al.
 115      */
 116     final @NonNull String stringFromStringContext(final IRArgument argument, final StatementSourceReference ref) {
 117         if (argument instanceof Single) {
 118             final Single single = (Single) argument;
 119             final String str = single.string();
 120             if (single.needQuoteCheck()) {
 121                 checkUnquoted(str, ref);
 122             }
 123             return single.needUnescape() ? unescape(str, ref) : str;
 124         }
 125
 126         verify(argument instanceof Concatenation, "Unexpected argument %s", argument);
 127         return concatStrings(((Concatenation) argument).parts(), ref);
 128     }
 129
 130     /*
 131      * NOTE: this method we do not use convenience methods provided by generated parser code, but instead are making
 132      *       based on the grammar assumptions. While this is more verbose, it cuts out a number of unnecessary code,
 133      *       such as intermediate List allocation et al.
 134      */
 135     @Deprecated(forRemoval = true)
 136     final @NonNull String stringFromStringContext(final ArgumentContext context, final StatementSourceReference ref) {
 137         // Get first child, which we fully expect to exist and be a lexer token
 138         final ParseTree firstChild = context.getChild(0);
 139         if (firstChild instanceof TerminalNode) {
 140             final Token token = ((TerminalNode) firstChild).getSymbol();
 141             switch (token.getType()) {
 142                 case YangStatementParser.IDENTIFIER:
 143                     // Simplest of cases -- it is an IDENTIFIER, hence we do not need to validate anything else and can
 144                     // just grab the string and run with it.
 145                     return firstChild.getText();
 146                 case YangStatementParser.DQUOT_STRING:
 147                 case YangStatementParser.DQUOT_END:
 148                 case YangStatementParser.SQUOT_STRING:
 149                 case YangStatementParser.SQUOT_END:
 150                     // Quoted strings are potentially a pain, deal with them separately
 151                     return decodeQuoted(context, ref);
 152                 default:
 153                     throw new VerifyException("Unexpected token " + token);
 154             }
 155         }
 156
 157         verify(firstChild instanceof UnquotedStringContext, "Unexpected shape of %s", context);
 158         // Simple case, just grab the text, as ANTLR has done all the heavy lifting
 159         final String str = firstChild.getText();
 160         checkUnquoted(str, ref);
 161         return str;
 162     }
 163
 164     @Deprecated
 165     private @NonNull String decodeQuoted(final ArgumentContext context, final StatementSourceReference ref) {
 166         if (context.getChildCount() > 2) {
 167             // Potentially-complex case of string quoting, escaping and concatenation.
 168             return concatStrings(context, ref);
 169         }
 170
 171         // No concatenation needed, special-case
 172         final ParseTree child = context.getChild(0);
 173         verify(child instanceof TerminalNode, "Unexpected shape of %s", context);
 174         final Token token = ((TerminalNode) child).getSymbol();
 175         switch (token.getType()) {
 176             case YangStatementParser.DQUOT_END:
 177             case YangStatementParser.SQUOT_END:
 178                 // We are missing actual body, hence this is an empty string
 179                 return "";
 180             case YangStatementParser.SQUOT_STRING:
 181                 return token.getText();
 182             case YangStatementParser.DQUOT_STRING:
 183                 return normalizeDoubleQuoted(token, ref);
 184             default:
 185                 throw new VerifyException("Unhandled token " + token);
 186         }
 187     }
 188
 189     private @NonNull String concatStrings(final List<? extends Single> parts, final StatementSourceReference ref) {
 190         final StringBuilder sb = new StringBuilder();
 191         for (Single part : parts) {
 192             final String str = part.string();
 193             sb.append(part.needUnescape() ? unescape(str, ref) : str);
 194         }
 195         return sb.toString();
 196     }
 197
 198     @Deprecated
 199     private String concatStrings(final ArgumentContext context, final StatementSourceReference ref) {
 200         final StringBuilder sb = new StringBuilder();
 201         for (ParseTree child : context.children) {
 202             verify(child instanceof TerminalNode, "Unexpected argument component %s", child);
 203             final Token token = ((TerminalNode) child).getSymbol();
 204             switch (token.getType()) {
 205                 case YangStatementParser.SEP:
 206                     // Separator, just skip it over
 207                 case YangStatementParser.PLUS:
 208                     // Operator, which we are handling by concat, skip it over
 209                 case YangStatementParser.DQUOT_END:
 210                 case YangStatementParser.SQUOT_END:
 211                     // Quote stops, skip them over because we either already added the content, or would be appending
 212                     // an empty string
 213                     break;
 214                 case YangStatementParser.SQUOT_STRING:
 215                     // Single-quoted string, append it as a literal
 216                     sb.append(token.getText());
 217                     break;
 218                 case YangStatementParser.DQUOT_STRING:
 219                     sb.append(normalizeDoubleQuoted(token, ref));
 220                     break;
 221                 default:
 222                     throw new VerifyException("Unexpected token " + token);
 223             }
 224         }
 225         return sb.toString();
 226     }
 227
 228     @Deprecated
 229     private String normalizeDoubleQuoted(final Token token, final StatementSourceReference ref) {
 230         // Whitespace normalization happens irrespective of further handling and has no effect on the result. Strictly
 231         // speaking we should also have the previous token, which would be a DQUOT_START and get the position from it.
 232         // Seeing as it is a single-character token let's just subtract one from this token to achieve the same result.
 233         final String stripped = AntlrSupport.trimWhitespace(token.getText(), token.getCharPositionInLine() - 1);
 234
 235         // Now we need to perform some amount of unescaping. This serves as a pre-check before we dispatch
 236         // validation and processing (which will reuse the work we have done)
 237         final int backslash = stripped.indexOf('\\');
 238         return backslash == -1 ? stripped : unescape(ref, stripped, backslash);
 239     }
 240
 241     /*
 242      * NOTE: Enforcement and transformation logic done by these methods should logically reside in the lexer and ANTLR
 243      *       account the for it with lexer modes. We do not want to force a re-lexing phase in the parser just because
 244      *       we decided to let ANTLR do the work.
 245      */
 246     abstract void checkDoubleQuoted(String str, StatementSourceReference ref, int backslash);
 247
 248     abstract void checkUnquoted(String str, StatementSourceReference ref);
 249
 250     private @NonNull String unescape(final String str, final StatementSourceReference ref) {
 251         // Now we need to perform some amount of unescaping. This serves as a pre-check before we dispatch
 252         // validation and processing (which will reuse the work we have done)
 253         final int backslash = str.indexOf('\\');
 254         return backslash == -1 ? str : unescape(ref, str, backslash);
 255     }
 256
 257     /*
 258      * Unescape escaped double quotes, tabs, new line and backslash in the inner string and trim the result.
 259      */
 260     private @NonNull String unescape(final StatementSourceReference ref, final String str, final int backslash) {
 261         checkDoubleQuoted(str, ref, backslash);
 262         StringBuilder sb = new StringBuilder(str.length());
 263         unescapeBackslash(sb, str, backslash);
 264         return sb.toString();
 265     }
 266
 267     @VisibleForTesting
 268     static void unescapeBackslash(final StringBuilder sb, final String str, final int backslash) {
 269         String substring = str;
 270         int backslashIndex = backslash;
 271         while (true) {
 272             int nextIndex = backslashIndex + 1;
 273             if (backslashIndex != -1 && nextIndex < substring.length()) {
 274                 replaceBackslash(sb, substring, nextIndex);
 275                 substring = substring.substring(nextIndex + 1);
 276                 if (substring.length() > 0) {
 277                     backslashIndex = substring.indexOf('\\');
 278                 } else {
 279                     break;
 280                 }
 281             } else {
 282                 sb.append(substring);
 283                 break;
 284             }
 285         }
 286     }
 287
 288     private static void replaceBackslash(final StringBuilder sb, final String str, final int nextAfterBackslash) {
 289         int backslash = nextAfterBackslash - 1;
 290         sb.append(str, 0, backslash);
 291         final char c = str.charAt(nextAfterBackslash);
 292         switch (c) {
 293             case '\\':
 294             case '"':
 295                 sb.append(c);
 296                 break;
 297             case 't':
 298                 sb.append('\t');
 299                 break;
 300             case 'n':
 301                 sb.append('\n');
 302                 break;
 303             default:
 304                 sb.append(str, backslash, nextAfterBackslash + 1);
 305         }
 306     }
 307 }