yang/yang-parser-rfc7950/src/main/java/org/opendaylight/yangtools/yang/parser/rfc7950/repo/ArgumentContextUtils.java

   1 /*
   2  * Copyright (c) 2015 Cisco Systems, Inc. and others.  All rights reserved.
   3  *
   4  * This program and the accompanying materials are made available under the
   5  * terms of the Eclipse Public License v1.0 which accompanies this distribution,
   6  * and is available at http://www.eclipse.org/legal/epl-v10.html
   7  */
   8 package org.opendaylight.yangtools.yang.parser.rfc7950.repo;
   9
  10 import static com.google.common.base.Verify.verify;
  11
  12 import com.google.common.annotations.VisibleForTesting;
  13 import com.google.common.base.CharMatcher;
  14 import com.google.common.base.VerifyException;
  15 import org.antlr.v4.runtime.Token;
  16 import org.antlr.v4.runtime.tree.ParseTree;
  17 import org.antlr.v4.runtime.tree.TerminalNode;
  18 import org.eclipse.jdt.annotation.NonNull;
  19 import org.opendaylight.yangtools.yang.common.YangVersion;
  20 import org.opendaylight.yangtools.yang.parser.antlr.YangStatementParser;
  21 import org.opendaylight.yangtools.yang.parser.antlr.YangStatementParser.ArgumentContext;
  22 import org.opendaylight.yangtools.yang.parser.antlr.YangStatementParser.QuotedStringContext;
  23 import org.opendaylight.yangtools.yang.parser.antlr.YangStatementParser.UnquotedStringContext;
  24 import org.opendaylight.yangtools.yang.parser.spi.source.SourceException;
  25 import org.opendaylight.yangtools.yang.parser.spi.source.StatementSourceReference;
  26
  27 /**
  28  * Utilities for dealing with YANG statement argument strings, encapsulated in ANTLR grammar's ArgumentContext.
  29  */
  30 abstract class ArgumentContextUtils {
  31     /**
  32      * YANG 1.0 version of strings, which were not completely clarified in
  33      * <a href="https://tools.ietf.org/html/rfc6020#section-6.1.3">RFC6020</a>.
  34      */
  35     private static final class RFC6020 extends ArgumentContextUtils {
  36         private static final @NonNull RFC6020 INSTANCE = new RFC6020();
  37
  38         @Override
  39         void checkDoubleQuoted(final String str, final StatementSourceReference ref, final int backslash) {
  40             // No-op
  41         }
  42
  43         @Override
  44         void checkUnquoted(final String str, final StatementSourceReference ref) {
  45             // No-op
  46         }
  47     }
  48
  49     /**
  50      * YANG 1.1 version of strings, which were clarified in
  51      * <a href="https://tools.ietf.org/html/rfc7950#section-6.1.3">RFC7950</a>.
  52      */
  53     // NOTE: the differences clarified lead to a proper ability to delegate this to ANTLR lexer, but that does not
  54     //       understand versions and needs to work with both.
  55     private static final class RFC7950 extends ArgumentContextUtils {
  56         private static final CharMatcher ANYQUOTE_MATCHER = CharMatcher.anyOf("'\"");
  57         private static final @NonNull RFC7950 INSTANCE = new RFC7950();
  58
  59         @Override
  60         void checkDoubleQuoted(final String str, final StatementSourceReference ref, final int backslash) {
  61             if (backslash < str.length() - 1) {
  62                 int index = backslash;
  63                 while (index != -1) {
  64                     switch (str.charAt(index + 1)) {
  65                         case 'n':
  66                         case 't':
  67                         case '\\':
  68                         case '\"':
  69                             index = str.indexOf('\\', index + 2);
  70                             break;
  71                         default:
  72                             throw new SourceException(ref, "YANG 1.1: illegal double quoted string (%s). In double "
  73                                 + "quoted string the backslash must be followed by one of the following character "
  74                                 + "[n,t,\",\\], but was '%s'.", str, str.charAt(index + 1));
  75                     }
  76                 }
  77             }
  78         }
  79
  80         @Override
  81         void checkUnquoted(final String str, final StatementSourceReference ref) {
  82             SourceException.throwIf(ANYQUOTE_MATCHER.matchesAnyOf(str), ref,
  83                 "YANG 1.1: unquoted string (%s) contains illegal characters", str);
  84         }
  85     }
  86
  87     private static final CharMatcher WHITESPACE_MATCHER = CharMatcher.whitespace();
  88
  89     private ArgumentContextUtils() {
  90         // Hidden on purpose
  91     }
  92
  93     static @NonNull ArgumentContextUtils forVersion(final YangVersion version) {
  94         switch (version) {
  95             case VERSION_1:
  96                 return RFC6020.INSTANCE;
  97             case VERSION_1_1:
  98                 return RFC7950.INSTANCE;
  99             default:
 100                 throw new IllegalStateException("Unhandled version " + version);
 101         }
 102     }
 103
 104     // TODO: teach the only caller about versions, or provide common-enough idioms for its use case
 105     static @NonNull ArgumentContextUtils rfc6020() {
 106         return RFC6020.INSTANCE;
 107     }
 108
 109     /*
 110      * NOTE: this method we do not use convenience methods provided by generated parser code, but instead are making
 111      *       based on the grammar assumptions. While this is more verbose, it cuts out a number of unnecessary code,
 112      *       such as intermediate List allocation et al.
 113      */
 114     final @NonNull String stringFromStringContext(final ArgumentContext context, final StatementSourceReference ref) {
 115         // Get first child, which we fully expect to exist and be a lexer token
 116         final ParseTree firstChild = context.getChild(0);
 117         if (firstChild instanceof TerminalNode) {
 118             // Simplest of cases -- it is a simple IDENTIFIER, hence we do not need to validate anything else and can
 119             // just grab the string and run with it.
 120             return firstChild.getText();
 121         }
 122
 123         if (firstChild instanceof UnquotedStringContext) {
 124             // Simple case, just grab the text, as ANTLR has done all the heavy lifting
 125             final String str = firstChild.getText();
 126             checkUnquoted(str, ref);
 127             return str;
 128         }
 129
 130         verify(firstChild instanceof QuotedStringContext, "Unexpected shape of %s", context);
 131         if (context.getChildCount() == 1) {
 132             // No concatenation needed, special-case
 133             return unquoteString((QuotedStringContext) firstChild, ref);
 134         }
 135
 136         // Potentially-complex case of string quoting, escaping and concatenation.
 137         return concatStrings(context, ref);
 138     }
 139
 140     private String unquoteString(final QuotedStringContext context, final StatementSourceReference ref) {
 141         final ParseTree secondChild = context.getChild(1);
 142         verify(secondChild instanceof TerminalNode, "Unexpected shape of %s", context);
 143         final Token secondToken = ((TerminalNode) secondChild).getSymbol();
 144         final int type = secondToken.getType();
 145         switch (type) {
 146             case YangStatementParser.DQUOT_END:
 147             case YangStatementParser.SQUOT_END:
 148                 // We are missing actual body, hence this is an empty string
 149                 return "";
 150             case YangStatementParser.SQUOT_STRING:
 151                 return secondChild.getText();
 152             case YangStatementParser.DQUOT_STRING:
 153                 // We should be looking at the first token, which is DQUOT_START, but since it is a single-character
 154                 // token, let's not bother.
 155                 return normalizeDoubleQuoted(secondChild.getText(), secondToken.getCharPositionInLine() - 1, ref);
 156             default:
 157                 throw new VerifyException("Unhandled token type " + type);
 158         }
 159     }
 160
 161     private String concatStrings(final ArgumentContext context, final StatementSourceReference ref) {
 162         /*
 163          * We have multiple fragments. Just search the tree. This code is equivalent to
 164          *
 165          *    context.quotedString().forEach(stringNode -> sb.append(unquoteString(stringNode, ref))
 166          *
 167          * except we minimize allocations which that would do.
 168          */
 169         final StringBuilder sb = new StringBuilder();
 170         for (ParseTree child : context.children) {
 171             if (child instanceof TerminalNode) {
 172                 final TerminalNode childNode = (TerminalNode) child;
 173                 switch (childNode.getSymbol().getType()) {
 174                     case YangStatementParser.SEP:
 175                     case YangStatementParser.PLUS:
 176                         // Operator, which we are handling by concat
 177                         break;
 178                     default:
 179                         throw new VerifyException("Unexpected symbol in " + childNode);
 180                 }
 181             } else {
 182                 verify(child instanceof QuotedStringContext, "Unexpected fragment component %s", child);
 183                 sb.append(unquoteString((QuotedStringContext) child, ref));
 184             }
 185         }
 186         return sb.toString();
 187     }
 188
 189     private String normalizeDoubleQuoted(final String str, final int dquot, final StatementSourceReference ref) {
 190         // Whitespace normalization happens irrespective of further handling and has no effect on the result
 191         final String stripped = trimWhitespace(str, dquot);
 192
 193         // Now we need to perform some amount of unescaping. This serves as a pre-check before we dispatch
 194         // validation and processing (which will reuse the work we have done)
 195         final int backslash = stripped.indexOf('\\');
 196         return backslash == -1 ? stripped : unescape(ref, stripped, backslash);
 197     }
 198
 199     /*
 200      * NOTE: Enforcement and transformation logic done by these methods should logically reside in the lexer and ANTLR
 201      *       account the for it with lexer modes. We do not want to force a re-lexing phase in the parser just because
 202      *       we decided to let ANTLR do the work.
 203      */
 204     abstract void checkDoubleQuoted(String str, StatementSourceReference ref, int backslash);
 205
 206     abstract void checkUnquoted(String str, StatementSourceReference ref);
 207
 208     /*
 209      * Unescape escaped double quotes, tabs, new line and backslash in the inner string and trim the result.
 210      */
 211     private String unescape(final StatementSourceReference ref, final String str, final int backslash) {
 212         checkDoubleQuoted(str, ref, backslash);
 213         StringBuilder sb = new StringBuilder(str.length());
 214         unescapeBackslash(sb, str, backslash);
 215         return sb.toString();
 216     }
 217
 218     @VisibleForTesting
 219     static void unescapeBackslash(final StringBuilder sb, final String str, final int backslash) {
 220         String substring = str;
 221         int backslashIndex = backslash;
 222         while (true) {
 223             int nextIndex = backslashIndex + 1;
 224             if (backslashIndex != -1 && nextIndex < substring.length()) {
 225                 replaceBackslash(sb, substring, nextIndex);
 226                 substring = substring.substring(nextIndex + 1);
 227                 if (substring.length() > 0) {
 228                     backslashIndex = substring.indexOf('\\');
 229                 } else {
 230                     break;
 231                 }
 232             } else {
 233                 sb.append(substring);
 234                 break;
 235             }
 236         }
 237     }
 238
 239     private static void replaceBackslash(final StringBuilder sb, final String str, final int nextAfterBackslash) {
 240         int backslash = nextAfterBackslash - 1;
 241         sb.append(str, 0, backslash);
 242         final char c = str.charAt(nextAfterBackslash);
 243         switch (c) {
 244             case '\\':
 245             case '"':
 246                 sb.append(c);
 247                 break;
 248             case 't':
 249                 sb.append('\t');
 250                 break;
 251             case 'n':
 252                 sb.append('\n');
 253                 break;
 254             default:
 255                 sb.append(str, backslash, nextAfterBackslash + 1);
 256         }
 257     }
 258
 259     @VisibleForTesting
 260     static String trimWhitespace(final String str, final int dquot) {
 261         final int firstBrk = str.indexOf('\n');
 262         if (firstBrk == -1) {
 263             return str;
 264         }
 265
 266         // Okay, we may need to do some trimming, set up a builder and append the first segment
 267         final int length = str.length();
 268         final StringBuilder sb = new StringBuilder(length);
 269
 270         // Append first segment, which needs only tail-trimming
 271         sb.append(str, 0, trimTrailing(str, 0, firstBrk)).append('\n');
 272
 273         // With that out of the way, setup our iteration state. The string segment we are looking at is
 274         // str.substring(start, end), which is guaranteed not to include any line breaks, i.e. end <= brk unless we are
 275         // at the last segment.
 276         int start = firstBrk + 1;
 277         int brk = str.indexOf('\n', start);
 278
 279         // Loop over inner strings
 280         while (brk != -1) {
 281             trimLeadingAndAppend(sb, dquot, str, start, trimTrailing(str, start, brk)).append('\n');
 282             start = brk + 1;
 283             brk = str.indexOf('\n', start);
 284         }
 285
 286         return trimLeadingAndAppend(sb, dquot, str, start, length).toString();
 287     }
 288
 289     private static StringBuilder trimLeadingAndAppend(final StringBuilder sb, final int dquot, final String str,
 290             final int start, final int end) {
 291         int offset = start;
 292         int pos = 0;
 293
 294         while (pos <= dquot) {
 295             if (offset == end) {
 296                 // We ran out of data, nothing to append
 297                 return sb;
 298             }
 299
 300             final char ch = str.charAt(offset);
 301             if (ch == '\t') {
 302                 // tabs are to be treated as 8 spaces
 303                 pos += 8;
 304             } else if (WHITESPACE_MATCHER.matches(ch)) {
 305                 pos++;
 306             } else {
 307                 break;
 308             }
 309
 310             offset++;
 311         }
 312
 313         // We have expanded beyond double quotes, push equivalent spaces
 314         while (pos - 1 > dquot) {
 315             sb.append(' ');
 316             pos--;
 317         }
 318
 319         return sb.append(str, offset, end);
 320     }
 321
 322     private static int trimTrailing(final String str, final int start, final int end) {
 323         int ret = end;
 324         while (ret > start) {
 325             final int prev = ret - 1;
 326             if (!WHITESPACE_MATCHER.matches(str.charAt(prev))) {
 327                 break;
 328             }
 329             ret = prev;
 330         }
 331         return ret;
 332     }
 333 }