yang/yang-parser-rfc7950/src/main/java/org/opendaylight/yangtools/yang/parser/rfc7950/repo/ArgumentContextUtils.java

   1 /*
   2  * Copyright (c) 2015 Cisco Systems, Inc. and others.  All rights reserved.
   3  *
   4  * This program and the accompanying materials are made available under the
   5  * terms of the Eclipse Public License v1.0 which accompanies this distribution,
   6  * and is available at http://www.eclipse.org/legal/epl-v10.html
   7  */
   8 package org.opendaylight.yangtools.yang.parser.rfc7950.repo;
   9
  10 import static com.google.common.base.Verify.verify;
  11
  12 import com.google.common.annotations.VisibleForTesting;
  13 import com.google.common.base.CharMatcher;
  14 import com.google.common.base.VerifyException;
  15 import org.antlr.v4.runtime.tree.ParseTree;
  16 import org.antlr.v4.runtime.tree.TerminalNode;
  17 import org.eclipse.jdt.annotation.NonNull;
  18 import org.opendaylight.yangtools.yang.common.YangVersion;
  19 import org.opendaylight.yangtools.yang.parser.antlr.YangStatementParser;
  20 import org.opendaylight.yangtools.yang.parser.antlr.YangStatementParser.ArgumentContext;
  21 import org.opendaylight.yangtools.yang.parser.spi.source.SourceException;
  22 import org.opendaylight.yangtools.yang.parser.spi.source.StatementSourceReference;
  23
  24 /**
  25  * Utilities for dealing with YANG statement argument strings, encapsulated in ANTLR grammar's ArgumentContext.
  26  */
  27 abstract class ArgumentContextUtils {
  28     /**
  29      * YANG 1.0 version of strings, which were not completely clarified in
  30      * <a href="https://tools.ietf.org/html/rfc6020#section-6.1.3">RFC6020</a>.
  31      */
  32     private static final class RFC6020 extends ArgumentContextUtils {
  33         private static final @NonNull RFC6020 INSTANCE = new RFC6020();
  34
  35         @Override
  36         void checkDoubleQuoted(final String str, final StatementSourceReference ref, final int backslash) {
  37             // No-op
  38         }
  39
  40         @Override
  41         void checkUnquoted(final String str, final StatementSourceReference ref) {
  42             // No-op
  43         }
  44     }
  45
  46     /**
  47      * YANG 1.1 version of strings, which were clarified in
  48      * <a href="https://tools.ietf.org/html/rfc7950#section-6.1.3">RFC7950</a>.
  49      */
  50     // NOTE: the differences clarified lead to a proper ability to delegate this to ANTLR lexer, but that does not
  51     //       understand versions and needs to work with both.
  52     private static final class RFC7950 extends ArgumentContextUtils {
  53         private static final CharMatcher ANYQUOTE_MATCHER = CharMatcher.anyOf("'\"");
  54         private static final @NonNull RFC7950 INSTANCE = new RFC7950();
  55
  56         @Override
  57         void checkDoubleQuoted(final String str, final StatementSourceReference ref, final int backslash) {
  58             if (backslash < str.length() - 1) {
  59                 int index = backslash;
  60                 while (index != -1) {
  61                     switch (str.charAt(index + 1)) {
  62                         case 'n':
  63                         case 't':
  64                         case '\\':
  65                         case '\"':
  66                             index = str.indexOf('\\', index + 2);
  67                             break;
  68                         default:
  69                             throw new SourceException(ref, "YANG 1.1: illegal double quoted string (%s). In double "
  70                                 + "quoted string the backslash must be followed by one of the following character "
  71                                 + "[n,t,\",\\], but was '%s'.", str, str.charAt(index + 1));
  72                     }
  73                 }
  74             }
  75         }
  76
  77         @Override
  78         void checkUnquoted(final String str, final StatementSourceReference ref) {
  79             SourceException.throwIf(ANYQUOTE_MATCHER.matchesAnyOf(str), ref,
  80                 "YANG 1.1: unquoted string (%s) contains illegal characters", str);
  81         }
  82     }
  83
  84     private static final CharMatcher WHITESPACE_MATCHER = CharMatcher.whitespace();
  85
  86     private ArgumentContextUtils() {
  87         // Hidden on purpose
  88     }
  89
  90     static @NonNull ArgumentContextUtils forVersion(final YangVersion version) {
  91         switch (version) {
  92             case VERSION_1:
  93                 return RFC6020.INSTANCE;
  94             case VERSION_1_1:
  95                 return RFC7950.INSTANCE;
  96             default:
  97                 throw new IllegalStateException("Unhandled version " + version);
  98         }
  99     }
 100
 101     // TODO: teach the only caller about versions, or provide common-enough idioms for its use case
 102     static @NonNull ArgumentContextUtils rfc6020() {
 103         return RFC6020.INSTANCE;
 104     }
 105
 106     /*
 107      * NOTE: this method we do not use convenience methods provided by generated parser code, but instead are making
 108      *       based on the grammar assumptions. While this is more verbose, it cuts out a number of unnecessary code,
 109      *       such as intermediate List allocation et al.
 110      */
 111     final @NonNull String stringFromStringContext(final ArgumentContext context, final StatementSourceReference ref) {
 112         // Get first child, which we fully expect to exist and be a lexer token
 113         final ParseTree firstChild = context.getChild(0);
 114         verify(firstChild instanceof TerminalNode, "Unexpected shape of %s", context);
 115         final TerminalNode firstNode = (TerminalNode) firstChild;
 116         final int firstType = firstNode.getSymbol().getType();
 117         switch (firstType) {
 118             case YangStatementParser.IDENTIFIER:
 119                 // Simple case, there is a simple string, which cannot contain anything that we would need to process.
 120                 return firstNode.getText();
 121             case YangStatementParser.PLUS:
 122                 return "+";
 123             case YangStatementParser.STRING:
 124                 // Complex case, defer to a separate method
 125                 return concatStrings(context, ref);
 126             default:
 127                 throw new VerifyException("Unexpected first symbol in " + context);
 128         }
 129     }
 130
 131     private String concatStrings(final ArgumentContext context, final StatementSourceReference ref) {
 132         /*
 133          * We have multiple fragments. Just search the tree. This code is equivalent to
 134          *
 135          *    context.STRING().forEach(stringNode -> appendString(sb, stringNode, ref))
 136          *
 137          * except we minimize allocations which that would do.
 138          */
 139         final StringBuilder sb = new StringBuilder();
 140         for (ParseTree child : context.children) {
 141             verify(child instanceof TerminalNode, "Unexpected fragment component %s", child);
 142             final TerminalNode childNode = (TerminalNode) child;
 143             switch (childNode.getSymbol().getType()) {
 144                 case YangStatementParser.SEP:
 145                     // Ignore whitespace
 146                     break;
 147                 case YangStatementParser.PLUS:
 148                     // Operator, which we are handling by concat
 149                     break;
 150                 case YangStatementParser.STRING:
 151                     // a lexer string, could be pretty much anything
 152                     // TODO: appendString() is a dispatch based on quotes, which we should be able to defer to lexer for
 153                     //       a dedicated type. That would expand the switch table here, but since we have it anyway, it
 154                     //       would be nice to have the quoting distinction already taken care of. The performance
 155                     //       difference will need to be benchmarked, though.
 156                     appendString(sb, childNode, ref);
 157                     break;
 158                 default:
 159                     throw new VerifyException("Unexpected symbol in " + childNode);
 160             }
 161         }
 162         return sb.toString();
 163     }
 164
 165     private void appendString(final StringBuilder sb, final TerminalNode stringNode,
 166             final StatementSourceReference ref) {
 167         final String str = stringNode.getText();
 168         final char firstChar = str.charAt(0);
 169         final char lastChar = str.charAt(str.length() - 1);
 170         if (firstChar == '"' && lastChar == '"') {
 171             sb.append(normalizeDoubleQuoted(str.substring(1, str.length() - 1),
 172                 stringNode.getSymbol().getCharPositionInLine(), ref));
 173         } else if (firstChar == '\'' && lastChar == '\'') {
 174             /*
 175              * According to RFC6020 a single quote character cannot occur in a single-quoted string, even when preceded
 176              * by a backslash.
 177              */
 178             sb.append(str, 1, str.length() - 1);
 179         } else {
 180             checkUnquoted(str, ref);
 181             sb.append(str);
 182         }
 183     }
 184
 185     private String normalizeDoubleQuoted(final String str, final int dquot, final StatementSourceReference ref) {
 186         // Whitespace normalization happens irrespective of further handling and has no effect on the result
 187         final String stripped = trimWhitespace(str, dquot);
 188
 189         // Now we need to perform some amount of unescaping. This serves as a pre-check before we dispatch
 190         // validation and processing (which will reuse the work we have done)
 191         final int backslash = stripped.indexOf('\\');
 192         return backslash == -1 ? stripped : unescape(ref, stripped, backslash);
 193     }
 194
 195     /*
 196      * NOTE: Enforcement and transformation logic done by these methods should logically reside in the lexer and ANTLR
 197      *       account the for it with lexer modes. We do not want to force a re-lexing phase in the parser just because
 198      *       we decided to let ANTLR do the work.
 199      */
 200     abstract void checkDoubleQuoted(String str, StatementSourceReference ref, int backslash);
 201
 202     abstract void checkUnquoted(String str, StatementSourceReference ref);
 203
 204     /*
 205      * Unescape escaped double quotes, tabs, new line and backslash in the inner string and trim the result.
 206      */
 207     private String unescape(final StatementSourceReference ref, final String str, final int backslash) {
 208         checkDoubleQuoted(str, ref, backslash);
 209         StringBuilder sb = new StringBuilder(str.length());
 210         unescapeBackslash(sb, str, backslash);
 211         return sb.toString();
 212     }
 213
 214     @VisibleForTesting
 215     static void unescapeBackslash(final StringBuilder sb, final String str, final int backslash) {
 216         String substring = str;
 217         int backslashIndex = backslash;
 218         while (true) {
 219             int nextIndex = backslashIndex + 1;
 220             if (backslashIndex != -1 && nextIndex < substring.length()) {
 221                 replaceBackslash(sb, substring, nextIndex);
 222                 substring = substring.substring(nextIndex + 1);
 223                 if (substring.length() > 0) {
 224                     backslashIndex = substring.indexOf('\\');
 225                 } else {
 226                     break;
 227                 }
 228             } else {
 229                 sb.append(substring);
 230                 break;
 231             }
 232         }
 233     }
 234
 235     private static void replaceBackslash(final StringBuilder sb, final String str, final int nextAfterBackslash) {
 236         int backslash = nextAfterBackslash - 1;
 237         sb.append(str, 0, backslash);
 238         final char c = str.charAt(nextAfterBackslash);
 239         switch (c) {
 240             case '\\':
 241             case '"':
 242                 sb.append(c);
 243                 break;
 244             case 't':
 245                 sb.append('\t');
 246                 break;
 247             case 'n':
 248                 sb.append('\n');
 249                 break;
 250             default:
 251                 sb.append(str, backslash, nextAfterBackslash + 1);
 252         }
 253     }
 254
 255     @VisibleForTesting
 256     static String trimWhitespace(final String str, final int dquot) {
 257         final int firstBrk = str.indexOf('\n');
 258         if (firstBrk == -1) {
 259             return str;
 260         }
 261
 262         // Okay, we may need to do some trimming, set up a builder and append the first segment
 263         final int length = str.length();
 264         final StringBuilder sb = new StringBuilder(length);
 265
 266         // Append first segment, which needs only tail-trimming
 267         sb.append(str, 0, trimTrailing(str, 0, firstBrk)).append('\n');
 268
 269         // With that out of the way, setup our iteration state. The string segment we are looking at is
 270         // str.substring(start, end), which is guaranteed not to include any line breaks, i.e. end <= brk unless we are
 271         // at the last segment.
 272         int start = firstBrk + 1;
 273         int brk = str.indexOf('\n', start);
 274
 275         // Loop over inner strings
 276         while (brk != -1) {
 277             trimLeadingAndAppend(sb, dquot, str, start, trimTrailing(str, start, brk)).append('\n');
 278             start = brk + 1;
 279             brk = str.indexOf('\n', start);
 280         }
 281
 282         return trimLeadingAndAppend(sb, dquot, str, start, length).toString();
 283     }
 284
 285     private static StringBuilder trimLeadingAndAppend(final StringBuilder sb, final int dquot, final String str,
 286             final int start, final int end) {
 287         int offset = start;
 288         int pos = 0;
 289
 290         while (pos <= dquot) {
 291             if (offset == end) {
 292                 // We ran out of data, nothing to append
 293                 return sb;
 294             }
 295
 296             final char ch = str.charAt(offset);
 297             if (ch == '\t') {
 298                 // tabs are to be treated as 8 spaces
 299                 pos += 8;
 300             } else if (WHITESPACE_MATCHER.matches(ch)) {
 301                 pos++;
 302             } else {
 303                 break;
 304             }
 305
 306             offset++;
 307         }
 308
 309         // We have expanded beyond double quotes, push equivalent spaces
 310         while (pos - 1 > dquot) {
 311             sb.append(' ');
 312             pos--;
 313         }
 314
 315         return sb.append(str, offset, end);
 316     }
 317
 318     private static int trimTrailing(final String str, final int start, final int end) {
 319         int ret = end;
 320         while (ret > start) {
 321             final int prev = ret - 1;
 322             if (!WHITESPACE_MATCHER.matches(str.charAt(prev))) {
 323                 break;
 324             }
 325             ret = prev;
 326         }
 327         return ret;
 328     }
 329 }