Refactor string unescaping 13/89013/1
authorvladyslav.marchenko <Vladyslav.Marchenko@pantheon.tech>
Fri, 6 Mar 2020 13:29:22 +0000 (15:29 +0200)
committerRobert Varga <robert.varga@pantheon.tech>
Thu, 9 Apr 2020 20:15:00 +0000 (22:15 +0200)
Add explicit tests and rework unescaping of double-quoted strings
so that we do not use arcane patterns.

JIRA: YANGTOOLS-1079
Change-Id: I3b8bd13f260f13796492d19aea35c4c3f27760fc
Signed-off-by: vladyslav.marchenko <Vladyslav.Marchenko@pantheon.tech>
Signed-off-by: Robert Varga <robert.varga@pantheon.tech>
(cherry picked from commit f8501e012a6585adc523f51d064dfa61ec9ea24e)

yang/yang-parser-rfc7950/src/main/java/org/opendaylight/yangtools/yang/parser/rfc7950/repo/ArgumentContextUtils.java
yang/yang-parser-rfc7950/src/test/java/org/opendaylight/yangtools/yang/parser/rfc7950/repo/ArgumentContextUtilsTest.java [new file with mode: 0644]
yang/yang-parser-rfc7950/src/test/java/org/opendaylight/yangtools/yang/parser/rfc7950/repo/AugmentContextUtilsTest.java [deleted file]
yang/yang-parser-rfc7950/src/test/resources/unescape/string-test.yang [new file with mode: 0644]

index 4919aadeab2b43af871b5bd862a844d78095e995..530875cbcbbd0d2e08fbc2e64374744e9ca794f6 100644 (file)
@@ -12,7 +12,6 @@ import static com.google.common.base.Verify.verify;
 import com.google.common.annotations.VisibleForTesting;
 import com.google.common.base.CharMatcher;
 import com.google.common.base.VerifyException;
-import java.util.regex.Pattern;
 import org.antlr.v4.runtime.tree.ParseTree;
 import org.antlr.v4.runtime.tree.TerminalNode;
 import org.eclipse.jdt.annotation.NonNull;
@@ -34,7 +33,7 @@ abstract class ArgumentContextUtils {
         private static final @NonNull RFC6020 INSTANCE = new RFC6020();
 
         @Override
-        void checkDoubleQuoted(final String str, final StatementSourceReference ref) {
+        void checkDoubleQuoted(final String str, final StatementSourceReference ref, final int backslash) {
             // No-op
         }
 
@@ -55,24 +54,21 @@ abstract class ArgumentContextUtils {
         private static final @NonNull RFC7950 INSTANCE = new RFC7950();
 
         @Override
-        void checkDoubleQuoted(final String str, final StatementSourceReference ref) {
-            // FIXME: YANGTOOLS-1079: we should forward backslash to this method, so that it does not start from the
-            //                        start from the start of the string. Furthermore this logic should operate on spans
-            //                        of characters -- i.e. the check for backslash should be a search instead -- as
-            //                        String knows how to do that and can do it more efficiently than this loop.
-            for (int i = 0; i < str.length() - 1; i++) {
-                if (str.charAt(i) == '\\') {
-                    switch (str.charAt(i + 1)) {
+        void checkDoubleQuoted(final String str, final StatementSourceReference ref, final int backslash) {
+            if (backslash < str.length() - 1) {
+                int index = backslash;
+                while (index != -1) {
+                    switch (str.charAt(index + 1)) {
                         case 'n':
                         case 't':
                         case '\\':
                         case '\"':
-                            i++;
+                            index = str.indexOf('\\', index + 2);
                             break;
                         default:
                             throw new SourceException(ref, "YANG 1.1: illegal double quoted string (%s). In double "
-                                    + "quoted string the backslash must be followed by one of the following character "
-                                    + "[n,t,\",\\], but was '%s'.", str, str.charAt(i + 1));
+                                + "quoted string the backslash must be followed by one of the following character "
+                                + "[n,t,\",\\], but was '%s'.", str, str.charAt(index + 1));
                     }
                 }
             }
@@ -86,10 +82,6 @@ abstract class ArgumentContextUtils {
     }
 
     private static final CharMatcher WHITESPACE_MATCHER = CharMatcher.whitespace();
-    private static final Pattern ESCAPED_DQUOT = Pattern.compile("\\\"", Pattern.LITERAL);
-    private static final Pattern ESCAPED_BACKSLASH = Pattern.compile("\\\\", Pattern.LITERAL);
-    private static final Pattern ESCAPED_LF = Pattern.compile("\\n", Pattern.LITERAL);
-    private static final Pattern ESCAPED_TAB = Pattern.compile("\\t", Pattern.LITERAL);
 
     private ArgumentContextUtils() {
         // Hidden on purpose
@@ -155,11 +147,10 @@ abstract class ArgumentContextUtils {
                     break;
                 case YangStatementParser.STRING:
                     // a lexer string, could be pretty much anything
-                    // FIXME: YANGTOOLS-1079: appendString() is a dispatch based on quotes, which we should be able to
-                    //                        defer to lexer for a dedicated type. That would expand the switch table
-                    //                        here, but since we have it anyway, it would be nice to have the quoting
-                    //                        distinction already taken care of. The performance difference will need to
-                    //                        be benchmarked, though.
+                    // TODO: appendString() is a dispatch based on quotes, which we should be able to defer to lexer for
+                    //       a dedicated type. That would expand the switch table here, but since we have it anyway, it
+                    //       would be nice to have the quoting distinction already taken care of. The performance
+                    //       difference will need to be benchmarked, though.
                     appendString(sb, childNode, ref);
                     break;
                 default:
@@ -196,7 +187,7 @@ abstract class ArgumentContextUtils {
         // Now we need to perform some amount of unescaping. This serves as a pre-check before we dispatch
         // validation and processing (which will reuse the work we have done)
         final int backslash = stripped.indexOf('\\');
-        return backslash == -1 ? stripped : unescape(stripped, backslash, ref);
+        return backslash == -1 ? stripped : unescape(ref, stripped, backslash);
     }
 
     /*
@@ -204,26 +195,59 @@ abstract class ArgumentContextUtils {
      *       account the for it with lexer modes. We do not want to force a re-lexing phase in the parser just because
      *       we decided to let ANTLR do the work.
      */
-    // FIXME: YANGTOOLS-1079: Re-evaluate above comment once our integration surface with lexer has been decided
-    abstract void checkDoubleQuoted(String str, StatementSourceReference ref);
+    abstract void checkDoubleQuoted(String str, StatementSourceReference ref, int backslash);
 
     abstract void checkUnquoted(String str, StatementSourceReference ref);
 
     /*
      * Unescape escaped double quotes, tabs, new line and backslash in the inner string and trim the result.
      */
-    private String unescape(final String str, final int backslash, final StatementSourceReference ref) {
-        checkDoubleQuoted(str, ref);
-
-        // FIXME: YANGTOOLS-1079: given we the leading backslash, it would be more efficient to walk the string and
-        //                        unescape in one go
-        return ESCAPED_TAB.matcher(
-                    ESCAPED_LF.matcher(
-                        ESCAPED_BACKSLASH.matcher(
-                            ESCAPED_DQUOT.matcher(str).replaceAll("\\\""))
-                        .replaceAll("\\\\"))
-                    .replaceAll("\\\n"))
-               .replaceAll("\\\t");
+    private String unescape(final StatementSourceReference ref, final String str, final int backslash) {
+        checkDoubleQuoted(str, ref, backslash);
+        StringBuilder sb = new StringBuilder(str.length());
+        unescapeBackslash(sb, str, backslash);
+        return sb.toString();
+    }
+
+    @VisibleForTesting
+    static void unescapeBackslash(final StringBuilder sb, final String str, final int backslash) {
+        String substring = str;
+        int backslashIndex = backslash;
+        while (true) {
+            int nextIndex = backslashIndex + 1;
+            if (backslashIndex != -1 && nextIndex < substring.length()) {
+                replaceBackslash(sb, substring, nextIndex);
+                substring = substring.substring(nextIndex + 1);
+                if (substring.length() > 0) {
+                    backslashIndex = substring.indexOf('\\');
+                } else {
+                    break;
+                }
+            } else {
+                sb.append(substring);
+                break;
+            }
+        }
+    }
+
+    private static void replaceBackslash(final StringBuilder sb, final String str, final int nextAfterBackslash) {
+        int backslash = nextAfterBackslash - 1;
+        sb.append(str, 0, backslash);
+        final char c = str.charAt(nextAfterBackslash);
+        switch (c) {
+            case '\\':
+            case '"':
+                sb.append(c);
+                break;
+            case 't':
+                sb.append('\t');
+                break;
+            case 'n':
+                sb.append('\n');
+                break;
+            default:
+                sb.append(str, backslash, nextAfterBackslash + 1);
+        }
     }
 
     @VisibleForTesting
diff --git a/yang/yang-parser-rfc7950/src/test/java/org/opendaylight/yangtools/yang/parser/rfc7950/repo/ArgumentContextUtilsTest.java b/yang/yang-parser-rfc7950/src/test/java/org/opendaylight/yangtools/yang/parser/rfc7950/repo/ArgumentContextUtilsTest.java
new file mode 100644 (file)
index 0000000..fba1589
--- /dev/null
@@ -0,0 +1,123 @@
+/*
+ * Copyright (c) 2018 Pantheon Technologies, s.r.o. and others.  All rights reserved.
+ *
+ * This program and the accompanying materials are made available under the
+ * terms of the Eclipse Public License v1.0 which accompanies this distribution,
+ * and is available at http://www.eclipse.org/legal/epl-v10.html
+ */
+package org.opendaylight.yangtools.yang.parser.rfc7950.repo;
+
+import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.assertNotNull;
+import static org.opendaylight.yangtools.yang.parser.rfc7950.repo.ArgumentContextUtils.trimWhitespace;
+import static org.opendaylight.yangtools.yang.parser.rfc7950.repo.ArgumentContextUtils.unescapeBackslash;
+
+import java.io.File;
+import java.util.Optional;
+import org.junit.Test;
+import org.opendaylight.yangtools.yang.model.api.Module;
+import org.opendaylight.yangtools.yang.model.api.SchemaContext;
+import org.opendaylight.yangtools.yang.stmt.StmtTestUtils;
+
+public class ArgumentContextUtilsTest {
+    @Test
+    public void testTrimWhitespace() {
+        assertEquals("\n", trimWhitespace("\n", 0));
+        assertEquals("\n", trimWhitespace("\n", 5));
+        assertEquals("\n\n\n\n", trimWhitespace("\n\n\n\n", 0));
+        assertEquals("\n\n\n\n", trimWhitespace("\n\n\n\n", 5));
+        assertEquals("abc\n\n", trimWhitespace("abc \n  \n", 0));
+        assertEquals("abc\n\n", trimWhitespace("abc \n  \n", 1));
+        assertEquals("abc\n  ", trimWhitespace("abc\n   ", 0));
+        assertEquals("abc\n", trimWhitespace("abc\n   ", 2));
+        assertEquals("abc\n\n", trimWhitespace("abc\n   \n", 2));
+        assertEquals("abc\n        ", trimWhitespace("abc\n\t ", 0));
+        assertEquals("abc\n      ", trimWhitespace("abc\n\t ", 2));
+        assertEquals("abc\n    ", trimWhitespace("abc\n\t ", 4));
+        assertEquals("abc\n    ", trimWhitespace("abc\n \t", 4));
+        assertEquals("abc\n   a\n    a\n", trimWhitespace("abc\n\ta\n\t a\n", 4));
+        assertEquals("abc\n\n    a\n", trimWhitespace("abc\n\t\n\t a\n", 4));
+        assertEquals("   \ta\n", trimWhitespace("   \ta\n", 3));
+        assertEquals("   \ta\n", trimWhitespace("   \ta\n  ", 3));
+        assertEquals("   \ta\n", trimWhitespace("   \ta\n   ", 3));
+        assertEquals("   \ta\n", trimWhitespace("   \ta\n    ", 3));
+        assertEquals("   \ta\n ", trimWhitespace("   \ta\n     ", 3));
+    }
+
+    @Test
+    public void testUnescapeNew() {
+        //      a\b -----> a\b  (invalid for 7950)
+        assertEquals("\\abc", unescape("\\abc", 0));
+        assertEquals("abc\\", unescape("abc\\", 3));
+        assertEquals("abc\\def", unescape("abc\\def", 3));
+        //      a\\b -----> a\b
+        assertEquals("\\abc", unescape("\\\\abc", 0));
+        assertEquals("abc\\", unescape("abc\\\\", 3));
+        assertEquals("abc\\def", unescape("abc\\\\def", 3));
+        //      a\\\b -----> a\\b (invalid for 7950)
+        assertEquals("\\\\abc", unescape("\\\\\\abc", 0));
+        assertEquals("abc\\\\", unescape("abc\\\\\\", 3));
+        assertEquals("abc\\\\def", unescape("abc\\\\\\def", 3));
+        //      a"b -----> a"b (not passible)
+        assertEquals("\"abc", unescape("\"abc", 0));
+        assertEquals("abc\"", unescape("abc\"", 3));
+        assertEquals("abc\"def", unescape("abc\"def", 3));
+        //      a\"b -----> a"b
+        assertEquals("\"abc", unescape("\\\"abc", 0));
+        assertEquals("abc\"", unescape("abc\\\"", 3));
+        assertEquals("abc\"def", unescape("abc\\\"def", 3));
+        //      a\\"b -----> a\"b (not passible)
+        assertEquals("\\\"abc", unescape("\\\\\"abc", 0));
+        assertEquals("abc\\\"", unescape("abc\\\\\"", 3));
+        assertEquals("abc\\\"def", unescape("abc\\\\\"def", 3));
+        //      a\\\"b -----> a\"b
+        assertEquals("\\\"abc", unescape("\\\\\\\"abc", 0));
+        assertEquals("abc\\\"", unescape("abc\\\\\\\"", 3));
+        assertEquals("abc\\\"def", unescape("abc\\\\\\\"def", 3));
+        //      a\tb -----> a   b
+        assertEquals("\tabc", unescape("\\tabc", 0));
+        assertEquals("abc\t", unescape("abc\\t", 3));
+        assertEquals("abc\tdef", unescape("abc\\tdef", 3));
+        //      a\\tb -----> a\tb
+        assertEquals("\\tabc", unescape("\\\\tabc", 0));
+        assertEquals("abc\\t", unescape("abc\\\\t", 3));
+        assertEquals("abc\\tdef", unescape("abc\\\\tdef", 3));
+        //      a\\\tb -----> a\    b
+        assertEquals("\\\tabc", unescape("\\\\\\tabc", 0));
+        assertEquals("abc\\\t", unescape("abc\\\\\\t", 3));
+        assertEquals("abc\\\tdef", unescape("abc\\\\\\tdef", 3));
+        //      a\nb -----> a
+        //                  b
+        assertEquals("\nabc", unescape("\\nabc", 0));
+        assertEquals("abc\n", unescape("abc\\n", 3));
+        assertEquals("abc\ndef", unescape("abc\\ndef", 3));
+        //      a\\nb -----> a\nb
+        assertEquals("\\nabc", unescape("\\\\nabc", 0));
+        assertEquals("abc\\n", unescape("abc\\\\n", 3));
+        assertEquals("abc\\ndef", unescape("abc\\\\ndef", 3));
+        //      a\\\nb -----> a\
+        //                    b
+        assertEquals("\\\nabc", unescape("\\\\\\nabc", 0));
+        assertEquals("abc\\\n", unescape("abc\\\\\\n", 3));
+        assertEquals("abc\\\ndef", unescape("abc\\\\\\ndef", 3));
+
+        assertEquals("\\\nabc abc\\n\nabc abc\t", unescape("\\\\\\nabc abc\\\\n\\nabc abc\\t", 0));
+    }
+
+    @Test
+    public void stringTestUnescape() throws Exception {
+        final SchemaContext schemaContext = StmtTestUtils.parseYangSources(new File(getClass()
+                .getResource("/unescape/string-test.yang").toURI()));
+        assertNotNull(schemaContext);
+        assertEquals(1, schemaContext.getModules().size());
+        final Module module = schemaContext.getModules().iterator().next();
+        assertEquals(Optional.of("  Unescaping examples: \\,\n,\t  \"string enclosed in double quotes\" end\n"
+                + "abc \\\\\\ \\t \\\nnn"), module.getDescription());
+    }
+
+    private static String unescape(final String str, final int backslash) {
+        StringBuilder sb = new StringBuilder(str.length());
+        unescapeBackslash(sb, str, backslash);
+        return sb.toString();
+    }
+}
diff --git a/yang/yang-parser-rfc7950/src/test/java/org/opendaylight/yangtools/yang/parser/rfc7950/repo/AugmentContextUtilsTest.java b/yang/yang-parser-rfc7950/src/test/java/org/opendaylight/yangtools/yang/parser/rfc7950/repo/AugmentContextUtilsTest.java
deleted file mode 100644 (file)
index 381e57d..0000000
+++ /dev/null
@@ -1,40 +0,0 @@
-/*
- * Copyright (c) 2018 Pantheon Technologies, s.r.o. and others.  All rights reserved.
- *
- * This program and the accompanying materials are made available under the
- * terms of the Eclipse Public License v1.0 which accompanies this distribution,
- * and is available at http://www.eclipse.org/legal/epl-v10.html
- */
-package org.opendaylight.yangtools.yang.parser.rfc7950.repo;
-
-import static org.junit.Assert.assertEquals;
-import static org.opendaylight.yangtools.yang.parser.rfc7950.repo.ArgumentContextUtils.trimWhitespace;
-
-import org.junit.Test;
-
-public class AugmentContextUtilsTest {
-
-    @Test
-    public void testTrimWhitespace() {
-        assertEquals("\n", trimWhitespace("\n", 0));
-        assertEquals("\n", trimWhitespace("\n", 5));
-        assertEquals("\n\n\n\n", trimWhitespace("\n\n\n\n", 0));
-        assertEquals("\n\n\n\n", trimWhitespace("\n\n\n\n", 5));
-        assertEquals("abc\n\n", trimWhitespace("abc \n  \n", 0));
-        assertEquals("abc\n\n", trimWhitespace("abc \n  \n", 1));
-        assertEquals("abc\n  ", trimWhitespace("abc\n   ", 0));
-        assertEquals("abc\n", trimWhitespace("abc\n   ", 2));
-        assertEquals("abc\n\n", trimWhitespace("abc\n   \n", 2));
-        assertEquals("abc\n        ", trimWhitespace("abc\n\t ", 0));
-        assertEquals("abc\n      ", trimWhitespace("abc\n\t ", 2));
-        assertEquals("abc\n    ", trimWhitespace("abc\n\t ", 4));
-        assertEquals("abc\n    ", trimWhitespace("abc\n \t", 4));
-        assertEquals("abc\n   a\n    a\n", trimWhitespace("abc\n\ta\n\t a\n", 4));
-        assertEquals("abc\n\n    a\n", trimWhitespace("abc\n\t\n\t a\n", 4));
-        assertEquals("   \ta\n", trimWhitespace("   \ta\n", 3));
-        assertEquals("   \ta\n", trimWhitespace("   \ta\n  ", 3));
-        assertEquals("   \ta\n", trimWhitespace("   \ta\n   ", 3));
-        assertEquals("   \ta\n", trimWhitespace("   \ta\n    ", 3));
-        assertEquals("   \ta\n ", trimWhitespace("   \ta\n     ", 3));
-    }
-}
diff --git a/yang/yang-parser-rfc7950/src/test/resources/unescape/string-test.yang b/yang/yang-parser-rfc7950/src/test/resources/unescape/string-test.yang
new file mode 100644 (file)
index 0000000..2b9e211
--- /dev/null
@@ -0,0 +1,8 @@
+module string-test {
+    namespace "test";
+    prefix test;
+
+    description "  Unescaping examples:" +
+                " \\,\n,\t  \"string enclosed in double quotes\" end
+                abc \\\\\\ \\t \\\nnn";
+}