From c566040bf3c64880eb52d2779f031e27301f1e51 Mon Sep 17 00:00:00 2001 From: Robert Varga Date: Fri, 6 Jan 2023 16:30:27 +0100 Subject: [PATCH 1/1] Fix String value parsing/serialization When encoding user-supplied values from user we need to be mindful of their actual content: we cannot just use single quotes (') everywhere, as the value itself can contain a single quote, in which case the resulting string would have unbalanced quotes and nominally unparseable. Adjust both sides of the serialization picture to account for this and use YANG section 6.1.3 for escaping rules. JIRA: YANGTOOLS-1473 Change-Id: I0df9beaf720a78682b6bde606a1359e7f1a09df8 Signed-off-by: Ruslan Kashapov Signed-off-by: Robert Varga (cherry picked from commit f31f6f001ec2263715ebfd5f28d214b56e9a0344) --- .../yang/data/codec/gson/YT1473Test.java | 1 - .../yang/data/codec/xml/YT1473Test.java | 1 - ...AbstractStringInstanceIdentifierCodec.java | 51 ++++++++++- ...XpathStringParsingPathArgumentBuilder.java | 91 ++++++++++++++++--- 4 files changed, 127 insertions(+), 17 deletions(-) diff --git a/codec/yang-data-codec-gson/src/test/java/org/opendaylight/yangtools/yang/data/codec/gson/YT1473Test.java b/codec/yang-data-codec-gson/src/test/java/org/opendaylight/yangtools/yang/data/codec/gson/YT1473Test.java index 02adaa5ba5..48edc81796 100644 --- a/codec/yang-data-codec-gson/src/test/java/org/opendaylight/yangtools/yang/data/codec/gson/YT1473Test.java +++ b/codec/yang-data-codec-gson/src/test/java/org/opendaylight/yangtools/yang/data/codec/gson/YT1473Test.java @@ -80,7 +80,6 @@ class YT1473Test { } @Test - @Disabled("YT-1473: string escaping needs to work") void testSerializeEscaped() throws Exception { // Escaping is needed, use double quotes and escape assertSerdes("/bar:str[.=\"str'\\\"\"]", buildYangInstanceIdentifier(BAR_STR, "str'\"")); diff --git a/codec/yang-data-codec-xml/src/test/java/org/opendaylight/yangtools/yang/data/codec/xml/YT1473Test.java b/codec/yang-data-codec-xml/src/test/java/org/opendaylight/yangtools/yang/data/codec/xml/YT1473Test.java index faa1f7c053..ca94824ef8 100644 --- a/codec/yang-data-codec-xml/src/test/java/org/opendaylight/yangtools/yang/data/codec/xml/YT1473Test.java +++ b/codec/yang-data-codec-xml/src/test/java/org/opendaylight/yangtools/yang/data/codec/xml/YT1473Test.java @@ -80,7 +80,6 @@ public class YT1473Test { } @Test - @Disabled("YT-1473: string escaping needs to work") public void testSerializeEscaped() throws Exception { // Escaping is needed, use double quotes and escape assertSerdes("/bar:str[.=\"str'\\\"\"]", buildYangInstanceIdentifier(BAR_STR, "str'\"")); diff --git a/data/yang-data-util/src/main/java/org/opendaylight/yangtools/yang/data/util/AbstractStringInstanceIdentifierCodec.java b/data/yang-data-util/src/main/java/org/opendaylight/yangtools/yang/data/util/AbstractStringInstanceIdentifierCodec.java index d69171d272..1ed4172da3 100644 --- a/data/yang-data-util/src/main/java/org/opendaylight/yangtools/yang/data/util/AbstractStringInstanceIdentifierCodec.java +++ b/data/yang-data-util/src/main/java/org/opendaylight/yangtools/yang/data/util/AbstractStringInstanceIdentifierCodec.java @@ -11,6 +11,8 @@ import static com.google.common.base.Preconditions.checkArgument; import static java.util.Objects.requireNonNull; import com.google.common.annotations.Beta; +import com.google.common.escape.Escaper; +import com.google.common.escape.Escapers; import javax.xml.XMLConstants; import org.eclipse.jdt.annotation.NonNull; import org.eclipse.jdt.annotation.Nullable; @@ -30,6 +32,14 @@ import org.opendaylight.yangtools.yang.model.util.LeafrefResolver; @Beta public abstract class AbstractStringInstanceIdentifierCodec extends AbstractNamespaceCodec implements InstanceIdentifierCodec { + // Escaper as per https://www.rfc-editor.org/rfc/rfc7950#section-6.1.3 + private static final Escaper DQUOT_ESCAPER = Escapers.builder() + .addEscape('\n', "\\n") + .addEscape('\t', "\\t") + .addEscape('"', "\\\"") + .addEscape('\\', "\\\\") + .build(); + @Override protected final String serializeImpl(final YangInstanceIdentifier data) { final StringBuilder sb = new StringBuilder(); @@ -58,16 +68,51 @@ public abstract class AbstractStringInstanceIdentifierCodec extends AbstractName if (arg instanceof NodeIdentifierWithPredicates) { for (var entry : ((NodeIdentifierWithPredicates) arg).entrySet()) { - appendQName(sb.append('['), entry.getKey(), lastModule); - sb.append("='").append(String.valueOf(entry.getValue())).append("']"); + final var keyName = entry.getKey(); + appendQName(sb.append('['), keyName, lastModule).append('='); + appendValue(sb, keyName.getModule(), entry.getValue()).append(']'); } } else if (arg instanceof NodeWithValue) { - sb.append("[.='").append(((NodeWithValue) arg).getValue()).append("']"); + appendValue(sb.append("[.="), lastModule, ((NodeWithValue) arg).getValue()).append(']'); } } return sb.toString(); } + private static StringBuilder appendValue(final StringBuilder sb, final QNameModule currentModule, + final Object value) { + final var str = String.valueOf(value); + + // We have two specifications here: Section 6.1.3 of both RFC6020 and RFC7950: + // + // RFC6020 Section 6.1.3: + // If a string contains any space or tab characters, a semicolon (";"), + // braces ("{" or "}"), or comment sequences ("//", "/*", or "*/"), then + // it MUST be enclosed within double or single quotes. + // + // RFC7950 Section 6.1.3: + // An unquoted string is any sequence of characters that does not + // contain any space, tab, carriage return, or line feed characters, a + // single or double quote character, a semicolon (";"), braces ("{" or + // "}"), or comment sequences ("//", "/*", or "*/"). + // + // Plus the common part: + // A single-quoted string (enclosed within ' ') preserves each character + // within the quotes. A single quote character cannot occur in a + // single-quoted string, even when preceded by a backslash. + // + // Unquoted strings are not interesting, as we are embedding the value in a string, not a YANG document, hence + // we have to use quotes. Single-quoted case is simpler, as it does not involve any escaping. The only case + // where we cannot use it is when the value itself has a single-quote in itself -- then we call back to + // double-quoting. + + return str.indexOf('\'') == -1 + // No escaping needed, use single quotes + ? sb.append('\'').append(str).append('\'') + // Escaping needed: use double quotes + : sb.append('"').append(DQUOT_ESCAPER.escape(str)).append('"'); + } + /** * Returns DataSchemaContextTree associated with SchemaContext for which * serialization / deserialization occurs. diff --git a/data/yang-data-util/src/main/java/org/opendaylight/yangtools/yang/data/util/XpathStringParsingPathArgumentBuilder.java b/data/yang-data-util/src/main/java/org/opendaylight/yangtools/yang/data/util/XpathStringParsingPathArgumentBuilder.java index 11b54ddc1e..9b2697e40d 100644 --- a/data/yang-data-util/src/main/java/org/opendaylight/yangtools/yang/data/util/XpathStringParsingPathArgumentBuilder.java +++ b/data/yang-data-util/src/main/java/org/opendaylight/yangtools/yang/data/util/XpathStringParsingPathArgumentBuilder.java @@ -52,14 +52,15 @@ final class XpathStringParsingPathArgumentBuilder implements Mutable { private static final CharMatcher IDENTIFIER = IDENTIFIER_FIRST_CHAR.or(CharMatcher.inRange('0', '9')) .or(CharMatcher.anyOf(".-")).precomputed(); - private static final CharMatcher QUOTE = CharMatcher.anyOf("'\""); - private static final char SLASH = '/'; + private static final char BACKSLASH = '\\'; private static final char COLON = ':'; private static final char DOT = '.'; private static final char EQUALS = '='; private static final char PRECONDITION_START = '['; private static final char PRECONDITION_END = ']'; + private static final char SQUOT = '\''; + private static final char DQUOT = '"'; private final List product = new ArrayList<>(); private final AbstractStringInstanceIdentifierCodec codec; @@ -231,27 +232,93 @@ final class XpathStringParsingPathArgumentBuilder implements Mutable { */ private void checkValid(final boolean condition, final String errorMsg, final Object... attributes) { if (!condition) { - throw new IllegalArgumentException(String.format( - "Could not parse Instance Identifier '%s'. Offset: %s : Reason: %s", data, offset, - String.format(errorMsg, attributes))); + throw iae(errorMsg, attributes); } } + private @NonNull IllegalArgumentException iae(final String errorMsg, final Object... attributes) { + return new IllegalArgumentException( + String.format("Could not parse Instance Identifier '%s'. Offset: %s : Reason: %s", data, offset, + String.format(errorMsg, attributes))); + } + /** * Returns following value of quoted literal (without quotes) and sets offset after literal. * * @return String literal */ private String nextQuotedValue() { - final char quoteChar = currentChar(); - checkValid(QUOTE.matches(quoteChar), "Value must be qoute escaped with ''' or '\"'."); + switch (currentChar()) { + case SQUOT: + return nextSingleQuotedValue(); + case DQUOT: + return nextDoubleQuotedValue(); + default: + throw iae("Value must be quote escaped with ''' or '\"'."); + } + } + + // Simple: just look for the matching single quote and return substring + private String nextSingleQuotedValue() { + skipCurrentChar(); + final int start = offset; + final int end = data.indexOf(SQUOT, start); + checkValid(end != -1, "Closing single quote not found"); + offset = end; skipCurrentChar(); - final int valueStart = offset; - final int endQoute = data.indexOf(quoteChar, offset); - final String value = data.substring(valueStart, endQoute); - offset = endQoute; + return data.substring(start, end); + } + + // Complicated: we need to potentially un-escape + private String nextDoubleQuotedValue() { skipCurrentChar(); - return value; + + final int maxIndex = data.length() - 1; + final var sb = new StringBuilder(); + while (true) { + final int nextStart = offset; + + // Find next double quotes + final int nextEnd = data.indexOf(DQUOT, nextStart); + checkValid(nextEnd != -1, "Closing double quote not found"); + offset = nextEnd; + + // Find next backslash + final int nextBackslash = data.indexOf(BACKSLASH, nextStart); + if (nextBackslash == -1 || nextBackslash > nextEnd) { + // No backslash between nextStart and nextEnd -- just copy characters and terminate + offset = nextEnd; + skipCurrentChar(); + return sb.append(data, nextStart, nextEnd).toString(); + } + + // Validate escape completeness and append buffer + checkValid(nextBackslash != maxIndex, "Incomplete escape"); + sb.append(data, nextStart, nextBackslash); + + // Adjust offset before potentially referencing it and + offset = nextBackslash; + sb.append(unescape(data.charAt(nextBackslash + 1))); + + // Rinse and repeat + offset = nextBackslash + 2; + } + } + + // As per https://www.rfc-editor.org/rfc/rfc7950#section-6.1.3 + private char unescape(final char escape) { + switch (escape) { + case 'n': + return '\n'; + case 't': + return '\t'; + case DQUOT: + return DQUOT; + case BACKSLASH: + return BACKSLASH; + default: + throw iae("Unrecognized escape"); + } } /** -- 2.36.6