From: Robert Varga Date: Fri, 6 Jan 2023 15:30:27 +0000 (+0100) Subject: Fix String value parsing/serialization X-Git-Tag: v11.0.0~233 X-Git-Url: https://git.opendaylight.org/gerrit/gitweb?a=commitdiff_plain;h=f31f6f001ec2263715ebfd5f28d214b56e9a0344;hp=3882dd90f1af909696a4e1ebad57aae1282cf956;p=yangtools.git Fix String value parsing/serialization When encoding user-supplied values from user we need to be mindful of their actual content: we cannot just use single quotes (') everywhere, as the value itself can contain a single quote, in which case the resulting string would have unbalanced quotes and nominally unparseable. Adjust both sides of the serialization picture to account for this and use YANG section 6.1.3 for escaping rules. JIRA: YANGTOOLS-1473 Change-Id: I0df9beaf720a78682b6bde606a1359e7f1a09df8 Signed-off-by: Ruslan Kashapov Signed-off-by: Robert Varga --- diff --git a/codec/yang-data-codec-gson/src/test/java/org/opendaylight/yangtools/yang/data/codec/gson/YT1473Test.java b/codec/yang-data-codec-gson/src/test/java/org/opendaylight/yangtools/yang/data/codec/gson/YT1473Test.java index 02adaa5ba5..48edc81796 100644 --- a/codec/yang-data-codec-gson/src/test/java/org/opendaylight/yangtools/yang/data/codec/gson/YT1473Test.java +++ b/codec/yang-data-codec-gson/src/test/java/org/opendaylight/yangtools/yang/data/codec/gson/YT1473Test.java @@ -80,7 +80,6 @@ class YT1473Test { } @Test - @Disabled("YT-1473: string escaping needs to work") void testSerializeEscaped() throws Exception { // Escaping is needed, use double quotes and escape assertSerdes("/bar:str[.=\"str'\\\"\"]", buildYangInstanceIdentifier(BAR_STR, "str'\"")); diff --git a/codec/yang-data-codec-xml/src/test/java/org/opendaylight/yangtools/yang/data/codec/xml/YT1473Test.java b/codec/yang-data-codec-xml/src/test/java/org/opendaylight/yangtools/yang/data/codec/xml/YT1473Test.java index 41874d6020..7bef13a35e 100644 --- a/codec/yang-data-codec-xml/src/test/java/org/opendaylight/yangtools/yang/data/codec/xml/YT1473Test.java +++ b/codec/yang-data-codec-xml/src/test/java/org/opendaylight/yangtools/yang/data/codec/xml/YT1473Test.java @@ -80,7 +80,6 @@ class YT1473Test { } @Test - @Disabled("YT-1473: string escaping needs to work") void testSerializeEscaped() throws Exception { // Escaping is needed, use double quotes and escape assertSerdes("/bar:str[.=\"str'\\\"\"]", buildYangInstanceIdentifier(BAR_STR, "str'\"")); diff --git a/data/yang-data-util/src/main/java/org/opendaylight/yangtools/yang/data/util/AbstractStringInstanceIdentifierCodec.java b/data/yang-data-util/src/main/java/org/opendaylight/yangtools/yang/data/util/AbstractStringInstanceIdentifierCodec.java index d343c1b808..8ba7d0aef8 100644 --- a/data/yang-data-util/src/main/java/org/opendaylight/yangtools/yang/data/util/AbstractStringInstanceIdentifierCodec.java +++ b/data/yang-data-util/src/main/java/org/opendaylight/yangtools/yang/data/util/AbstractStringInstanceIdentifierCodec.java @@ -11,6 +11,8 @@ import static com.google.common.base.Preconditions.checkArgument; import static java.util.Objects.requireNonNull; import com.google.common.annotations.Beta; +import com.google.common.escape.Escaper; +import com.google.common.escape.Escapers; import javax.xml.XMLConstants; import org.eclipse.jdt.annotation.NonNull; import org.eclipse.jdt.annotation.Nullable; @@ -30,6 +32,14 @@ import org.opendaylight.yangtools.yang.model.util.LeafrefResolver; @Beta public abstract class AbstractStringInstanceIdentifierCodec extends AbstractNamespaceCodec implements InstanceIdentifierCodec { + // Escaper as per https://www.rfc-editor.org/rfc/rfc7950#section-6.1.3 + private static final Escaper DQUOT_ESCAPER = Escapers.builder() + .addEscape('\n', "\\n") + .addEscape('\t', "\\t") + .addEscape('"', "\\\"") + .addEscape('\\', "\\\\") + .build(); + @Override protected final String serializeImpl(final YangInstanceIdentifier data) { final StringBuilder sb = new StringBuilder(); @@ -58,16 +68,51 @@ public abstract class AbstractStringInstanceIdentifierCodec extends AbstractName if (arg instanceof NodeIdentifierWithPredicates nip) { for (var entry : nip.entrySet()) { - appendQName(sb.append('['), entry.getKey(), lastModule); - sb.append("='").append(String.valueOf(entry.getValue())).append("']"); + final var keyName = entry.getKey(); + appendQName(sb.append('['), keyName, lastModule).append('='); + appendValue(sb, keyName.getModule(), entry.getValue()).append(']'); } } else if (arg instanceof NodeWithValue val) { - sb.append("[.='").append(val.getValue()).append("']"); + appendValue(sb.append("[.="), lastModule, val.getValue()).append(']'); } } return sb.toString(); } + private static StringBuilder appendValue(final StringBuilder sb, final QNameModule currentModule, + final Object value) { + final var str = String.valueOf(value); + + // We have two specifications here: Section 6.1.3 of both RFC6020 and RFC7950: + // + // RFC6020 Section 6.1.3: + // If a string contains any space or tab characters, a semicolon (";"), + // braces ("{" or "}"), or comment sequences ("//", "/*", or "*/"), then + // it MUST be enclosed within double or single quotes. + // + // RFC7950 Section 6.1.3: + // An unquoted string is any sequence of characters that does not + // contain any space, tab, carriage return, or line feed characters, a + // single or double quote character, a semicolon (";"), braces ("{" or + // "}"), or comment sequences ("//", "/*", or "*/"). + // + // Plus the common part: + // A single-quoted string (enclosed within ' ') preserves each character + // within the quotes. A single quote character cannot occur in a + // single-quoted string, even when preceded by a backslash. + // + // Unquoted strings are not interesting, as we are embedding the value in a string, not a YANG document, hence + // we have to use quotes. Single-quoted case is simpler, as it does not involve any escaping. The only case + // where we cannot use it is when the value itself has a single-quote in itself -- then we call back to + // double-quoting. + + return str.indexOf('\'') == -1 + // No escaping needed, use single quotes + ? sb.append('\'').append(str).append('\'') + // Escaping needed: use double quotes + : sb.append('"').append(DQUOT_ESCAPER.escape(str)).append('"'); + } + /** * Returns DataSchemaContextTree associated with SchemaContext for which * serialization / deserialization occurs. diff --git a/data/yang-data-util/src/main/java/org/opendaylight/yangtools/yang/data/util/XpathStringParsingPathArgumentBuilder.java b/data/yang-data-util/src/main/java/org/opendaylight/yangtools/yang/data/util/XpathStringParsingPathArgumentBuilder.java index 11b54ddc1e..84000eea48 100644 --- a/data/yang-data-util/src/main/java/org/opendaylight/yangtools/yang/data/util/XpathStringParsingPathArgumentBuilder.java +++ b/data/yang-data-util/src/main/java/org/opendaylight/yangtools/yang/data/util/XpathStringParsingPathArgumentBuilder.java @@ -52,14 +52,15 @@ final class XpathStringParsingPathArgumentBuilder implements Mutable { private static final CharMatcher IDENTIFIER = IDENTIFIER_FIRST_CHAR.or(CharMatcher.inRange('0', '9')) .or(CharMatcher.anyOf(".-")).precomputed(); - private static final CharMatcher QUOTE = CharMatcher.anyOf("'\""); - private static final char SLASH = '/'; + private static final char BACKSLASH = '\\'; private static final char COLON = ':'; private static final char DOT = '.'; private static final char EQUALS = '='; private static final char PRECONDITION_START = '['; private static final char PRECONDITION_END = ']'; + private static final char SQUOT = '\''; + private static final char DQUOT = '"'; private final List product = new ArrayList<>(); private final AbstractStringInstanceIdentifierCodec codec; @@ -231,27 +232,84 @@ final class XpathStringParsingPathArgumentBuilder implements Mutable { */ private void checkValid(final boolean condition, final String errorMsg, final Object... attributes) { if (!condition) { - throw new IllegalArgumentException(String.format( - "Could not parse Instance Identifier '%s'. Offset: %s : Reason: %s", data, offset, - String.format(errorMsg, attributes))); + throw iae(errorMsg, attributes); } } + private @NonNull IllegalArgumentException iae(final String errorMsg, final Object... attributes) { + return new IllegalArgumentException("Could not parse Instance Identifier '%s'. Offset: %s : Reason: %s" + .formatted(data, offset, errorMsg.formatted(attributes))); + } + /** * Returns following value of quoted literal (without quotes) and sets offset after literal. * * @return String literal */ private String nextQuotedValue() { - final char quoteChar = currentChar(); - checkValid(QUOTE.matches(quoteChar), "Value must be qoute escaped with ''' or '\"'."); + return switch (currentChar()) { + case SQUOT -> nextSingleQuotedValue(); + case DQUOT -> nextDoubleQuotedValue(); + default -> throw iae("Value must be quote escaped with ''' or '\"'."); + }; + } + + // Simple: just look for the matching single quote and return substring + private String nextSingleQuotedValue() { skipCurrentChar(); - final int valueStart = offset; - final int endQoute = data.indexOf(quoteChar, offset); - final String value = data.substring(valueStart, endQoute); - offset = endQoute; + final int start = offset; + final int end = data.indexOf(SQUOT, start); + checkValid(end != -1, "Closing single quote not found"); + offset = end; skipCurrentChar(); - return value; + return data.substring(start, end); + } + + // Complicated: we need to potentially un-escape + private String nextDoubleQuotedValue() { + skipCurrentChar(); + + final int maxIndex = data.length() - 1; + final var sb = new StringBuilder(); + while (true) { + final int nextStart = offset; + + // Find next double quotes + final int nextEnd = data.indexOf(DQUOT, nextStart); + checkValid(nextEnd != -1, "Closing double quote not found"); + offset = nextEnd; + + // Find next backslash + final int nextBackslash = data.indexOf(BACKSLASH, nextStart); + if (nextBackslash == -1 || nextBackslash > nextEnd) { + // No backslash between nextStart and nextEnd -- just copy characters and terminate + offset = nextEnd; + skipCurrentChar(); + return sb.append(data, nextStart, nextEnd).toString(); + } + + // Validate escape completeness and append buffer + checkValid(nextBackslash != maxIndex, "Incomplete escape"); + sb.append(data, nextStart, nextBackslash); + + // Adjust offset before potentially referencing it and + offset = nextBackslash; + sb.append(unescape(data.charAt(nextBackslash + 1))); + + // Rinse and repeat + offset = nextBackslash + 2; + } + } + + // As per https://www.rfc-editor.org/rfc/rfc7950#section-6.1.3 + private char unescape(final char escape) { + return switch (escape) { + case 'n' -> '\n'; + case 't' -> '\t'; + case DQUOT -> DQUOT; + case BACKSLASH -> BACKSLASH; + default -> throw iae("Unrecognized escape"); + }; } /**