From 95bd6c9c8254844616a99bf40f73b4d2c5726686 Mon Sep 17 00:00:00 2001 From: Robert Varga Date: Fri, 20 Sep 2019 10:07:10 +0200 Subject: [PATCH] Add Magnesium encoding tokens Changes to how uint8/16/32/64 types are mapped results in the need to transfer the new value types. These represent 4 brand new tokens, which need to be handled. One option would be to fork NeonSR2 and modify it, somehow dealing with value lookups (i.e. Uint8 is not valid in SR2 stream, but tokens are shared across Lithium/NeonSR). Another option is to create a streaming format from scratch, coming up with a completely new family of tokens and encoding rules. This is attractive, as both current formats have three deficiencies: 1) they encode MapEntry key values twice, once in the leaf itself and once in the entry's NodeIdentifierWithPredicates 2) they are '0-happy', i.e. they do not recognize that integer codes and sizes are typically much smaller than the 4-byte range and thus much of the stream is just small ints encoded as 4 bytes 3) while they are simple and straighforward, they also end up wasting bits -- the four token families (codes, value types, node types, path argument types) each have at most 15 distinct values, and hence the bytes used to transmit them have 4 empty bits (or more) Hence we take the other option and design a streaming format with brand new tokens, which are structured to carry as much more information per byte. This is done by combining 'type' information with forward coding hints, for example: - a Boolean leaf value would previously require 2 bytes: (byte) ValueTypes.BOOL_TYPE (byte) true/falseIn the new tokens whereas new tokens express this in one byte: (byte) LeafValue.BOOLEAN_{FALSE,TRUE} - a byte[5] value would require 10 bytes to encode: (byte) ValueTypes.BINARY_TYPE (int) length (5 bytes) bytes whereas new tokens express this in 6 bytes: (byte) ValueTypes.BINARY_0 + 5 (works up to 127) (5 bytes) bytes A similar approach is taken when encoding NormalizedNode types. Here we recognize there are only 14 node types (currently) and hence we can provide up to 4 additional bits for information about how the node's identifier is encoded. For PathArguments, we also use separate coding, where the format is flexible based on the 4 types on identifiers there are, each having slightly different format. Most notable here is the integration of QName reference coding (for normal PathArguments) and integrated set size coding (for AugmentationIdentifiers). This this patch adds just the token definitions, along with basic documentation and version declaration. JIRA: CONTROLLER-1919 Change-Id: I9f6f58a7cb77d9d98c46e13b8dc6955c8e3c0737 Signed-off-by: Robert Varga --- .../node/utils/stream/MagnesiumNode.java | 155 ++++++++++ .../utils/stream/MagnesiumPathArgument.java | 121 ++++++++ .../node/utils/stream/MagnesiumValue.java | 291 ++++++++++++++++++ 3 files changed, 567 insertions(+) create mode 100644 opendaylight/md-sal/sal-clustering-commons/src/main/java/org/opendaylight/controller/cluster/datastore/node/utils/stream/MagnesiumNode.java create mode 100644 opendaylight/md-sal/sal-clustering-commons/src/main/java/org/opendaylight/controller/cluster/datastore/node/utils/stream/MagnesiumPathArgument.java create mode 100644 opendaylight/md-sal/sal-clustering-commons/src/main/java/org/opendaylight/controller/cluster/datastore/node/utils/stream/MagnesiumValue.java diff --git a/opendaylight/md-sal/sal-clustering-commons/src/main/java/org/opendaylight/controller/cluster/datastore/node/utils/stream/MagnesiumNode.java b/opendaylight/md-sal/sal-clustering-commons/src/main/java/org/opendaylight/controller/cluster/datastore/node/utils/stream/MagnesiumNode.java new file mode 100644 index 0000000000..30a35abc31 --- /dev/null +++ b/opendaylight/md-sal/sal-clustering-commons/src/main/java/org/opendaylight/controller/cluster/datastore/node/utils/stream/MagnesiumNode.java @@ -0,0 +1,155 @@ +/* + * Copyright (c) 2019 PANTHEON.tech, s.r.o. and others. All rights reserved. + * + * This program and the accompanying materials are made available under the + * terms of the Eclipse Public License v1.0 which accompanies this distribution, + * and is available at http://www.eclipse.org/legal/epl-v10.html + */ +package org.opendaylight.controller.cluster.datastore.node.utils.stream; + +/** + * Magnesium encoding Node types. Encoded as a single byte, split as follows: + *
+ *   7 6 5 4 3 2 1 0
+ *  +-+-+-+-+-+-+-+-+
+ *  | P | A |  Type |
+ *  +-+-+-+-+-+-+-+-+
+ * 
+ * The fields being: + * + */ +// TODO: restructure this into some concrete examples +//- a leaf referencing a previously-encoded NodeIdentifier would take +//6 bytes: +// (byte) NodeTypes.LEAF_NODE +// (byte) TokenTypes.IS_QNAME_CODE +// (int) code value +//where as new tokens can do that in as few as 2 bytes: +// (byte) NodeType.(NODE_LEAF | ADDR_LOOKUP_1B) +// (byte) code value +//with worst-case being 5 bytes: +// (byte) NodeType.(NODE_LEAF | ADDR_LOOKUP_4B) +// (int) code value +//- a map entry node referencing previously-encoded QNames and a single +//predicate would take a base of 15 bytes (not counting value object): +// (byte) NodeTypes.MAP_ENTRY_NODE +// (byte) TokenTypes.IS_QNAME_CODE +// (int) code value +// (int) size of predicates +// (byte) TokenTypes.IS_QNAME_CODE +// (int) code value +//whereas new tokens can do that in as few as 3 bytes: +// (byte) NodeType.(NODE_MAP_ENTRY | ADDR_LOOKUP_1B | PREDICATE_ONE) +// (byte) code value +// (byte) code value +//this ability is maintained for up to 255 predicates with: +// (byte) NodeType.(NODE_MAP_ENTRY | ADDR_LOOKUP_1B | PREDICATE_1B) +// (byte) code value +// (byte) size of predicates +// (byte) code value [0-255] +//- a leaf representing a key inside a map entry has the ability to skip +//value encoding by being as simple as: +// (byte) NodeTYpe.(NODE_LEAF | ADDR_LOOKUP_1B | PREDICATE_ONE) +// (byte) code value +// +final class MagnesiumNode { + /** + * End of node marker. Does not support addressing modes. + */ + static final byte NODE_END = 0x00; // N/A + /** + * A leaf node. Encoding can specify {@link #PREDICATE_ONE}, which indicates the value is skipped as the encoder + * has emitted a parent MapNode, whose identifier contains the value. + */ + static final byte NODE_LEAF = 0x01; + static final byte NODE_CONTAINER = 0x02; + static final byte NODE_LIST = 0x03; + static final byte NODE_MAP = 0x04; + static final byte NODE_MAP_ORDERED = 0x05; + static final byte NODE_LEAFSET = 0x06; + static final byte NODE_LEAFSET_ORDERED = 0x07; + static final byte NODE_CHOICE = 0x08; + static final byte NODE_AUGMENTATION = 0x09; + static final byte NODE_ANYXML = 0x0A; + static final byte NODE_LIST_ENTRY = 0x0B; + static final byte NODE_LEAFSET_ENTRY = 0x0C; + static final byte NODE_MAP_ENTRY = 0x0D; + + // TODO: either implement or remove this coding. While Lithium has emit code, it lacks the code do read such nodes, + // which most probably means we do not need to bother ... + static final byte NODE_ANYXML_MODELED = 0x0E; + // 0x0F reserved for anydata + static final byte TYPE_MASK = 0x0F; + + + /** + * Inherit identifier from parent. This addressing mode is applicable in: + * + */ + static final byte ADDR_PARENT = 0x00; + /** + * Define a new QName-based identifier constant. For {@link #NODE_AUGMENTATION} this is a set of QNames. Assign + * a new linear key to this constant. + */ + static final byte ADDR_DEFINE = 0x10; + /** + * Reference a previously {@link #ADDR_DEFINE}d identifier constant. This node byte is followed by an unsigned + * byte, which holds the linear key previously defined (i.e. 0-255). + */ + static final byte ADDR_LOOKUP_1B = 0x20; + /** + * Reference a previously {@link #ADDR_DEFINE}d identifier constant. This node byte is followed by a signed int, + * which holds the linear key previously defined. + */ + static final byte ADDR_LOOKUP_4B = 0x30; + static final byte ADDR_MASK = ADDR_LOOKUP_4B; + + /** + * Predicate encoding: no predicates are present in a {@link #NODE_MAP_ENTRY}. + */ + static final byte PREDICATE_ZERO = 0x00; + + /** + * Predicate encoding: a single predicate is present in a {@link #NODE_MAP_ENTRY}. In case of {@link #NODE_LEAF} + * encoded as part of a {@link #NODE_MAP_ENTRY} this bit indicates the value is not encoded and + * should be looked up from the map entry's predicates. + * + *

+ * The predicate is encoded as a {@link #ADDR_DEFINE} or {@link #ADDR_LOOKUP_1B}/{@link #ADDR_LOOKUP_4B}, + * followed by an encoded {@link MagnesiumValue}. + */ + static final byte PREDICATE_ONE = 0x40; + + /** + * Predicate encoding: 0-255 predicates are present, as specified by the following {@code unsigned byte}. This + * encoding is expected to be exceedingly rare. This should not be used to encode 0 or 1 predicate, those cases + * should be encoded as: + *

+ */ + static final byte PREDICATE_1B = (byte) 0x80; + + /** + * Predicate encoding 0 - {@link Integer#MAX_VALUE} predicates are present, as specified by the following + * {@code int}. This should not be used where 0-255 predicates are present. + */ + static final byte PREDICATE_4B = (byte) (PREDICATE_ONE | PREDICATE_1B); + static final byte PREDICATE_MASK = PREDICATE_4B; + + private MagnesiumNode() { + + } +} \ No newline at end of file diff --git a/opendaylight/md-sal/sal-clustering-commons/src/main/java/org/opendaylight/controller/cluster/datastore/node/utils/stream/MagnesiumPathArgument.java b/opendaylight/md-sal/sal-clustering-commons/src/main/java/org/opendaylight/controller/cluster/datastore/node/utils/stream/MagnesiumPathArgument.java new file mode 100644 index 0000000000..e1022e0630 --- /dev/null +++ b/opendaylight/md-sal/sal-clustering-commons/src/main/java/org/opendaylight/controller/cluster/datastore/node/utils/stream/MagnesiumPathArgument.java @@ -0,0 +1,121 @@ +/* + * Copyright (c) 2019 PANTHEON.tech, s.r.o. and others. All rights reserved. + * + * This program and the accompanying materials are made available under the + * terms of the Eclipse Public License v1.0 which accompanies this distribution, + * and is available at http://www.eclipse.org/legal/epl-v10.html + */ +package org.opendaylight.controller.cluster.datastore.node.utils.stream; + +/** + * Path Argument types used in Magnesium encoding. These are encoded as a single byte, three bits of which are reserved + * for PathArgument type itself: + *
+ *   7 6 5 4 3 2 1 0
+ *  +-+-+-+-+-+-+-+-+
+ *  |         | Type|
+ *  +-+-+-+-+-+-+-+-+
+ * 
+ * There are five type defined: + * + */ +final class MagnesiumPathArgument { + // 3 bits reserved for type... + static final byte AUGMENTATION_IDENTIFIER = 0x00; + static final byte NODE_IDENTIFIER = 0x01; + static final byte NODE_IDENTIFIER_WITH_PREDICATES = 0x02; + static final byte NODE_WITH_VALUE = 0x03; + static final byte MOUNTPOINT_IDENTIFIER = 0x04; + + // ... leaving three values currently unused + // 0x05 reserved + // 0x06 reserved + // 0x07 reserved + + static final byte TYPE_MASK = 0x07; + + // In case of AUGMENTATION_IDENTIFIER, top 5 bits are used to encode the number of path arguments, except last three + // values. This means that up to AugmentationIdentifiers with up to 28 components have this length encoded inline, + // otherwise we encode them in following 1 (unsigned), 2 (unsigned) or 4 (signed) bytes + static final byte AID_COUNT_1B = (byte) 0xE8; + static final byte AID_COUNT_2B = (byte) 0xF0; + static final byte AID_COUNT_4B = (byte) 0xF8; + static final byte AID_COUNT_MASK = AID_COUNT_4B; + static final byte AID_COUNT_SHIFT = 3; + + // For normal path path arguments we can either define a QName reference or follow a 1-4 byte reference. + static final byte QNAME_DEF = 0x00; + static final byte QNAME_REF_1B = 0x08; // Unsigned + static final byte QNAME_REF_2B = 0x10; // Unsigned + static final byte QNAME_REF_4B = 0x18; // Signed + static final byte QNAME_MASK = QNAME_REF_4B; + + // For NodeIdentifierWithPredicates we also carry the number of subsequent path arguments. The case of 0-4 arguments + // is indicated directly, otherwise there is 1-4 bytes carrying the reference. + static final byte SIZE_0 = 0x00; + static final byte SIZE_1 = 0x20; + static final byte SIZE_2 = 0x40; + static final byte SIZE_3 = 0x60; + static final byte SIZE_4 = (byte) 0x80; + static final byte SIZE_1B = (byte) 0xA0; + static final byte SIZE_2B = (byte) 0xC0; + static final byte SIZE_4B = (byte) 0xE0; + static final byte SIZE_MASK = SIZE_4B; + static final byte SIZE_SHIFT = 5; + + private MagnesiumPathArgument() { + + } +} diff --git a/opendaylight/md-sal/sal-clustering-commons/src/main/java/org/opendaylight/controller/cluster/datastore/node/utils/stream/MagnesiumValue.java b/opendaylight/md-sal/sal-clustering-commons/src/main/java/org/opendaylight/controller/cluster/datastore/node/utils/stream/MagnesiumValue.java new file mode 100644 index 0000000000..403578d47f --- /dev/null +++ b/opendaylight/md-sal/sal-clustering-commons/src/main/java/org/opendaylight/controller/cluster/datastore/node/utils/stream/MagnesiumValue.java @@ -0,0 +1,291 @@ +/* + * Copyright (c) 2019 PANTHEON.tech, s.r.o. and others. All rights reserved. + * + * This program and the accompanying materials are made available under the + * terms of the Eclipse Public License v1.0 which accompanies this distribution, + * and is available at http://www.eclipse.org/legal/epl-v10.html + */ +package org.opendaylight.controller.cluster.datastore.node.utils.stream; + +import java.io.DataOutput; +import java.math.BigDecimal; +import java.math.BigInteger; +import org.opendaylight.yangtools.yang.common.Empty; +import org.opendaylight.yangtools.yang.common.Uint16; +import org.opendaylight.yangtools.yang.common.Uint32; +import org.opendaylight.yangtools.yang.common.Uint64; +import org.opendaylight.yangtools.yang.common.Uint8; +import org.opendaylight.yangtools.yang.data.api.YangInstanceIdentifier; + +/** + * Magnesium encoding value types. Serialized as a single byte. + */ +/* + * Note these constants are organized by their absolute value, which is slightly counter-intuitive when trying to make + * sense of what is going on. + * + * TODO: create some sort of facility which would provide symbolic names for debugging and documentation purposes. + */ +final class MagnesiumValue { + /** + * {@link Boolean#FALSE} value. + */ + static final byte BOOLEAN_FALSE = 0x00; + /** + * {@link Boolean#TRUE} value. + */ + static final byte BOOLEAN_TRUE = 0x01; + /** + * An {@link Empty} value. + */ + static final byte EMPTY = 0x02; + /** + * A Byte, followed by a byte holding the value. + */ + static final byte INT8 = 0x03; + /** + * A Short, followed by a {@code short} holding the value. + */ + static final byte INT16 = 0x04; + /** + * An Integer, followed by an {@code int} holding the value. + */ + static final byte INT32 = 0x05; + /** + * A Long, followed by an {@code long} holding the value. + */ + static final byte INT64 = 0x06; + /** + * A Uint8, followed by an {@code unsigned byte} holding the value. + */ + static final byte UINT8 = 0x07; + /** + * A Uint16, followed by a {@code unsigned short} holding the value. + */ + static final byte UINT16 = 0x08; + /** + * A Uint32, followed by an {@code unsigned int} holding the value. + */ + static final byte UINT32 = 0x09; + /** + * A Uint64, followed by an {@code unsigned long} holding the value. + */ + static final byte UINT64 = 0x0A; + /** + * A {@link String}, encoded through {@link DataOutput#writeUTF(String)}. Note this is generally true of any + * string with less then 16384 characters. + */ + static final byte STRING_UTF = 0x0B; + /** + * A {@link String}, encoded as an {@code unsigned short} followed by that many UTF8-encoded bytes. + */ + static final byte STRING_2B = 0x0C; + /** + * A {@link String}, encoded as an {@code int >= 0} followed by that many UTF8-encoded bytes. + */ + static final byte STRING_4B = 0x0D; + /** + * A {@link String}, encoded as an {@code int >= 0} followed by that many UTF16 characters, i.e. as produced by + * {@link DataOutput#writeChars(String)}. + */ + static final byte STRING_CHARS = 0x0E; + /** + * Utility 'reference coding' codepoint with {@code unsigned byte} offset. This is not a value type, but is used in + * context of various schema-related encodings like constant strings, QNameModule and similar. + */ + static final byte STRING_REF_1B = 0x0F; + /** + * Utility 'reference coding' codepoint with {@code unsigned short} offset. This is not a value type, but is used in + * context of various schema-related encodings like constant strings, QNameModule and similar. + */ + static final byte STRING_REF_2B = 0x10; + /** + * Utility 'reference coding' codepoint with {@code int} offset. This is not a value type, but is used in context of + * various schema-related encodings like constant strings, QNameModule and similar. + */ + static final byte STRING_REF_4B = 0x11; + /** + * A {@code byte[])}, encoded as a single {@code unsigned byte} followed by 128-383 bytes. Note that smaller + * arrays are encoded via {@link #BINARY_0} - {@link #BINARY_127} range. + */ + static final byte BINARY_1B = 0x12; + /** + * A {@code byte[])}, encoded as a single {@code unsigned short} followed by 384-65919 bytes. See also + * {@link #BINARY_1B}. + */ + static final byte BINARY_2B = 0x13; + /** + * A {@code byte[])}, encoded as a single {@code int} followed by that many bytes bytes. See also + * {@link #BINARY_2B}. + */ + static final byte BINARY_4B = 0x14; + /** + * A {@link YangInstanceIdentifier}, encoded as a single {@code int}, followed by that many components. See + * also {@link #YIID_0}, which offers optimized encoding for up to 31 components. Components are encoded using + * {@link MagnesiumPathArgument} coding. + */ + static final byte YIID = 0x15; + /** + * A QName literal. Encoded as QNameModule + String. This literal is expected to be memoized on receiver side, which + * assigns the next linear integer identifier. The sender will memoize it too and further references to this QName + * will be made via {@link #QNAME_REF_1B}, {@link #QNAME_REF_2B} or {@link #QNAME_REF_4B}. + * + *

+ * Note that QNameModule (and String in this context) encoding works similarly -- it can only occur as part of a + * QName (coming from here or {@link MagnesiumPathArgument}) and is subject to the same memoization. + * + *

+ * For example, given two QNames {@code foo = QName.create("foo", "abc")} and + * {@code bar = QName.create("foo", "def")}, if they are written in order {@code foo, bar, foo}, then the following + * events are emitted: + *

+     *   QNAME                (define QName, assign shorthand Q0)
+     *   STRING_UTF   "foo"   ("foo", assign shorthand S0, implies define QNameModule, assign shorthand M0)
+     *   STRING_EMPTY         (foo's non-existent revision)
+     *   STRING_UTF   "abc"   ("abc", assign shorthand S1)
+     *   QNAME                (define QName, assign shorthand Q1)
+     *   MODREF_1B    (byte)0 (reference M0)
+     *   STRING_UTF   "def"   ("def", assign shorthand S2)
+     *   QNAME_REF_1B (byte)0 (reference Q0)
+     * 
+ */ + // Design note: STRING_EMPTY is required to *NOT* establish a shortcut, as that is less efficient (and hence does + // not make sense from the sender, the receiver or the serialization protocol itself. + static final byte QNAME = 0x16; + /** + * Reference a QName previously defined via {@link #QNAME}. Reference number is encoded as {@code unsigned byte}. + */ + static final byte QNAME_REF_1B = 0x17; + /** + * Reference a QName previously defined via {@link #QNAME}. Reference number is encoded as {@code unsigned short}. + */ + static final byte QNAME_REF_2B = 0x18; + /** + * Reference a QName previously defined via {@link #QNAME}. Reference number is encoded as {@code int}. + */ + static final byte QNAME_REF_4B = 0x19; + /** + * Reference a previously defined QNameModule. Reference number is encoded as {@code unsigned byte}. + */ + static final byte MODREF_1B = 0x1A; + /** + * Reference a previously defined QNameModule. Reference number is encoded as {@code unsigned short}. + */ + static final byte MODREF_2B = 0x1B; + /** + * Reference a previously defined QNameModule. Reference number is encoded as {@code int}. + */ + static final byte MODREF_4B = 0x1C; + + /** + * A {@link BigDecimal}, encoded through {@link DataOutput#writeUTF(String)}. + */ + // This is legacy compatibility. At some point we will remove support for writing these. + static final byte BIGDECIMAL = 0x1D; + /** + * A {@link BigInteger}, encoded through {@link DataOutput#writeUTF(String)}. + */ + // This is legacy compatibility. At some point we will remove support for writing these. + static final byte BIGINTEGER = 0x1E; + + // 0x1F reserved + + /** + * Byte value {@code 0}. + */ + static final byte INT8_0 = 0x20; + /** + * Short value {@code 0}. + */ + static final byte INT16_0 = 0x21; + /** + * Integer value {@code 0}. + */ + static final byte INT32_0 = 0x22; + /** + * Long value {@code 0}. + */ + static final byte INT64_0 = 0x23; + /** + * {@link Uint8#ZERO} value. + */ + static final byte UINT8_0 = 0x24; + /** + * {@link Uint16#ZERO} value. + */ + static final byte UINT16_0 = 0x25; + /** + * {@link Uint32#ZERO} value. + */ + static final byte UINT32_0 = 0x26; + /** + * {@link Uint64#ZERO} value. + */ + static final byte UINT64_0 = 0x27; + /** + * Empty String value ({@code ""}). + */ + static final byte STRING_EMPTY = 0x28; + /** + * {@link #INT32} with a 2-byte operand. + */ + static final byte INT32_2B = 0x29; + /** + * {@link #UINT32} with a 2-byte operand. + */ + static final byte UINT32_2B = 0x2A; + /** + * {@link #INT64} with a 4-byte operand. + */ + static final byte INT64_4B = 0x2B; + /** + * {@link #UINT64} with a 4-byte operand. + */ + static final byte UINT64_4B = 0x2C; + + // 0x2D - 0x39 reserved + + /** + * Empty bits value. This code point starts the range, where the number of bits can be extracted as + * {@code code & 0x1F)}. Last three values of this range are used to encode more than 28 entries. + */ + static final byte BITS_0 = 0x40; + /** + * A bits value of up to 255 entries. Number of values is encoded as the following {@code unsigned byte}. + */ + static final byte BITS_1B = 0x5D; + /** + * A bits value of up to 65535 entries. Number of values is encoded as the following {@code unsigned short}. + */ + static final byte BITS_2B = 0x5E; + /** + * A bits value. Number of values is encoded as the following {@code int}. + */ + static final byte BITS_4B = 0x5F; + + /** + * {@link YangInstanceIdentifier} with zero components. This code point starts the range ending with + * {@link #YIID_31}, where the number of components can be extracted as {@code code & 0x1F}. Identifiers with + * more than 31 components are encoded using {@link #YIID}. + */ + static final byte YIID_0 = 0x60; + /** + * {@link YangInstanceIdentifier} with 31 components. See {@link #YIID_0}. + */ + static final byte YIID_31 = 0x7F; + + /** + * A {@code byte[]} with 0 bytes. This code point starts the range ending with {@link #BINARY_127}, where + * the number of bytes can be extracted as {@code code & 0x7F}. Arrays longer than 127 bytes are encoded using + * {@link #BINARY_1B}, {@link #BINARY_2B} and {@link #BINARY_4B} as needed. + */ + static final byte BINARY_0 = (byte) 0x80; + /** + * A {@code byte[]} with 127 bytes. See {@link #BINARY_0}. + */ + static final byte BINARY_127 = (byte) 0xFF; + + private MagnesiumValue() { + + } +} -- 2.36.6