Since the codes are assigned in a linear fashion, we can use
a simple ArrayList to track assignes codes, preventing the need
for boxing integer during population and lookups. Lookups also
end up requiring an array offset lookup rather than needing
to go through multiple indirections.
While this can in theory lead to a large array being allocated,
the set of strings we keep in the table is limited by the
SchemaContext and thus will be typically capped ad couple of
thousand.
The cost of growing the lookup should not be significantly
higher, even if ArrayList grows slower:
- 10, 16, 25, 38, 58, 88
versus HashMap (with .75 load factor)
- 12, 24, 48, 96, 192, 384
each resize operation is significantly faster.
Change-Id: I00c7a2e0985dbf901b6857a16424bf157e5af39d
Signed-off-by: Robert Varga <robert.varga@pantheon.tech>
import java.math.BigDecimal;
import java.math.BigInteger;
import java.nio.charset.StandardCharsets;
import java.math.BigDecimal;
import java.math.BigInteger;
import java.nio.charset.StandardCharsets;
-import java.util.HashMap;
+import java.util.ArrayList;
import java.util.HashSet;
import java.util.HashSet;
import java.util.Set;
import javax.xml.parsers.DocumentBuilderFactory;
import javax.xml.parsers.ParserConfigurationException;
import java.util.Set;
import javax.xml.parsers.DocumentBuilderFactory;
import javax.xml.parsers.ParserConfigurationException;
private final DataInput input;
private final DataInput input;
- private final Map<Integer, String> codedStringMap = new HashMap<>();
+ private final List<String> codedStringMap = new ArrayList<>();
private QName lastLeafSetQName;
private QName lastLeafSetQName;
return null;
case TokenTypes.IS_CODE_VALUE:
final int code = input.readInt();
return null;
case TokenTypes.IS_CODE_VALUE:
final int code = input.readInt();
- final String lookup = codedStringMap.get(code);
- if (lookup == null) {
- throw new IOException("String code " + code + " was not found");
+ try {
+ return codedStringMap.get(code);
+ } catch (IndexOutOfBoundsException e) {
+ throw new IOException("String code " + code + " was not found", e);
case TokenTypes.IS_STRING_VALUE:
final String value = input.readUTF().intern();
case TokenTypes.IS_STRING_VALUE:
final String value = input.readUTF().intern();
- codedStringMap.put(codedStringMap.size(), value);
+ codedStringMap.add(value);
return value;
default:
throw new IOException("Unhandled string value type " + valueType);
return value;
default:
throw new IOException("Unhandled string value type " + valueType);