Bug 6180 - Parser: Backslash double-quote in double-quoted string not recognized
[yangtools.git] / yang / yang-parser-impl / src / main / java / org / opendaylight / yangtools / yang / parser / stmt / rfc6020 / Utils.java
index 3123445b478574bee0f9ac12a10eed0f48218abf..442c7fdcfa9207ba4a36af78ed017def56546b9c 100644 (file)
@@ -8,12 +8,15 @@
 package org.opendaylight.yangtools.yang.parser.stmt.rfc6020;
 
 import static org.opendaylight.yangtools.yang.parser.spi.meta.StmtContextUtils.firstAttributeOf;
+
 import com.google.common.base.CharMatcher;
 import com.google.common.base.Preconditions;
 import com.google.common.base.Splitter;
 import com.google.common.base.Strings;
+import com.google.common.collect.ImmutableList;
 import com.google.common.collect.ImmutableMap;
 import com.google.common.collect.ImmutableMap.Builder;
+import com.google.common.collect.ImmutableSet;
 import com.google.common.collect.Iterables;
 import java.util.ArrayList;
 import java.util.Arrays;
@@ -23,7 +26,9 @@ import java.util.HashSet;
 import java.util.List;
 import java.util.Map;
 import java.util.Set;
+import java.util.regex.Matcher;
 import java.util.regex.Pattern;
+import java.util.regex.PatternSyntaxException;
 import javax.annotation.Nullable;
 import javax.xml.xpath.XPath;
 import javax.xml.xpath.XPathExpressionException;
@@ -37,6 +42,7 @@ import org.opendaylight.yangtools.yang.model.api.Deviation;
 import org.opendaylight.yangtools.yang.model.api.Deviation.Deviate;
 import org.opendaylight.yangtools.yang.model.api.ModuleIdentifier;
 import org.opendaylight.yangtools.yang.model.api.RevisionAwareXPath;
+import org.opendaylight.yangtools.yang.model.api.Rfc6020Mapping;
 import org.opendaylight.yangtools.yang.model.api.Status;
 import org.opendaylight.yangtools.yang.model.api.stmt.BelongsToStatement;
 import org.opendaylight.yangtools.yang.model.api.stmt.ModuleStatement;
@@ -61,9 +67,8 @@ import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
 
 public final class Utils {
+    private static final int UNICODE_SCRIPT_FIX_COUNTER = 30;
     private static final Logger LOG = LoggerFactory.getLogger(Utils.class);
-    private static final CharMatcher DOUBLE_QUOTE_MATCHER = CharMatcher.is('"');
-    private static final CharMatcher SINGLE_QUOTE_MATCHER = CharMatcher.is('\'');
     private static final CharMatcher LEFT_PARENTHESIS_MATCHER = CharMatcher.is('(');
     private static final CharMatcher RIGHT_PARENTHESIS_MATCHER = CharMatcher.is(')');
     private static final CharMatcher AMPERSAND_MATCHER = CharMatcher.is('&');
@@ -71,11 +76,222 @@ public final class Utils {
     private static final Splitter SLASH_SPLITTER = Splitter.on('/').omitEmptyStrings().trimResults();
     private static final Splitter SPACE_SPLITTER = Splitter.on(' ').omitEmptyStrings().trimResults();
     private static final Pattern PATH_ABS = Pattern.compile("/[^/].*");
+    private static final Pattern BETWEEN_CURLY_BRACES_PATTERN = Pattern.compile("\\{(.+?)\\}");
+    private static final Set<String> JAVA_UNICODE_BLOCKS = ImmutableSet.<String>builder()
+            .add("AegeanNumbers")
+            .add("AlchemicalSymbols")
+            .add("AlphabeticPresentationForms")
+            .add("AncientGreekMusicalNotation")
+            .add("AncientGreekNumbers")
+            .add("AncientSymbols")
+            .add("Arabic")
+            .add("ArabicPresentationForms-A")
+            .add("ArabicPresentationForms-B")
+            .add("ArabicSupplement")
+            .add("Armenian")
+            .add("Arrows")
+            .add("Avestan")
+            .add("Balinese")
+            .add("Bamum")
+            .add("BamumSupplement")
+            .add("BasicLatin")
+            .add("Batak")
+            .add("Bengali")
+            .add("BlockElements")
+            .add("Bopomofo")
+            .add("BopomofoExtended")
+            .add("BoxDrawing")
+            .add("Brahmi")
+            .add("BraillePatterns")
+            .add("Buginese")
+            .add("Buhid")
+            .add("ByzantineMusicalSymbols")
+            .add("Carian")
+            .add("Cham")
+            .add("Cherokee")
+            .add("CJKCompatibility")
+            .add("CJKCompatibilityForms")
+            .add("CJKCompatibilityIdeographs")
+            .add("CJKCompatibilityIdeographsSupplement")
+            .add("CJKRadicalsSupplement")
+            .add("CJKStrokes")
+            .add("CJKSymbolsandPunctuation")
+            .add("CJKUnifiedIdeographs")
+            .add("CJKUnifiedIdeographsExtensionA")
+            .add("CJKUnifiedIdeographsExtensionB")
+            .add("CJKUnifiedIdeographsExtensionC")
+            .add("CJKUnifiedIdeographsExtensionD")
+            .add("CombiningDiacriticalMarks")
+            .add("CombiningDiacriticalMarksSupplement")
+            .add("CombiningHalfMarks")
+            .add("CombiningDiacriticalMarksforSymbols")
+            .add("CommonIndicNumberForms")
+            .add("ControlPictures")
+            .add("Coptic")
+            .add("CountingRodNumerals")
+            .add("Cuneiform")
+            .add("CuneiformNumbersandPunctuation")
+            .add("CurrencySymbols")
+            .add("CypriotSyllabary")
+            .add("Cyrillic")
+            .add("CyrillicExtended-A")
+            .add("CyrillicExtended-B")
+            .add("CyrillicSupplementary")
+            .add("Deseret")
+            .add("Devanagari")
+            .add("DevanagariExtended")
+            .add("Dingbats")
+            .add("DominoTiles")
+            .add("EgyptianHieroglyphs")
+            .add("Emoticons")
+            .add("EnclosedAlphanumericSupplement")
+            .add("EnclosedAlphanumerics")
+            .add("EnclosedCJKLettersandMonths")
+            .add("EnclosedIdeographicSupplement")
+            .add("Ethiopic")
+            .add("EthiopicExtended")
+            .add("EthiopicExtended-A")
+            .add("EthiopicSupplement")
+            .add("GeneralPunctuation")
+            .add("GeometricShapes")
+            .add("Georgian")
+            .add("GeorgianSupplement")
+            .add("Glagolitic")
+            .add("Gothic")
+            .add("GreekandCoptic")
+            .add("GreekExtended")
+            .add("Gujarati")
+            .add("Gurmukhi")
+            .add("HalfwidthandFullwidthForms")
+            .add("HangulCompatibilityJamo")
+            .add("HangulJamo")
+            .add("HangulJamoExtended-A")
+            .add("HangulJamoExtended-B")
+            .add("HangulSyllables")
+            .add("Hanunoo")
+            .add("Hebrew")
+            .add("HighPrivateUseSurrogates")
+            .add("HighSurrogates")
+            .add("Hiragana")
+            .add("IdeographicDescriptionCharacters")
+            .add("ImperialAramaic")
+            .add("InscriptionalPahlavi")
+            .add("InscriptionalParthian")
+            .add("IPAExtensions")
+            .add("Javanese")
+            .add("Kaithi")
+            .add("KanaSupplement")
+            .add("Kanbun")
+            .add("Kangxi Radicals")
+            .add("Kannada")
+            .add("Katakana")
+            .add("KatakanaPhoneticExtensions")
+            .add("KayahLi")
+            .add("Kharoshthi")
+            .add("Khmer")
+            .add("KhmerSymbols")
+            .add("Lao")
+            .add("Latin-1Supplement")
+            .add("LatinExtended-A")
+            .add("LatinExtendedAdditional")
+            .add("LatinExtended-B")
+            .add("LatinExtended-C")
+            .add("LatinExtended-D")
+            .add("Lepcha")
+            .add("LetterlikeSymbols")
+            .add("Limbu")
+            .add("LinearBIdeograms")
+            .add("LinearBSyllabary")
+            .add("Lisu")
+            .add("LowSurrogates")
+            .add("Lycian")
+            .add("Lydian")
+            .add("MahjongTiles")
+            .add("Malayalam")
+            .add("Mandaic")
+            .add("MathematicalAlphanumericSymbols")
+            .add("MathematicalOperators")
+            .add("MeeteiMayek")
+            .add("MiscellaneousMathematicalSymbols-A")
+            .add("MiscellaneousMathematicalSymbols-B")
+            .add("MiscellaneousSymbols")
+            .add("MiscellaneousSymbolsandArrows")
+            .add("MiscellaneousSymbolsAndPictographs")
+            .add("MiscellaneousTechnical")
+            .add("ModifierToneLetters")
+            .add("Mongolian")
+            .add("MusicalSymbols")
+            .add("Myanmar")
+            .add("MyanmarExtended-A")
+            .add("NewTaiLue")
+            .add("NKo")
+            .add("NumberForms")
+            .add("Ogham")
+            .add("OlChiki")
+            .add("OldItalic")
+            .add("OldPersian")
+            .add("OldSouthArabian")
+            .add("OldTurkic")
+            .add("OpticalCharacterRecognition")
+            .add("Oriya")
+            .add("Osmanya")
+            .add("Phags-pa")
+            .add("PhaistosDisc")
+            .add("Phoenician")
+            .add("PhoneticExtensions")
+            .add("PhoneticExtensionsSupplement")
+            .add("PlayingCards")
+            .add("PrivateUseArea")
+            .add("Rejang")
+            .add("RumiNumeralSymbols")
+            .add("Runic")
+            .add("Samaritan")
+            .add("Saurashtra")
+            .add("Shavian")
+            .add("Sinhala")
+            .add("SmallFormVariants")
+            .add("SpacingModifierLetters")
+            .add("Specials")
+            .add("Sundanese")
+            .add("SuperscriptsandSubscripts")
+            .add("SupplementalArrows-A")
+            .add("SupplementalArrows-B")
+            .add("SupplementalMathematicalOperators")
+            .add("SupplementalPunctuation")
+            .add("SupplementaryPrivateUseArea-A")
+            .add("SupplementaryPrivateUseArea-B")
+            .add("SylotiNagri")
+            .add("Syriac")
+            .add("Tagalog")
+            .add("Tagbanwa")
+            .add("Tags")
+            .add("TaiLe")
+            .add("TaiTham")
+            .add("TaiViet")
+            .add("TaiXuanJingSymbols")
+            .add("Tamil")
+            .add("Telugu")
+            .add("Thaana")
+            .add("Thai")
+            .add("Tibetan")
+            .add("Tifinagh")
+            .add("TransportAndMapSymbols")
+            .add("Ugaritic")
+            .add("UnifiedCanadianAboriginalSyllabics")
+            .add("UnifiedCanadianAboriginalSyllabicsExtended")
+            .add("Vai")
+            .add("VariationSelectors")
+            .add("VariationSelectorsSupplement")
+            .add("VedicExtensions")
+            .add("VerticalForms")
+            .add("YiRadicals")
+            .add("YiSyllables")
+            .add("YijingHexagramSymbols").build();
 
     private static final Map<String, Deviate> KEYWORD_TO_DEVIATE_MAP;
     static {
-        Builder<String, Deviate> keywordToDeviateMapBuilder = ImmutableMap.builder();
-        for (Deviate deviate : Deviation.Deviate.values()) {
+        final Builder<String, Deviate> keywordToDeviateMapBuilder = ImmutableMap.builder();
+        for (final Deviate deviate : Deviation.Deviate.values()) {
             keywordToDeviateMapBuilder.put(deviate.getKeyword(), deviate);
         }
         KEYWORD_TO_DEVIATE_MAP = keywordToDeviateMapBuilder.build();
@@ -102,7 +318,7 @@ public final class Utils {
 
     public static Collection<SchemaNodeIdentifier.Relative> transformKeysStringToKeyNodes(final StmtContext<?, ?, ?> ctx,
             final String value) {
-        List<String> keyTokens = SPACE_SPLITTER.splitToList(value);
+        final List<String> keyTokens = SPACE_SPLITTER.splitToList(value);
 
         // to detect if key contains duplicates
         if ((new HashSet<>(keyTokens)).size() < keyTokens.size()) {
@@ -110,11 +326,11 @@ public final class Utils {
             throw new SourceException(ctx.getStatementSourceReference(), "Duplicate value in list key: %s", value);
         }
 
-        Set<SchemaNodeIdentifier.Relative> keyNodes = new HashSet<>();
+        final Set<SchemaNodeIdentifier.Relative> keyNodes = new HashSet<>();
 
-        for (String keyToken : keyTokens) {
+        for (final String keyToken : keyTokens) {
 
-            SchemaNodeIdentifier.Relative keyNode = (Relative) SchemaNodeIdentifier.Relative.create(false,
+            final SchemaNodeIdentifier.Relative keyNode = (Relative) SchemaNodeIdentifier.Relative.create(false,
                     Utils.qNameFromArgument(ctx, keyToken));
             keyNodes.add(keyNode);
         }
@@ -134,7 +350,7 @@ public final class Utils {
         try {
             // TODO: we could capture the result and expose its 'evaluate' method
             xPath.compile(trimmed);
-        } catch (XPathExpressionException e) {
+        } catch (final XPathExpressionException e) {
             LOG.warn("Argument \"{}\" is not valid XPath string at \"{}\"", path, ctx.getStatementSourceReference(), e);
         }
 
@@ -142,11 +358,11 @@ public final class Utils {
     }
 
     public static QName trimPrefix(final QName identifier) {
-        String prefixedLocalName = identifier.getLocalName();
-        String[] namesParts = prefixedLocalName.split(":");
+        final String prefixedLocalName = identifier.getLocalName();
+        final String[] namesParts = prefixedLocalName.split(":");
 
         if (namesParts.length == 2) {
-            String localName = namesParts[1];
+            final String localName = namesParts[1];
             return QName.create(identifier.getModule(), localName);
         }
 
@@ -170,12 +386,12 @@ public final class Utils {
         if (stmtDef.get(identifier) != null) {
             return stmtDef.get(identifier).getStatementName();
         } else {
-            String prefixedLocalName = identifier.getLocalName();
-            String[] namesParts = prefixedLocalName.split(":");
+            final String prefixedLocalName = identifier.getLocalName();
+            final String[] namesParts = prefixedLocalName.split(":");
 
             if (namesParts.length == 2) {
-                String prefix = namesParts[0];
-                String localName = namesParts[1];
+                final String prefix = namesParts[0];
+                final String localName = namesParts[1];
                 if (prefixes != null && prefixes.get(prefix) != null
                         && stmtDef.get(QName.create(prefixes.get(prefix), localName)) != null) {
                     return QName.create(prefixes.get(prefix), localName);
@@ -188,11 +404,11 @@ public final class Utils {
     static SchemaNodeIdentifier nodeIdentifierFromPath(final StmtContext<?, ?, ?> ctx, final String path) {
         // FIXME: is the path trimming really necessary??
         final List<QName> qNames = new ArrayList<>();
-        for (String nodeName : SLASH_SPLITTER.split(trimSingleLastSlashFromXPath(path))) {
+        for (final String nodeName : SLASH_SPLITTER.split(trimSingleLastSlashFromXPath(path))) {
             try {
                 final QName qName = Utils.qNameFromArgument(ctx, nodeName);
                 qNames.add(qName);
-            } catch (Exception e) {
+            } catch (final Exception e) {
                 throw new IllegalArgumentException(
                     String.format("Failed to parse node '%s' in path '%s'", nodeName, path), e);
             }
@@ -202,24 +418,32 @@ public final class Utils {
     }
 
     public static String stringFromStringContext(final YangStatementParser.ArgumentContext context) {
-        StringBuilder sb = new StringBuilder();
+        final StringBuilder sb = new StringBuilder();
         List<TerminalNode> strings = context.STRING();
         if (strings.isEmpty()) {
             strings = Arrays.asList(context.IDENTIFIER());
         }
-        for (TerminalNode stringNode : strings) {
+        for (final TerminalNode stringNode : strings) {
             final String str = stringNode.getText();
-            char firstChar = str.charAt(0);
-            final CharMatcher quoteMatcher;
-            if (SINGLE_QUOTE_MATCHER.matches(firstChar)) {
-                quoteMatcher = SINGLE_QUOTE_MATCHER;
-            } else if (DOUBLE_QUOTE_MATCHER.matches(firstChar)) {
-                quoteMatcher = DOUBLE_QUOTE_MATCHER;
+            final char firstChar = str.charAt(0);
+            final char lastChar = str.charAt(str.length() - 1);
+            if (firstChar == '"' && lastChar == '"') {
+                final String innerStr = str.substring(1, str.length() - 1);
+                /*
+                 * Unescape escaped double quotes, tabs, new line and backslash
+                 * in the inner string and trim the result.
+                 */
+                sb.append(innerStr.replace("\\\"", "\"").replace("\\\\", "\\").replace("\\n", "\n")
+                        .replace("\\t", "\t"));
+            } else if (firstChar == '\'' && lastChar == '\'') {
+                /*
+                 * According to RFC6020 a single quote character cannot occur in
+                 * a single-quoted string, even when preceded by a backslash.
+                 */
+                sb.append(str.substring(1, str.length() - 1));
             } else {
                 sb.append(str);
-                continue;
             }
-            sb.append(quoteMatcher.removeFrom(str.substring(1, str.length() - 1)));
         }
         return sb.toString();
     }
@@ -233,7 +457,7 @@ public final class Utils {
         QNameModule qNameModule = null;
         String localName = null;
 
-        String[] namesParts = value.split(":");
+        final String[] namesParts = value.split(":");
         switch (namesParts.length) {
         case 1:
             localName = namesParts[0];
@@ -277,7 +501,7 @@ public final class Utils {
         final QNameModule qNameModule = ctx.getFromNamespace(ModuleIdentifierToModuleQName.class, modId);
 
         if (qNameModule == null && StmtContextUtils.producesDeclared(ctx.getRoot(), SubmoduleStatement.class)) {
-            String moduleName = ctx.getRoot().getFromNamespace(BelongsToPrefixToModuleName.class, prefix);
+            final String moduleName = ctx.getRoot().getFromNamespace(BelongsToPrefixToModuleName.class, prefix);
             return ctx.getFromNamespace(ModuleNameToModuleQName.class, moduleName);
         }
         return qNameModule;
@@ -341,7 +565,7 @@ public final class Utils {
 
     public static Date getLatestRevision(final Iterable<? extends StmtContext<?, ?, ?>> subStmts) {
         Date revision = null;
-        for (StmtContext<?, ?, ?> subStmt : subStmts) {
+        for (final StmtContext<?, ?, ?> subStmt : subStmts) {
             if (subStmt.getPublicDefinition().getDeclaredRepresentationClass().isAssignableFrom(RevisionStatement
                     .class)) {
                 if (revision == null && subStmt.getStatementArgument() != null) {
@@ -377,4 +601,59 @@ public final class Utils {
 
         return string;
     }
+
+    public static String fixUnicodeScriptPattern(String rawPattern) {
+        for (int i = 0; i < UNICODE_SCRIPT_FIX_COUNTER; i++) {
+            try {
+                Pattern.compile(rawPattern);
+                return rawPattern;
+            } catch(final PatternSyntaxException ex) {
+                LOG.debug("Invalid regex pattern syntax in: {}", rawPattern, ex);
+                if (ex.getMessage().contains("Unknown character script name")) {
+                    rawPattern = fixUnknownScripts(ex.getMessage(), rawPattern);
+                } else {
+                    return rawPattern;
+                }
+            }
+        }
+
+        LOG.warn("Regex pattern could not be fixed: {}", rawPattern);
+        return rawPattern;
+    }
+
+    private static String fixUnknownScripts(final String exMessage, final String rawPattern) {
+        StringBuilder result = new StringBuilder(rawPattern);
+        final Matcher matcher = BETWEEN_CURLY_BRACES_PATTERN.matcher(exMessage);
+        if (matcher.find()) {
+            final String capturedGroup = matcher.group(1);
+            if (JAVA_UNICODE_BLOCKS.contains(capturedGroup)) {
+                final int idx = rawPattern.indexOf("Is" + capturedGroup);
+                result = result.replace(idx, idx + 2, "In");
+            }
+        }
+        return result.toString();
+    }
+
+    public static boolean belongsToTheSameModule(final QName targetStmtQName, final QName sourceStmtQName) {
+        if (targetStmtQName.getModule().equals(sourceStmtQName.getModule())) {
+            return true;
+        }
+        return false;
+    }
+
+    public static boolean isPresenceContainer(final StatementContextBase<?, ?, ?> targetCtx) {
+        if (!targetCtx.getPublicDefinition().equals(Rfc6020Mapping.CONTAINER)) {
+            return false;
+        }
+
+        final List<StatementContextBase<?, ?, ?>> targetSubStatements = new ImmutableList.Builder<StatementContextBase<?, ?, ?>>()
+                .addAll(targetCtx.declaredSubstatements()).addAll(targetCtx.effectiveSubstatements()).build();
+        for (final StatementContextBase<?, ?, ?> subStatement : targetSubStatements) {
+            if (subStatement.getPublicDefinition().equals(Rfc6020Mapping.PRESENCE)) {
+                return true;
+            }
+        }
+
+        return false;
+    }
 }