Bug 4079: Unable to compile pattern defined in module
[yangtools.git] / yang / yang-parser-impl / src / main / java / org / opendaylight / yangtools / yang / parser / stmt / rfc6020 / Utils.java
index c1aa7affee4e5bc0b93ee65af23ab9f688cbe4df..71ae3700a38b3e075a6bbb384b8adffe934de40c 100644 (file)
@@ -12,6 +12,9 @@ import com.google.common.base.CharMatcher;
 import com.google.common.base.Preconditions;
 import com.google.common.base.Splitter;
 import com.google.common.base.Strings;
+import com.google.common.collect.ImmutableMap;
+import com.google.common.collect.ImmutableMap.Builder;
+import com.google.common.collect.ImmutableSet;
 import com.google.common.collect.Iterables;
 import java.util.ArrayList;
 import java.util.Arrays;
@@ -19,9 +22,11 @@ import java.util.Collection;
 import java.util.Date;
 import java.util.HashSet;
 import java.util.List;
-import java.util.Objects;
+import java.util.Map;
 import java.util.Set;
+import java.util.regex.Matcher;
 import java.util.regex.Pattern;
+import java.util.regex.PatternSyntaxException;
 import javax.annotation.Nullable;
 import javax.xml.xpath.XPath;
 import javax.xml.xpath.XPathExpressionException;
@@ -31,8 +36,8 @@ import org.opendaylight.yangtools.antlrv4.code.gen.YangStatementParser;
 import org.opendaylight.yangtools.yang.common.QName;
 import org.opendaylight.yangtools.yang.common.QNameModule;
 import org.opendaylight.yangtools.yang.common.SimpleDateFormatUtil;
-import org.opendaylight.yangtools.yang.common.YangConstants;
 import org.opendaylight.yangtools.yang.model.api.Deviation;
+import org.opendaylight.yangtools.yang.model.api.Deviation.Deviate;
 import org.opendaylight.yangtools.yang.model.api.ModuleIdentifier;
 import org.opendaylight.yangtools.yang.model.api.RevisionAwareXPath;
 import org.opendaylight.yangtools.yang.model.api.Status;
@@ -43,6 +48,7 @@ import org.opendaylight.yangtools.yang.model.api.stmt.SchemaNodeIdentifier;
 import org.opendaylight.yangtools.yang.model.api.stmt.SchemaNodeIdentifier.Relative;
 import org.opendaylight.yangtools.yang.model.api.stmt.SubmoduleStatement;
 import org.opendaylight.yangtools.yang.model.util.RevisionAwareXPathImpl;
+import org.opendaylight.yangtools.yang.parser.spi.meta.QNameCacheNamespace;
 import org.opendaylight.yangtools.yang.parser.spi.meta.StmtContext;
 import org.opendaylight.yangtools.yang.parser.spi.meta.StmtContextUtils;
 import org.opendaylight.yangtools.yang.parser.spi.source.BelongsToPrefixToModuleName;
@@ -52,18 +58,243 @@ import org.opendaylight.yangtools.yang.parser.spi.source.ModuleIdentifierToModul
 import org.opendaylight.yangtools.yang.parser.spi.source.ModuleNameToModuleQName;
 import org.opendaylight.yangtools.yang.parser.spi.source.PrefixToModule;
 import org.opendaylight.yangtools.yang.parser.spi.source.QNameToStatementDefinition;
-import org.opendaylight.yangtools.yang.parser.stmt.reactor.RootStatementContext;
+import org.opendaylight.yangtools.yang.parser.spi.source.SourceException;
 import org.opendaylight.yangtools.yang.parser.stmt.reactor.StatementContextBase;
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
 
 public final class Utils {
+    private static final int UNICODE_SCRIPT_FIX_COUNTER = 30;
     private static final Logger LOG = LoggerFactory.getLogger(Utils.class);
     private static final CharMatcher DOUBLE_QUOTE_MATCHER = CharMatcher.is('"');
     private static final CharMatcher SINGLE_QUOTE_MATCHER = CharMatcher.is('\'');
+    private static final CharMatcher LEFT_PARENTHESIS_MATCHER = CharMatcher.is('(');
+    private static final CharMatcher RIGHT_PARENTHESIS_MATCHER = CharMatcher.is(')');
+    private static final CharMatcher AMPERSAND_MATCHER = CharMatcher.is('&');
+    private static final CharMatcher QUESTION_MARK_MATCHER = CharMatcher.is('?');
     private static final Splitter SLASH_SPLITTER = Splitter.on('/').omitEmptyStrings().trimResults();
     private static final Splitter SPACE_SPLITTER = Splitter.on(' ').omitEmptyStrings().trimResults();
     private static final Pattern PATH_ABS = Pattern.compile("/[^/].*");
+    private static final Pattern BETWEEN_CURLY_BRACES_PATTERN = Pattern.compile("\\{(.+?)\\}");
+    private static final Set<String> JAVA_UNICODE_BLOCKS = ImmutableSet.<String>builder()
+            .add("AegeanNumbers")
+            .add("AlchemicalSymbols")
+            .add("AlphabeticPresentationForms")
+            .add("AncientGreekMusicalNotation")
+            .add("AncientGreekNumbers")
+            .add("AncientSymbols")
+            .add("Arabic")
+            .add("ArabicPresentationForms-A")
+            .add("ArabicPresentationForms-B")
+            .add("ArabicSupplement")
+            .add("Armenian")
+            .add("Arrows")
+            .add("Avestan")
+            .add("Balinese")
+            .add("Bamum")
+            .add("BamumSupplement")
+            .add("BasicLatin")
+            .add("Batak")
+            .add("Bengali")
+            .add("BlockElements")
+            .add("Bopomofo")
+            .add("BopomofoExtended")
+            .add("BoxDrawing")
+            .add("Brahmi")
+            .add("BraillePatterns")
+            .add("Buginese")
+            .add("Buhid")
+            .add("ByzantineMusicalSymbols")
+            .add("Carian")
+            .add("Cham")
+            .add("Cherokee")
+            .add("CJKCompatibility")
+            .add("CJKCompatibilityForms")
+            .add("CJKCompatibilityIdeographs")
+            .add("CJKCompatibilityIdeographsSupplement")
+            .add("CJKRadicalsSupplement")
+            .add("CJKStrokes")
+            .add("CJKSymbolsandPunctuation")
+            .add("CJKUnifiedIdeographs")
+            .add("CJKUnifiedIdeographsExtensionA")
+            .add("CJKUnifiedIdeographsExtensionB")
+            .add("CJKUnifiedIdeographsExtensionC")
+            .add("CJKUnifiedIdeographsExtensionD")
+            .add("CombiningDiacriticalMarks")
+            .add("CombiningDiacriticalMarksSupplement")
+            .add("CombiningHalfMarks")
+            .add("CombiningDiacriticalMarksforSymbols")
+            .add("CommonIndicNumberForms")
+            .add("ControlPictures")
+            .add("Coptic")
+            .add("CountingRodNumerals")
+            .add("Cuneiform")
+            .add("CuneiformNumbersandPunctuation")
+            .add("CurrencySymbols")
+            .add("CypriotSyllabary")
+            .add("Cyrillic")
+            .add("CyrillicExtended-A")
+            .add("CyrillicExtended-B")
+            .add("CyrillicSupplementary")
+            .add("Deseret")
+            .add("Devanagari")
+            .add("DevanagariExtended")
+            .add("Dingbats")
+            .add("DominoTiles")
+            .add("EgyptianHieroglyphs")
+            .add("Emoticons")
+            .add("EnclosedAlphanumericSupplement")
+            .add("EnclosedAlphanumerics")
+            .add("EnclosedCJKLettersandMonths")
+            .add("EnclosedIdeographicSupplement")
+            .add("Ethiopic")
+            .add("EthiopicExtended")
+            .add("EthiopicExtended-A")
+            .add("EthiopicSupplement")
+            .add("GeneralPunctuation")
+            .add("GeometricShapes")
+            .add("Georgian")
+            .add("GeorgianSupplement")
+            .add("Glagolitic")
+            .add("Gothic")
+            .add("GreekandCoptic")
+            .add("GreekExtended")
+            .add("Gujarati")
+            .add("Gurmukhi")
+            .add("HalfwidthandFullwidthForms")
+            .add("HangulCompatibilityJamo")
+            .add("HangulJamo")
+            .add("HangulJamoExtended-A")
+            .add("HangulJamoExtended-B")
+            .add("HangulSyllables")
+            .add("Hanunoo")
+            .add("Hebrew")
+            .add("HighPrivateUseSurrogates")
+            .add("HighSurrogates")
+            .add("Hiragana")
+            .add("IdeographicDescriptionCharacters")
+            .add("ImperialAramaic")
+            .add("InscriptionalPahlavi")
+            .add("InscriptionalParthian")
+            .add("IPAExtensions")
+            .add("Javanese")
+            .add("Kaithi")
+            .add("KanaSupplement")
+            .add("Kanbun")
+            .add("Kangxi Radicals")
+            .add("Kannada")
+            .add("Katakana")
+            .add("KatakanaPhoneticExtensions")
+            .add("KayahLi")
+            .add("Kharoshthi")
+            .add("Khmer")
+            .add("KhmerSymbols")
+            .add("Lao")
+            .add("Latin-1Supplement")
+            .add("LatinExtended-A")
+            .add("LatinExtendedAdditional")
+            .add("LatinExtended-B")
+            .add("LatinExtended-C")
+            .add("LatinExtended-D")
+            .add("Lepcha")
+            .add("LetterlikeSymbols")
+            .add("Limbu")
+            .add("LinearBIdeograms")
+            .add("LinearBSyllabary")
+            .add("Lisu")
+            .add("LowSurrogates")
+            .add("Lycian")
+            .add("Lydian")
+            .add("MahjongTiles")
+            .add("Malayalam")
+            .add("Mandaic")
+            .add("MathematicalAlphanumericSymbols")
+            .add("MathematicalOperators")
+            .add("MeeteiMayek")
+            .add("MiscellaneousMathematicalSymbols-A")
+            .add("MiscellaneousMathematicalSymbols-B")
+            .add("MiscellaneousSymbols")
+            .add("MiscellaneousSymbolsandArrows")
+            .add("MiscellaneousSymbolsAndPictographs")
+            .add("MiscellaneousTechnical")
+            .add("ModifierToneLetters")
+            .add("Mongolian")
+            .add("MusicalSymbols")
+            .add("Myanmar")
+            .add("MyanmarExtended-A")
+            .add("NewTaiLue")
+            .add("NKo")
+            .add("NumberForms")
+            .add("Ogham")
+            .add("OlChiki")
+            .add("OldItalic")
+            .add("OldPersian")
+            .add("OldSouthArabian")
+            .add("OldTurkic")
+            .add("OpticalCharacterRecognition")
+            .add("Oriya")
+            .add("Osmanya")
+            .add("Phags-pa")
+            .add("PhaistosDisc")
+            .add("Phoenician")
+            .add("PhoneticExtensions")
+            .add("PhoneticExtensionsSupplement")
+            .add("PlayingCards")
+            .add("PrivateUseArea")
+            .add("Rejang")
+            .add("RumiNumeralSymbols")
+            .add("Runic")
+            .add("Samaritan")
+            .add("Saurashtra")
+            .add("Shavian")
+            .add("Sinhala")
+            .add("SmallFormVariants")
+            .add("SpacingModifierLetters")
+            .add("Specials")
+            .add("Sundanese")
+            .add("SuperscriptsandSubscripts")
+            .add("SupplementalArrows-A")
+            .add("SupplementalArrows-B")
+            .add("SupplementalMathematicalOperators")
+            .add("SupplementalPunctuation")
+            .add("SupplementaryPrivateUseArea-A")
+            .add("SupplementaryPrivateUseArea-B")
+            .add("SylotiNagri")
+            .add("Syriac")
+            .add("Tagalog")
+            .add("Tagbanwa")
+            .add("Tags")
+            .add("TaiLe")
+            .add("TaiTham")
+            .add("TaiViet")
+            .add("TaiXuanJingSymbols")
+            .add("Tamil")
+            .add("Telugu")
+            .add("Thaana")
+            .add("Thai")
+            .add("Tibetan")
+            .add("Tifinagh")
+            .add("TransportAndMapSymbols")
+            .add("Ugaritic")
+            .add("UnifiedCanadianAboriginalSyllabics")
+            .add("UnifiedCanadianAboriginalSyllabicsExtended")
+            .add("Vai")
+            .add("VariationSelectors")
+            .add("VariationSelectorsSupplement")
+            .add("VedicExtensions")
+            .add("VerticalForms")
+            .add("YiRadicals")
+            .add("YiSyllables")
+            .add("YijingHexagramSymbols").build();
+
+    private static final Map<String, Deviate> KEYWORD_TO_DEVIATE_MAP;
+    static {
+        Builder<String, Deviate> keywordToDeviateMapBuilder = ImmutableMap.builder();
+        for (Deviate deviate : Deviation.Deviate.values()) {
+            keywordToDeviateMapBuilder.put(deviate.getKeyword(), deviate);
+        }
+        KEYWORD_TO_DEVIATE_MAP = keywordToDeviateMapBuilder.build();
+    }
 
     private static final ThreadLocal<XPathFactory> XPATH_FACTORY = new ThreadLocal<XPathFactory>() {
         @Override
@@ -91,7 +322,7 @@ public final class Utils {
         // to detect if key contains duplicates
         if ((new HashSet<>(keyTokens)).size() < keyTokens.size()) {
             // FIXME: report all duplicate keys
-            throw new IllegalArgumentException();
+            throw new SourceException(ctx.getStatementSourceReference(), "Duplicate value in list key: %s", value);
         }
 
         Set<SchemaNodeIdentifier.Relative> keyNodes = new HashSet<>();
@@ -137,18 +368,22 @@ public final class Utils {
         return identifier;
     }
 
-    public static String getPrefixFromArgument(final String prefixedLocalName) {
-        String[] namesParts = prefixedLocalName.split(":");
-        if (namesParts.length == 2) {
-            return namesParts[0];
-        }
-        return null;
-    }
-
-    public static boolean isValidStatementDefinition(final PrefixToModule prefixes, final QNameToStatementDefinition stmtDef,
-            final QName identifier) {
+    /**
+     *
+     * Based on identifier read from source and collections of relevant prefixes and statement definitions mappings
+     * provided for actual phase, method resolves and returns valid QName for declared statement to be written.
+     * This applies to any declared statement, including unknown statements.
+     *
+     * @param prefixes - collection of all relevant prefix mappings supplied for actual parsing phase
+     * @param stmtDef - collection of all relevant statement definition mappings provided for actual parsing phase
+     * @param identifier - statement to parse from source
+     * @return valid QName for declared statement to be written
+     *
+     */
+    public static QName getValidStatementDefinition(final PrefixToModule prefixes, final QNameToStatementDefinition
+            stmtDef, final QName identifier) {
         if (stmtDef.get(identifier) != null) {
-            return true;
+            return stmtDef.get(identifier).getStatementName();
         } else {
             String prefixedLocalName = identifier.getLocalName();
             String[] namesParts = prefixedLocalName.split(":");
@@ -157,16 +392,12 @@ public final class Utils {
                 String prefix = namesParts[0];
                 String localName = namesParts[1];
                 if (prefixes != null && prefixes.get(prefix) != null
-                        && stmtDef.get(QName.create(YangConstants.RFC6020_YIN_MODULE, localName)) != null) {
-                    return true;
-                } else {
-                    if (stmtDef.get(QName.create(YangConstants.RFC6020_YIN_MODULE, localName)) != null) {
-                        return true;
-                    }
+                        && stmtDef.get(QName.create(prefixes.get(prefix), localName)) != null) {
+                    return QName.create(prefixes.get(prefix), localName);
                 }
             }
         }
-        return false;
+        return null;
     }
 
     static SchemaNodeIdentifier nodeIdentifierFromPath(final StmtContext<?, ?, ?> ctx, final String path) {
@@ -242,17 +473,18 @@ public final class Utils {
             break;
         }
 
-        Preconditions.checkArgument(qNameModule != null, "Error in module '%s': can not resolve QNameModule for '%s'.",
-                ctx.getRoot().rawStatementArgument(), value);
+        Preconditions.checkArgument(qNameModule != null,
+                "Error in module '%s': can not resolve QNameModule for '%s'. Statement source at %s",
+                ctx.getRoot().rawStatementArgument(), value, ctx.getStatementSourceReference());
         final QNameModule resultQNameModule;
         if (qNameModule.getRevision() == null) {
-            resultQNameModule = QNameModule.cachedReference(
-                QNameModule.create(qNameModule.getNamespace(), SimpleDateFormatUtil.DEFAULT_DATE_REV));
+            resultQNameModule = QNameModule.create(qNameModule.getNamespace(), SimpleDateFormatUtil.DEFAULT_DATE_REV)
+                .intern();
         } else {
             resultQNameModule = qNameModule;
         }
 
-        return QName.create(resultQNameModule, localName);
+        return ctx.getFromNamespace(QNameCacheNamespace.class, QName.create(resultQNameModule, localName));
     }
 
     public static QNameModule getModuleQNameByPrefix(final StmtContext<?, ?, ?> ctx, final String prefix) {
@@ -288,8 +520,7 @@ public final class Utils {
             return qNameModule;
         }
 
-        return QNameModule.cachedReference(
-            QNameModule.create(qNameModule.getNamespace(), SimpleDateFormatUtil.DEFAULT_DATE_REV));
+        return QNameModule.create(qNameModule.getNamespace(), SimpleDateFormatUtil.DEFAULT_DATE_REV).intern();
     }
 
     @Nullable
@@ -303,44 +534,24 @@ public final class Utils {
                 .isAssignableFrom(UnknownStatementImpl.class);
     }
 
-    public static Deviation.Deviate parseDeviateFromString(final String deviate) {
-
-        // Yang constants should be lowercase so we have throw if value does not
-        // suit this
-        String deviateUpper = deviate.toUpperCase();
-        Preconditions.checkArgument(!Objects.equals(deviate, deviateUpper),
-            "String %s is not valid deviate argument", deviate);
-
-        // but Java enum is uppercase so we cannot use lowercase here
-        try {
-            return Deviation.Deviate.valueOf(deviateUpper);
-        } catch (IllegalArgumentException e) {
-            throw new IllegalArgumentException(String.format("String %s is not valid deviate argument", deviate), e);
-        }
+    public static Deviation.Deviate parseDeviateFromString(final StmtContext<?, ?, ?> ctx, final String deviateKeyword) {
+        return Preconditions.checkNotNull(KEYWORD_TO_DEVIATE_MAP.get(deviateKeyword),
+                "String '%s' is not valid deviate argument. Statement source at %s", deviateKeyword,
+                ctx.getStatementSourceReference());
     }
 
     public static Status parseStatus(final String value) {
-
-        Status status = null;
         switch (value) {
         case "current":
-            status = Status.CURRENT;
-            break;
+            return Status.CURRENT;
         case "deprecated":
-            status = Status.DEPRECATED;
-            break;
+            return Status.DEPRECATED;
         case "obsolete":
-            status = Status.OBSOLETE;
-            break;
+            return Status.OBSOLETE;
         default:
-            LOG.warn("Invalid 'status' statement: " + value);
+            LOG.warn("Invalid 'status' statement: {}", value);
+            return null;
         }
-
-        return status;
-    }
-
-    public static Date getLatestRevision(final RootStatementContext<?, ?, ?> root) {
-        return getLatestRevision(root.declaredSubstatements());
     }
 
     public static Date getLatestRevision(final Iterable<? extends StmtContext<?, ?, ?>> subStmts) {
@@ -361,7 +572,56 @@ public final class Utils {
 
     public static boolean isModuleIdentifierWithoutSpecifiedRevision(final Object o) {
         return (o instanceof ModuleIdentifier)
-                && (((ModuleIdentifier) o).getRevision() == SimpleDateFormatUtil.DEFAULT_DATE_IMP ||
-                        ((ModuleIdentifier) o).getRevision() == SimpleDateFormatUtil.DEFAULT_BELONGS_TO_DATE);
+                && (((ModuleIdentifier) o).getRevision() == SimpleDateFormatUtil.DEFAULT_DATE_IMP || ((ModuleIdentifier) o)
+                        .getRevision() == SimpleDateFormatUtil.DEFAULT_BELONGS_TO_DATE);
+    }
+
+    /**
+     * Replaces illegal characters of QName by the name of the character (e.g.
+     * '?' is replaced by "QuestionMark" etc.).
+     *
+     * @param string
+     *            input String
+     * @return result String
+     */
+    public static String replaceIllegalCharsForQName(String string) {
+        string = LEFT_PARENTHESIS_MATCHER.replaceFrom(string, "LeftParenthesis");
+        string = RIGHT_PARENTHESIS_MATCHER.replaceFrom(string, "RightParenthesis");
+        string = AMPERSAND_MATCHER.replaceFrom(string, "Ampersand");
+        string = QUESTION_MARK_MATCHER.replaceFrom(string, "QuestionMark");
+
+        return string;
+    }
+
+    public static String fixUnicodeScriptPattern(String rawPattern) {
+        for (int i = 0; i < UNICODE_SCRIPT_FIX_COUNTER; i++) {
+            try {
+                Pattern.compile(rawPattern);
+                return rawPattern;
+            } catch(PatternSyntaxException ex) {
+                LOG.debug("Invalid regex pattern syntax in: {}", rawPattern, ex);
+                if (ex.getMessage().contains("Unknown character script name")) {
+                    rawPattern = fixUnknownScripts(ex.getMessage(), rawPattern);
+                } else {
+                    return rawPattern;
+                }
+            }
+        }
+
+        LOG.warn("Regex pattern could not be fixed: {}", rawPattern);
+        return rawPattern;
+    }
+
+    private static String fixUnknownScripts(final String exMessage, final String rawPattern) {
+        StringBuilder result = new StringBuilder(rawPattern);
+        Matcher matcher = BETWEEN_CURLY_BRACES_PATTERN.matcher(exMessage);
+        if (matcher.find()) {
+            String capturedGroup = matcher.group(1);
+            if (JAVA_UNICODE_BLOCKS.contains(capturedGroup)) {
+                int idx = rawPattern.indexOf("Is" + capturedGroup);
+                result = result.replace(idx, idx + 2, "In");
+            }
+        }
+        return result.toString();
     }
 }