import com.google.common.base.Strings;
import com.google.common.collect.ImmutableMap;
import com.google.common.collect.ImmutableMap.Builder;
+import com.google.common.collect.ImmutableSet;
import com.google.common.collect.Iterables;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.List;
import java.util.Map;
import java.util.Set;
+import java.util.regex.Matcher;
import java.util.regex.Pattern;
+import java.util.regex.PatternSyntaxException;
import javax.annotation.Nullable;
import javax.xml.xpath.XPath;
import javax.xml.xpath.XPathExpressionException;
import org.slf4j.LoggerFactory;
public final class Utils {
+ private static final int UNICODE_SCRIPT_FIX_COUNTER = 30;
private static final Logger LOG = LoggerFactory.getLogger(Utils.class);
private static final CharMatcher DOUBLE_QUOTE_MATCHER = CharMatcher.is('"');
private static final CharMatcher SINGLE_QUOTE_MATCHER = CharMatcher.is('\'');
private static final Splitter SLASH_SPLITTER = Splitter.on('/').omitEmptyStrings().trimResults();
private static final Splitter SPACE_SPLITTER = Splitter.on(' ').omitEmptyStrings().trimResults();
private static final Pattern PATH_ABS = Pattern.compile("/[^/].*");
+ private static final Pattern BETWEEN_CURLY_BRACES_PATTERN = Pattern.compile("\\{(.+?)\\}");
+ private static final Set<String> JAVA_UNICODE_BLOCKS = ImmutableSet.<String>builder()
+ .add("AegeanNumbers")
+ .add("AlchemicalSymbols")
+ .add("AlphabeticPresentationForms")
+ .add("AncientGreekMusicalNotation")
+ .add("AncientGreekNumbers")
+ .add("AncientSymbols")
+ .add("Arabic")
+ .add("ArabicPresentationForms-A")
+ .add("ArabicPresentationForms-B")
+ .add("ArabicSupplement")
+ .add("Armenian")
+ .add("Arrows")
+ .add("Avestan")
+ .add("Balinese")
+ .add("Bamum")
+ .add("BamumSupplement")
+ .add("BasicLatin")
+ .add("Batak")
+ .add("Bengali")
+ .add("BlockElements")
+ .add("Bopomofo")
+ .add("BopomofoExtended")
+ .add("BoxDrawing")
+ .add("Brahmi")
+ .add("BraillePatterns")
+ .add("Buginese")
+ .add("Buhid")
+ .add("ByzantineMusicalSymbols")
+ .add("Carian")
+ .add("Cham")
+ .add("Cherokee")
+ .add("CJKCompatibility")
+ .add("CJKCompatibilityForms")
+ .add("CJKCompatibilityIdeographs")
+ .add("CJKCompatibilityIdeographsSupplement")
+ .add("CJKRadicalsSupplement")
+ .add("CJKStrokes")
+ .add("CJKSymbolsandPunctuation")
+ .add("CJKUnifiedIdeographs")
+ .add("CJKUnifiedIdeographsExtensionA")
+ .add("CJKUnifiedIdeographsExtensionB")
+ .add("CJKUnifiedIdeographsExtensionC")
+ .add("CJKUnifiedIdeographsExtensionD")
+ .add("CombiningDiacriticalMarks")
+ .add("CombiningDiacriticalMarksSupplement")
+ .add("CombiningHalfMarks")
+ .add("CombiningDiacriticalMarksforSymbols")
+ .add("CommonIndicNumberForms")
+ .add("ControlPictures")
+ .add("Coptic")
+ .add("CountingRodNumerals")
+ .add("Cuneiform")
+ .add("CuneiformNumbersandPunctuation")
+ .add("CurrencySymbols")
+ .add("CypriotSyllabary")
+ .add("Cyrillic")
+ .add("CyrillicExtended-A")
+ .add("CyrillicExtended-B")
+ .add("CyrillicSupplementary")
+ .add("Deseret")
+ .add("Devanagari")
+ .add("DevanagariExtended")
+ .add("Dingbats")
+ .add("DominoTiles")
+ .add("EgyptianHieroglyphs")
+ .add("Emoticons")
+ .add("EnclosedAlphanumericSupplement")
+ .add("EnclosedAlphanumerics")
+ .add("EnclosedCJKLettersandMonths")
+ .add("EnclosedIdeographicSupplement")
+ .add("Ethiopic")
+ .add("EthiopicExtended")
+ .add("EthiopicExtended-A")
+ .add("EthiopicSupplement")
+ .add("GeneralPunctuation")
+ .add("GeometricShapes")
+ .add("Georgian")
+ .add("GeorgianSupplement")
+ .add("Glagolitic")
+ .add("Gothic")
+ .add("GreekandCoptic")
+ .add("GreekExtended")
+ .add("Gujarati")
+ .add("Gurmukhi")
+ .add("HalfwidthandFullwidthForms")
+ .add("HangulCompatibilityJamo")
+ .add("HangulJamo")
+ .add("HangulJamoExtended-A")
+ .add("HangulJamoExtended-B")
+ .add("HangulSyllables")
+ .add("Hanunoo")
+ .add("Hebrew")
+ .add("HighPrivateUseSurrogates")
+ .add("HighSurrogates")
+ .add("Hiragana")
+ .add("IdeographicDescriptionCharacters")
+ .add("ImperialAramaic")
+ .add("InscriptionalPahlavi")
+ .add("InscriptionalParthian")
+ .add("IPAExtensions")
+ .add("Javanese")
+ .add("Kaithi")
+ .add("KanaSupplement")
+ .add("Kanbun")
+ .add("Kangxi Radicals")
+ .add("Kannada")
+ .add("Katakana")
+ .add("KatakanaPhoneticExtensions")
+ .add("KayahLi")
+ .add("Kharoshthi")
+ .add("Khmer")
+ .add("KhmerSymbols")
+ .add("Lao")
+ .add("Latin-1Supplement")
+ .add("LatinExtended-A")
+ .add("LatinExtendedAdditional")
+ .add("LatinExtended-B")
+ .add("LatinExtended-C")
+ .add("LatinExtended-D")
+ .add("Lepcha")
+ .add("LetterlikeSymbols")
+ .add("Limbu")
+ .add("LinearBIdeograms")
+ .add("LinearBSyllabary")
+ .add("Lisu")
+ .add("LowSurrogates")
+ .add("Lycian")
+ .add("Lydian")
+ .add("MahjongTiles")
+ .add("Malayalam")
+ .add("Mandaic")
+ .add("MathematicalAlphanumericSymbols")
+ .add("MathematicalOperators")
+ .add("MeeteiMayek")
+ .add("MiscellaneousMathematicalSymbols-A")
+ .add("MiscellaneousMathematicalSymbols-B")
+ .add("MiscellaneousSymbols")
+ .add("MiscellaneousSymbolsandArrows")
+ .add("MiscellaneousSymbolsAndPictographs")
+ .add("MiscellaneousTechnical")
+ .add("ModifierToneLetters")
+ .add("Mongolian")
+ .add("MusicalSymbols")
+ .add("Myanmar")
+ .add("MyanmarExtended-A")
+ .add("NewTaiLue")
+ .add("NKo")
+ .add("NumberForms")
+ .add("Ogham")
+ .add("OlChiki")
+ .add("OldItalic")
+ .add("OldPersian")
+ .add("OldSouthArabian")
+ .add("OldTurkic")
+ .add("OpticalCharacterRecognition")
+ .add("Oriya")
+ .add("Osmanya")
+ .add("Phags-pa")
+ .add("PhaistosDisc")
+ .add("Phoenician")
+ .add("PhoneticExtensions")
+ .add("PhoneticExtensionsSupplement")
+ .add("PlayingCards")
+ .add("PrivateUseArea")
+ .add("Rejang")
+ .add("RumiNumeralSymbols")
+ .add("Runic")
+ .add("Samaritan")
+ .add("Saurashtra")
+ .add("Shavian")
+ .add("Sinhala")
+ .add("SmallFormVariants")
+ .add("SpacingModifierLetters")
+ .add("Specials")
+ .add("Sundanese")
+ .add("SuperscriptsandSubscripts")
+ .add("SupplementalArrows-A")
+ .add("SupplementalArrows-B")
+ .add("SupplementalMathematicalOperators")
+ .add("SupplementalPunctuation")
+ .add("SupplementaryPrivateUseArea-A")
+ .add("SupplementaryPrivateUseArea-B")
+ .add("SylotiNagri")
+ .add("Syriac")
+ .add("Tagalog")
+ .add("Tagbanwa")
+ .add("Tags")
+ .add("TaiLe")
+ .add("TaiTham")
+ .add("TaiViet")
+ .add("TaiXuanJingSymbols")
+ .add("Tamil")
+ .add("Telugu")
+ .add("Thaana")
+ .add("Thai")
+ .add("Tibetan")
+ .add("Tifinagh")
+ .add("TransportAndMapSymbols")
+ .add("Ugaritic")
+ .add("UnifiedCanadianAboriginalSyllabics")
+ .add("UnifiedCanadianAboriginalSyllabicsExtended")
+ .add("Vai")
+ .add("VariationSelectors")
+ .add("VariationSelectorsSupplement")
+ .add("VedicExtensions")
+ .add("VerticalForms")
+ .add("YiRadicals")
+ .add("YiSyllables")
+ .add("YijingHexagramSymbols").build();
private static final Map<String, Deviate> KEYWORD_TO_DEVIATE_MAP;
static {
return string;
}
+
+ public static String fixUnicodeScriptPattern(String rawPattern) {
+ for (int i = 0; i < UNICODE_SCRIPT_FIX_COUNTER; i++) {
+ try {
+ Pattern.compile(rawPattern);
+ return rawPattern;
+ } catch(PatternSyntaxException ex) {
+ LOG.debug("Invalid regex pattern syntax in: {}", rawPattern, ex);
+ if (ex.getMessage().contains("Unknown character script name")) {
+ rawPattern = fixUnknownScripts(ex.getMessage(), rawPattern);
+ } else {
+ return rawPattern;
+ }
+ }
+ }
+
+ LOG.warn("Regex pattern could not be fixed: {}", rawPattern);
+ return rawPattern;
+ }
+
+ private static String fixUnknownScripts(final String exMessage, final String rawPattern) {
+ StringBuilder result = new StringBuilder(rawPattern);
+ Matcher matcher = BETWEEN_CURLY_BRACES_PATTERN.matcher(exMessage);
+ if (matcher.find()) {
+ String capturedGroup = matcher.group(1);
+ if (JAVA_UNICODE_BLOCKS.contains(capturedGroup)) {
+ int idx = rawPattern.indexOf("Is" + capturedGroup);
+ result = result.replace(idx, idx + 2, "In");
+ }
+ }
+ return result.toString();
+ }
}
--- /dev/null
+/*
+ * Copyright (c) 2016 Cisco Systems, Inc. and others. All rights reserved.
+ *
+ * This program and the accompanying materials are made available under the
+ * terms of the Eclipse Public License v1.0 which accompanies this distribution,
+ * and is available at http://www.eclipse.org/legal/epl-v10.html
+ */
+
+package org.opendaylight.yangtools.yang.stmt.retest;
+
+import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.assertNotNull;
+
+import java.lang.reflect.InvocationTargetException;
+import java.util.regex.Pattern;
+import java.util.regex.PatternSyntaxException;
+import org.junit.Test;
+import org.opendaylight.yangtools.yang.parser.stmt.rfc6020.Utils;
+
+public class Bug4079Test {
+
+ @Test
+ public void testValidPatternFix() {
+ String fixedUnicodeScriptPattern = Utils.fixUnicodeScriptPattern("(\\p{IsArrows})*+");
+ assertEquals("(\\p{InArrows})*+", fixedUnicodeScriptPattern);
+ assertNotNull(Pattern.compile(fixedUnicodeScriptPattern));
+
+ fixedUnicodeScriptPattern = Utils.fixUnicodeScriptPattern("(\\p{IsDingbats})++");
+ assertEquals("(\\p{InDingbats})++", fixedUnicodeScriptPattern);
+ assertNotNull(Pattern.compile(fixedUnicodeScriptPattern));
+
+ fixedUnicodeScriptPattern = Utils.fixUnicodeScriptPattern("(\\p{IsSpecials})?+");
+ assertEquals("(\\p{InSpecials})?+", fixedUnicodeScriptPattern);
+ assertNotNull(Pattern.compile(fixedUnicodeScriptPattern));
+
+ fixedUnicodeScriptPattern = Utils.fixUnicodeScriptPattern("(\\p{IsBatak}){4}+");
+ assertEquals("(\\p{IsBatak}){4}+", fixedUnicodeScriptPattern);
+ assertNotNull(Pattern.compile(fixedUnicodeScriptPattern));
+
+ fixedUnicodeScriptPattern = Utils.fixUnicodeScriptPattern("(\\p{IsLatin}){4,6}+");
+ assertEquals("(\\p{IsLatin}){4,6}+", fixedUnicodeScriptPattern);
+ assertNotNull(Pattern.compile(fixedUnicodeScriptPattern));
+
+ fixedUnicodeScriptPattern = Utils.fixUnicodeScriptPattern("(\\p{IsTibetan}){4,}+");
+ assertEquals("(\\p{IsTibetan}){4,}+", fixedUnicodeScriptPattern);
+ assertNotNull(Pattern.compile(fixedUnicodeScriptPattern));
+
+ fixedUnicodeScriptPattern = Utils.fixUnicodeScriptPattern("(\\p{IsAlphabetic}){4}?");
+ assertEquals("(\\p{IsAlphabetic}){4}?", fixedUnicodeScriptPattern);
+ assertNotNull(Pattern.compile(fixedUnicodeScriptPattern));
+
+ fixedUnicodeScriptPattern = Utils.fixUnicodeScriptPattern("(\\p{IsLowercase}){4,6}?");
+ assertEquals("(\\p{IsLowercase}){4,6}?", fixedUnicodeScriptPattern);
+ assertNotNull(Pattern.compile(fixedUnicodeScriptPattern));
+
+ fixedUnicodeScriptPattern = Utils.fixUnicodeScriptPattern("(\\p{IsUppercase}){4,}?");
+ assertEquals("(\\p{IsUppercase}){4,}?", fixedUnicodeScriptPattern);
+ assertNotNull(Pattern.compile(fixedUnicodeScriptPattern));
+
+ fixedUnicodeScriptPattern = Utils.fixUnicodeScriptPattern("(\\p{IsBasicLatin}|\\p{IsLatin-1Supplement})*");
+ assertEquals("(\\p{InBasicLatin}|\\p{InLatin-1Supplement})*", fixedUnicodeScriptPattern);
+ assertNotNull(Pattern.compile(fixedUnicodeScriptPattern));
+
+ fixedUnicodeScriptPattern = Utils.fixUnicodeScriptPattern("(\\p{InBasicLatin}|\\p{InLatin-1Supplement})+");
+ assertEquals("(\\p{InBasicLatin}|\\p{InLatin-1Supplement})+", fixedUnicodeScriptPattern);
+ assertNotNull(Pattern.compile(fixedUnicodeScriptPattern));
+
+ fixedUnicodeScriptPattern = Utils.fixUnicodeScriptPattern("(\\p{IsBasicLatin}|\\p{InLatin-1Supplement})?");
+ assertEquals("(\\p{InBasicLatin}|\\p{InLatin-1Supplement})?", fixedUnicodeScriptPattern);
+ assertNotNull(Pattern.compile(fixedUnicodeScriptPattern));
+
+ fixedUnicodeScriptPattern = Utils.fixUnicodeScriptPattern("(\\p{InBasicLatin}|\\p{IsLatin-1Supplement}){4}");
+ assertEquals("(\\p{InBasicLatin}|\\p{InLatin-1Supplement}){4}", fixedUnicodeScriptPattern);
+ assertNotNull(Pattern.compile(fixedUnicodeScriptPattern));
+
+ fixedUnicodeScriptPattern = Utils.fixUnicodeScriptPattern("(\\p{IsLatin}|\\p{IsArmenian}){2,4}");
+ assertEquals("(\\p{IsLatin}|\\p{IsArmenian}){2,4}", fixedUnicodeScriptPattern);
+ assertNotNull(Pattern.compile(fixedUnicodeScriptPattern));
+
+ fixedUnicodeScriptPattern = Utils.fixUnicodeScriptPattern("(\\p{IsLatin}|\\p{IsBasicLatin}){2,}");
+ assertEquals("(\\p{IsLatin}|\\p{InBasicLatin}){2,}", fixedUnicodeScriptPattern);
+ assertNotNull(Pattern.compile(fixedUnicodeScriptPattern));
+
+ fixedUnicodeScriptPattern = Utils.fixUnicodeScriptPattern("(\\p{IsBasicLatin}|\\p{IsLatin})*?");
+ assertEquals("(\\p{InBasicLatin}|\\p{IsLatin})*?", fixedUnicodeScriptPattern);
+ assertNotNull(Pattern.compile(fixedUnicodeScriptPattern));
+
+ fixedUnicodeScriptPattern = Utils.fixUnicodeScriptPattern(
+ "(\\p{IsBasicLatin}|\\p{IsLatin-1Supplement}|\\p{IsArrows})+?");
+ assertEquals("(\\p{InBasicLatin}|\\p{InLatin-1Supplement}|\\p{InArrows})+?", fixedUnicodeScriptPattern);
+ assertNotNull(Pattern.compile(fixedUnicodeScriptPattern));
+
+ fixedUnicodeScriptPattern = Utils.fixUnicodeScriptPattern(
+ "(\\p{InBasicLatin}|\\p{IsLatin-1Supplement}|\\p{IsLatin})??");
+ assertEquals("(\\p{InBasicLatin}|\\p{InLatin-1Supplement}|\\p{IsLatin})??", fixedUnicodeScriptPattern);
+ assertNotNull(Pattern.compile(fixedUnicodeScriptPattern));
+
+ fixedUnicodeScriptPattern = Utils.fixUnicodeScriptPattern("(\\\\\\p{IsBasicLatin})*+");
+ assertEquals("(\\\\\\p{InBasicLatin})*+", fixedUnicodeScriptPattern);
+ assertNotNull(Pattern.compile(fixedUnicodeScriptPattern));
+
+ fixedUnicodeScriptPattern = Utils.fixUnicodeScriptPattern("(\\\\\\\\\\p{IsBasicLatin})*+");
+ assertEquals("(\\\\\\\\\\p{InBasicLatin})*+", fixedUnicodeScriptPattern);
+ assertNotNull(Pattern.compile(fixedUnicodeScriptPattern));
+
+ fixedUnicodeScriptPattern = Utils.fixUnicodeScriptPattern("(\\\\\\\\\\\\\\p{IsBasicLatin})*+");
+ assertEquals("(\\\\\\\\\\\\\\p{InBasicLatin})*+", fixedUnicodeScriptPattern);
+ assertNotNull(Pattern.compile(fixedUnicodeScriptPattern));
+ }
+
+ @Test(expected = PatternSyntaxException.class)
+ public void testInvalidPattern() throws NoSuchMethodException, InvocationTargetException, IllegalAccessException {
+ String fixedUnicodeScriptPattern = Utils.fixUnicodeScriptPattern("(\\\\p{IsBasicLatin})*+");
+ assertEquals("(\\\\p{IsBasicLatin})*+", fixedUnicodeScriptPattern);
+ // should throw exception
+ Pattern.compile(fixedUnicodeScriptPattern);
+ }
+
+ @Test(expected = PatternSyntaxException.class)
+ public void testInvalidPattern2() throws NoSuchMethodException, InvocationTargetException, IllegalAccessException {
+ String fixedUnicodeScriptPattern = Utils.fixUnicodeScriptPattern("(\\p{IsSpecials}|\\\\\\\\p{IsBasicLatin})*+");
+ assertEquals("(\\p{InSpecials}|\\\\\\\\p{IsBasicLatin})*+", fixedUnicodeScriptPattern);
+ // should throw exception
+ Pattern.compile(fixedUnicodeScriptPattern);
+ }
+
+ @Test(expected = PatternSyntaxException.class)
+ public void testInvalidPattern3() throws NoSuchMethodException, InvocationTargetException, IllegalAccessException {
+ String fixedUnicodeScriptPattern = Utils.fixUnicodeScriptPattern("(\\\\\\\\\\\\p{IsBasicLatin}|\\p{IsTags})*+");
+ assertEquals("(\\\\\\\\\\\\p{IsBasicLatin}|\\p{IsTags})*+", fixedUnicodeScriptPattern);
+ // should throw exception
+ Pattern.compile(fixedUnicodeScriptPattern);
+ }
+}