From: Igor Foltin Date: Wed, 10 Feb 2016 11:09:12 +0000 (+0100) Subject: Bug 4079: Unable to compile pattern defined in module X-Git-Tag: release/beryllium~4 X-Git-Url: https://git.opendaylight.org/gerrit/gitweb?p=yangtools.git;a=commitdiff_plain;h=80bcbaac427850f34bcaadf375e94134af42d01f Bug 4079: Unable to compile pattern defined in module when using Unicode blocks The original patch for this bug fixed the problem only in the old yang parser. This patch fixes the issue in the new yang parser. Change-Id: I07c7ad8d00ab0f8f39888afd1468054e74574cb4 Signed-off-by: Igor Foltin (cherry picked from commit ee7500aa1604b3c654511d9f41de8281957101a8) --- diff --git a/yang/yang-parser-impl/src/main/java/org/opendaylight/yangtools/yang/parser/stmt/rfc6020/PatternStatementImpl.java b/yang/yang-parser-impl/src/main/java/org/opendaylight/yangtools/yang/parser/stmt/rfc6020/PatternStatementImpl.java index c5cf296150..56f562410c 100644 --- a/yang/yang-parser-impl/src/main/java/org/opendaylight/yangtools/yang/parser/stmt/rfc6020/PatternStatementImpl.java +++ b/yang/yang-parser-impl/src/main/java/org/opendaylight/yangtools/yang/parser/stmt/rfc6020/PatternStatementImpl.java @@ -52,7 +52,7 @@ public class PatternStatementImpl extends AbstractDeclaredStatement ctx, final String value) { - final String pattern = "^" + value + '$'; + final String pattern = "^" + Utils.fixUnicodeScriptPattern(value) + '$'; try { Pattern.compile(pattern); diff --git a/yang/yang-parser-impl/src/main/java/org/opendaylight/yangtools/yang/parser/stmt/rfc6020/Utils.java b/yang/yang-parser-impl/src/main/java/org/opendaylight/yangtools/yang/parser/stmt/rfc6020/Utils.java index 3123445b47..71ae3700a3 100644 --- a/yang/yang-parser-impl/src/main/java/org/opendaylight/yangtools/yang/parser/stmt/rfc6020/Utils.java +++ b/yang/yang-parser-impl/src/main/java/org/opendaylight/yangtools/yang/parser/stmt/rfc6020/Utils.java @@ -14,6 +14,7 @@ import com.google.common.base.Splitter; import com.google.common.base.Strings; import com.google.common.collect.ImmutableMap; import com.google.common.collect.ImmutableMap.Builder; +import com.google.common.collect.ImmutableSet; import com.google.common.collect.Iterables; import java.util.ArrayList; import java.util.Arrays; @@ -23,7 +24,9 @@ import java.util.HashSet; import java.util.List; import java.util.Map; import java.util.Set; +import java.util.regex.Matcher; import java.util.regex.Pattern; +import java.util.regex.PatternSyntaxException; import javax.annotation.Nullable; import javax.xml.xpath.XPath; import javax.xml.xpath.XPathExpressionException; @@ -61,6 +64,7 @@ import org.slf4j.Logger; import org.slf4j.LoggerFactory; public final class Utils { + private static final int UNICODE_SCRIPT_FIX_COUNTER = 30; private static final Logger LOG = LoggerFactory.getLogger(Utils.class); private static final CharMatcher DOUBLE_QUOTE_MATCHER = CharMatcher.is('"'); private static final CharMatcher SINGLE_QUOTE_MATCHER = CharMatcher.is('\''); @@ -71,6 +75,217 @@ public final class Utils { private static final Splitter SLASH_SPLITTER = Splitter.on('/').omitEmptyStrings().trimResults(); private static final Splitter SPACE_SPLITTER = Splitter.on(' ').omitEmptyStrings().trimResults(); private static final Pattern PATH_ABS = Pattern.compile("/[^/].*"); + private static final Pattern BETWEEN_CURLY_BRACES_PATTERN = Pattern.compile("\\{(.+?)\\}"); + private static final Set JAVA_UNICODE_BLOCKS = ImmutableSet.builder() + .add("AegeanNumbers") + .add("AlchemicalSymbols") + .add("AlphabeticPresentationForms") + .add("AncientGreekMusicalNotation") + .add("AncientGreekNumbers") + .add("AncientSymbols") + .add("Arabic") + .add("ArabicPresentationForms-A") + .add("ArabicPresentationForms-B") + .add("ArabicSupplement") + .add("Armenian") + .add("Arrows") + .add("Avestan") + .add("Balinese") + .add("Bamum") + .add("BamumSupplement") + .add("BasicLatin") + .add("Batak") + .add("Bengali") + .add("BlockElements") + .add("Bopomofo") + .add("BopomofoExtended") + .add("BoxDrawing") + .add("Brahmi") + .add("BraillePatterns") + .add("Buginese") + .add("Buhid") + .add("ByzantineMusicalSymbols") + .add("Carian") + .add("Cham") + .add("Cherokee") + .add("CJKCompatibility") + .add("CJKCompatibilityForms") + .add("CJKCompatibilityIdeographs") + .add("CJKCompatibilityIdeographsSupplement") + .add("CJKRadicalsSupplement") + .add("CJKStrokes") + .add("CJKSymbolsandPunctuation") + .add("CJKUnifiedIdeographs") + .add("CJKUnifiedIdeographsExtensionA") + .add("CJKUnifiedIdeographsExtensionB") + .add("CJKUnifiedIdeographsExtensionC") + .add("CJKUnifiedIdeographsExtensionD") + .add("CombiningDiacriticalMarks") + .add("CombiningDiacriticalMarksSupplement") + .add("CombiningHalfMarks") + .add("CombiningDiacriticalMarksforSymbols") + .add("CommonIndicNumberForms") + .add("ControlPictures") + .add("Coptic") + .add("CountingRodNumerals") + .add("Cuneiform") + .add("CuneiformNumbersandPunctuation") + .add("CurrencySymbols") + .add("CypriotSyllabary") + .add("Cyrillic") + .add("CyrillicExtended-A") + .add("CyrillicExtended-B") + .add("CyrillicSupplementary") + .add("Deseret") + .add("Devanagari") + .add("DevanagariExtended") + .add("Dingbats") + .add("DominoTiles") + .add("EgyptianHieroglyphs") + .add("Emoticons") + .add("EnclosedAlphanumericSupplement") + .add("EnclosedAlphanumerics") + .add("EnclosedCJKLettersandMonths") + .add("EnclosedIdeographicSupplement") + .add("Ethiopic") + .add("EthiopicExtended") + .add("EthiopicExtended-A") + .add("EthiopicSupplement") + .add("GeneralPunctuation") + .add("GeometricShapes") + .add("Georgian") + .add("GeorgianSupplement") + .add("Glagolitic") + .add("Gothic") + .add("GreekandCoptic") + .add("GreekExtended") + .add("Gujarati") + .add("Gurmukhi") + .add("HalfwidthandFullwidthForms") + .add("HangulCompatibilityJamo") + .add("HangulJamo") + .add("HangulJamoExtended-A") + .add("HangulJamoExtended-B") + .add("HangulSyllables") + .add("Hanunoo") + .add("Hebrew") + .add("HighPrivateUseSurrogates") + .add("HighSurrogates") + .add("Hiragana") + .add("IdeographicDescriptionCharacters") + .add("ImperialAramaic") + .add("InscriptionalPahlavi") + .add("InscriptionalParthian") + .add("IPAExtensions") + .add("Javanese") + .add("Kaithi") + .add("KanaSupplement") + .add("Kanbun") + .add("Kangxi Radicals") + .add("Kannada") + .add("Katakana") + .add("KatakanaPhoneticExtensions") + .add("KayahLi") + .add("Kharoshthi") + .add("Khmer") + .add("KhmerSymbols") + .add("Lao") + .add("Latin-1Supplement") + .add("LatinExtended-A") + .add("LatinExtendedAdditional") + .add("LatinExtended-B") + .add("LatinExtended-C") + .add("LatinExtended-D") + .add("Lepcha") + .add("LetterlikeSymbols") + .add("Limbu") + .add("LinearBIdeograms") + .add("LinearBSyllabary") + .add("Lisu") + .add("LowSurrogates") + .add("Lycian") + .add("Lydian") + .add("MahjongTiles") + .add("Malayalam") + .add("Mandaic") + .add("MathematicalAlphanumericSymbols") + .add("MathematicalOperators") + .add("MeeteiMayek") + .add("MiscellaneousMathematicalSymbols-A") + .add("MiscellaneousMathematicalSymbols-B") + .add("MiscellaneousSymbols") + .add("MiscellaneousSymbolsandArrows") + .add("MiscellaneousSymbolsAndPictographs") + .add("MiscellaneousTechnical") + .add("ModifierToneLetters") + .add("Mongolian") + .add("MusicalSymbols") + .add("Myanmar") + .add("MyanmarExtended-A") + .add("NewTaiLue") + .add("NKo") + .add("NumberForms") + .add("Ogham") + .add("OlChiki") + .add("OldItalic") + .add("OldPersian") + .add("OldSouthArabian") + .add("OldTurkic") + .add("OpticalCharacterRecognition") + .add("Oriya") + .add("Osmanya") + .add("Phags-pa") + .add("PhaistosDisc") + .add("Phoenician") + .add("PhoneticExtensions") + .add("PhoneticExtensionsSupplement") + .add("PlayingCards") + .add("PrivateUseArea") + .add("Rejang") + .add("RumiNumeralSymbols") + .add("Runic") + .add("Samaritan") + .add("Saurashtra") + .add("Shavian") + .add("Sinhala") + .add("SmallFormVariants") + .add("SpacingModifierLetters") + .add("Specials") + .add("Sundanese") + .add("SuperscriptsandSubscripts") + .add("SupplementalArrows-A") + .add("SupplementalArrows-B") + .add("SupplementalMathematicalOperators") + .add("SupplementalPunctuation") + .add("SupplementaryPrivateUseArea-A") + .add("SupplementaryPrivateUseArea-B") + .add("SylotiNagri") + .add("Syriac") + .add("Tagalog") + .add("Tagbanwa") + .add("Tags") + .add("TaiLe") + .add("TaiTham") + .add("TaiViet") + .add("TaiXuanJingSymbols") + .add("Tamil") + .add("Telugu") + .add("Thaana") + .add("Thai") + .add("Tibetan") + .add("Tifinagh") + .add("TransportAndMapSymbols") + .add("Ugaritic") + .add("UnifiedCanadianAboriginalSyllabics") + .add("UnifiedCanadianAboriginalSyllabicsExtended") + .add("Vai") + .add("VariationSelectors") + .add("VariationSelectorsSupplement") + .add("VedicExtensions") + .add("VerticalForms") + .add("YiRadicals") + .add("YiSyllables") + .add("YijingHexagramSymbols").build(); private static final Map KEYWORD_TO_DEVIATE_MAP; static { @@ -377,4 +592,36 @@ public final class Utils { return string; } + + public static String fixUnicodeScriptPattern(String rawPattern) { + for (int i = 0; i < UNICODE_SCRIPT_FIX_COUNTER; i++) { + try { + Pattern.compile(rawPattern); + return rawPattern; + } catch(PatternSyntaxException ex) { + LOG.debug("Invalid regex pattern syntax in: {}", rawPattern, ex); + if (ex.getMessage().contains("Unknown character script name")) { + rawPattern = fixUnknownScripts(ex.getMessage(), rawPattern); + } else { + return rawPattern; + } + } + } + + LOG.warn("Regex pattern could not be fixed: {}", rawPattern); + return rawPattern; + } + + private static String fixUnknownScripts(final String exMessage, final String rawPattern) { + StringBuilder result = new StringBuilder(rawPattern); + Matcher matcher = BETWEEN_CURLY_BRACES_PATTERN.matcher(exMessage); + if (matcher.find()) { + String capturedGroup = matcher.group(1); + if (JAVA_UNICODE_BLOCKS.contains(capturedGroup)) { + int idx = rawPattern.indexOf("Is" + capturedGroup); + result = result.replace(idx, idx + 2, "In"); + } + } + return result.toString(); + } } diff --git a/yang/yang-parser-impl/src/test/java/org/opendaylight/yangtools/yang/stmt/retest/Bug4079Test.java b/yang/yang-parser-impl/src/test/java/org/opendaylight/yangtools/yang/stmt/retest/Bug4079Test.java new file mode 100644 index 0000000000..7793a61f32 --- /dev/null +++ b/yang/yang-parser-impl/src/test/java/org/opendaylight/yangtools/yang/stmt/retest/Bug4079Test.java @@ -0,0 +1,134 @@ +/* + * Copyright (c) 2016 Cisco Systems, Inc. and others. All rights reserved. + * + * This program and the accompanying materials are made available under the + * terms of the Eclipse Public License v1.0 which accompanies this distribution, + * and is available at http://www.eclipse.org/legal/epl-v10.html + */ + +package org.opendaylight.yangtools.yang.stmt.retest; + +import static org.junit.Assert.assertEquals; +import static org.junit.Assert.assertNotNull; + +import java.lang.reflect.InvocationTargetException; +import java.util.regex.Pattern; +import java.util.regex.PatternSyntaxException; +import org.junit.Test; +import org.opendaylight.yangtools.yang.parser.stmt.rfc6020.Utils; + +public class Bug4079Test { + + @Test + public void testValidPatternFix() { + String fixedUnicodeScriptPattern = Utils.fixUnicodeScriptPattern("(\\p{IsArrows})*+"); + assertEquals("(\\p{InArrows})*+", fixedUnicodeScriptPattern); + assertNotNull(Pattern.compile(fixedUnicodeScriptPattern)); + + fixedUnicodeScriptPattern = Utils.fixUnicodeScriptPattern("(\\p{IsDingbats})++"); + assertEquals("(\\p{InDingbats})++", fixedUnicodeScriptPattern); + assertNotNull(Pattern.compile(fixedUnicodeScriptPattern)); + + fixedUnicodeScriptPattern = Utils.fixUnicodeScriptPattern("(\\p{IsSpecials})?+"); + assertEquals("(\\p{InSpecials})?+", fixedUnicodeScriptPattern); + assertNotNull(Pattern.compile(fixedUnicodeScriptPattern)); + + fixedUnicodeScriptPattern = Utils.fixUnicodeScriptPattern("(\\p{IsBatak}){4}+"); + assertEquals("(\\p{IsBatak}){4}+", fixedUnicodeScriptPattern); + assertNotNull(Pattern.compile(fixedUnicodeScriptPattern)); + + fixedUnicodeScriptPattern = Utils.fixUnicodeScriptPattern("(\\p{IsLatin}){4,6}+"); + assertEquals("(\\p{IsLatin}){4,6}+", fixedUnicodeScriptPattern); + assertNotNull(Pattern.compile(fixedUnicodeScriptPattern)); + + fixedUnicodeScriptPattern = Utils.fixUnicodeScriptPattern("(\\p{IsTibetan}){4,}+"); + assertEquals("(\\p{IsTibetan}){4,}+", fixedUnicodeScriptPattern); + assertNotNull(Pattern.compile(fixedUnicodeScriptPattern)); + + fixedUnicodeScriptPattern = Utils.fixUnicodeScriptPattern("(\\p{IsAlphabetic}){4}?"); + assertEquals("(\\p{IsAlphabetic}){4}?", fixedUnicodeScriptPattern); + assertNotNull(Pattern.compile(fixedUnicodeScriptPattern)); + + fixedUnicodeScriptPattern = Utils.fixUnicodeScriptPattern("(\\p{IsLowercase}){4,6}?"); + assertEquals("(\\p{IsLowercase}){4,6}?", fixedUnicodeScriptPattern); + assertNotNull(Pattern.compile(fixedUnicodeScriptPattern)); + + fixedUnicodeScriptPattern = Utils.fixUnicodeScriptPattern("(\\p{IsUppercase}){4,}?"); + assertEquals("(\\p{IsUppercase}){4,}?", fixedUnicodeScriptPattern); + assertNotNull(Pattern.compile(fixedUnicodeScriptPattern)); + + fixedUnicodeScriptPattern = Utils.fixUnicodeScriptPattern("(\\p{IsBasicLatin}|\\p{IsLatin-1Supplement})*"); + assertEquals("(\\p{InBasicLatin}|\\p{InLatin-1Supplement})*", fixedUnicodeScriptPattern); + assertNotNull(Pattern.compile(fixedUnicodeScriptPattern)); + + fixedUnicodeScriptPattern = Utils.fixUnicodeScriptPattern("(\\p{InBasicLatin}|\\p{InLatin-1Supplement})+"); + assertEquals("(\\p{InBasicLatin}|\\p{InLatin-1Supplement})+", fixedUnicodeScriptPattern); + assertNotNull(Pattern.compile(fixedUnicodeScriptPattern)); + + fixedUnicodeScriptPattern = Utils.fixUnicodeScriptPattern("(\\p{IsBasicLatin}|\\p{InLatin-1Supplement})?"); + assertEquals("(\\p{InBasicLatin}|\\p{InLatin-1Supplement})?", fixedUnicodeScriptPattern); + assertNotNull(Pattern.compile(fixedUnicodeScriptPattern)); + + fixedUnicodeScriptPattern = Utils.fixUnicodeScriptPattern("(\\p{InBasicLatin}|\\p{IsLatin-1Supplement}){4}"); + assertEquals("(\\p{InBasicLatin}|\\p{InLatin-1Supplement}){4}", fixedUnicodeScriptPattern); + assertNotNull(Pattern.compile(fixedUnicodeScriptPattern)); + + fixedUnicodeScriptPattern = Utils.fixUnicodeScriptPattern("(\\p{IsLatin}|\\p{IsArmenian}){2,4}"); + assertEquals("(\\p{IsLatin}|\\p{IsArmenian}){2,4}", fixedUnicodeScriptPattern); + assertNotNull(Pattern.compile(fixedUnicodeScriptPattern)); + + fixedUnicodeScriptPattern = Utils.fixUnicodeScriptPattern("(\\p{IsLatin}|\\p{IsBasicLatin}){2,}"); + assertEquals("(\\p{IsLatin}|\\p{InBasicLatin}){2,}", fixedUnicodeScriptPattern); + assertNotNull(Pattern.compile(fixedUnicodeScriptPattern)); + + fixedUnicodeScriptPattern = Utils.fixUnicodeScriptPattern("(\\p{IsBasicLatin}|\\p{IsLatin})*?"); + assertEquals("(\\p{InBasicLatin}|\\p{IsLatin})*?", fixedUnicodeScriptPattern); + assertNotNull(Pattern.compile(fixedUnicodeScriptPattern)); + + fixedUnicodeScriptPattern = Utils.fixUnicodeScriptPattern( + "(\\p{IsBasicLatin}|\\p{IsLatin-1Supplement}|\\p{IsArrows})+?"); + assertEquals("(\\p{InBasicLatin}|\\p{InLatin-1Supplement}|\\p{InArrows})+?", fixedUnicodeScriptPattern); + assertNotNull(Pattern.compile(fixedUnicodeScriptPattern)); + + fixedUnicodeScriptPattern = Utils.fixUnicodeScriptPattern( + "(\\p{InBasicLatin}|\\p{IsLatin-1Supplement}|\\p{IsLatin})??"); + assertEquals("(\\p{InBasicLatin}|\\p{InLatin-1Supplement}|\\p{IsLatin})??", fixedUnicodeScriptPattern); + assertNotNull(Pattern.compile(fixedUnicodeScriptPattern)); + + fixedUnicodeScriptPattern = Utils.fixUnicodeScriptPattern("(\\\\\\p{IsBasicLatin})*+"); + assertEquals("(\\\\\\p{InBasicLatin})*+", fixedUnicodeScriptPattern); + assertNotNull(Pattern.compile(fixedUnicodeScriptPattern)); + + fixedUnicodeScriptPattern = Utils.fixUnicodeScriptPattern("(\\\\\\\\\\p{IsBasicLatin})*+"); + assertEquals("(\\\\\\\\\\p{InBasicLatin})*+", fixedUnicodeScriptPattern); + assertNotNull(Pattern.compile(fixedUnicodeScriptPattern)); + + fixedUnicodeScriptPattern = Utils.fixUnicodeScriptPattern("(\\\\\\\\\\\\\\p{IsBasicLatin})*+"); + assertEquals("(\\\\\\\\\\\\\\p{InBasicLatin})*+", fixedUnicodeScriptPattern); + assertNotNull(Pattern.compile(fixedUnicodeScriptPattern)); + } + + @Test(expected = PatternSyntaxException.class) + public void testInvalidPattern() throws NoSuchMethodException, InvocationTargetException, IllegalAccessException { + String fixedUnicodeScriptPattern = Utils.fixUnicodeScriptPattern("(\\\\p{IsBasicLatin})*+"); + assertEquals("(\\\\p{IsBasicLatin})*+", fixedUnicodeScriptPattern); + // should throw exception + Pattern.compile(fixedUnicodeScriptPattern); + } + + @Test(expected = PatternSyntaxException.class) + public void testInvalidPattern2() throws NoSuchMethodException, InvocationTargetException, IllegalAccessException { + String fixedUnicodeScriptPattern = Utils.fixUnicodeScriptPattern("(\\p{IsSpecials}|\\\\\\\\p{IsBasicLatin})*+"); + assertEquals("(\\p{InSpecials}|\\\\\\\\p{IsBasicLatin})*+", fixedUnicodeScriptPattern); + // should throw exception + Pattern.compile(fixedUnicodeScriptPattern); + } + + @Test(expected = PatternSyntaxException.class) + public void testInvalidPattern3() throws NoSuchMethodException, InvocationTargetException, IllegalAccessException { + String fixedUnicodeScriptPattern = Utils.fixUnicodeScriptPattern("(\\\\\\\\\\\\p{IsBasicLatin}|\\p{IsTags})*+"); + assertEquals("(\\\\\\\\\\\\p{IsBasicLatin}|\\p{IsTags})*+", fixedUnicodeScriptPattern); + // should throw exception + Pattern.compile(fixedUnicodeScriptPattern); + } +}