binding2/mdsal-binding2-generator-util/src/main/java/org/opendaylight/mdsal/binding/javav2/generator/util/JavaIdentifierNormalizer.java

   1 /*
   2  * Copyright (c) 2017 Cisco Systems, Inc. and others.  All rights reserved.
   3  *
   4  * This program and the accompanying materials are made available under the
   5  * terms of the Eclipse Public License v1.0 which accompanies this distribution,
   6  * and is available at http://www.eclipse.org/legal/epl-v10.html
   7  */
   8 package org.opendaylight.mdsal.binding.javav2.generator.util;
   9
  10 import com.google.common.annotations.Beta;
  11 import com.google.common.base.CharMatcher;
  12 import com.google.common.base.Splitter;
  13 import com.google.common.collect.ImmutableSet;
  14 import com.google.common.collect.ListMultimap;
  15 import java.util.Iterator;
  16 import java.util.List;
  17 import java.util.Set;
  18 import java.util.regex.Pattern;
  19 import org.opendaylight.mdsal.binding.javav2.generator.context.ModuleContext;
  20 import org.opendaylight.mdsal.binding.javav2.model.api.Enumeration;
  21 import org.opendaylight.mdsal.binding.javav2.model.api.Enumeration.Pair;
  22 import org.opendaylight.mdsal.binding.javav2.util.BindingMapping;
  23
  24 /**
  25  * This util class converts every non-java char in identifier to java char by
  26  * its unicode name (<a href=
  27  * "http://docs.oracle.com/javase/specs/jls/se8/html/jls-3.html#jls-3.8">JAVA SE
  28  * SPECIFICATIONS - Identifiers</a>). There are special types of mapping
  29  * non-java chars to original identifiers according to specific
  30  * {@linkplain JavaIdentifier java type}:
  31  * <ul>
  32  * <li>class, enum, interface</li>
  33  * <li>
  34  * <ul>
  35  * <li>without special separator</li>
  36  * <li>the first character of identifier, any other first character of
  37  * identifier part mapped by non-Java char name from unicode and char in
  38  * identifier behind non-java char name are converting to upper case</li>
  39  * <li>examples:</li>
  40  * <li>
  41  * <ul>
  42  * <li>example* - ExampleAsterisk</li>
  43  * <li>example*example - ExampleAserisksExample</li>
  44  * <li>\example - ReverseSolidusExample</li>
  45  * <li>1example - DigitOneExample</li>
  46  * <li>example1 - Example1</li>
  47  * <li>int - IntReservedKeyword</li>
  48  * <li>con - ConReservedKeyword</li>
  49  * </ul>
  50  * </li>
  51  * </ul>
  52  * </li>
  53  * <li>enum value, constant</li>
  54  * <li>
  55  * <ul>
  56  * <li>used underscore as special separator</li>
  57  * <li>converted identifier to upper case</li>
  58  * <li>examples:</li>
  59  * <li>
  60  * <ul>
  61  * <li>example* - EXAMPLE_ASTERISK</li>
  62  * <li>example*example - EXAMPLE_ASTERISK_EXAMPLE</li>
  63  * <li>\example - REVERSE_SOLIDUS_EXAMPLE</li>
  64  * <li>1example - DIGIT_ONE_EXAMPLE</li>
  65  * <li>example1 - EXAMPLE1</li>
  66  * <li>int - INT_RESERVED_KEYWORD</li>
  67  * <li>con - CON_RESERVED_KEYWORD</li>
  68  * </ul>
  69  * </li>
  70  * </ul>
  71  * </li>
  72  * <li>method, variable</li>
  73  * <li>
  74  * <li>
  75  * <ul>
  76  * <li>without special separator</li>
  77  * <li>the first character of identifier is converting to lower case</li>
  78  * <li>any other first character of identifier part mapped by non-Java char name
  79  * from unicode and char in identifier behind non-java char name are converting
  80  * to upper case</li>
  81  * <li>examples:</li>
  82  * <li>
  83  * <ul>
  84  * <li>example* - exampleAsterisk</li>
  85  * <li>example*example - exampleAserisksExample</li>
  86  * <li>\example - reverseSolidusExample</li>
  87  * <li>1example - digitOneExample</li>
  88  * <li>example1 - example1</li>
  89  * <li>int - intReservedKeyword</li>
  90  * <li>con - conReservedKeyword</li>
  91  * </ul>
  92  * </li>
  93  * </ul>
  94  * </li>
  95  * <li>package - full package name (<a href=
  96  * "https://docs.oracle.com/javase/tutorial/java/package/namingpkgs.html">
  97  * Naming a package</a>)</li>
  98  * <li>
  99  * <li>
 100  * <ul>
 101  * <li>parts of package name are separated by dots</li>
 102  * <li>parts of package name are converting to lower case</li>
 103  * <li>if parts of package name are reserved Java or Windows keywords, such as
 104  * 'int' the suggested convention is to add an underscore to keyword</li>
 105  * <li>dash is parsed as underscore according to <a href=
 106  * "https://docs.oracle.com/javase/tutorial/java/package/namingpkgs.html">
 107  * Naming a package</a></li>
 108  * <li>examples:</li>
 109  * <li>
 110  * <ul>
 111  * <li>org.example* - org.exampleasterisk</li>
 112  * <li>org.example*example - org.exampleasteriskexample</li>
 113  * <li>org.\example - org.reversesolidusexample</li>
 114  * <li>org.1example - org.digitoneexample</li>
 115  * <li>org.example1 - org.example1</li>
 116  * <li>org.int - org.int_</li>
 117  * <li>org.con - org.con_</li>
 118  * <li>org.foo-cont - org.foo_cont</li>
 119  * </ul>
 120  * </li>
 121  * </ul>
 122  * </li>
 123  * </ul>
 124  *
 125  * <p>
 126  * There is special case in CLASS, INTERFACE, ENUM, ENUM VALUE, CONSTANT, METHOD
 127  * and VARIABLE if identifier contains single dash - then the converter ignores
 128  * the single dash in the way of the non-java chars. In other way, if dash is
 129  * the first or the last char in the identifier or there is more dashes in a row
 130  * in the identifier, then these dashes are converted as non-java chars.
 131  * Example:
 132  * <ul>
 133  * <li>class, enum, interface</li>
 134  * <li>
 135  * <ul>
 136  * <li>foo-cont - FooCont</li>
 137  * <li>foo--cont - FooHyphenMinusHyphenMinusCont</li>
 138  * <li>-foo - HyphenMinusFoo</li>
 139  * <li>foo- - FooHyphenMinus</li>
 140  * </ul>
 141  * </li>
 142  * <li>enum value, constant
 143  * <li>
 144  * <ul>
 145  * <li>foo-cont - FOO_CONT</li>
 146  * <li>foo--cont - FOO_HYPHEN_MINUS_HYPHEN_MINUS_CONT</li>
 147  * <li>-foo - HYPHEN_MINUS_FOO</li>
 148  * <li>foo- - FOO_HYPHEN_MINUS</li>
 149  * </ul>
 150  * </li>
 151  * <li>method, variable</li>
 152  * <li>
 153  * <ul>
 154  * <li>foo-cont - fooCont</li>
 155  * <li>foo--cont - fooHyphenMinusHyphenMinusCont</li>
 156  * <li>-foo - hyphenMinusFoo</li>
 157  * <li>foo- - fooHyphenMinus</li>
 158  * </ul>
 159  * </li>
 160  * </ul>
 161  *
 162  * <p>
 163  * Next special case talks about normalizing class name which already exists in
 164  * package - but with different camel cases (foo, Foo, fOo, ...). To every next
 165  * classes with same names will by added their actual rank (serial number),
 166  * except the first one. This working for CLASS, ENUM and INTEFACE java
 167  * identifiers. If there exist the same ENUM VALUES in ENUM (with different
 168  * camel cases), then it's parsed with same logic like CLASSES, ENUMS and
 169  * INTERFACES but according to list of pairs of their ENUM parent. Example:
 170  *
 171  * <ul>
 172  * <li>class, enum, interface</li>
 173  * <li>
 174  * <ul>
 175  * <li>package name org.example, class (or interface or enum) Foo - normalized
 176  * to Foo
 177  * <li>package name org.example, class (or interface or enum) fOo - normalized
 178  * to Foo1
 179  * </ul>
 180  * </li>
 181  * <li>enum value</li>
 182  * <li>
 183  * <ul>
 184  * <li>
 185  *
 186  * <pre>
 187  * type enumeration {
 188  *     enum foo;
 189  *     enum Foo;
 190  * }
 191  * </pre>
 192  *
 193  * </li>
 194  * <li>YANG enum values will be mapped to 'FOO' and 'FOO_1' Java enum
 195  * values.</li>
 196  * </ul>
 197  * </li>
 198  * </ul>
 199  */
 200 @Beta
 201 public final class JavaIdentifierNormalizer {
 202
 203     public static final Set<String> SPECIAL_RESERVED_PATHS = ImmutableSet.of(
 204         "org.opendaylight.yangtools.concepts",
 205         "org.opendaylight.yangtools.yang.common",
 206         "org.opendaylight.yangtools.yang.model",
 207         "org.opendaylight.mdsal.binding.javav2.spec",
 208         "java",
 209         "com");
 210
 211     private static final char UNDERSCORE = '_';
 212     private static final char DASH = '-';
 213     private static final String RESERVED_KEYWORD = "reserved_keyword";
 214     private static final Set<String> PRIMITIVE_TYPES = ImmutableSet.of("char[]", "byte[]");
 215
 216     private static final CharMatcher DASH_MATCHER = CharMatcher.is(DASH);
 217     private static final CharMatcher DASH_OR_SPACE_MATCHER = CharMatcher.anyOf(" -");
 218     private static final CharMatcher UNDERSCORE_MATCHER = CharMatcher.is(UNDERSCORE);
 219     private static final Splitter DOT_SPLITTER = Splitter.on('.');
 220     private static final Splitter UNDERSCORE_SPLITTER = Splitter.on(UNDERSCORE);
 221
 222     private static final Pattern DOUBLE_UNDERSCORE_PATTERN = Pattern.compile("__", Pattern.LITERAL);
 223
 224     // Converted to lower case
 225     private static final Set<String> WINDOWS_RESERVED_WORDS = BindingMapping.WINDOWS_RESERVED_WORDS.stream()
 226             .map(String::toLowerCase).collect(ImmutableSet.toImmutableSet());
 227
 228     private JavaIdentifierNormalizer() {
 229         throw new UnsupportedOperationException("Util class");
 230     }
 231
 232     /**
 233      * <p>
 234      * According to <a href="https://tools.ietf.org/html/rfc7950#section-9.6.4">YANG RFC 7950</a>,
 235      * all assigned names in an enumeration MUST be unique. Created names are contained in the list
 236      * of {@link Enumeration.Pair}. This method adds actual index with underscore behind name of new
 237      * enum value only if this name already exists in one of the list of {@link Enumeration.Pair}.
 238      * Then, the name will be converted to java chars according to {@link JavaIdentifier#ENUM_VALUE}
 239      * and returned.
 240      * </p>
 241      * Example:
 242      *
 243      * <pre>
 244      * type enumeration {
 245      *     enum foo;
 246      *     enum Foo;
 247      * }
 248      * </pre>
 249      *
 250      * <p>
 251      * YANG enum values will be mapped to 'FOO' and 'FOO_1' Java enum values.
 252      *
 253      * @param name
 254      *            - name of new enum value
 255      * @param values
 256      *            - list of all actual enum values
 257      * @return converted and fixed name of new enum value
 258      */
 259     public static String normalizeEnumValueIdentifier(final String name, final List<Pair> values) {
 260         return convertIdentifierEnumValue(name, name, values, 1);
 261     }
 262
 263     /**
 264      * Normalizing full package name by non java chars and reserved keywords.
 265      *
 266      * @param fullPackageName
 267      *            - full package name
 268      * @return normalized name
 269      */
 270     public static String normalizeFullPackageName(final String fullPackageName) {
 271         final Iterator<String> it = DOT_SPLITTER.split(fullPackageName).iterator();
 272         if (!it.hasNext()) {
 273             return fullPackageName;
 274         }
 275
 276         final StringBuilder sb = new StringBuilder(fullPackageName.length());
 277         while (true) {
 278             String next = it.next();
 279             sb.append(normalizePartialPackageName(next));
 280             if (!it.hasNext()) {
 281                 return sb.toString();
 282             }
 283
 284             if (!next.isEmpty()) {
 285                 sb.append('.');
 286             }
 287         }
 288     }
 289
 290     /**
 291      * Normalizing part of package name by non java chars.
 292      *
 293      * @param packageNamePart
 294      *            - part of package name
 295      * @return normalized name
 296      */
 297     static String normalizePartialPackageName(final String packageNamePart) {
 298         // if part of package name consist from java or windows reserved word, return it with
 299         // underscore at the end and in lower case
 300         final String lowerPart = packageNamePart.toLowerCase();
 301         if (BindingMapping.JAVA_RESERVED_WORDS.contains(lowerPart) || WINDOWS_RESERVED_WORDS.contains(lowerPart)) {
 302             return lowerPart + UNDERSCORE;
 303         }
 304
 305         final String normalizedPart = DASH_MATCHER.replaceFrom(packageNamePart, UNDERSCORE);
 306
 307         final StringBuilder sb = new StringBuilder();
 308         final StringBuilder innerSb = new StringBuilder();
 309         for (int i = 0; i < normalizedPart.length(); i++) {
 310             final char c = normalizedPart.charAt(i);
 311             if (c == UNDERSCORE) {
 312                 if (innerSb.length() != 0) {
 313                     sb.append(normalizeSpecificIdentifier(innerSb.toString(), JavaIdentifier.PACKAGE));
 314                     innerSb.setLength(0);
 315                 }
 316                 sb.append(UNDERSCORE);
 317             } else {
 318                 innerSb.append(c);
 319             }
 320         }
 321         if (innerSb.length() != 0) {
 322             sb.append(normalizeSpecificIdentifier(innerSb.toString(), JavaIdentifier.PACKAGE));
 323         }
 324         // returned normalized part of package name
 325         return sb.toString();
 326     }
 327
 328     /**
 329      * Find and convert non Java chars in identifiers of generated transfer objects, initially
 330      * derived from corresponding YANG according to
 331      * <a href="http://docs.oracle.com/javase/specs/jls/se8/html/jls-3.html#jls-3.8"> Java
 332      * Specifications - Identifiers</a>. If there is more same class names at the same package, then
 333      * append rank (serial number) to the end of them. Works for class, enum, interface.
 334      *
 335      * @param packageName
 336      *            - package of identifier
 337      * @param className
 338      *            - name of identifier
 339      * @return - java acceptable identifier
 340      */
 341     static String normalizeClassIdentifier(final String packageName, final String className,
 342             final ModuleContext context) {
 343         if (packageName.isEmpty() && PRIMITIVE_TYPES.contains(className)) {
 344             return className;
 345         }
 346         for (final String reservedPath : SPECIAL_RESERVED_PATHS) {
 347             if (packageName.startsWith(reservedPath)) {
 348                 return className;
 349             }
 350         }
 351         final String convertedClassName = normalizeSpecificIdentifier(className, JavaIdentifier.CLASS);
 352
 353         // if packageName contains class name at the end, then the className is name of inner class
 354         final String basePackageName;
 355         final int lastDot = packageName.lastIndexOf('.');
 356         if (lastDot != -1 && Character.isUpperCase(packageName.charAt(lastDot + 1))) {
 357             // ignore class name in package name - inner class name has to be normalized according to original package
 358             // of parent class
 359             basePackageName = packageName.substring(0, lastDot);
 360         } else {
 361             basePackageName = packageName;
 362         }
 363
 364         return normalizeClassIdentifier(basePackageName, convertedClassName, convertedClassName, 1, context);
 365     }
 366
 367     /**
 368      * Checking while there doesn't exist any class name with the same name
 369      * (regardless of camel cases) in package.
 370      *
 371      * @param packageName
 372      *            - package of class name
 373      * @param origClassName
 374      *            - original class name
 375      * @param actualClassName
 376      *            - actual class name with rank (serial number)
 377      * @param rank
 378      *            - actual rank (serial number)
 379      * @return converted identifier
 380      */
 381     private static String normalizeClassIdentifier(final String packageName, final String origClassName,
 382             final String actualClassName, final int rank, final ModuleContext context) {
 383
 384         final ListMultimap<String, String> packagesMap = context.getPackagesMap();
 385
 386         synchronized (packagesMap) {
 387             if (packagesMap.containsKey(packageName)) {
 388                 for (final String existingName : packagesMap.get(packageName)) {
 389                     if (actualClassName.equalsIgnoreCase(existingName)) {
 390                         return normalizeClassIdentifier(packageName, origClassName, origClassName + rank,
 391                                 rank + 1, context);
 392                     }
 393                 }
 394             }
 395             context.putToPackagesMap(packageName, actualClassName);
 396             return actualClassName;
 397         }
 398     }
 399
 400     /**
 401      * Find and convert non Java chars in identifiers of generated transfer objects, initially
 402      * derived from corresponding YANG.
 403      *
 404      * @param identifier
 405      *            - name of identifier
 406      * @param javaIdentifier
 407      *            - java type of identifier
 408      * @return - java acceptable identifier
 409      */
 410     public static String normalizeSpecificIdentifier(final String identifier, final JavaIdentifier javaIdentifier) {
 411         // if identifier isn't PACKAGE type then check it by reserved keywords
 412         if (javaIdentifier != JavaIdentifier.PACKAGE) {
 413             final String lower = identifier.toLowerCase();
 414             if (BindingMapping.JAVA_RESERVED_WORDS.contains(lower) || WINDOWS_RESERVED_WORDS.contains(lower)) {
 415                 return fixCasesByJavaType(lower + UNDERSCORE + RESERVED_KEYWORD, javaIdentifier);
 416             }
 417         }
 418
 419         // check and convert first char in identifier if there is non-java char
 420         final StringBuilder sb = new StringBuilder();
 421         final char firstChar = identifier.charAt(0);
 422         if (!Character.isJavaIdentifierStart(firstChar)) {
 423             // converting first char of identifier
 424             sb.append(convertFirst(firstChar, existNext(identifier, 0)));
 425         } else {
 426             sb.append(firstChar);
 427         }
 428         // check and convert other chars in identifier, if there is non-java char
 429         for (int i = 1; i < identifier.length(); i++) {
 430             final char actualChar = identifier.charAt(i);
 431             // ignore single dash as non java char - if there is more dashes in a row or dash is as
 432             // the last char in identifier then parse these dashes as non java chars
 433             if (actualChar == DASH && existNext(identifier, i)) {
 434                 if (identifier.charAt(i - 1) != DASH && identifier.charAt(i + 1) != DASH) {
 435                     sb.append(UNDERSCORE);
 436                     continue;
 437                 }
 438             }
 439             if (!Character.isJavaIdentifierPart(actualChar)) {
 440                 // prepare actual string of sb for checking if underscore exist on position of the last char
 441                 sb.append(convert(actualChar, existNext(identifier, i), sb.charAt(sb.length() - 1)));
 442             } else {
 443                 sb.append(actualChar);
 444             }
 445         }
 446
 447         // apply camel case in appropriate way
 448         return fixCasesByJavaType(DOUBLE_UNDERSCORE_PATTERN.matcher(sb).replaceAll("_").toLowerCase(), javaIdentifier);
 449     }
 450
 451     /**
 452      * Fix cases of converted identifiers by Java type.
 453      *
 454      * @param convertedIdentifier
 455      *            - converted identifier
 456      * @param javaIdentifier
 457      *            - java type of identifier
 458      * @return converted identifier with right cases according to java type
 459      */
 460     private static String fixCasesByJavaType(final String convertedIdentifier, final JavaIdentifier javaIdentifier) {
 461         switch (javaIdentifier) {
 462             case CLASS:
 463             case ENUM:
 464             case INTERFACE:
 465                 return capitalize(fixCases(convertedIdentifier));
 466             case ENUM_VALUE:
 467             case CONSTANT:
 468                 return convertedIdentifier.toUpperCase();
 469             case METHOD:
 470             case VARIABLE:
 471                 return fixCases(convertedIdentifier);
 472             case PACKAGE:
 473                 return UNDERSCORE_MATCHER.removeFrom(convertedIdentifier);
 474             default:
 475                 throw new IllegalArgumentException("Unknown java type of identifier : " + javaIdentifier.toString());
 476         }
 477     }
 478
 479     /**
 480      * Delete unnecessary chars in converted identifier and apply camel case in appropriate way.
 481      *
 482      * @param convertedIdentifier
 483      *            - original converted identifier
 484      * @return resolved identifier
 485      */
 486     private static String fixCases(final String convertedIdentifier) {
 487         if (convertedIdentifier.indexOf(UNDERSCORE) == -1) {
 488             return convertedIdentifier;
 489         }
 490
 491         final StringBuilder sb = new StringBuilder(convertedIdentifier.length());
 492         final Iterator<String> it = UNDERSCORE_SPLITTER.split(convertedIdentifier).iterator();
 493         sb.append(it.next());
 494         while (it.hasNext()) {
 495             sb.append(capitalize(it.next()));
 496         }
 497         return sb.toString();
 498     }
 499
 500     /**
 501      * Check if there exist next char in identifier behind actual char position.
 502      *
 503      * @param identifier
 504      *            - original identifier
 505      * @param actual
 506      *            - actual char position
 507      * @return true if there is another char, false otherwise
 508      */
 509     private static boolean existNext(final String identifier, final int actual) {
 510         return identifier.length() > actual + 1;
 511     }
 512
 513     /**
 514      * Converting first char of identifier. This happen only if this char is
 515      * non-java char.
 516      *
 517      * @param firstChar
 518      *            - first char
 519      * @param existNext
 520      *            - existing of next char behind actual char
 521      * @return converted char
 522      */
 523     private static String convertFirst(final char firstChar, final boolean existNext) {
 524         final String name = DASH_OR_SPACE_MATCHER.replaceFrom(Character.getName(firstChar), UNDERSCORE);
 525         return existNext ? name + '_' : name;
 526     }
 527
 528     /**
 529      * Converting any char in java identifier, This happen only if this char is
 530      * non-java char.
 531      *
 532      * @param actualChar
 533      *            - actual char
 534      * @param existNext
 535      *            - existing of next char behind actual char
 536      * @param partialLastChar
 537      *            - last char of partial converted identifier
 538      * @return converted char
 539      */
 540     private static String convert(final char actualChar, final boolean existNext, final char partialLastChar) {
 541         return partialLastChar == '_' ? convertFirst(actualChar, existNext) : "_" + convertFirst(actualChar, existNext);
 542     }
 543
 544     /**
 545      * Capitalize input string.
 546      *
 547      * @param identifier
 548      *            - string to be capitalized
 549      */
 550     private static String capitalize(final String identifier) {
 551         return identifier.substring(0, 1).toUpperCase() + identifier.substring(1);
 552     }
 553
 554     private static String convertIdentifierEnumValue(final String name, final String origName, final List<Pair> values,
 555             final int rank) {
 556         String newName = name;
 557         for (final Pair pair : values) {
 558             if (name.equalsIgnoreCase(pair.getName()) || name.equalsIgnoreCase(pair.getMappedName())) {
 559                 int actualRank = rank;
 560                 final String actualName = origName + UNDERSCORE + actualRank;
 561                 newName = convertIdentifierEnumValue(actualName, origName, values, ++actualRank);
 562             }
 563         }
 564         return normalizeSpecificIdentifier(newName, JavaIdentifier.ENUM_VALUE);
 565     }
 566 }