예제 #1
0
  // Add wordnet test ?
  public static String getSecondaryEntity(String title) {
    String potentialEntity = null;

    // e.g. National_Party_(South_Africa)
    if (title.contains("(") && title.endsWith(")")) {
      potentialEntity = StringUtils.substringBetween(title, "(", ")");
      String[] parts = potentialEntity.split(",");

      // e.g. National_Party_(UK,_1976)
      if (parts.length > 1 && StringUtils.containsOnly(parts[1], "_0123456789")) {
        potentialEntity = parts[0];
      }
    }

    // e.g. Chicago,_Illinois
    else if (title.contains(",")) {
      potentialEntity = StringUtils.substringAfterLast(title, ",");
    }

    // e.g. New_Progressive_Party_of_Puerto_Rico
    // Might need to be careful
    else if (title.contains("of")) {
      potentialEntity = StringUtils.substringAfterLast(title, "of");
      if (title.startsWith("List_of")) {
        String[] tokens = potentialEntity.split("_");
        int capPos = 0;
        StringBuilder sb = new StringBuilder();
        while (capPos < tokens.length
            && (StringUtils.isEmpty(tokens[capPos])
                || WordFeatures.isCapitalized(tokens[capPos]))) {
          sb.append(tokens[capPos]).append('_');
          capPos++;
        }
        potentialEntity = sb.toString();
      }
    }

    // Removes extra chars
    if (potentialEntity != null)
      potentialEntity = potentialEntity.replace('_', ' ').trim().replace(' ', '_');

    try {
      if (wiki == null || wiki.getTitleIdOf(potentialEntity) >= 0) return potentialEntity;
    } catch (Exception e) {
    }

    return null;
  }
 public static void checkValidSuffix(String suffix) throws IOException {
   if (suffix == null) {
     Throw.ioe(TAG, "Suffix is null.");
   } else {
     if ("".equals(suffix)) Throw.ioe(TAG, "Suffix is empty string.");
     if (!suffix.startsWith(".")) Throw.ioe(TAG, "Suffix must start with a dot: \"%s\".", suffix);
     if (suffix.length() < 2)
       Throw.ioe(TAG, "Suffix \"%s\" is too short. Must be (dot)[a-z]{1,10}.", suffix);
     if (suffix.length() > 11)
       Throw.ioe(TAG, "Suffix \"%s\" is too long. Must be (dot)[a-z]{1,10}.", suffix);
     if (!StringUtils.containsOnly(
         suffix.substring(1, suffix.length()), "abcdefghijklmnopqrstuvwxyz"))
       Throw.ioe(
           TAG, "Suffix \"%s\" contains weird characters. Must be (dot)[a-z]{1,10}.", suffix);
   }
 }