// Add wordnet test ? public static String getSecondaryEntity(String title) { String potentialEntity = null; // e.g. National_Party_(South_Africa) if (title.contains("(") && title.endsWith(")")) { potentialEntity = StringUtils.substringBetween(title, "(", ")"); String[] parts = potentialEntity.split(","); // e.g. National_Party_(UK,_1976) if (parts.length > 1 && StringUtils.containsOnly(parts[1], "_0123456789")) { potentialEntity = parts[0]; } } // e.g. Chicago,_Illinois else if (title.contains(",")) { potentialEntity = StringUtils.substringAfterLast(title, ","); } // e.g. New_Progressive_Party_of_Puerto_Rico // Might need to be careful else if (title.contains("of")) { potentialEntity = StringUtils.substringAfterLast(title, "of"); if (title.startsWith("List_of")) { String[] tokens = potentialEntity.split("_"); int capPos = 0; StringBuilder sb = new StringBuilder(); while (capPos < tokens.length && (StringUtils.isEmpty(tokens[capPos]) || WordFeatures.isCapitalized(tokens[capPos]))) { sb.append(tokens[capPos]).append('_'); capPos++; } potentialEntity = sb.toString(); } } // Removes extra chars if (potentialEntity != null) potentialEntity = potentialEntity.replace('_', ' ').trim().replace(' ', '_'); try { if (wiki == null || wiki.getTitleIdOf(potentialEntity) >= 0) return potentialEntity; } catch (Exception e) { } return null; }
public static void checkValidSuffix(String suffix) throws IOException { if (suffix == null) { Throw.ioe(TAG, "Suffix is null."); } else { if ("".equals(suffix)) Throw.ioe(TAG, "Suffix is empty string."); if (!suffix.startsWith(".")) Throw.ioe(TAG, "Suffix must start with a dot: \"%s\".", suffix); if (suffix.length() < 2) Throw.ioe(TAG, "Suffix \"%s\" is too short. Must be (dot)[a-z]{1,10}.", suffix); if (suffix.length() > 11) Throw.ioe(TAG, "Suffix \"%s\" is too long. Must be (dot)[a-z]{1,10}.", suffix); if (!StringUtils.containsOnly( suffix.substring(1, suffix.length()), "abcdefghijklmnopqrstuvwxyz")) Throw.ioe( TAG, "Suffix \"%s\" contains weird characters. Must be (dot)[a-z]{1,10}.", suffix); } }