/** * Ctor read and map all rules from the given verb ocean file, but, keep only rules with allowed * relation types, and, for each verb pair, keep only the highest scoring rule. The rules are then * mapped. * * @param scoreThreshold rules with thresholds not higher than this will be screened * @param verbOceanRelationsFile e.g. Data\RESOURCES\VerbOcean\verbocean.unrefined.2004-05-20.txt * @param allowedRelationTypes only rules with these relations will be returned. others will be * screened. If they contain any of the {@link #FORBIDDEN_RELATION_TYPES}, a * LexicalResourceException is thrown. Cannot be null, can be empty. * @throws LexicalResourceException */ public VerbOceanLexicalResource( double scoreThreshold, File verbOceanRelationsFile, Set<RelationType> allowedRelationTypes) throws LexicalResourceException { if (scoreThreshold <= 0) throw new LexicalResourceException( "the score threshold must be positive. I got " + scoreThreshold); if (verbOceanRelationsFile == null) throw new LexicalResourceException("got null relations file"); if (!verbOceanRelationsFile.exists()) throw new LexicalResourceException(verbOceanRelationsFile + " doesn't exist"); if (allowedRelationTypes == null) throw new LexicalResourceException("allowedRelationTypes is null"); for (RelationType forbiddenRelationType : FORBIDDEN_RELATION_TYPES) if (allowedRelationTypes.contains(forbiddenRelationType)) throw new LexicalResourceException( "The given allowed relation types set " + allowedRelationTypes + " contains a forbidden relation type " + forbiddenRelationType); try { VERB = new BySimplerCanonicalPartOfSpeech(SimplerCanonicalPosTag.VERB); } catch (UnsupportedPosTagStringException e) { throw new LexicalResourceException("Internal error", e); } PairMap<String, LexicalRule<? extends VerbOceanRuleInfo>> mapRulesByUnorderedPair = new PairMap<String, LexicalRule<? extends VerbOceanRuleInfo>>(); Set<Pair<String>> verbPairs = new LinkedHashSet<Pair<String>>(); // read and map all rules, but, keep only rules with allowed relation types, and, for each verb // pair, keep only the highest scoring rule try { BufferedReader reader = new BufferedReader(new FileReader(verbOceanRelationsFile)); String line; while ((line = reader.readLine()) != null) { if (line.length() != 0 && line.charAt(0) != '#') // skip empty and commented lines { String[] parts = line.split(" "); RelationType relationType = RelationType.parse(parts[1]); double score = Double.parseDouble(parts[4]); if (allowedRelationTypes.contains(relationType) && score > scoreThreshold) // screen out unallowed relation types and low scores { String leftVerb = parts[0]; String rightVerb = parts[2]; Pair<String> verbPair = new Pair<String>(leftVerb, rightVerb); LexicalRule<? extends VerbOceanRuleInfo> comparedRule = mapRulesByUnorderedPair.getValueOf(verbPair); if (comparedRule == null || score > comparedRule .getConfidence()) // if there is a better rule for the same verb pair, skip // this rule mapRulesByUnorderedPair.put( verbPair, makeRule(leftVerb, rightVerb, score, relationType)); if (comparedRule == null) verbPairs.add(verbPair); } } } reader.close(); } catch (FileNotFoundException e) { throw new LexicalResourceException("file not found: " + verbOceanRelationsFile, e); } catch (IOException e) { throw new LexicalResourceException("IO error reading: " + verbOceanRelationsFile, e); } // fill up the one sided rule maps fillTheRuleMaps(mapRulesByUnorderedPair, verbPairs); }