/** * An object representing the information extracted from an article from one or more heuristics. * * @author Sam Sarjant */ public class MinedInformation implements Mergeable<MinedInformation>, Serializable { private static final long serialVersionUID = 1L; /** The bitwise int for all info types being true. */ public static final int ALL_TYPES = (1 << InformationType.values().length) - 1; /** The non-parentage assertions found for this concept */ private Collection<PartialAssertion> assertions_ = new HashSet<>(); /** The assertions that are non-refutable and the selected disambiguated assertions. */ private Collection<DefiniteAssertion> concreteAssertions_ = new ArrayList<>(); /** The assertions to make during disjointness disambiguation. */ private Collection<DefiniteAssertion> concreteParentageAssertions_ = new ArrayList<>(); /** If there are any assertions that COULD be resolved as parentage assertions. */ private boolean hasParentageAssertions_ = false; private transient boolean isModified_ = false; /** The bitwise representation of the mined information. */ private int minedTypes_ = -1; /** The self ref to the mappable article. */ private WikipediaMappedConcept selfRef_; /** The article being processed. */ protected int articleID_; /** The type of infobox this article contains (if any). */ protected List<String> infoboxType_ = null; /** The extracted standing for articles. */ protected WeightedStanding standing_ = new WeightedStanding(); /** * Constructor for a new MinedInformation * * @param cycTerm The term this information represents. * @param article The article to extract information from. */ public MinedInformation(int article) { articleID_ = article; selfRef_ = MiningHeuristic.createSelfRefConcept(articleID_); } /** * Adds an assertion to this information. * * @param singleAssertion The assertion to add. */ public void addAssertion(MinedAssertion singleAssertion) { if (singleAssertion == null) return; if (singleAssertion instanceof DefiniteAssertion) { // A definite assertion - add to concretes. concreteAssertions_.add((DefiniteAssertion) singleAssertion); if (singleAssertion.isHierarchical()) { concreteParentageAssertions_.add((DefiniteAssertion) singleAssertion); hasParentageAssertions_ = true; } } else { // Only partially complete - add to disambiguatable. assertions_.add((PartialAssertion) singleAssertion); if (singleAssertion.isHierarchical()) hasParentageAssertions_ = true; } isModified_ = true; } /** * Adds an assertion to this information from the predicate and args. * * @param predicate The predicate of the assertion. * @param provenance The source of the assertion. * @param args The arguments of the assertion - can be {@link MappableConcept}. */ public void addAssertion( OntologyConcept predicate, HeuristicProvenance provenance, AssertionArgument... args) { if (predicate == null || args == null || args.length == 0) return; boolean isMappable = false; for (AssertionArgument aa : args) { if (aa instanceof MappableConcept) { isMappable = true; break; } } MinedAssertion assertion = null; if (isMappable) assertion = new PartialAssertion(predicate, provenance, args); else assertion = new DefiniteAssertion(predicate, provenance, (OntologyConcept[]) args); addAssertion(assertion); } /** * Adds a mined information type to the mined types this object has used. * * @param infoType The information types mined. */ public void addMinedInfoType(InformationType infoType) { int val = 1 << infoType.ordinal(); if (minedTypes_ == -1) minedTypes_ = val; minedTypes_ |= val; } /** * Adds a mined information type to the mined types this object has used. * * @param infoType The information types mined. */ public void addMinedInfoType(int infoType) { if (minedTypes_ == -1) minedTypes_ = infoType; minedTypes_ |= infoType; } public void addStandingInformation( TermStanding standing, double weight, HeuristicProvenance provenance) { standing_.addStanding(provenance, standing, weight); isModified_ = true; } public void addStandingInformation(WeightedStanding standing) throws Exception { standing_.mergeInformation(standing); isModified_ = true; } public void clearInformation() { assertions_.clear(); standing_ = new WeightedStanding(); infoboxType_ = null; concreteAssertions_.clear(); concreteParentageAssertions_.clear(); minedTypes_ = -1; isModified_ = false; } @Override public boolean equals(Object obj) { if (this == obj) return true; if (obj == null) return false; if (getClass() != obj.getClass()) return false; MinedInformation other = (MinedInformation) obj; if (articleID_ != other.articleID_) return false; if (assertions_ == null) { if (other.assertions_ != null) return false; } else if (!assertions_.equals(other.assertions_)) return false; if (concreteAssertions_ == null) { if (other.concreteAssertions_ != null) return false; } else if (!concreteAssertions_.equals(other.concreteAssertions_)) return false; if (infoboxType_ == null) { if (other.infoboxType_ != null) return false; } else if (!infoboxType_.equals(other.infoboxType_)) return false; if (minedTypes_ != other.minedTypes_) return false; if (standing_ == null) { if (other.standing_ != null) return false; } else if (!standing_.equals(other.standing_)) return false; return true; } public Integer getArticle() { return articleID_; } public WeightedStanding getArticleStanding(int article) { return standing_; } /** * Gets the unresolved assertion queues. * * @return The unresolved assertion queues. */ public Collection<PartialAssertion> getAssertions() { return assertions_; } public Collection<DefiniteAssertion> getConcreteAssertions() { return concreteAssertions_; } public Collection<DefiniteAssertion> getConcreteParentageAssertions() { return concreteParentageAssertions_; } public List<String> getInfoboxTypes() { return infoboxType_; } public MappableConcept getMappableSelfRef() { return selfRef_; } public int getMinedInformation() { if (minedTypes_ == -1) return 0; return minedTypes_; } public WeightedStanding getStanding() { return getArticleStanding(articleID_); } /** * Returns a bitwise int of the information that has not been mined by this {@link * MinedInformation} yet. * * @return A bitwise int of the unmined information. */ public int getUnminedInformation() { if (minedTypes_ == -1) return ALL_TYPES; return minedTypes_ ^ ALL_TYPES; } @Override public int hashCode() { final int prime = 31; int result = 1; result = prime * result + articleID_; result = prime * result + ((assertions_ == null) ? 0 : assertions_.hashCode()); result = prime * result + ((concreteAssertions_ == null) ? 0 : concreteAssertions_.hashCode()); result = prime * result + ((infoboxType_ == null) ? 0 : infoboxType_.hashCode()); result = prime * result + minedTypes_; result = prime * result + ((standing_ == null) ? 0 : standing_.hashCode()); return result; } public boolean hasParentageAssertions() { return hasParentageAssertions_; } public boolean isMined() { return minedTypes_ >= 0; } public boolean isModified() { return isModified_; } @Override public boolean mergeInformation(MinedInformation otherInfo) throws Exception { return mergeInformation(otherInfo, false); } /** * If, during the merging, internal data should be recreated (cloned). * * @param otherInfo The other info to merge. * @param recreateInternals If internal data should be cloned. * @return True if the merging was successful. * @throws Exception Should something go awry... */ public boolean mergeInformation(MinedInformation otherInfo, boolean recreateInternals) throws Exception { // No info, do nothing. if (otherInfo == null || !otherInfo.isModified_ || otherInfo == this) return true; isModified_ = true; // Non-matching information! if (articleID_ != otherInfo.articleID_) throw new Exception("Information does not match!"); for (PartialAssertion assertionQueue : otherInfo.assertions_) { if (recreateInternals) addAssertion(assertionQueue.clone()); else addAssertion(assertionQueue); } for (DefiniteAssertion concrete : otherInfo.concreteAssertions_) { if (recreateInternals) addAssertion(concrete.clone()); else addAssertion(concrete); } if (otherInfo.infoboxType_ != null) infoboxType_ = otherInfo.infoboxType_; if (minedTypes_ == -1) minedTypes_ = otherInfo.minedTypes_; else minedTypes_ &= otherInfo.minedTypes_; // Resolve standing addStandingInformation(otherInfo.standing_); return true; } public void setInfoboxTypes(List<String> infoboxTypes) { infoboxType_ = infoboxTypes; isModified_ = true; } /** * Prints out any new info found in this mined information. * * @return The new info string. */ public String toFlatString() { return toString().replaceAll("\\n", ", "); } @Override public String toString() { StringBuilder buffer = new StringBuilder("Mined info for " + articleID_); // Standing if (!standing_.isEmpty()) buffer.append("\nStanding: " + standing_); if (infoboxType_ != null) buffer.append("\nInfobox type: " + infoboxType_); // Parentage if (!concreteAssertions_.isEmpty()) buffer.append("\nConcrete parentage assertions: " + concreteParentageAssertions_); return buffer.toString(); } public void setModified(boolean b) { isModified_ = true; } }