private void calculateDocClassTermIDFs() { docClassTermIDFs = new HashMap<>(); for (String term : docClassFrequencyPerTerm.getTerms()) { Integer docFrequency = docClassFrequencyPerTerm.getFrequency(term); Double docFraction = docClasses.size() / (1 + docFrequency.doubleValue()); docClassTermIDFs.put(term, Math.log10(docFraction)); } }
private Map<String, Double> weightBagOfWordsWithIDF(BagOfWords queryBag) { Map<String, Double> weighted = new HashMap<>(); for (Entry<String, Integer> entry : queryBag.getFrequencies().entrySet()) { String term = entry.getKey(); Double classifierCorpusIDF = docClassTermIDFs.get(term); weighted.put( entry.getKey(), (null == classifierCorpusIDF) ? 0d : classifierCorpusIDF * queryBag.getFrequency(term)); } return weighted; }