public void testSaveLoadRoundtrip() throws IOException { val baos = new ByteArrayOutputStream(3200); val dos = new DataOutputStream(baos); avengers.save(dos); val dis = new DataInputStream(new ByteArrayInputStream(baos.toByteArray())); val otherAvengers = Indexer.load(dis); assertEquals(avengers, otherAvengers); }
public void testIndexer() { assertFalse(avengers.isEmpty()); assertTrue(avengers.size() == 3); assertTrue(avengers.indexOf("cap") == 0); assertTrue(avengers.lastIndexOf("cap") == 0); assertTrue(avengers.get(0).equals("cap")); assertTrue(avengers.contains("cap")); assertFalse(avengers.contains("made-up")); Object[] arr = avengers.toArray(); assertEquals(arr, new String[] {"cap", "iron-man", "hulk"}); assertEquals(avengers, avengers.subList(0, avengers.size())); }
public Map<String, Integer> getLexiconCounts(Indexer indexer, boolean originalContentOnly) { Map<String, Integer> map = new LinkedHashMap<String, Integer>(); String[] captions = captionToExpandedQuery.keySet().toArray(new String[captionToExpandedQuery.size()]); for (String caption : captions) { String query = captionToExpandedQuery.get(caption); if (query == null) { log.warn("Skipping unknown caption '" + caption + "'"); continue; } Integer cnt = 0; try { if (originalContentOnly) cnt = indexer.getTotalHits(query, false, Indexer.QueryType.ORIGINAL); else cnt = indexer.getTotalHits(query, false, Indexer.QueryType.FULL); } catch (Exception e) { Util.print_exception("Exception while collecting lexicon counts", e, log); } map.put(caption, cnt); } return map; }
/** * main entry point: returns a category -> docs map for each (non-zero) category in the current * captionToQueryMap. * * @indexer must already have run * @docs results are restrictes to these docs. assumes all docs if docs is null or empty. * @captions (null/none = all) * <p>vihari This is a weird name for a method that returns documents with emotions instead * of emotions. */ public Map<String, Collection<Document>> getEmotions( Indexer indexer, Collection<Document> docs, boolean originalContentOnly, String... captions) { Map<String, Collection<Document>> result = new LinkedHashMap<String, Collection<Document>>(); Set<Document> docs_set = Util.castOrCloneAsSet(docs); // for (String[] emotion: emotionsData) String[] selected_captions = captions.length > 0 ? captions : captionToExpandedQuery.keySet().toArray(new String[0]); for (String caption : selected_captions) { String query = captionToExpandedQuery.get(caption); if (query == null) { log.warn("Skipping unknown caption '" + caption + "'"); continue; } // query is simply word1|word2|word3 etc for that sentiment // the -1 indicates that we want all docs in the indexer that match the query int threshold = 1; Indexer.QueryOptions options = new Indexer.QueryOptions(); options.setThreshold(threshold); options.setQueryType(Indexer.QueryType.ORIGINAL); Collection<Document> docsForCaption = indexer.docsForQuery(query, options); /* log.info (docsForCaption.size() + " before"); threshold = 2; docsForCaption = indexer.docsForQuery(query, -1, threshold); log.info (docsForCaption.size() + " after"); */ // Set<Document> docs = indexer.docsWithPhraseThreshold(query, -1, 2); // in future, we // may have a higher threshold for sentiment matching // if @param docs is present, retain only those docs that match, otherwise retain all if (!Util.nullOrEmpty(docs_set)) // docsForCaption.retainAll(docs_set); docsForCaption = Util.listIntersection(docsForCaption, docs_set); // put it in the result only if at least 1 doc matches if (docsForCaption.size() > 0) result.put(caption, docsForCaption); } return result; }
@Test public class IndexerTest { private static final Indexer<String> avengers = Indexer.fromStream(Stream.of("cap", "iron-man", "hulk", "cap")); public void testIndexer() { assertFalse(avengers.isEmpty()); assertTrue(avengers.size() == 3); assertTrue(avengers.indexOf("cap") == 0); assertTrue(avengers.lastIndexOf("cap") == 0); assertTrue(avengers.get(0).equals("cap")); assertTrue(avengers.contains("cap")); assertFalse(avengers.contains("made-up")); Object[] arr = avengers.toArray(); assertEquals(arr, new String[] {"cap", "iron-man", "hulk"}); assertEquals(avengers, avengers.subList(0, avengers.size())); } @Test(expectedExceptions = RuntimeException.class) public void testIndexerThrowsRemove() { Indexer.fromStream(Stream.of("a", "b", "c")).remove(0); } @Test(expectedExceptions = RuntimeException.class) public void testIndexerThrowsAdd() { Indexer.fromStream(Stream.of("a", "b", "c")).add("d"); } public void testSaveLoadRoundtrip() throws IOException { val baos = new ByteArrayOutputStream(3200); val dos = new DataOutputStream(baos); avengers.save(dos); val dis = new DataInputStream(new ByteArrayInputStream(baos.toByteArray())); val otherAvengers = Indexer.load(dis); assertEquals(avengers, otherAvengers); } }
@Test(expectedExceptions = RuntimeException.class) public void testIndexerThrowsAdd() { Indexer.fromStream(Stream.of("a", "b", "c")).add("d"); }
@Test(expectedExceptions = RuntimeException.class) public void testIndexerThrowsRemove() { Indexer.fromStream(Stream.of("a", "b", "c")).remove(0); }