コード例 #1
0
ファイル: IndexerTest.java プロジェクト: irwenqiang/ml-1
 public void testSaveLoadRoundtrip() throws IOException {
   val baos = new ByteArrayOutputStream(3200);
   val dos = new DataOutputStream(baos);
   avengers.save(dos);
   val dis = new DataInputStream(new ByteArrayInputStream(baos.toByteArray()));
   val otherAvengers = Indexer.load(dis);
   assertEquals(avengers, otherAvengers);
 }
コード例 #2
0
ファイル: IndexerTest.java プロジェクト: irwenqiang/ml-1
 public void testIndexer() {
   assertFalse(avengers.isEmpty());
   assertTrue(avengers.size() == 3);
   assertTrue(avengers.indexOf("cap") == 0);
   assertTrue(avengers.lastIndexOf("cap") == 0);
   assertTrue(avengers.get(0).equals("cap"));
   assertTrue(avengers.contains("cap"));
   assertFalse(avengers.contains("made-up"));
   Object[] arr = avengers.toArray();
   assertEquals(arr, new String[] {"cap", "iron-man", "hulk"});
   assertEquals(avengers, avengers.subList(0, avengers.size()));
 }
コード例 #3
0
ファイル: Lexicon.java プロジェクト: ePADD/muse
 public Map<String, Integer> getLexiconCounts(Indexer indexer, boolean originalContentOnly) {
   Map<String, Integer> map = new LinkedHashMap<String, Integer>();
   String[] captions =
       captionToExpandedQuery.keySet().toArray(new String[captionToExpandedQuery.size()]);
   for (String caption : captions) {
     String query = captionToExpandedQuery.get(caption);
     if (query == null) {
       log.warn("Skipping unknown caption '" + caption + "'");
       continue;
     }
     Integer cnt = 0;
     try {
       if (originalContentOnly)
         cnt = indexer.getTotalHits(query, false, Indexer.QueryType.ORIGINAL);
       else cnt = indexer.getTotalHits(query, false, Indexer.QueryType.FULL);
     } catch (Exception e) {
       Util.print_exception("Exception while collecting lexicon counts", e, log);
     }
     map.put(caption, cnt);
   }
   return map;
 }
コード例 #4
0
ファイル: Lexicon.java プロジェクト: ePADD/muse
    /**
     * main entry point: returns a category -> docs map for each (non-zero) category in the current
     * captionToQueryMap.
     *
     * @indexer must already have run
     * @docs results are restrictes to these docs. assumes all docs if docs is null or empty.
     * @captions (null/none = all)
     *     <p>vihari This is a weird name for a method that returns documents with emotions instead
     *     of emotions.
     */
    public Map<String, Collection<Document>> getEmotions(
        Indexer indexer,
        Collection<Document> docs,
        boolean originalContentOnly,
        String... captions) {
      Map<String, Collection<Document>> result = new LinkedHashMap<String, Collection<Document>>();
      Set<Document> docs_set = Util.castOrCloneAsSet(docs);
      //			for (String[] emotion: emotionsData)
      String[] selected_captions =
          captions.length > 0 ? captions : captionToExpandedQuery.keySet().toArray(new String[0]);
      for (String caption : selected_captions) {
        String query = captionToExpandedQuery.get(caption);
        if (query == null) {
          log.warn("Skipping unknown caption '" + caption + "'");
          continue;
        }

        // query is simply word1|word2|word3 etc for that sentiment
        // the -1 indicates that we want all docs in the indexer that match the query
        int threshold = 1;
        Indexer.QueryOptions options = new Indexer.QueryOptions();
        options.setThreshold(threshold);
        options.setQueryType(Indexer.QueryType.ORIGINAL);
        Collection<Document> docsForCaption = indexer.docsForQuery(query, options);
        /*
        log.info (docsForCaption.size() + " before");
        threshold = 2;
        docsForCaption = indexer.docsForQuery(query, -1, threshold);
        log.info (docsForCaption.size() + " after");
        */
        //				Set<Document> docs = indexer.docsWithPhraseThreshold(query, -1, 2); // in future, we
        // may have a higher threshold for sentiment matching
        // if @param docs is present, retain only those docs that match, otherwise retain all
        if (!Util.nullOrEmpty(docs_set))
          // docsForCaption.retainAll(docs_set);
          docsForCaption = Util.listIntersection(docsForCaption, docs_set);

        // put it in the result only if at least 1 doc matches
        if (docsForCaption.size() > 0) result.put(caption, docsForCaption);
      }
      return result;
    }
コード例 #5
0
ファイル: IndexerTest.java プロジェクト: irwenqiang/ml-1
@Test
public class IndexerTest {

  private static final Indexer<String> avengers =
      Indexer.fromStream(Stream.of("cap", "iron-man", "hulk", "cap"));

  public void testIndexer() {
    assertFalse(avengers.isEmpty());
    assertTrue(avengers.size() == 3);
    assertTrue(avengers.indexOf("cap") == 0);
    assertTrue(avengers.lastIndexOf("cap") == 0);
    assertTrue(avengers.get(0).equals("cap"));
    assertTrue(avengers.contains("cap"));
    assertFalse(avengers.contains("made-up"));
    Object[] arr = avengers.toArray();
    assertEquals(arr, new String[] {"cap", "iron-man", "hulk"});
    assertEquals(avengers, avengers.subList(0, avengers.size()));
  }

  @Test(expectedExceptions = RuntimeException.class)
  public void testIndexerThrowsRemove() {
    Indexer.fromStream(Stream.of("a", "b", "c")).remove(0);
  }

  @Test(expectedExceptions = RuntimeException.class)
  public void testIndexerThrowsAdd() {
    Indexer.fromStream(Stream.of("a", "b", "c")).add("d");
  }

  public void testSaveLoadRoundtrip() throws IOException {
    val baos = new ByteArrayOutputStream(3200);
    val dos = new DataOutputStream(baos);
    avengers.save(dos);
    val dis = new DataInputStream(new ByteArrayInputStream(baos.toByteArray()));
    val otherAvengers = Indexer.load(dis);
    assertEquals(avengers, otherAvengers);
  }
}
コード例 #6
0
ファイル: IndexerTest.java プロジェクト: irwenqiang/ml-1
 @Test(expectedExceptions = RuntimeException.class)
 public void testIndexerThrowsAdd() {
   Indexer.fromStream(Stream.of("a", "b", "c")).add("d");
 }
コード例 #7
0
ファイル: IndexerTest.java プロジェクト: irwenqiang/ml-1
 @Test(expectedExceptions = RuntimeException.class)
 public void testIndexerThrowsRemove() {
   Indexer.fromStream(Stream.of("a", "b", "c")).remove(0);
 }