예제 #1
0
 /**
  * Creates a new child Metadata object with this object as its parent, including provided parent
  * ID.
  *
  * @return a new child Metadata object
  */
 public DocumentMetadata asParent(String id, ParentType parentType) {
   setProperty("id", id);
   FlexibleParameters newParameters = new FlexibleParameters();
   for (String key : parameters.getKeys()) {
     newParameters.setParameter("parent_" + key, parameters.getParameterValues(key));
   }
   newParameters.setParameter("parentType", parentType.name().toLowerCase());
   return new DocumentMetadata(newParameters);
 }
예제 #2
0
 public void setQueryParameters(FlexibleParameters storedparams)
     throws UnsupportedEncodingException {
   setProperty("queryParameters", storedparams.getAsQueryString());
 }
예제 #3
0
 private void setProperty(String key, String[] values) {
   parameters.setParameter(key, values);
 }
예제 #4
0
 private String getProperty(String key, String defaultValue) {
   return parameters.getParameterValue(key, defaultValue);
 }
예제 #5
0
 private String getProperty(String key) {
   return parameters.getParameterValue(key);
   //		return properties.getProperty(key);
 }
예제 #6
0
 public boolean equals(DocumentMetadata metadata) {
   return parameters.equals(metadata.parameters);
 }
예제 #7
0
 public boolean containsKey(String string) {
   return parameters.containsKey(string);
 }
예제 #8
0
  @Test
  public void test() throws IOException {
    Storage storage = new MemoryStorage();
    Document document;
    LuceneManager luceneManager = storage.getLuceneManager();
    document = new Document();
    document.add(
        new TextField("lexical", "dark and stormy night in document one", Field.Store.YES));
    luceneManager.addDocument(document);
    DocumentTerm documentTerm;

    FlexibleParameters parameters;

    parameters = new FlexibleParameters();
    parameters.addParameter("string", "It was a dark and stormy night.");
    parameters.addParameter("string", "It was the best of times it was the worst of times.");
    parameters.addParameter("tool", "StepEnabledIndexedCorpusCreator");

    CorpusCreator creator = new CorpusCreator(storage, parameters);
    creator.run();
    parameters.setParameter("corpus", creator.getStoredId());

    parameters.setParameter("tool", "DocumentTermFrequencies");

    DocumentTerms documentTermFrequencies;
    List<DocumentTerm> documentTerms;

    parameters.setParameter("query", "dar*");
    documentTermFrequencies = new DocumentTerms(storage, parameters);
    documentTermFrequencies.run();
    documentTerms = documentTermFrequencies.getDocumentTerms();
    assertEquals(1, documentTerms.size());
    documentTerm = documentTerms.get(0);
    assertEquals("dark", documentTerm.getTerm());
    assertEquals(1, documentTerm.getRawFrequency());
    assertEquals(0, documentTerm.getDocumentIndex());

    parameters.setParameter("query", "it was");
    documentTermFrequencies = new DocumentTerms(storage, parameters);
    documentTermFrequencies.run();
    // we sort by reverse frequency by default
    documentTerms = documentTermFrequencies.getDocumentTerms();
    assertEquals(2, documentTerms.size());
    documentTerm = documentTerms.get(0);
    assertEquals(1, documentTerm.getDocumentIndex());
    assertEquals("it was", documentTerm.getTerm());
    assertEquals(2, documentTerm.getRawFrequency());
    documentTerm = documentTerms.get(1);
    assertEquals(0, documentTerm.getDocumentIndex());
    assertEquals("it was", documentTerm.getTerm());
    assertEquals(1, documentTerm.getRawFrequency());

    parameters.removeParameter("query");
    documentTermFrequencies = new DocumentTerms(storage, parameters);
    documentTermFrequencies.run();
    documentTerms = documentTermFrequencies.getDocumentTerms();
    assertEquals(14, documentTerms.size());
    documentTerm = documentTerms.get(0);
    assertEquals("it", documentTerm.getTerm());
    assertEquals(2, documentTerm.getRawFrequency());

    parameters.setParameter("limit", 1);
    documentTermFrequencies = new DocumentTerms(storage, parameters);
    documentTermFrequencies.run();
    documentTerms = documentTermFrequencies.getDocumentTerms();
    assertEquals(1, documentTerms.size());
    documentTerm = documentTerms.get(0);
    assertEquals("it", documentTerm.getTerm());
    assertEquals(2, documentTerm.getRawFrequency());

    parameters.setParameter("start", 1);
    documentTermFrequencies = new DocumentTerms(storage, parameters);
    documentTermFrequencies.run();
    documentTerms = documentTermFrequencies.getDocumentTerms();
    assertEquals(1, documentTerms.size());
    documentTerm = documentTerms.get(0);
    assertEquals("of", documentTerm.getTerm());
    assertEquals(2, documentTerm.getRawFrequency());

    parameters.setParameter("start", 50);
    documentTermFrequencies = new DocumentTerms(storage, parameters);
    documentTermFrequencies.run();
    documentTerms = documentTermFrequencies.getDocumentTerms();
    assertEquals(0, documentTerms.size());

    // with stopwords
    parameters.setParameter("stopList", "stop.en.taporware.txt");
    parameters.removeParameter("start");
    parameters.removeParameter("limit");
    documentTermFrequencies = new DocumentTerms(storage, parameters);
    documentTermFrequencies.run();
    documentTerms = documentTermFrequencies.getDocumentTerms();
    assertEquals(6, documentTerms.size());
    documentTerm = documentTerms.get(0);
    assertEquals("times", documentTerm.getTerm());
    documentTerm = documentTerms.get(documentTerms.size() - 1);
    assertEquals("worst", documentTerm.getTerm());

    storage.destroy();
  }