Пример #1
0
 /**
  * Same as {@link #makeDocument()}, only this method creates a document of the given size input by
  * <code>size</code>.
  */
 public Document makeDocument(int size) throws Exception {
   LeftOver lvr = leftovr.get();
   if (lvr == null
       || lvr.docdata == null
       || lvr.docdata.getBody() == null
       || lvr.docdata.getBody().length() == 0) {
     resetLeftovers();
   }
   DocData docData = getDocState().docData;
   DocData dd = (lvr == null ? source.getNextDocData(docData) : lvr.docdata);
   int cnt = (lvr == null ? 0 : lvr.cnt);
   while (dd.getBody() == null || dd.getBody().length() < size) {
     DocData dd2 = dd;
     dd = source.getNextDocData(new DocData());
     cnt = 0;
     dd.setBody(dd2.getBody() + dd.getBody());
   }
   Document doc = createDocument(dd, size, cnt);
   if (dd.getBody() == null || dd.getBody().length() == 0) {
     resetLeftovers();
   } else {
     if (lvr == null) {
       lvr = new LeftOver();
       leftovr.set(lvr);
     }
     lvr.docdata = dd;
     lvr.cnt = ++cnt;
   }
   return doc;
 }
Пример #2
0
 /** Reset inputs so that the test run would behave, input wise, as if it just started. */
 public synchronized void resetInputs() throws IOException {
   source.printStatistics("docs");
   // re-initiate since properties by round may have changed.
   setConfig(config);
   source.resetInputs();
   numDocsCreated.set(0);
   resetLeftovers();
 }
Пример #3
0
  public static ContentSource newInstance(URL url) {
    String host = url.getHost().toLowerCase();

    Log.d("matching on: " + host);

    ContentSource s = sources.get(host);
    if (s == null || s.getResourceBuilder() == null) {
      s = ContentSource.GENERIC;
    }
    s.getResourceBuilder().setURL(url);
    return s;
  }
Пример #4
0
  /**
   * Set the repository the formatter can load object contents from.
   *
   * <p>Once a repository has been set, the formatter must be released to ensure the internal
   * ObjectReader is able to release its resources.
   *
   * @param repository source repository holding referenced objects.
   */
  public void setRepository(Repository repository) {
    if (reader != null) reader.release();

    db = repository;
    reader = db.newObjectReader();

    ContentSource cs = ContentSource.create(reader);
    source = new ContentSource.Pair(cs, cs);

    DiffConfig dc = db.getConfig().get(DiffConfig.KEY);
    if (dc.isNoPrefix()) {
      setOldPrefix(""); // $NON-NLS-1$
      setNewPrefix(""); // $NON-NLS-1$
    }
    setDetectRenames(dc.isRenameDetectionEnabled());

    diffAlgorithm =
        DiffAlgorithm.getAlgorithm(
            db.getConfig()
                .getEnum(
                    ConfigConstants.CONFIG_DIFF_SECTION,
                    null,
                    ConfigConstants.CONFIG_KEY_ALGORITHM,
                    SupportedAlgorithm.HISTOGRAM));
  }
  @Override
  public boolean equals(Object obj) {
    if (this == obj) {
      return true;
    }

    if ((obj == null) || (!(obj instanceof PackageVersionContentSource))) {
      return false;
    }

    final PackageVersionContentSource other = (PackageVersionContentSource) obj;

    if (packageVersion == null) {
      if (other.packageVersion != null) {
        return false;
      }
    } else if (!packageVersion.equals(other.packageVersion)) {
      return false;
    }

    if (contentSource == null) {
      if (other.contentSource != null) {
        return false;
      }
    } else if (!contentSource.equals(other.contentSource)) {
      return false;
    }

    return true;
  }
 @Override
 public int hashCode() {
   int result = 1;
   result = (31 * result) + ((packageVersion == null) ? 0 : packageVersion.hashCode());
   result = (31 * result) + ((contentSource == null) ? 0 : contentSource.hashCode());
   return result;
 }
Пример #7
0
 @Override
 public void resetInputs() throws IOException {
   synchronized (lock) {
     super.resetInputs();
     close();
     nextFile = 0;
     iteration = 0;
   }
 }
 @Override
 public void setConfig(Config config) {
   super.setConfig(config);
   keepImages = config.get("keep.image.only.docs", true);
   String fileName = config.get("docs.file", null);
   if (fileName == null) {
     throw new IllegalArgumentException("docs.file must be set");
   }
   file = new File(fileName).getAbsoluteFile();
 }
Пример #9
0
 @Override
 public void setConfig(Config config) {
   super.setConfig(config);
   String fileName = config.get("docs.file", null);
   if (fileName == null) {
     throw new IllegalArgumentException("docs.file must be set");
   }
   file = Paths.get(fileName).toAbsolutePath();
   if (encoding == null) {
     encoding = IOUtils.UTF_8;
   }
 }
  @Override
  public void setConfig(Config config) {
    super.setConfig(config);

    File workDir = new File(config.get("work.dir", "work"));
    String d = config.get("docs.dir", "dir-out");
    dataDir = new File(d);
    if (!dataDir.isAbsolute()) {
      dataDir = new File(workDir, d);
    }

    inputFiles = new Iterator(dataDir);

    if (inputFiles == null) {
      throw new RuntimeException("No txt files in dataDir: " + dataDir.getAbsolutePath());
    }
  }
Пример #11
0
 @Override
 public void setConfig(Config config) {
   super.setConfig(config);
   // dirs
   File workDir = new File(config.get("work.dir", "work"));
   String d = config.get("docs.dir", "trec");
   dataDir = new File(d);
   if (!dataDir.isAbsolute()) {
     dataDir = new File(workDir, d);
   }
   // files
   collectFiles(dataDir, inputFiles);
   if (inputFiles.size() == 0) {
     throw new IllegalArgumentException("No files in dataDir: " + dataDir);
   }
   // trec doc parser
   try {
     String trecDocParserClassName =
         config.get("trec.doc.parser", "org.apache.lucene.benchmark.byTask.feeds.TrecGov2Parser");
     trecDocParser =
         Class.forName(trecDocParserClassName).asSubclass(TrecDocParser.class).newInstance();
   } catch (Exception e) {
     // Should not get here. Throw runtime exception.
     throw new RuntimeException(e);
   }
   // html parser
   try {
     String htmlParserClassName =
         config.get("html.parser", "org.apache.lucene.benchmark.byTask.feeds.DemoHTMLParser");
     htmlParser = Class.forName(htmlParserClassName).asSubclass(HTMLParser.class).newInstance();
   } catch (Exception e) {
     // Should not get here. Throw runtime exception.
     throw new RuntimeException(e);
   }
   // encoding
   if (encoding == null) {
     encoding = "ISO-8859-1";
   }
   // iteration exclusion in doc name
   excludeDocnameIteration = config.get("content.source.excludeIteration", false);
 }
Пример #12
0
  /** Set the configuration parameters of this doc maker. */
  public void setConfig(Config config) {
    this.config = config;
    try {
      String sourceClass =
          config.get("content.source", "org.apache.lucene.benchmark.byTask.feeds.SingleDocSource");
      source = Class.forName(sourceClass).asSubclass(ContentSource.class).newInstance();
      source.setConfig(config);
    } catch (Exception e) {
      // Should not get here. Throw runtime exception.
      throw new RuntimeException(e);
    }

    boolean stored = config.get("doc.stored", false);
    boolean bodyStored = config.get("doc.body.stored", stored);
    boolean tokenized = config.get("doc.tokenized", true);
    boolean bodyTokenized = config.get("doc.body.tokenized", tokenized);
    boolean norms = config.get("doc.tokenized.norms", false);
    boolean bodyNorms = config.get("doc.body.tokenized.norms", true);
    boolean termVec = config.get("doc.term.vector", false);
    storeVal = (stored ? Field.Store.YES : Field.Store.NO);
    bodyStoreVal = (bodyStored ? Field.Store.YES : Field.Store.NO);
    if (tokenized) {
      indexVal = norms ? Index.ANALYZED : Index.ANALYZED_NO_NORMS;
    } else {
      indexVal = norms ? Index.NOT_ANALYZED : Index.NOT_ANALYZED_NO_NORMS;
    }

    if (bodyTokenized) {
      bodyIndexVal = bodyNorms ? Index.ANALYZED : Index.ANALYZED_NO_NORMS;
    } else {
      bodyIndexVal = bodyNorms ? Index.NOT_ANALYZED : Index.NOT_ANALYZED_NO_NORMS;
    }

    boolean termVecPositions = config.get("doc.term.vector.positions", false);
    boolean termVecOffsets = config.get("doc.term.vector.offsets", false);
    if (termVecPositions && termVecOffsets) {
      termVecVal = TermVector.WITH_POSITIONS_OFFSETS;
    } else if (termVecPositions) {
      termVecVal = TermVector.WITH_POSITIONS;
    } else if (termVecOffsets) {
      termVecVal = TermVector.WITH_OFFSETS;
    } else if (termVec) {
      termVecVal = TermVector.YES;
    } else {
      termVecVal = TermVector.NO;
    }
    storeBytes = config.get("doc.store.body.bytes", false);

    reuseFields = config.get("doc.reuse.fields", true);

    // In a multi-rounds run, it is important to reset DocState since settings
    // of fields may change between rounds, and this is the only way to reset
    // the cache of all threads.
    docState = new ThreadLocal<DocState>();

    indexProperties = config.get("doc.index.props", false);

    updateDocIDLimit = config.get("doc.random.id.limit", -1);
    if (updateDocIDLimit != -1) {
      r = new Random(179);
    }
  }
Пример #13
0
 @Override
 public void resetInputs() throws IOException {
   super.resetInputs();
   openFile();
 }
 @Override
 public synchronized void resetInputs() throws IOException {
   super.resetInputs();
   inputFiles = new Iterator(dataDir);
   iteration = 0;
 }
Пример #15
0
 /**
  * Closes the {@link DocMaker}. The base implementation closes the {@link ContentSource}, and it
  * can be overridden to do more work (but make sure to call super.close()).
  */
 public void close() throws IOException {
   source.close();
 }
 @Override
 public void resetInputs() throws IOException {
   super.resetInputs();
   is = getInputStream(file);
 }
Пример #17
0
 /** Returns the number of bytes generated by the content source since last reset. */
 public synchronized long getBytesCount() {
   return source.getBytesCount();
 }
Пример #18
0
 /**
  * Returns the total number of bytes that were generated by the content source defined to that doc
  * maker.
  */
 public long getTotalBytesCount() {
   return source.getTotalBytesCount();
 }
Пример #19
0
 /**
  * Creates a {@link Document} object ready for indexing. This method uses the {@link
  * ContentSource} to get the next document from the source, and creates a {@link Document} object
  * from the returned fields. If <code>reuseFields</code> was set to true, it will reuse {@link
  * Document} and {@link Field} instances.
  */
 public Document makeDocument() throws Exception {
   resetLeftovers();
   DocData docData = source.getNextDocData(getDocState().docData);
   Document doc = createDocument(docData, 0, -1);
   return doc;
 }
Пример #20
0
 private ContentSource source(AbstractTreeIterator iterator) {
   if (iterator instanceof WorkingTreeIterator)
     return ContentSource.create((WorkingTreeIterator) iterator);
   return ContentSource.create(reader);
 }
Пример #21
0
 public static void registerContentSource(ContentSource s) {
   for (String hostname : s.getHostnames()) {
     sources.put(hostname, s);
   }
 }