Exemplo n.º 1
0
  /**
   * Main method of the example class.
   *
   * @param args (ignored) command-line arguments
   * @throws Exception exception
   */
  public static void main(final String[] args) throws Exception {

    System.out.println("=== XMLDBInsert ===");

    // Collection instance
    Collection col = null;

    try {
      // Register the database
      Class<?> c = Class.forName(DRIVER);
      Database db = (Database) c.newInstance();
      DatabaseManager.registerDatabase(db);

      System.out.println("\n* Get collection.");

      // Receive the collection
      col = DatabaseManager.getCollection(DBNAME);

      // ID for the new document
      String id = "world";

      // Content of the new document
      String doc = "<xml>Hello World!</xml>";

      System.out.println("\n* Create new resource.");

      // Create a new XML resource with the specified ID
      XMLResource res = (XMLResource) col.createResource(id, XMLResource.RESOURCE_TYPE);

      // Set the content of the XML resource as the document
      res.setContent(doc);

      System.out.println("\n* Store new resource.");

      // Store the resource into the database
      col.storeResource(res);

    } catch (final XMLDBException ex) {
      // Handle exceptions
      System.err.println("XML:DB Exception occurred " + ex.errorCode);
      ex.printStackTrace();
    } finally {
      // Close the collection
      if (col != null) col.close();
    }
  }
Exemplo n.º 2
0
  /**
   * Main method of the example class.
   *
   * @param args (ignored) command-line arguments
   * @throws Exception exception
   */
  public static void main(final String[] args) throws Exception {

    System.out.println("=== XMLDBQuery ===\n");

    System.out.println("* Run query via XML:DB:");

    // Collection instance
    Collection coll = null;

    try {
      // Register the database
      Class<?> c = Class.forName(DRIVER);
      Database db = (Database) c.newInstance();
      DatabaseManager.registerDatabase(db);

      // Receive the database
      coll = DatabaseManager.getCollection(DBNAME);

      // Receive the XPath query service
      XPathQueryService service = (XPathQueryService) coll.getService("XPathQueryService", "1.0");

      // Execute the query and receives all results
      ResourceSet set = service.query(QUERY);

      // Create a result iterator
      ResourceIterator iter = set.getIterator();

      // Loop through all result items
      while (iter.hasMoreResources()) {
        // Receive the next results
        Resource res = iter.nextResource();

        // Write the result to the console
        System.out.println(res.getContent());
      }
    } catch (final XMLDBException ex) {
      // Handle exceptions
      System.err.println("XML:DB Exception occured " + ex.errorCode);
    } finally {
      // Close the collection
      if (coll != null) coll.close();
    }
  }
Exemplo n.º 3
0
  // TODO if this class extends BasicIndexer, then this method could be inherited
  public void createDirectIndex(Collection[] collections) {
    logger.info(
        "BlockIndexer creating direct index"
            + (Boolean.parseBoolean(
                    ApplicationSetup.getProperty("block.delimiters.enabled", "false"))
                ? " delimited-block indexing enabled"
                : ""));
    currentIndex = Index.createNewIndex(path, prefix);
    lexiconBuilder =
        FieldScore.FIELDS_COUNT > 0
            ? new LexiconBuilder(
                currentIndex,
                "lexicon",
                new BlockFieldLexiconMap(FieldScore.FIELDS_COUNT),
                FieldLexiconEntry.class.getName())
            : new LexiconBuilder(
                currentIndex, "lexicon", new BlockLexiconMap(), BlockLexiconEntry.class.getName());
    // lexiconBuilder = new BlockLexiconBuilder(currentIndex, "lexicon");
    try {
      directIndexBuilder =
          FieldScore.FIELDS_COUNT > 0
              ? new BlockFieldDirectInvertedOutputStream(
                  currentIndex.getPath()
                      + ApplicationSetup.FILE_SEPARATOR
                      + currentIndex.getPrefix()
                      + "."
                      + "direct"
                      + BitIn.USUAL_EXTENSION)
              : new BlockDirectInvertedOutputStream(
                  currentIndex.getPath()
                      + ApplicationSetup.FILE_SEPARATOR
                      + currentIndex.getPrefix()
                      + "."
                      + "direct"
                      + BitIn.USUAL_EXTENSION);
    } catch (IOException ioe) {
      logger.error("Cannot make DirectInvertedOutputStream:", ioe);
    }
    docIndexBuilder = new DocumentIndexBuilder(currentIndex, "document");
    metaBuilder = createMetaIndexBuilder();
    emptyDocIndexEntry =
        (FieldScore.FIELDS_COUNT > 0)
            ? new FieldDocumentIndexEntry(FieldScore.FIELDS_COUNT)
            : new BasicDocumentIndexEntry();

    // int LexiconCount = 0;
    int numberOfDocuments = 0;
    // int numberOfTokens = 0;
    // long startBunchOfDocuments = System.currentTimeMillis();
    final boolean boundaryDocsEnabled = BUILDER_BOUNDARY_DOCUMENTS.size() > 0;
    boolean stopIndexing = false;
    for (int collectionNo = 0; !stopIndexing && collectionNo < collections.length; collectionNo++) {
      Collection collection = collections[collectionNo];
      long startCollection = System.currentTimeMillis();
      boolean notLastDoc = false;
      // while(notLastDoc = collection.hasNext()) {
      while ((notLastDoc = collection.nextDocument())) {
        // get the next document from the collection

        // String docid = collection.getDocid();
        // Document doc = collection.next();
        Document doc = collection.getDocument();

        if (doc == null) continue;

        numberOfDocuments++;
        // setup for parsing
        createDocumentPostings();
        String term;
        numOfTokensInDocument = 0;
        numOfTokensInBlock = 0;
        blockId = 0;
        // get each term in the document
        while (!doc.endOfDocument()) {
          if ((term = doc.getNextTerm()) != null && !term.equals("")) {
            termFields = doc.getFields();
            // pass term into TermPipeline (stop, stem etc)
            pipeline_first.processTerm(term);
            // the term pipeline will eventually add the term to this
            // object.
          }
          if (MAX_TOKENS_IN_DOCUMENT > 0 && numOfTokensInDocument > MAX_TOKENS_IN_DOCUMENT) break;
        }
        // if we didn't index all tokens from document,
        // we need to get to the end of the document.
        while (!doc.endOfDocument()) doc.getNextTerm();
        // we now have all terms in the DocumentTree

        pipeline_first.reset();
        // process DocumentTree (tree of terms)
        try {
          if (termsInDocument.getDocumentLength() == 0) {
            // this document is empty, add the
            // minimum to the document index
            indexEmpty(doc.getAllProperties());
          } else {
              /* index this docuent */
            // numberOfTokens += numOfTokensInDocument;
            indexDocument(doc.getAllProperties(), termsInDocument);
          }
        } catch (Exception ioe) {
          logger.error("Failed to index " + doc.getProperty("docno"), ioe);
        }
        if (MAX_DOCS_PER_BUILDER > 0 && numberOfDocuments >= MAX_DOCS_PER_BUILDER) {
          stopIndexing = true;
          break;
        }

        if (boundaryDocsEnabled && BUILDER_BOUNDARY_DOCUMENTS.contains(doc.getProperty("docno"))) {
          stopIndexing = true;
          break;
        }
      }
      long endCollection = System.currentTimeMillis();
      long secs = ((endCollection - startCollection) / 1000);
      logger.info(
          "Collection #"
              + collectionNo
              + " took "
              + secs
              + "seconds to index "
              + "("
              + numberOfDocuments
              + " documents)\n");
      if (secs > 3600)
        logger.info(
            "Rate: " + ((double) numberOfDocuments / ((double) secs / 3600.0d)) + " docs/hour");

      if (!notLastDoc) {
        try {
          collection.close();
        } catch (IOException e) {
          logger.warn("Couldnt close collection", e);
        }
      }
    }

    /* end of the collection has been reached */
    finishedDirectIndexBuild();
    currentIndex.addIndexStructure(
        "direct",
        "org.terrier.structures.BlockDirectIndex",
        "org.terrier.structures.Index,java.lang.String,java.lang.Class",
        "index,structureName,"
            + (FieldScore.FIELDS_COUNT > 0
                ? fieldDirectIndexPostingIteratorClass
                : basicDirectIndexPostingIteratorClass));
    currentIndex.addIndexStructureInputStream(
        "direct",
        "org.terrier.structures.BlockDirectIndexInputStream",
        "org.terrier.structures.Index,java.lang.String,java.lang.Class",
        "index,structureName,"
            + (FieldScore.FIELDS_COUNT > 0
                ? fieldDirectIndexPostingIteratorClass
                : basicDirectIndexPostingIteratorClass));
    currentIndex.setIndexProperty("index.direct.fields.count", "" + FieldScore.FIELDS_COUNT);
    currentIndex.setIndexProperty(
        "index.direct.fields.names", ArrayUtils.join(FieldScore.FIELD_NAMES, ","));
    if (FieldScore.FIELDS_COUNT > 0) {
      currentIndex.addIndexStructure(
          "document-factory",
          FieldDocumentIndexEntry.Factory.class.getName(),
          "java.lang.String",
          "${index.direct.fields.count}");
    } else {
      currentIndex.addIndexStructure(
          "document-factory", BasicDocumentIndexEntry.Factory.class.getName(), "", "");
    }
    /* flush the index buffers */
    directIndexBuilder.close();
    docIndexBuilder.finishedCollections();
    /* and then merge all the temporary lexicons */
    lexiconBuilder.finishedDirectIndexBuild();
    try {
      metaBuilder.close();
    } catch (IOException ioe) {
      logger.error("Could not finish MetaIndexBuilder: ", ioe);
    }
    if (FieldScore.FIELDS_COUNT > 0) {
      currentIndex.addIndexStructure(
          "lexicon-valuefactory",
          FieldLexiconEntry.Factory.class.getName(),
          "java.lang.String",
          "${index.direct.fields.count}");
    }
    /* reset the in-memory mapping of terms to term codes.*/
    TermCodes.reset();
    System.gc();
    try {
      currentIndex.flush();
    } catch (IOException ioe) {
      logger.error("Could not flush index properties: ", ioe);
    }
  }