Example #1
1
  private Integer executeDocInsert(JCas jCas) throws SQLException, BaleenException {
    DocumentAnnotation da = getDocumentAnnotation(jCas);
    String documentId = ConsumerUtils.getExternalId(da, contentHashAsId);

    insertDocStatement.clearParameters();
    insertDocStatement.setString(1, documentId);
    insertDocStatement.setString(2, da.getDocType());
    insertDocStatement.setString(3, da.getSourceUri());
    insertDocStatement.setString(4, jCas.getDocumentText());
    insertDocStatement.setString(5, jCas.getDocumentLanguage());
    insertDocStatement.setTimestamp(6, new Timestamp(da.getTimestamp()));
    insertDocStatement.setString(7, da.getDocumentClassification());
    insertDocStatement.setArray(
        8, createVarcharArray(postgresResource.getConnection(), da.getDocumentCaveats()));
    insertDocStatement.setArray(
        9, createVarcharArray(postgresResource.getConnection(), da.getDocumentReleasability()));
    insertDocStatement.executeUpdate();

    Integer docKey = getKey(insertDocStatement);
    if (docKey == null) {
      throw new BaleenException("No document key returned");
    }

    return docKey;
  }
Example #2
0
  /** If the tables don't already exist, then create them. */
  private void createTables() throws ResourceInitializationException {
    try (Statement s = postgresResource.getConnection().createStatement(); ) {
      s.execute(
          CREATE_TABLE_PREFIX
              + getTableName(DOC_ROOT)
              + " (key serial primary key, externalId character varying, type character varying, source character varying, content character varying, language character varying, processed timestamp, classification character varying, caveats character varying[], releasability character varying[])");
      s.execute(
          CREATE_TABLE_PREFIX
              + getTableName(DOC_METADATA_ROOT)
              + " (key serial primary key, doc_key integer references "
              + getTableName(DOC_ROOT)
              + "(key), name character varying, value character varying)");
      s.execute(
          CREATE_TABLE_PREFIX
              + getTableName(ENTITY_ROOT)
              + " (key serial primary key, doc_key integer references "
              + getTableName(DOC_ROOT)
              + "(key), externalId character varying[], type character varying, value character varying[])");
      s.execute(
          CREATE_TABLE_PREFIX
              + getTableName(ENTITY_GEO_ROOT)
              + " (key serial primary key, entity_key integer references "
              + getTableName(ENTITY_ROOT)
              + "(key), geo geometry(Geometry, 4326))");

      postgresResource.getConnection().setAutoCommit(false);
    } catch (SQLException e) {
      throw new ResourceInitializationException(e);
    }
  }
Example #3
0
  @Override
  protected void doProcess(JCas jCas) throws AnalysisEngineProcessException {
    Connection conn = postgresResource.getConnection();

    try {
      // Insert document and metadata into database
      Integer docKey = executeDocInsert(jCas);
      for (Metadata md : JCasUtil.select(jCas, Metadata.class)) {
        executeDocMetadataInsert(docKey, md);
      }

      processEntities(jCas, docKey);

      conn.commit();
    } catch (SQLException | BaleenException e) {
      getMonitor().error("Unable to insert document into Postgres database", e);
      if (conn != null) {
        try {
          conn.rollback();
        } catch (SQLException e2) {
          getMonitor()
              .error(
                  "Unable to rollback insertion - state of the database may have been left inconsistent",
                  e2);
        }
      }
    }
  }
Example #4
0
  @Override
  public void doInitialize(UimaContext aContext) throws ResourceInitializationException {
    checkVersion();
    createTables();

    try {
      insertDocStatement =
          postgresResource
              .getConnection()
              .prepareStatement(
                  INSERT_INTO_PREFIX
                      + getTableName(DOC_ROOT)
                      + " (externalId, type, source, content, language, processed, classification, caveats, releasability) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?)",
                  Statement.RETURN_GENERATED_KEYS);
      insertDocMetadataStatement =
          postgresResource
              .getConnection()
              .prepareStatement(
                  INSERT_INTO_PREFIX
                      + getTableName(DOC_METADATA_ROOT)
                      + " (doc_key, name, value) VALUES (?, ?, ?)");
      insertEntityStatement =
          postgresResource
              .getConnection()
              .prepareStatement(
                  INSERT_INTO_PREFIX
                      + getTableName(ENTITY_ROOT)
                      + " (doc_key, externalId, type, value) VALUES (?, ?, ?, ?)",
                  Statement.RETURN_GENERATED_KEYS);
      insertEntityGeoStatement =
          postgresResource
              .getConnection()
              .prepareStatement(
                  INSERT_INTO_PREFIX
                      + getTableName(ENTITY_GEO_ROOT)
                      + " (entity_key, geo) VALUES (?, ST_GeomFromGeoJSON(?))");
    } catch (SQLException e) {
      throw new ResourceInitializationException(e);
    }
  }
Example #5
0
  private Integer executeEntityInsert(
      Integer docKey, Collection<String> values, Collection<String> externalIds, String type)
      throws SQLException {
    insertEntityStatement.clearParameters();
    insertEntityStatement.setInt(1, docKey);
    insertEntityStatement.setArray(
        2,
        postgresResource
            .getConnection()
            .createArrayOf(VARCHAR, externalIds.toArray(new String[0])));
    insertEntityStatement.setString(3, type);
    insertEntityStatement.setArray(
        4, postgresResource.getConnection().createArrayOf(VARCHAR, values.toArray(new String[0])));
    insertEntityStatement.executeUpdate();

    Integer entityKey = getKey(insertEntityStatement);
    if (entityKey == null) {
      getMonitor().error("No entity key returned - Geo insertion, if applicable, will be skipped");
    }

    return entityKey;
  }
Example #6
0
  /** Check that Postgres has at least version 2 of PostGIS installed */
  private void checkVersion() throws ResourceInitializationException {
    try (Statement s = postgresResource.getConnection().createStatement(); ) {
      ResultSet rs = s.executeQuery("SELECT PostGIS_Lib_Version() AS version");

      rs.next();
      String version = rs.getString("version");

      String[] versionParts = version.split("\\.");
      Integer majorVersion = Integer.parseInt(versionParts[0]);

      if (majorVersion < 2) {
        throw new BaleenException("Unsupported PostGIS Version");
      }

    } catch (SQLException | NumberFormatException | NullPointerException e) {
      getMonitor().error("Unable to retrieve PostGIS version");
      throw new ResourceInitializationException(e);
    } catch (BaleenException e) {
      throw new ResourceInitializationException(e);
    }
  }