private Integer executeDocInsert(JCas jCas) throws SQLException, BaleenException { DocumentAnnotation da = getDocumentAnnotation(jCas); String documentId = ConsumerUtils.getExternalId(da, contentHashAsId); insertDocStatement.clearParameters(); insertDocStatement.setString(1, documentId); insertDocStatement.setString(2, da.getDocType()); insertDocStatement.setString(3, da.getSourceUri()); insertDocStatement.setString(4, jCas.getDocumentText()); insertDocStatement.setString(5, jCas.getDocumentLanguage()); insertDocStatement.setTimestamp(6, new Timestamp(da.getTimestamp())); insertDocStatement.setString(7, da.getDocumentClassification()); insertDocStatement.setArray( 8, createVarcharArray(postgresResource.getConnection(), da.getDocumentCaveats())); insertDocStatement.setArray( 9, createVarcharArray(postgresResource.getConnection(), da.getDocumentReleasability())); insertDocStatement.executeUpdate(); Integer docKey = getKey(insertDocStatement); if (docKey == null) { throw new BaleenException("No document key returned"); } return docKey; }
/** If the tables don't already exist, then create them. */ private void createTables() throws ResourceInitializationException { try (Statement s = postgresResource.getConnection().createStatement(); ) { s.execute( CREATE_TABLE_PREFIX + getTableName(DOC_ROOT) + " (key serial primary key, externalId character varying, type character varying, source character varying, content character varying, language character varying, processed timestamp, classification character varying, caveats character varying[], releasability character varying[])"); s.execute( CREATE_TABLE_PREFIX + getTableName(DOC_METADATA_ROOT) + " (key serial primary key, doc_key integer references " + getTableName(DOC_ROOT) + "(key), name character varying, value character varying)"); s.execute( CREATE_TABLE_PREFIX + getTableName(ENTITY_ROOT) + " (key serial primary key, doc_key integer references " + getTableName(DOC_ROOT) + "(key), externalId character varying[], type character varying, value character varying[])"); s.execute( CREATE_TABLE_PREFIX + getTableName(ENTITY_GEO_ROOT) + " (key serial primary key, entity_key integer references " + getTableName(ENTITY_ROOT) + "(key), geo geometry(Geometry, 4326))"); postgresResource.getConnection().setAutoCommit(false); } catch (SQLException e) { throw new ResourceInitializationException(e); } }
@Override protected void doProcess(JCas jCas) throws AnalysisEngineProcessException { Connection conn = postgresResource.getConnection(); try { // Insert document and metadata into database Integer docKey = executeDocInsert(jCas); for (Metadata md : JCasUtil.select(jCas, Metadata.class)) { executeDocMetadataInsert(docKey, md); } processEntities(jCas, docKey); conn.commit(); } catch (SQLException | BaleenException e) { getMonitor().error("Unable to insert document into Postgres database", e); if (conn != null) { try { conn.rollback(); } catch (SQLException e2) { getMonitor() .error( "Unable to rollback insertion - state of the database may have been left inconsistent", e2); } } } }
@Override public void doInitialize(UimaContext aContext) throws ResourceInitializationException { checkVersion(); createTables(); try { insertDocStatement = postgresResource .getConnection() .prepareStatement( INSERT_INTO_PREFIX + getTableName(DOC_ROOT) + " (externalId, type, source, content, language, processed, classification, caveats, releasability) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?)", Statement.RETURN_GENERATED_KEYS); insertDocMetadataStatement = postgresResource .getConnection() .prepareStatement( INSERT_INTO_PREFIX + getTableName(DOC_METADATA_ROOT) + " (doc_key, name, value) VALUES (?, ?, ?)"); insertEntityStatement = postgresResource .getConnection() .prepareStatement( INSERT_INTO_PREFIX + getTableName(ENTITY_ROOT) + " (doc_key, externalId, type, value) VALUES (?, ?, ?, ?)", Statement.RETURN_GENERATED_KEYS); insertEntityGeoStatement = postgresResource .getConnection() .prepareStatement( INSERT_INTO_PREFIX + getTableName(ENTITY_GEO_ROOT) + " (entity_key, geo) VALUES (?, ST_GeomFromGeoJSON(?))"); } catch (SQLException e) { throw new ResourceInitializationException(e); } }
private Integer executeEntityInsert( Integer docKey, Collection<String> values, Collection<String> externalIds, String type) throws SQLException { insertEntityStatement.clearParameters(); insertEntityStatement.setInt(1, docKey); insertEntityStatement.setArray( 2, postgresResource .getConnection() .createArrayOf(VARCHAR, externalIds.toArray(new String[0]))); insertEntityStatement.setString(3, type); insertEntityStatement.setArray( 4, postgresResource.getConnection().createArrayOf(VARCHAR, values.toArray(new String[0]))); insertEntityStatement.executeUpdate(); Integer entityKey = getKey(insertEntityStatement); if (entityKey == null) { getMonitor().error("No entity key returned - Geo insertion, if applicable, will be skipped"); } return entityKey; }
/** Check that Postgres has at least version 2 of PostGIS installed */ private void checkVersion() throws ResourceInitializationException { try (Statement s = postgresResource.getConnection().createStatement(); ) { ResultSet rs = s.executeQuery("SELECT PostGIS_Lib_Version() AS version"); rs.next(); String version = rs.getString("version"); String[] versionParts = version.split("\\."); Integer majorVersion = Integer.parseInt(versionParts[0]); if (majorVersion < 2) { throw new BaleenException("Unsupported PostGIS Version"); } } catch (SQLException | NumberFormatException | NullPointerException e) { getMonitor().error("Unable to retrieve PostGIS version"); throw new ResourceInitializationException(e); } catch (BaleenException e) { throw new ResourceInitializationException(e); } }