@Override @SuppressWarnings("unchecked") protected void cleanup(Reducer.Context context) throws IOException, InterruptedException { if (!Project.getProject().isMetadataCollectStandard()) { // write summary headers with all metadata, but for standard metadata don't write the last // line context.write(new Text("Hash"), new Text(columnMetadata.delimiterSeparatedHeaders())); } zipFileWriter.closeZip(); if (Project.getProject().isLuceneIndexEnabled()) { mergeLuceneIndex(); } Project project = Project.getProject(); if (project.isEnvHadoop()) { String outputPath = Project.getProject().getProperty(ParameterProcessing.OUTPUT_DIR_HADOOP); String zipFileName = zipFileWriter.getZipFileName(); if (project.isFsHdfs()) { String cmd = "hadoop fs -copyFromLocal " + zipFileName + " " + outputPath + File.separator + context.getTaskAttemptID() + ".zip"; OsUtil.runCommand(cmd); } else if (project.isFsS3()) { S3Agent s3agent = new S3Agent(); String run = project.getRun(); if (!run.isEmpty()) { run = run + "/"; } String s3key = project.getProjectCode() + File.separator + "output/" + run + "results/" + context.getTaskAttemptID() + ".zip"; // Keep updating the hadoop progress int refreshInterval = 60000; Timer timer = new Timer(refreshInterval, this); timer.start(); s3agent.putFileInS3(zipFileName, s3key); timer.stop(); } } Stats.getInstance().setJobFinished(); }
public static synchronized SolrIndex getInstance() { if (__instance == null) { if (Project.getProject().isSendIndexToSolrEnabled()) { __instance = new HttpSolrIndex(); } else { __instance = new DisabledSolrIndex(); } } return __instance; }
@Override @SuppressWarnings("unchecked") protected void setup(Reducer.Context context) throws IOException, InterruptedException { this.context = context; String settingsStr = context.getConfiguration().get(ParameterProcessing.SETTINGS_STR); Settings settings = Settings.loadFromString(settingsStr); Settings.setSettings(settings); String projectStr = context.getConfiguration().get(ParameterProcessing.PROJECT); Project project = Project.loadFromString(projectStr); if (project.isEnvHadoop()) { String metadataFileContents = context.getConfiguration().get(ParameterProcessing.METADATA_FILE); new File(ColumnMetadata.metadataNamesFile).getParentFile().mkdirs(); Files.write(metadataFileContents.getBytes(), new File(ColumnMetadata.metadataNamesFile)); } columnMetadata = new ColumnMetadata(); String fileSeparatorStr = project.getFieldSeparator(); char fieldSeparatorChar = Delimiter.getDelim(fileSeparatorStr); columnMetadata.setFieldSeparator(String.valueOf(fieldSeparatorChar)); columnMetadata.setAllMetadata(project.getMetadataCollect()); // write standard metadata fields context.write(null, new Text(columnMetadata.delimiterSeparatedHeaders())); zipFileWriter.setup(); zipFileWriter.openZipForWriting(); luceneIndex = new LuceneIndex(settings.getLuceneIndexDir(), project.getProjectCode(), null); luceneIndex.init(); }
private void mergeLuceneIndex() throws IOException { String luceneDir = Settings.getSettings().getLuceneIndexDir(); String hdfsLuceneDir = "/" + luceneDir + File.separator + Project.getProject().getProjectCode() + File.separator; String localLuceneTempDir = luceneDir + File.separator + "tmp" + File.separator; File localLuceneTempDirFile = new File(localLuceneTempDir); if (localLuceneTempDirFile.exists()) { Util.deleteDirectory(localLuceneTempDirFile); } localLuceneTempDirFile.mkdir(); // copy all zip lucene indexes, created by maps to local hd String cmd = "hadoop fs -copyToLocal " + hdfsLuceneDir + "* " + localLuceneTempDir; OsUtil.runCommand(cmd); // remove the map indexes as they are now copied to local String removeOldZips = "hadoop fs -rm " + hdfsLuceneDir + "*"; OsUtil.runCommand(removeOldZips); logger.trace("Lucene index files collected to: {}", localLuceneTempDirFile.getAbsolutePath()); String[] zipFilesArr = localLuceneTempDirFile.list(); for (String indexZipFileStr : zipFilesArr) { String indexZipFileName = localLuceneTempDir + indexZipFileStr; String unzipToDir = localLuceneTempDir + indexZipFileStr.replace(".zip", ""); ZipUtil.unzipFile(indexZipFileName, unzipToDir); File indexDir = new File(unzipToDir); FSDirectory fsDir = FSDirectory.open(indexDir); luceneIndex.addToIndex(fsDir); } // TODO check if we need to push the index to S3 or somewhere else luceneIndex.destroy(); }
@Override public void init() { try { String endpoint = getSolrEndpoint(); if (supportMultipleProjects) { String projectCode = Project.getProject().getProjectCode(); String command = endpoint + "solr/admin/cores?action=CREATE&name=" + SOLR_INSTANCE_DIR + "_" + projectCode + "&instanceDir=" + SOLR_INSTANCE_DIR + "&config=solrconfig.xml&dataDir=data_" + projectCode + "&schema=schema.xml"; sendGetCommand(command); this.updateUrl = endpoint + "solr/" + SOLR_INSTANCE_DIR + "_" + projectCode + "/update"; } else { sendGetCommand(endpoint + "solr/admin/ping"); this.updateUrl = endpoint + "solr/update"; } String deleteAll = "<delete><query>id:[*TO *]</query></delete>"; sendPostCommand(updateUrl, deleteAll); sendPostCommand(updateUrl, "<commit/>"); } catch (SolrException se) { History.appendToHistory("Problem with SOLR init: " + se.getMessage()); // se.printStackTrace(); } }