public void initialize(UimaContext context) throws ResourceInitializationException { super.initialize(context); this.dirPath = (String) context.getConfigParameterValue(PARAM_DIR_PATH); this.baseData = new File(this.dirPath); // set up the file system. // // corpus // | // +-- categoryLabel_<train|test|eval>.txt // // each category contains one line per document with line breaks stripped away // try { if (baseData.exists()) { Map<String, File> filesToZip = Converters.recursivelyListFiles(this.baseData); try { SimpleDateFormat df = new SimpleDateFormat("MM-dd-yy-HHmm"); String suffix = df.format(new Date()); File targetZip = new File( this.baseData.getParentFile().getParentFile().getName() + "_" + this.baseData.getParentFile().getName() + "_" + this.baseData.getName() + "_" + suffix + ".zip"); Converters.zipIt(filesToZip, targetZip); } catch (Exception e) { } Converters.recursivelyDeleteFiles(this.baseData); } this.baseData.mkdirs(); } catch (Exception e) { throw new ResourceInitializationException(e); } }
@Test public void test() throws Exception { String[] args = {PdfDirWatcher.READ_SECTION_TEXT, inputFile.getPath(), outputFile.getPath()}; WatchDirectory.main(args); // Listen for changes over 60 seconds for (int i = 0; i < 60; i++) { Thread.sleep(1000); System.out.print(i + " secs ... "); if (i == 5) { Converters.copyFile(f1, f2); Converters.copyFile(f1, f3); } if (i == 45) { f3.delete(); } } }
protected void setUp() throws Exception { super.setUp(); URL u = this.getClass().getClassLoader().getResource("sampleData/plos/8_8"); inputFile = new File(u.getPath()); outputFile = new File(inputFile.getParent() + "/temp"); Converters.recursivelyDeleteFiles(outputFile); outputFile.mkdir(); u = this.getClass().getClassLoader().getResource("sampleData/plos/8_8/pbio.1000441.pdf"); f1 = new File(u.getPath()); f2 = new File(f1.getParent() + "/temp.pdf"); f2.delete(); f3 = new File(f1.getParent() + "/temp2.pdf"); f3.delete(); }
protected void tearDown() throws Exception { super.tearDown(); f2.delete(); f3.delete(); Converters.recursivelyDeleteFiles(outputFile); }
public static void main(String args[]) throws Exception { LapdfEngine engine = new LapdfEngine(); if (args.length < 1) { System.err.println(USAGE); System.exit(1); } String inputFileOrDirPath = args[0]; String outputDirPath = ""; String ruleFilePath = ""; File inputFileOrDir = new File(inputFileOrDirPath); if (!inputFileOrDir.exists()) { System.err.println(USAGE); System.err.println("Input file / dir '" + inputFileOrDirPath + "' does not exist."); System.err.println("Please include full path"); System.exit(1); } // output folder is set. if (args.length > 1) { outputDirPath = args[1]; } else { outputDirPath = "-"; } if (outputDirPath.equals("-")) { if (inputFileOrDir.isDirectory()) { outputDirPath = inputFileOrDirPath; } else { outputDirPath = inputFileOrDir.getParent(); } } File outDir = new File(outputDirPath); if (!outDir.exists()) { outDir.mkdir(); } // output folder is set. File ruleFile = null; if (args.length > 2) { ruleFilePath = args[2]; } else { ruleFilePath = "-"; } if (ruleFilePath.equals("-")) { ruleFile = Converters.extractFileFromJarClasspath("rules/general.drl"); } else { ruleFile = new File(ruleFilePath); } if (!ruleFile.exists()) { System.err.println(USAGE); System.err.println(ruleFilePath + " does not exist."); System.err.println("Please include full path"); } if (inputFileOrDir.isDirectory()) { Pattern patt = Pattern.compile("\\.pdf$"); Map<String, File> inputFiles = Converters.recursivelyListFiles(inputFileOrDir, patt); Iterator<String> it = inputFiles.keySet().iterator(); while (it.hasNext()) { String key = it.next(); File pdf = inputFiles.get(key); String pdfStem = pdf.getName(); pdfStem = pdfStem.replaceAll("\\.pdf", ""); String outImgPath = Converters.mimicDirectoryStructure(inputFileOrDir, outDir, pdf).getPath(); outImgPath = outImgPath.replaceAll("\\.pdf", "_secImgs"); File outImgDir = new File(outImgPath); if (!outImgDir.exists()) outImgDir.mkdir(); try { LapdfDocument lapdf = engine.blockifyPdfFile(pdf); engine.classifyDocument(lapdf, ruleFile); engine.renderImageOutlines(lapdf, outImgDir, pdfStem, LapdfMode.CLASSIFY); } catch (Exception e) { e.printStackTrace(); } } } else { String pdfStem = inputFileOrDir.getName(); pdfStem = pdfStem.replaceAll("\\.pdf$", ""); String outImgPath = outDir.getPath() + "/" + pdfStem + "_secImgs"; File outImgDir = new File(outImgPath); if (!outImgDir.exists()) outImgDir.mkdir(); LapdfDocument lapdf = engine.blockifyPdfFile(inputFileOrDir); engine.classifyDocument(lapdf, ruleFile); engine.renderImageOutlines(lapdf, outImgDir, pdfStem, LapdfMode.CLASSIFY); } }