private static Directory index(Analyzer analyzer, String processingPath) { RAMDirectory directory = null; IndexWriter indexWriter = null; try { directory = new RAMDirectory(); IndexWriterConfig iwc = new IndexWriterConfig(Version.LUCENE_35, analyzer); indexWriter = new IndexWriter(directory, iwc); File file = new File(processingPath); index_h("", file, indexWriter); } catch (IOException e) { e.printStackTrace(); } finally { if (indexWriter != null) { try { indexWriter.close(); } catch (CorruptIndexException e1) { // TODO Auto-generated catch block e1.printStackTrace(); } catch (IOException e1) { // TODO Auto-generated catch block e1.printStackTrace(); } } } return directory; }
private static Map<String, List<String>> generate_result(Directory directory) { Map<String, List<String>> result_map = new HashMap<String, List<String>>(); try { IndexReader reader = IndexReader.open(directory); TermEnum termEnum = reader.terms(); while (termEnum.next()) { String termEnumString = termEnum.term().toString(); if (termEnumString.startsWith("content:")) { String term = termEnumString.substring(termEnumString.lastIndexOf(":") + 1); TermDocs termDocs = reader.termDocs(termEnum.term()); while (termDocs.next()) { Document doc = reader.document(termDocs.doc()); String relative_path = doc.get("relative_path"); if (result_map.containsKey(relative_path)) { result_map.get(relative_path).add(term + termDocs.freq()); } else { result_map.put(relative_path, new ArrayList<String>()); } } } } } catch (IOException e) { e.printStackTrace(); } finally { } return result_map; }
private Map<String, Integer> loadFixedClasses(String file) { FileReader input; try { Map<Integer, List<String>> fixedClaszzes = new HashMap<Integer, List<String>>(); Map<String, Integer> invertedFixedClasses = new HashMap<String, Integer>(); File f = new File(file); if (!f.exists()) { System.out.println("Extra classes file doesn't exist: " + fixedClassesFile); return invertedFixedClasses; } input = new FileReader(f); BufferedReader bufRead = new BufferedReader(input); String line; while ((line = bufRead.readLine()) != null) { String parts[] = line.split(","); int clazz = Integer.parseInt(parts[0]); List<String> symbols = new ArrayList<String>(); symbols.addAll(Arrays.asList(parts).subList(1, parts.length)); fixedClaszzes.put(clazz, symbols); } for (Map.Entry<Integer, List<String>> e : fixedClaszzes.entrySet()) { for (String s : e.getValue()) { invertedFixedClasses.put(s, e.getKey()); } } return invertedFixedClasses; } catch (IOException e) { e.printStackTrace(); } return null; }
private static void output_result(String outPath, Map<String, List<String>> result_map) { for (String s : result_map.keySet()) { try { FileUtils.writeLines(new File(outPath + s), result_map.get(s)); } catch (IOException e) { e.printStackTrace(); } } }
private static String[] getStopWords(String path) { try { List<String> list = FileUtils.readLines(new File(path), "utf-8"); System.out.println("=============================================================="); System.out.println("getStopWords " + list.get(159)); System.out.println("=============================================================="); return list.toArray(new String[list.size()]); } catch (FileNotFoundException e) { e.printStackTrace(); } catch (IOException e) { e.printStackTrace(); } finally { } return null; }
public static void main(String[] args) { try { // IMPORTANT HINT REGARDING STRING ENCODING // in Java all Strings have UTF-8 as default encoding // therefore: there are only a few references to UTF-8 encoding in this demo code // however, if values are retrieved from a database or another program language is used, then // one needs to // make sure that the UTF-8 encoding is correctly implemented // create CMD line option object Options options = new Options(); // add CMD line options options.addOption( "o", "output-dir", true, "specify base output directory, if none is specified a new directory will be created in the current path"); options.addOption( "n", "number-of-generated-receipts", true, "specify number of receipts to be randomly generated, 50 is default"); options.addOption( "g", "signature-creation-device-cannot-fail", false, "deactivate glitches in signature-creation-device"); options.addOption( "s", "no-signature-certificate-switch", false, "deactivate switching of signature certificates after 5 receipts"); options.addOption( "t", "no-training-receipts", false, "deactivate random generation of training-receipts"); /// parse CMD line options CommandLineParser parser = new DefaultParser(); CommandLine cmd = parser.parse(options, args); boolean signatureCreationDeviceAlwaysWorks = cmd.hasOption("g"); boolean deactivateSignatureCertificateSwitching = cmd.hasOption("s"); boolean deactivateTrainingReceipts = cmd.hasOption("t"); String outputParentDirectoryString = cmd.getOptionValue("o"); if (outputParentDirectoryString == null) { DateFormat df = new SimpleDateFormat("yyyy-MM-dd'T'HH-mm-ss"); outputParentDirectoryString = "./CashBoxDemoOutput" + df.format(new Date()); } File OUTPUT_PARENT_DIRECTORY = new File(outputParentDirectoryString); OUTPUT_PARENT_DIRECTORY.mkdirs(); System.out.println("Setting workdir to " + OUTPUT_PARENT_DIRECTORY.getAbsolutePath()); String numberOfReceiptsString = cmd.getOptionValue("n"); int NUMBER_OF_RECEIPTS = DEFAULT_NUMBER_OF_GENERATED_RECEIPTS; if (numberOfReceiptsString != null) { NUMBER_OF_RECEIPTS = new Integer(numberOfReceiptsString); } // TODO add provider independent functionality // initialise cryptographic providers Security.addProvider(new BouncyCastleProvider()); // prepare cashbox init parameters CashBoxParameters cashBoxParameters = new CashBoxParameters(); // set parameter for signature certificate switching // if > 0 then switch signature certificate after so many signatures // this is important for demonstrating the handling of the DEP export format // when receipts where signed with multiple signature certificates if (deactivateSignatureCertificateSwitching) { cashBoxParameters.setChangeSignatureCertificateAfterSoManyReceipts(-1); } else { cashBoxParameters.setChangeSignatureCertificateAfterSoManyReceipts(10); } // generate and set random cash box ID ("Kassen-ID") // REF TO SPECIFICATION: Detailspezifikation/Abs 4 String CASH_BOX_ID = "DEMO-CASH-BOX" + Math.round(Math.random() * 1000); cashBoxParameters.setCashBoxID(CASH_BOX_ID); // set cashbox suite // REF TO SPECIFICATION: Detailspezifikation/Abs 2 // AT0 is used here for demonstration purposes, see Abs 2 for details on AT0 cashBoxParameters.setRkSuite(RKSuite.R1_AT0); // set initial receipt identifier // in this demo cashbox integer values are used as receipt identifiers ("Belegnummer"), // however the specification does not // impose that limit. An arbitrary UTF-8 String could be used, the only requirement is that // the same combination of // the cashBox ID ("Kassen-ID") and the receipt identifier ("Belegnummer") is NEVER used for // more than one receipt // using the same multiple times compromises the security of the encrypted turnover value, // which might lead // to leaked turnover data. // REF TO SPECIFICATION: Detailspezifikation/Abs 4, Abs 8, Abs 9, Abs 10 long initialReceiptIdentifier = Math.round(Math.random() * 1000000); cashBoxParameters.setInitialReceiptIdentifier(initialReceiptIdentifier); // set DEP module for storing and exporting receipts // REF TO SPECIFICATION: Detailspezifikation/Abs 3, 11 cashBoxParameters.setDepModul(new SimpleMemoryDEPModule()); // create random AES key for turnover encryption // REF TO SPECIFICATION: Detailspezifikation/Abs 4, Abs 8, Abs 9, Abs 10 cashBoxParameters.setTurnoverKeyAESkey(CashBoxUtils.createAESKey()); // set up signature module // the signature module is composed of an JWS module that create the JSON Web Signature (JWS) // and // a low level signature module for signing the hash values. // REF TO SPECIFICATION: Detailspezifikation/Abs 2, Abs 4, Abs 5, Abs 6 // JWSModule jwsModule = new OrgBitbucketBcJwsModule(); //requires bouncycastle provider JWSModule jwsModule = new ManualJWSModule(); // allows for provider independent use cases // set damage flag, which simulates the failure of the signature creation device and the // correct handling // of this case, obviously this is only suitable for demonstration purposes jwsModule.setDamageIsPossible(!signatureCreationDeviceAlwaysWorks); jwsModule.setProbabilityOfDamagedSignatureDevice(PROPABILITY_DAMAGED_SIGNATURE_DEVICE); jwsModule.setSignatureModule(new DO_NOT_USE_IN_REAL_CASHBOX_DemoSoftwareSignatureModule()); // jwsModule.setSignatureModule(new PKCS11SignatureModule()); cashBoxParameters.setJwsModule(jwsModule); // set printer module // REF TO SPECIFICATION: Detailspezifikation/Abs 12, Abs 13, Abs 14, Abs 15 PrinterModule printerModule = new SimplePDFPrinterModule(); cashBoxParameters.setPrinterModule(printerModule); // init the cash box with the parameters DemoCashBox demoCashBox = new DemoCashBox(cashBoxParameters); // init done, start interaction with cashbox // create random receipt data that will be handled by the cashbox List<RawReceiptData> receipts = RandomReceiptGenerator.generateRandomReceipts(NUMBER_OF_RECEIPTS); // store first receipt (Startbeleg) in cashbox // all taxtype values are set to zero (per default in this demo) RawReceiptData firstReceipt = new RawReceiptData(); demoCashBox.storeReceipt(firstReceipt, false, false); // now store the other receipts for (RawReceiptData rawReceiptData : receipts) { // store receipt within cashbox: (prepare data-to-be-signed, sign with JWS, store signed // receipt in DEP) // pre-defined chance for a training receipt (just for demo purposes) boolean isTrainingReceipt = false; if (Math.random() < PROPABILITY_TRAINING_RECEIPT && !deactivateTrainingReceipts) { isTrainingReceipt = true; } // pre-defined chance for a storno receipt boolean isStornoReceipt = false; if (Math.random() < PROPABILITY_OF_STORNO_RECEIPT) { isStornoReceipt = true; } demoCashBox.storeReceipt(rawReceiptData, isTrainingReceipt, isStornoReceipt); } // dump machine readable code of receipts (this "code" is used for the QR-codes) // REF TO SPECIFICATION: Detailspezifikation/Abs 12 // dump to File File qrCoreRepExportFile = new File(OUTPUT_PARENT_DIRECTORY, "qr-code-rep.txt"); List<ReceiptPackage> receiptPackages = demoCashBox.getStoredReceipts(); PrintWriter writer = new PrintWriter(new FileWriter(qrCoreRepExportFile)); System.out.println("------------QR-CODE-REP------------"); for (ReceiptPackage receiptPackage : receiptPackages) { System.out.println(receiptPackage.getQRCodeRepresentation()); writer.println(receiptPackage.getQRCodeRepresentation()); } System.out.println(""); writer.close(); // dump OCR code of receipts // REF TO SPECIFICATION: Detailspezifikation/Abs 14 // dump to File File ocrCoreRepExportFile = new File(OUTPUT_PARENT_DIRECTORY, "ocr-code-rep.txt"); writer = new PrintWriter(new FileWriter(ocrCoreRepExportFile)); System.out.println("------------OCR-CODE-REP------------"); for (ReceiptPackage receiptPackage : receiptPackages) { System.out.println(receiptPackage.getOcrCodeRepresentation()); writer.println(receiptPackage.getOcrCodeRepresentation()); } System.out.println(""); writer.close(); // export DEP from cashbox // REF TO SPECIFICATION: Detailspezifikation/Abs 3 DEPExportFormat depExportFormat = demoCashBox.exportDEP(); // get JSON rep and dump export format to file/std output Gson gson = new GsonBuilder().setPrettyPrinting().create(); String exportFormatJSONString = gson.toJson(depExportFormat); System.out.println("------------DEP-EXPORT-FORMAT------------"); System.out.println(exportFormatJSONString); System.out.println(""); // dump DEP export to file File depExportFile = new File(OUTPUT_PARENT_DIRECTORY, "dep-export.txt"); FileOutputStream outputStream = new FileOutputStream(depExportFile); outputStream.write(exportFormatJSONString.getBytes()); outputStream.close(); // export receipts as PDF (QR-CODE) // REF TO SPECIFICATION: Detailspezifikation/Abs 12, Abs 13 File qrCodeDumpDirectory = new File(OUTPUT_PARENT_DIRECTORY, "qr-code-dir-pdf"); qrCodeDumpDirectory.mkdirs(); List<byte[]> printedQRCodeReceipts = demoCashBox.printReceipt(receiptPackages, ReceiptPrintType.QR_CODE); CashBoxUtils.writeReceiptsToFiles(printedQRCodeReceipts, "QR-", qrCodeDumpDirectory); // export receipts as PDF (OCR) // REF TO SPECIFICATION: Detailspezifikation/Abs 14, Abs 15 File ocrCodeDumpDirectory = new File(OUTPUT_PARENT_DIRECTORY, "ocr-code-dir-pdf"); ocrCodeDumpDirectory.mkdirs(); List<byte[]> printedOCRCodeReceipts = demoCashBox.printReceipt(receiptPackages, ReceiptPrintType.OCR); CashBoxUtils.writeReceiptsToFiles(printedOCRCodeReceipts, "OCR-", ocrCodeDumpDirectory); // store signature certificates (so that they can be used for verification purposes) // only for demonstration purposes List<String> signatureCertificates = new ArrayList<>(); List<List<String>> certificateChains = new ArrayList<>(); DEPBelegDump[] belegDumps = depExportFormat.getBelegPackage(); for (DEPBelegDump depBelegDump : belegDumps) { signatureCertificates.add(depBelegDump.getSignatureCertificate()); certificateChains.add(Arrays.asList(depBelegDump.getCertificateChain())); } File signatureCertificatesOutputFile = new File(OUTPUT_PARENT_DIRECTORY, "signatureCertificates.txt"); String signatureCertificatesJSON = gson.toJson(signatureCertificates); BufferedOutputStream bufferedOutputStream = new BufferedOutputStream(new FileOutputStream(signatureCertificatesOutputFile)); ByteArrayInputStream bIn = new ByteArrayInputStream(signatureCertificatesJSON.getBytes()); IOUtils.copy(bIn, bufferedOutputStream); bufferedOutputStream.close(); // store certificate chains (so that they can be used for verification purposes) // only for demonstration purposes File signatureCertificateChainsOutputFile = new File(OUTPUT_PARENT_DIRECTORY, "signatureCertificateChains.txt"); String signatureCertificateChainsJSON = gson.toJson(certificateChains); bufferedOutputStream = new BufferedOutputStream(new FileOutputStream(signatureCertificateChainsOutputFile)); bIn = new ByteArrayInputStream(signatureCertificateChainsJSON.getBytes()); IOUtils.copy(bIn, bufferedOutputStream); bufferedOutputStream.close(); // store AES key as BASE64 String (for demonstration purposes: to allow decryption of turnover // value) // ATTENTION, this is only for demonstration purposes, the AES key must be stored in a secure // area byte[] aesKey = cashBoxParameters.getTurnoverKeyAESkey().getEncoded(); String aesKeyBase64 = CashBoxUtils.base64Encode(aesKey, false); writer = new PrintWriter(new File(OUTPUT_PARENT_DIRECTORY, "aesKeyBase64.txt")); writer.print(aesKeyBase64); writer.close(); } catch (IOException e) { e.printStackTrace(); } catch (ParseException e) { e.printStackTrace(); } }
/** * constructs the folders required by the application. These are, typically: <br> * * <ul> * <li>/input and subfolders * <ul> * <li>/input/<i>sourceLang</i>, /input/<i>targetLang</i> (for storing the results of * processing the input files with various tools, such as pos tagger, transliterator, * morphological analyser),<br> * <li>/input/systems/<i>systemName</i> (for storing system specific resources - for * example, the compiled and processed word lattices in the case of the IBM system * </ul> * <li>/output (for storing the resulting feature files), * </ul> */ public void constructFolders() { File f = new File(input); if (!f.exists()) { f.mkdirs(); System.out.println("folder created " + f.getPath()); } f = new File(input + File.separator + sourceLang); if (!f.exists()) { f.mkdirs(); System.out.println("folder created " + f.getPath()); } f = new File(input + File.separator + targetLang); if (!f.exists()) { f.mkdirs(); System.out.println("folder created " + f.getPath()); } f = new File(input + File.separator + targetLang + File.separator + "temp"); if (!f.exists()) { f.mkdirs(); System.out.println("folder created " + f.getPath()); } /* f = new File(input + File.separator + "systems"); if (!f.exists()) { f.mkdir(); System.out.println("folder created " + f.getPath()); } f = new File(input + File.separator + "systems" + File.separator + "IBM"); if (!f.exists()) { f.mkdir(); System.out.println("folder created " + f.getPath()); } f = new File(input + File.separator + "systems" + File.separator + "MOSES"); if (!f.exists()) { f.mkdir(); System.out.println("folder created " + f.getPath()); } */ String output = resourceManager.getString("output"); f = new File(output); if (!f.exists()) { f.mkdirs(); System.out.println("folder created " + f.getPath()); } if (featureManager.hasFeature("1700")) { String lang_resources = workDir + File.separator + "lang_resources"; f = new File(lang_resources); if (!f.exists()) { System.out.println("For Lucene features, lang_resources are needed."); System.exit(0); } String source_lang_resources = lang_resources + File.separator + sourceLang; f = new File(source_lang_resources); if (!f.exists()) { System.out.println("For Lucene features, source lang_resources are needed."); System.exit(0); } String source_lucene_path = lang_resources + File.separator + sourceLang + File.separator + "luceneIndex"; f = new File(source_lucene_path); if (!f.exists()) { f.mkdir(); System.out.println("folder created " + f.getPath()); } String source_lucene_corpus = source_lucene_path + File.separator + sourceLang + ".corpus"; try { Runtime.getRuntime() .exec( "ln -s " + workDir + File.separator + resourceManager.getString(sourceLang + ".corpus") + " " + source_lucene_corpus); } catch (IOException e) { // TODO Auto-generated catch block e.printStackTrace(); } // Indexing the target String target_lang_resources = lang_resources + File.separator + targetLang; f = new File(target_lang_resources); if (!f.exists()) { System.out.println("For Lucene features, target lang_resources are needed."); System.exit(0); } String target_lucene_path = lang_resources + File.separator + targetLang + File.separator + "luceneIndex"; f = new File(target_lucene_path); if (!f.exists()) { f.mkdir(); System.out.println("folder created " + f.getPath()); } String target_lucene_corpus = target_lucene_path + File.separator + targetLang + ".corpus"; try { Runtime.getRuntime() .exec( "ln -s " + workDir + File.separator + resourceManager.getString(targetLang + ".corpus") + " " + target_lucene_corpus); } catch (IOException e) { // TODO Auto-generated catch block e.printStackTrace(); } } }