Ejemplo n.º 1
0
 public Q2(String[] args) {
   super();
   this.args = args;
   startTime = System.currentTimeMillis();
   instanceId = UUID.randomUUID();
   parseCmdLine(args);
   libDir = new File(deployDir, "lib");
   dirMap = new TreeMap();
   deployDir.mkdirs();
   mainClassLoader = Thread.currentThread().getContextClassLoader();
 }
Ejemplo n.º 2
0
 // Take a tree of files starting in a directory in a zip file
 // and copy them to a disk directory, recreating the tree.
 private int unpackZipFile(
     File inZipFile, String directory, String parent, boolean suppressFirstPathElement) {
   int count = 0;
   if (!inZipFile.exists()) return count;
   parent = parent.trim();
   if (!parent.endsWith(File.separator)) parent += File.separator;
   if (!directory.endsWith(File.separator)) directory += File.separator;
   File outFile = null;
   try {
     ZipFile zipFile = new ZipFile(inZipFile);
     Enumeration zipEntries = zipFile.entries();
     while (zipEntries.hasMoreElements()) {
       ZipEntry entry = (ZipEntry) zipEntries.nextElement();
       String name = entry.getName().replace('/', File.separatorChar);
       if (name.startsWith(directory)) {
         if (suppressFirstPathElement) name = name.substring(directory.length());
         outFile = new File(parent + name);
         // Create the directory, just in case
         if (name.indexOf(File.separatorChar) >= 0) {
           String p = name.substring(0, name.lastIndexOf(File.separatorChar) + 1);
           File dirFile = new File(parent + p);
           dirFile.mkdirs();
         }
         if (!entry.isDirectory()) {
           System.out.println("Installing " + outFile);
           // Copy the file
           BufferedOutputStream out = new BufferedOutputStream(new FileOutputStream(outFile));
           BufferedInputStream in = new BufferedInputStream(zipFile.getInputStream(entry));
           int size = 1024;
           int n = 0;
           byte[] b = new byte[size];
           while ((n = in.read(b, 0, size)) != -1) out.write(b, 0, n);
           in.close();
           out.flush();
           out.close();
           // Count the file
           count++;
         }
       }
     }
     zipFile.close();
   } catch (Exception e) {
     System.err.println("...an error occured while installing " + outFile);
     e.printStackTrace();
     System.err.println("Error copying " + outFile.getName() + "\n" + e.getMessage());
     return -count;
   }
   System.out.println(count + " files were installed.");
   return count;
 }
Ejemplo n.º 3
0
  private void cleanup(File directory) {
    // Clean up from old installations, removing or renaming files.
    // Note that directory is the parent of the CTP directory
    // unless the original installation was done by Bill Weadock's
    // all-in-one installer for Windows.

    // Get a file pointing to the CTP directory.
    // This might be the current directory, or
    // it might be the CTP child.
    File dir;
    if (directory.getName().equals("RSNA")) dir = directory;
    else dir = new File(directory, "CTP");

    // If CTP.jar exists in this directory, it is a really
    // old CTP main file - not used anymore
    File ctp = new File(dir, "CTP.jar");
    if (ctp.exists()) ctp.delete();

    // These are old names for the Launcher.jar file
    File launcher = new File(dir, "CTP-launcher.jar");
    if (launcher.exists()) launcher.delete();
    launcher = new File(dir, "TFS-launcher.jar");
    if (launcher.exists()) launcher.delete();

    // Delete the obsolete CTP-runner.jar file
    File runner = new File(dir, "CTP-runner.jar");
    if (runner.exists()) runner.delete();

    // Delete the obsolete MIRC-copier.jar file
    File copier = new File(dir, "MIRC-copier.jar");
    if (copier.exists()) copier.delete();

    // Rename the old versions of the properties files
    File oldprops = new File(dir, "CTP-startup.properties");
    File newprops = new File(dir, "CTP-launcher.properties");
    File correctprops = new File(dir, "Launcher.properties");
    if (oldprops.exists()) {
      if (newprops.exists() || correctprops.exists()) oldprops.delete();
      else oldprops.renameTo(correctprops);
    }
    if (newprops.exists()) {
      if (correctprops.exists()) newprops.delete();
      else newprops.renameTo(correctprops);
    }

    // Get rid of obsolete startup and shutdown programs
    File startup = new File(dir, "CTP-startup.jar");
    if (startup.exists()) startup.delete();
    File shutdown = new File(dir, "CTP-shutdown.jar");
    if (shutdown.exists()) shutdown.delete();

    // Get rid of the obsolete linux directory
    File linux = new File(dir, "linux");
    if (linux.exists()) {
      startup = new File(linux, "CTP-startup.jar");
      if (startup.exists()) startup.delete();
      shutdown = new File(linux, "CTP-shutdown.jar");
      if (shutdown.exists()) shutdown.delete();
      linux.delete();
    }

    // clean up the libraries directory
    File libraries = new File(dir, "libraries");
    if (libraries.exists()) {
      // remove obsolete versions of the slf4j libraries
      // and the dcm4che-imageio libraries
      File[] files = libraries.listFiles();
      for (File file : files) {
        if (file.isFile()) {
          String name = file.getName();
          if (name.startsWith("slf4j-") || name.startsWith("dcm4che-imageio-rle")) {
            file.delete();
          }
        }
      }
      // remove the email subdirectory
      File email = new File(libraries, "email");
      deleteAll(email);
      // remove the xml subdirectory
      File xml = new File(libraries, "xml");
      deleteAll(xml);
      // remove the sftp subdirectory
      File sftp = new File(libraries, "sftp");
      deleteAll(xml);
      // move edtftpj.jar to the ftp directory
      File edtftpj = new File(libraries, "edtftpj.jar");
      if (edtftpj.exists()) {
        File ftp = new File(libraries, "ftp");
        ftp.mkdirs();
        File ftpedtftpj = new File(ftp, "edtftpj.jar");
        edtftpj.renameTo(ftpedtftpj);
      }
    }

    // remove the obsolete xml library under dir
    File xml = new File(dir, "xml");
    deleteAll(xml);

    // remove the dicom profiles so any
    // obsolete files will disappear
    File profiles = new File(dir, "profiles");
    File dicom = new File(profiles, "dicom");
    deleteAll(dicom);
    dicom.mkdirs();

    // Remove the index.html file so it will be rebuilt from
    // example-index.html when the system next starts.
    File root = new File(dir, "ROOT");
    if (root.exists()) {
      File index = new File(root, "index.html");
      index.delete();
    }
  }
  public static void main(String[] args) {
    try {

      // IMPORTANT HINT REGARDING STRING ENCODING
      // in Java all Strings have UTF-8 as default encoding
      // therefore: there are only a few references to UTF-8 encoding in this demo code
      // however, if values are retrieved from a database or another program language is used, then
      // one needs to
      // make sure that the UTF-8 encoding is correctly implemented

      // create CMD line option object
      Options options = new Options();

      // add CMD line options
      options.addOption(
          "o",
          "output-dir",
          true,
          "specify base output directory, if none is specified a new directory will be created in the current path");
      options.addOption(
          "n",
          "number-of-generated-receipts",
          true,
          "specify number of receipts to be randomly generated, 50 is default");
      options.addOption(
          "g",
          "signature-creation-device-cannot-fail",
          false,
          "deactivate glitches in signature-creation-device");
      options.addOption(
          "s",
          "no-signature-certificate-switch",
          false,
          "deactivate switching of signature certificates after 5 receipts");
      options.addOption(
          "t", "no-training-receipts", false, "deactivate random generation of training-receipts");

      /// parse CMD line options
      CommandLineParser parser = new DefaultParser();
      CommandLine cmd = parser.parse(options, args);

      boolean signatureCreationDeviceAlwaysWorks = cmd.hasOption("g");
      boolean deactivateSignatureCertificateSwitching = cmd.hasOption("s");
      boolean deactivateTrainingReceipts = cmd.hasOption("t");

      String outputParentDirectoryString = cmd.getOptionValue("o");
      if (outputParentDirectoryString == null) {
        DateFormat df = new SimpleDateFormat("yyyy-MM-dd'T'HH-mm-ss");
        outputParentDirectoryString = "./CashBoxDemoOutput" + df.format(new Date());
      }
      File OUTPUT_PARENT_DIRECTORY = new File(outputParentDirectoryString);
      OUTPUT_PARENT_DIRECTORY.mkdirs();
      System.out.println("Setting workdir to " + OUTPUT_PARENT_DIRECTORY.getAbsolutePath());

      String numberOfReceiptsString = cmd.getOptionValue("n");
      int NUMBER_OF_RECEIPTS = DEFAULT_NUMBER_OF_GENERATED_RECEIPTS;
      if (numberOfReceiptsString != null) {
        NUMBER_OF_RECEIPTS = new Integer(numberOfReceiptsString);
      }

      // TODO add provider independent functionality
      // initialise cryptographic providers
      Security.addProvider(new BouncyCastleProvider());

      // prepare cashbox init parameters
      CashBoxParameters cashBoxParameters = new CashBoxParameters();

      // set parameter for signature certificate switching
      // if > 0 then switch signature certificate after so many signatures
      // this is important for demonstrating the handling of the DEP export format
      // when receipts where signed with multiple signature certificates
      if (deactivateSignatureCertificateSwitching) {
        cashBoxParameters.setChangeSignatureCertificateAfterSoManyReceipts(-1);
      } else {
        cashBoxParameters.setChangeSignatureCertificateAfterSoManyReceipts(10);
      }

      // generate and set random cash box ID ("Kassen-ID")
      // REF TO SPECIFICATION: Detailspezifikation/Abs 4
      String CASH_BOX_ID = "DEMO-CASH-BOX" + Math.round(Math.random() * 1000);
      cashBoxParameters.setCashBoxID(CASH_BOX_ID);

      // set cashbox suite
      // REF TO SPECIFICATION: Detailspezifikation/Abs 2
      // AT0 is used here for demonstration purposes, see Abs 2 for details on AT0
      cashBoxParameters.setRkSuite(RKSuite.R1_AT0);

      // set initial receipt identifier
      // in this demo cashbox integer values are used as receipt identifiers ("Belegnummer"),
      // however the specification does not
      // impose that limit. An arbitrary UTF-8 String could be used, the only requirement is that
      // the same combination of
      // the cashBox ID ("Kassen-ID") and the receipt identifier ("Belegnummer") is NEVER used for
      // more than one receipt
      // using the same multiple times compromises the security of the encrypted turnover value,
      // which might lead
      // to leaked turnover data.
      // REF TO SPECIFICATION: Detailspezifikation/Abs 4, Abs 8, Abs 9, Abs 10
      long initialReceiptIdentifier = Math.round(Math.random() * 1000000);
      cashBoxParameters.setInitialReceiptIdentifier(initialReceiptIdentifier);

      // set DEP module for storing and exporting receipts
      // REF TO SPECIFICATION: Detailspezifikation/Abs 3, 11
      cashBoxParameters.setDepModul(new SimpleMemoryDEPModule());

      // create random AES key for turnover encryption
      // REF TO SPECIFICATION: Detailspezifikation/Abs 4, Abs 8, Abs 9, Abs 10
      cashBoxParameters.setTurnoverKeyAESkey(CashBoxUtils.createAESKey());

      // set up signature module
      // the signature module is composed of an JWS module that create the JSON Web Signature (JWS)
      // and
      // a low level signature module for signing the hash values.
      // REF TO SPECIFICATION: Detailspezifikation/Abs 2, Abs 4, Abs 5, Abs 6

      // JWSModule jwsModule = new OrgBitbucketBcJwsModule();  //requires bouncycastle provider
      JWSModule jwsModule = new ManualJWSModule(); // allows for provider independent use cases
      // set damage flag, which simulates the failure of the signature creation device and the
      // correct handling
      // of this case, obviously this is only suitable for demonstration purposes
      jwsModule.setDamageIsPossible(!signatureCreationDeviceAlwaysWorks);
      jwsModule.setProbabilityOfDamagedSignatureDevice(PROPABILITY_DAMAGED_SIGNATURE_DEVICE);

      jwsModule.setSignatureModule(new DO_NOT_USE_IN_REAL_CASHBOX_DemoSoftwareSignatureModule());
      // jwsModule.setSignatureModule(new PKCS11SignatureModule());

      cashBoxParameters.setJwsModule(jwsModule);

      // set printer module
      // REF TO SPECIFICATION: Detailspezifikation/Abs 12, Abs 13, Abs 14, Abs 15
      PrinterModule printerModule = new SimplePDFPrinterModule();
      cashBoxParameters.setPrinterModule(printerModule);

      // init the cash box with the parameters
      DemoCashBox demoCashBox = new DemoCashBox(cashBoxParameters);

      // init done, start interaction with cashbox
      // create random receipt data that will be handled by the cashbox
      List<RawReceiptData> receipts =
          RandomReceiptGenerator.generateRandomReceipts(NUMBER_OF_RECEIPTS);

      // store first receipt (Startbeleg) in cashbox
      // all taxtype values are set to zero (per default in this demo)
      RawReceiptData firstReceipt = new RawReceiptData();
      demoCashBox.storeReceipt(firstReceipt, false, false);

      // now store the other receipts
      for (RawReceiptData rawReceiptData : receipts) {
        // store receipt within cashbox: (prepare data-to-be-signed, sign with JWS, store signed
        // receipt in DEP)

        // pre-defined chance for a training receipt (just for demo purposes)
        boolean isTrainingReceipt = false;
        if (Math.random() < PROPABILITY_TRAINING_RECEIPT && !deactivateTrainingReceipts) {
          isTrainingReceipt = true;
        }

        // pre-defined chance for a storno receipt
        boolean isStornoReceipt = false;
        if (Math.random() < PROPABILITY_OF_STORNO_RECEIPT) {
          isStornoReceipt = true;
        }
        demoCashBox.storeReceipt(rawReceiptData, isTrainingReceipt, isStornoReceipt);
      }

      // dump machine readable code of receipts (this "code" is used for the QR-codes)
      // REF TO SPECIFICATION: Detailspezifikation/Abs 12
      // dump to File
      File qrCoreRepExportFile = new File(OUTPUT_PARENT_DIRECTORY, "qr-code-rep.txt");
      List<ReceiptPackage> receiptPackages = demoCashBox.getStoredReceipts();
      PrintWriter writer = new PrintWriter(new FileWriter(qrCoreRepExportFile));
      System.out.println("------------QR-CODE-REP------------");
      for (ReceiptPackage receiptPackage : receiptPackages) {
        System.out.println(receiptPackage.getQRCodeRepresentation());
        writer.println(receiptPackage.getQRCodeRepresentation());
      }
      System.out.println("");
      writer.close();

      // dump OCR code of receipts
      // REF TO SPECIFICATION: Detailspezifikation/Abs 14
      // dump to File
      File ocrCoreRepExportFile = new File(OUTPUT_PARENT_DIRECTORY, "ocr-code-rep.txt");
      writer = new PrintWriter(new FileWriter(ocrCoreRepExportFile));
      System.out.println("------------OCR-CODE-REP------------");
      for (ReceiptPackage receiptPackage : receiptPackages) {
        System.out.println(receiptPackage.getOcrCodeRepresentation());
        writer.println(receiptPackage.getOcrCodeRepresentation());
      }
      System.out.println("");
      writer.close();

      // export DEP from cashbox
      // REF TO SPECIFICATION: Detailspezifikation/Abs 3
      DEPExportFormat depExportFormat = demoCashBox.exportDEP();

      // get JSON rep and dump export format to file/std output
      Gson gson = new GsonBuilder().setPrettyPrinting().create();
      String exportFormatJSONString = gson.toJson(depExportFormat);
      System.out.println("------------DEP-EXPORT-FORMAT------------");
      System.out.println(exportFormatJSONString);
      System.out.println("");

      // dump DEP export to file
      File depExportFile = new File(OUTPUT_PARENT_DIRECTORY, "dep-export.txt");
      FileOutputStream outputStream = new FileOutputStream(depExportFile);
      outputStream.write(exportFormatJSONString.getBytes());
      outputStream.close();

      // export receipts as PDF (QR-CODE)
      // REF TO SPECIFICATION: Detailspezifikation/Abs 12, Abs 13
      File qrCodeDumpDirectory = new File(OUTPUT_PARENT_DIRECTORY, "qr-code-dir-pdf");
      qrCodeDumpDirectory.mkdirs();
      List<byte[]> printedQRCodeReceipts =
          demoCashBox.printReceipt(receiptPackages, ReceiptPrintType.QR_CODE);
      CashBoxUtils.writeReceiptsToFiles(printedQRCodeReceipts, "QR-", qrCodeDumpDirectory);

      // export receipts as PDF (OCR)
      // REF TO SPECIFICATION: Detailspezifikation/Abs 14, Abs 15
      File ocrCodeDumpDirectory = new File(OUTPUT_PARENT_DIRECTORY, "ocr-code-dir-pdf");
      ocrCodeDumpDirectory.mkdirs();
      List<byte[]> printedOCRCodeReceipts =
          demoCashBox.printReceipt(receiptPackages, ReceiptPrintType.OCR);
      CashBoxUtils.writeReceiptsToFiles(printedOCRCodeReceipts, "OCR-", ocrCodeDumpDirectory);

      // store signature certificates (so that they can be used for verification purposes)
      // only for demonstration purposes
      List<String> signatureCertificates = new ArrayList<>();
      List<List<String>> certificateChains = new ArrayList<>();
      DEPBelegDump[] belegDumps = depExportFormat.getBelegPackage();
      for (DEPBelegDump depBelegDump : belegDumps) {
        signatureCertificates.add(depBelegDump.getSignatureCertificate());
        certificateChains.add(Arrays.asList(depBelegDump.getCertificateChain()));
      }
      File signatureCertificatesOutputFile =
          new File(OUTPUT_PARENT_DIRECTORY, "signatureCertificates.txt");
      String signatureCertificatesJSON = gson.toJson(signatureCertificates);
      BufferedOutputStream bufferedOutputStream =
          new BufferedOutputStream(new FileOutputStream(signatureCertificatesOutputFile));
      ByteArrayInputStream bIn = new ByteArrayInputStream(signatureCertificatesJSON.getBytes());
      IOUtils.copy(bIn, bufferedOutputStream);
      bufferedOutputStream.close();

      // store certificate chains (so that they can be used for verification purposes)
      // only for demonstration purposes
      File signatureCertificateChainsOutputFile =
          new File(OUTPUT_PARENT_DIRECTORY, "signatureCertificateChains.txt");
      String signatureCertificateChainsJSON = gson.toJson(certificateChains);
      bufferedOutputStream =
          new BufferedOutputStream(new FileOutputStream(signatureCertificateChainsOutputFile));
      bIn = new ByteArrayInputStream(signatureCertificateChainsJSON.getBytes());
      IOUtils.copy(bIn, bufferedOutputStream);
      bufferedOutputStream.close();

      // store AES key as BASE64 String (for demonstration purposes: to allow decryption of turnover
      // value)
      // ATTENTION, this is only for demonstration purposes, the AES key must be stored in a secure
      // area
      byte[] aesKey = cashBoxParameters.getTurnoverKeyAESkey().getEncoded();
      String aesKeyBase64 = CashBoxUtils.base64Encode(aesKey, false);
      writer = new PrintWriter(new File(OUTPUT_PARENT_DIRECTORY, "aesKeyBase64.txt"));
      writer.print(aesKeyBase64);
      writer.close();
    } catch (IOException e) {
      e.printStackTrace();
    } catch (ParseException e) {
      e.printStackTrace();
    }
  }
Ejemplo n.º 5
0
  /** runs the BB features */
  public void runBB() {
    File f = new File(sourceFile);
    String sourceFileName = f.getName();
    f = new File(targetFile);
    String targetFileName = f.getName();
    String outputFileName = sourceFileName + "_to_" + targetFileName + ".out";

    File file = new File(resourceManager.getString("output"));
    if (!file.exists()) {
      System.err.println("Creating dir: " + resourceManager.getString("output"));
      Logger.log("Creating dir: " + resourceManager.getString("output"));
      file.mkdirs();
    } else {
      Logger.log("output dir exists: " + resourceManager.getString("output"));
    }

    String out = resourceManager.getString("output") + File.separator + outputFileName;
    System.out.println("Output will be: " + out);

    String pplSourcePath =
        resourceManager.getString("input")
            + File.separator
            + sourceLang
            + File.separator
            + sourceFileName
            + resourceManager.getString("tools.ngram.output.ext");
    String pplTargetPath =
        resourceManager.getString("input")
            + File.separator
            + targetLang
            + File.separator
            + targetFileName
            + resourceManager.getString("tools.ngram.output.ext");

    String pplPOSTargetPath =
        resourceManager.getString("input")
            + File.separator
            + targetLang
            + File.separator
            + targetFileName
            + PosTagger.getXPOS()
            + resourceManager.getString("tools.ngram.output.ext");
    runNGramPPL();

    FileModel fm = new FileModel(sourceFile, resourceManager.getString(sourceLang + ".corpus"));

    // FileModel fm = new FileModel(sourceFile,
    //     resourceManager.getString("source" + ".corpus"));

    PPLProcessor pplProcSource =
        new PPLProcessor(pplSourcePath, new String[] {"logprob", "ppl", "ppl1"});
    PPLProcessor pplProcTarget =
        new PPLProcessor(pplTargetPath, new String[] {"logprob", "ppl", "ppl1"});

    String sourcePosOutput = null;
    String targetPosOutput = null;
    PPLProcessor pplPosTarget = null;
    if (!isBaseline) {
      sourcePosOutput = runPOS(sourceFile, sourceLang, "source");
      targetPosOutput = runPOS(targetFile, targetLang, "target");

      String targetPPLPos = runNGramPPLPos(targetPosOutput + PosTagger.getXPOS());
      System.out.println("---------TARGET PPLPOS: " + targetPPLPos);
      pplPosTarget =
          new PPLProcessor(targetPPLPos, new String[] {"poslogprob", "posppl", "posppl1"});
    }

    loadGiza();
    processNGrams();
    boolean gl = false;
    String temp0 = resourceManager.getString("GL");
    if (null != temp0 && temp0.equals("1")) {
      gl = true;
    }

    if (gl) {
      loadGlobalLexicon();
    }

    // Preparing the indices for IR_similarity_features
    Lucene sourceLuc = null;
    Lucene targetLuc = null;
    if (featureManager.hasFeature("1700")) {
      // The indices reside under lang_resources path
      String lang_resources = workDir + File.separator + "lang_resources";
      // Indices are saved under: luceneIndex folder
      String source_lucene_path =
          lang_resources + File.separator + sourceLang + File.separator + "luceneIndex";
      // The corpus to index
      String source_lucene_corpus = source_lucene_path + File.separator + sourceLang + ".corpus";
      //			System.out.println("SOURCE: " + source_lucene_path + " ||| " + source_lucene_corpus);
      try {
        sourceLuc = new Lucene(source_lucene_path, source_lucene_corpus, true, true, "Source");
      } catch (Exception e) {
        // TODO Auto-generated catch block
        e.printStackTrace();
      }
      String target_lucene_path =
          lang_resources + File.separator + targetLang + File.separator + "luceneIndex";
      String target_lucene_corpus = target_lucene_path + File.separator + targetLang + ".corpus";
      //			System.out.println("TARGET: " + target_lucene_path + " ||| " + target_lucene_corpus);
      try {
        targetLuc = new Lucene(target_lucene_path, target_lucene_corpus, true, true, "Target");
      } catch (Exception e) {
        // TODO Auto-generated catch block
        e.printStackTrace();
      }
    }

    // MQM kicks in
    MQMManager.getInstance().initialize(resourceManager);
    Context context = new Context();
    context.setSourceFilePath(sourceFile);
    context.setTargetFilePath(targetFile);
    MQMManager.getInstance().globalProcessing(context);

    try {
      BufferedReader brSource = new BufferedReader(new FileReader(sourceFile));
      BufferedReader brTarget = new BufferedReader(new FileReader(targetFile));
      BufferedWriter output = new BufferedWriter(new FileWriter(out));
      BufferedReader posSource = null;
      BufferedReader posTarget = null;
      boolean posSourceExists = ResourceManager.isRegistered("sourcePosTagger");
      boolean posTargetExists = ResourceManager.isRegistered("targetPosTagger");
      POSProcessor posSourceProc = null;
      POSProcessor posTargetProc = null;

      // lefterav: Berkeley parser modifications start here
      // Check if user has defined the grammar files for source
      // and target language

      //   if ( ResourceManager.isRegistered("BParser")){
      boolean bp = false;
      String temp = resourceManager.getString("BP");
      if (null != temp && temp.equals("1")) {
        bp = true;
      }

      BParserProcessor sourceParserProcessor = null;
      BParserProcessor targetParserProcessor = null;

      if (bp) {
        sourceParserProcessor = new BParserProcessor();
        targetParserProcessor = new BParserProcessor();
        sourceParserProcessor.initialize(sourceFile, resourceManager, sourceLang);
        targetParserProcessor.initialize(targetFile, resourceManager, targetLang);
      }
      // }

      /** BEGIN: Added by Raphael Rubino for the Topic Model Features */
      boolean tm = false;
      String temp1 = resourceManager.getString("TM");
      if (temp1 != null && temp1.equals("1")) {
        tm = true;
      }
      TopicDistributionProcessor sourceTopicDistributionProcessor = null;
      TopicDistributionProcessor targetTopicDistributionProcessor = null;
      if (tm) {
        String sourceTopicDistributionFile =
            resourceManager.getString(sourceLang + ".topic.distribution");
        String targetTopicDistributionFile =
            resourceManager.getString(targetLang + ".topic.distribution");
        sourceTopicDistributionProcessor =
            new TopicDistributionProcessor(sourceTopicDistributionFile, "sourceTopicDistribution");
        targetTopicDistributionProcessor =
            new TopicDistributionProcessor(targetTopicDistributionFile, "targetTopicDistribution");
      }
      /* END: Added by Raphael Rubino for the Topic Model Features
       */

      if (!isBaseline) {
        if (posSourceExists) {
          posSourceProc = new POSProcessor(sourcePosOutput);
          posSource =
              new BufferedReader(
                  new InputStreamReader(new FileInputStream(sourcePosOutput), "utf-8"));
        }
        if (posTargetExists) {
          posTargetProc = new POSProcessor(targetPosOutput);
          posTarget =
              new BufferedReader(new InputStreamReader(new FileInputStream(targetPosOutput)));
        }
      }
      ResourceManager.printResources();
      Sentence sourceSent;
      Sentence targetSent;
      int sentCount = 0;

      String lineSource = brSource.readLine();
      String lineTarget = brTarget.readLine();

      /** Triggers (by David Langlois) */
      boolean tr = false;
      String temp2 = resourceManager.getString("TR");
      if (temp2 != null && temp2.equals("1")) {
        tr = true;
      }

      Triggers itl_target = null;
      TriggersProcessor itl_target_p = null;
      Triggers itl_source = null;
      TriggersProcessor itl_source_p = null;
      // TriggersProcessor itl_source_p = null;
      Triggers itl_source_target = null;
      TriggersProcessor itl_source_target_p = null;

      if (tr) {

        itl_target =
            new Triggers(
                resourceManager.getString("target.intra.triggers.file"),
                Integer.parseInt(resourceManager.getString("nb.max.triggers.target.intra")),
                resourceManager.getString("phrase.separator"));
        itl_target_p = new TriggersProcessor(itl_target);

        itl_source =
            new Triggers(
                resourceManager.getString("source.intra.triggers.file"),
                Integer.parseInt(resourceManager.getString("nb.max.triggers.source.intra")),
                resourceManager.getString("phrase.separator"));
        itl_source_p = new TriggersProcessor(itl_source);

        itl_source_target =
            new Triggers(
                resourceManager.getString("source.target.inter.triggers.file"),
                Integer.parseInt(resourceManager.getString("nb.max.triggers.source.target.inter")),
                resourceManager.getString("phrase.separator"));
        itl_source_target_p = new TriggersProcessor(itl_source_target);
      }
      /*
       * End modification for Triggers
       */

      // read in each line from the source and target files
      // create a sentence from each
      // process each sentence
      // run the features on the sentences
      while ((lineSource != null) && (lineTarget != null)) {

        // lineSource = lineSource.trim().substring(lineSource.indexOf(" ")).replace("+", "");
        sourceSent = new Sentence(lineSource, sentCount);
        targetSent = new Sentence(lineTarget, sentCount);

        //       System.out.println("Processing sentence "+sentCount);
        //     System.out.println("SORCE: " + sourceSent.getText());
        //   System.out.println("TARGET: " + targetSent.getText());

        if (posSourceExists) {
          posSourceProc.processSentence(sourceSent);
        }
        if (posTargetExists) {
          posTargetProc.processSentence(targetSent);
        }
        sourceSent.computeNGrams(3);
        targetSent.computeNGrams(3);
        pplProcSource.processNextSentence(sourceSent);
        pplProcTarget.processNextSentence(targetSent);
        if (!isBaseline) {
          pplPosTarget.processNextSentence(targetSent);
        }

        // lefterav: Parse code here

        if (bp) {
          sourceParserProcessor.processNextSentence(sourceSent);
          targetParserProcessor.processNextSentence(targetSent);
        }

        if (tm) {

          sourceTopicDistributionProcessor.processNextSentence(sourceSent);
          targetTopicDistributionProcessor.processNextSentence(targetSent);
        }

        // modified by David
        if (tr) {
          itl_source_p.processNextSentence(sourceSent);
          itl_target_p.processNextSentence(targetSent);
          itl_source_target_p.processNextParallelSentences(sourceSent, targetSent);
        }
        // end modification by David

        // MQM kicks in
        MQMManager.getInstance().processNextParallelSentences(sourceSent, targetSent);

        // Ergun
        if (featureManager.hasFeature("1700")) {
          sourceLuc.processNextSentence(sourceSent);
          targetLuc.processNextSentence(targetSent);
        }

        ++sentCount;
        output.write(featureManager.runFeatures(sourceSent, targetSent));
        output.newLine();
        lineSource = brSource.readLine();
        lineTarget = brTarget.readLine();
      }
      if (posSource != null) {
        posSource.close();
      }
      if (posTarget != null) {
        posTarget.close();
      }

      brSource.close();
      brTarget.close();
      output.close();
      Logger.close();
    } catch (Exception e) {
      e.printStackTrace();
    }
  }
Ejemplo n.º 6
0
  /**
   * constructs the folders required by the application. These are, typically: <br>
   *
   * <ul>
   *   <li>/input and subfolders
   *       <ul>
   *         <li>/input/<i>sourceLang</i>, /input/<i>targetLang</i> (for storing the results of
   *             processing the input files with various tools, such as pos tagger, transliterator,
   *             morphological analyser),<br>
   *         <li>/input/systems/<i>systemName</i> (for storing system specific resources - for
   *             example, the compiled and processed word lattices in the case of the IBM system
   *       </ul>
   *   <li>/output (for storing the resulting feature files),
   * </ul>
   */
  public void constructFolders() {

    File f = new File(input);
    if (!f.exists()) {
      f.mkdirs();
      System.out.println("folder created " + f.getPath());
    }

    f = new File(input + File.separator + sourceLang);
    if (!f.exists()) {
      f.mkdirs();
      System.out.println("folder created " + f.getPath());
    }
    f = new File(input + File.separator + targetLang);
    if (!f.exists()) {
      f.mkdirs();
      System.out.println("folder created " + f.getPath());
    }
    f = new File(input + File.separator + targetLang + File.separator + "temp");
    if (!f.exists()) {
      f.mkdirs();
      System.out.println("folder created " + f.getPath());
    }
    /*
         f = new File(input + File.separator + "systems");
         if (!f.exists()) {
             f.mkdir();
             System.out.println("folder created " + f.getPath());
         }

         f = new File(input + File.separator + "systems" + File.separator
                 + "IBM");
         if (!f.exists()) {
             f.mkdir();
             System.out.println("folder created " + f.getPath());
         }

         f = new File(input + File.separator + "systems" + File.separator
                 + "MOSES");
         if (!f.exists()) {
             f.mkdir();
             System.out.println("folder created " + f.getPath());
         }
    */
    String output = resourceManager.getString("output");
    f = new File(output);
    if (!f.exists()) {
      f.mkdirs();
      System.out.println("folder created " + f.getPath());
    }

    if (featureManager.hasFeature("1700")) {
      String lang_resources = workDir + File.separator + "lang_resources";
      f = new File(lang_resources);
      if (!f.exists()) {
        System.out.println("For Lucene features, lang_resources are needed.");
        System.exit(0);
      }
      String source_lang_resources = lang_resources + File.separator + sourceLang;
      f = new File(source_lang_resources);
      if (!f.exists()) {
        System.out.println("For Lucene features, source lang_resources are needed.");
        System.exit(0);
      }
      String source_lucene_path =
          lang_resources + File.separator + sourceLang + File.separator + "luceneIndex";
      f = new File(source_lucene_path);
      if (!f.exists()) {
        f.mkdir();
        System.out.println("folder created " + f.getPath());
      }
      String source_lucene_corpus = source_lucene_path + File.separator + sourceLang + ".corpus";
      try {
        Runtime.getRuntime()
            .exec(
                "ln -s "
                    + workDir
                    + File.separator
                    + resourceManager.getString(sourceLang + ".corpus")
                    + " "
                    + source_lucene_corpus);
      } catch (IOException e) {
        // TODO Auto-generated catch block
        e.printStackTrace();
      }

      // Indexing the target
      String target_lang_resources = lang_resources + File.separator + targetLang;
      f = new File(target_lang_resources);
      if (!f.exists()) {
        System.out.println("For Lucene features, target lang_resources are needed.");
        System.exit(0);
      }
      String target_lucene_path =
          lang_resources + File.separator + targetLang + File.separator + "luceneIndex";
      f = new File(target_lucene_path);
      if (!f.exists()) {
        f.mkdir();
        System.out.println("folder created " + f.getPath());
      }
      String target_lucene_corpus = target_lucene_path + File.separator + targetLang + ".corpus";
      try {
        Runtime.getRuntime()
            .exec(
                "ln -s "
                    + workDir
                    + File.separator
                    + resourceManager.getString(targetLang + ".corpus")
                    + " "
                    + target_lucene_corpus);
      } catch (IOException e) {
        // TODO Auto-generated catch block
        e.printStackTrace();
      }
    }
  }