Java DistributedFileSystemManager Examples, eu.larkc.iris.indexing.DistributedFileSystemManager Java Examples

Example #1

0

Show file

File: FlowAssembly.java Project: distributed-iris-reasoner/distributed-iris-reasoner

 /*
  * Returns the tap for inferences
  */
 private Map<String, Tap> getInferencesTap(Scheme scheme) {
   Map<String, Tap> inferencesTap = new HashMap<String, Tap>();
   try {
     String path = null;
     FileSystem fs = FileSystem.get(mConfiguration.hadoopConfiguration);
     if (mConfiguration.doPredicateIndexing) {
       LiteralFields headStream = ruleStreams.getHeadStream();
       path = distributedFileSystemManager.getInferencesPath(headStream);
       if (fs.exists(new Path(path))) {
         inferencesTap.put(headStream.getId().toString(), new Hfs(scheme, path));
       }
       for (LiteralFields fields : ruleStreams.getBodyStreams()) {
         path = distributedFileSystemManager.getInferencesPath(fields);
         if (fs.exists(new Path(path))) {
           inferencesTap.put(fields.getId().toString(), new Hfs(scheme, path));
         }
       }
     } else {
       path = distributedFileSystemManager.getInferencesPath();
       if (fs.exists(new Path(path))) {
         inferencesTap.put("main", new Hfs(scheme, path));
       }
     }
   } catch (IOException e) {
     logger.error("io exception", e);
     throw new RuntimeException("io exception", e);
   }
   return inferencesTap;
 }

Example #2

0

Show file

File: FlowAssembly.java Project: distributed-iris-reasoner/distributed-iris-reasoner

  /**
   * Evaluates this flow assembly
   *
   * @param evaluationContext the evaluation context, stratum, iteration, rule number
   * @return true if new inferences were stored, false otherwise
   */
  public boolean evaluate(EvaluationContext evaluationContext) {
    this.distributedFileSystemManager = new DistributedFileSystemManager(mConfiguration);

    String flowIdentificator =
        "_"
            + evaluationContext.getStratumNumber()
            + "_"
            + evaluationContext.getIterationNumber()
            + "_"
            + evaluationContext.getRuleNumber();
    String resultName =
        mConfiguration.resultsName != null ? mConfiguration.resultsName : "inference";
    if (ruleStreams.getHeadStream().getPredicate() != null || !mConfiguration.doPredicateIndexing) {
      path =
          distributedFileSystemManager.getInferencesPath(
              ruleStreams.getHeadStream(), resultName, flowIdentificator);
    } else {
      path = distributedFileSystemManager.getTempInferencesPath(resultName, flowIdentificator);
    }

    try {
      return processFlow(resultName, flowIdentificator, path);
    } catch (IOException e) {
      logger.error("io exception creating flow", e);
      throw new RuntimeException("io exception creating flow", e);
    }
  }

Example #3

0

Show file

File: FlowAssembly.java Project: distributed-iris-reasoner/distributed-iris-reasoner

 /*
  * Prepares the source taps
  */
 private Map<String, Tap> prepareSourceTaps() {
   SequenceFile sourceScheme = new SequenceFile(fields);
   Map<String, List<Tap>> sources = new HashMap<String, List<Tap>>();
   if (mConfiguration.doPredicateIndexing) {
     LiteralFields headStream = ruleStreams.getHeadStream();
     prepareIndexedSource(sourceScheme, sources, headStream);
     for (LiteralFields fields : ruleStreams.getBodyStreams()) {
       prepareIndexedSource(sourceScheme, sources, fields);
     }
   } else {
     Tap factsTap = new Hfs(sourceScheme, distributedFileSystemManager.getFactsPath());
     sources.put("main", new ArrayList<Tap>());
     sources.get("main").add(factsTap);
     Map<String, Tap> inferencesTaps = getInferencesTap(sourceScheme);
     if (inferencesTaps.containsKey("main")) {
       sources.get("main").add(inferencesTaps.get("main"));
     }
   }
   Map<String, Tap> sourceTaps = new HashMap<String, Tap>();
   for (String name : sources.keySet()) {
     sourceTaps.put(
         name,
         new MultiSourceTap(
             sources
                 .get(name)
                 .toArray(
                     new Tap
                         [0]))); // we can assume that the number of fields are the same as the
                                 // head;s tuple size + 1 (the predicate)
   }
   return sourceTaps;
 }

Example #4

0

Show file

File: FlowAssembly.java Project: distributed-iris-reasoner/distributed-iris-reasoner

 /*
  * Put into the sources the taps for the predicate indexed storage
  */
 private void prepareIndexedSource(
     SequenceFile sourceScheme, Map<String, List<Tap>> sources, LiteralFields fields) {
   IPredicate predicate = fields.getPredicate();
   String literalId = fields.getId().toString();
   sources.put(literalId, new ArrayList<Tap>());
   if (predicate == null) {
     sources
         .get(literalId)
         .add(new Hfs(sourceScheme, distributedFileSystemManager.getFactsPath()));
   } else {
     sources
         .get(literalId)
         .add(new Hfs(sourceScheme, distributedFileSystemManager.getFactsPath(fields)));
   }
   Map<String, Tap> inferencesTaps = getInferencesTap(sourceScheme);
   if (inferencesTaps.containsKey(literalId)) {
     sources.get(literalId).add(inferencesTaps.get(literalId));
   }
 }

Example #5

0

Show file

File: FlowAssembly.java Project: distributed-iris-reasoner/distributed-iris-reasoner

  /*
   * creates a pipe for predicate counts
   */
  private void setupPredicateCounts(Pipe pipe, Map<String, Tap> sinks, List<Pipe> pipes)
      throws IOException {
    String predicateGroupsTempPath =
        distributedFileSystemManager.getPredicateGroupsTempPath(mConfiguration.resultsName);

    FileSystem fs = FileSystem.get(mConfiguration.hadoopConfiguration);
    if (fs.exists(new Path(predicateGroupsTempPath))) {
      fs.delete(new Path(predicateGroupsTempPath), true);
    }

    Tap predicatesSink = new Hfs(new Fields(0, 1), predicateGroupsTempPath);
    Pipe predicatesPipe = Utils.buildPredicateCountPipe(pipe);

    sinks.put("predicatesPipe", predicatesSink);
    pipes.add(predicatesPipe);
  }

Example #6

0

Show file

File: FlowAssembly.java Project: distributed-iris-reasoner/distributed-iris-reasoner

  /*
   * creates and processes a flow identified by {@code flowIdentificator}
   * results are stored at {@code output} under the result named {@code resultName}
   */
  private boolean processFlow(String resultName, String flowIdentificator, String output)
      throws IOException {
    boolean hasNewInferences = false;
    String flowName = resultName + flowIdentificator;
    Map<String, Tap> sources = prepareSourceTaps();

    SequenceFile sinkScheme = new SequenceFile(fields);
    // sinkScheme.setNumSinkParts(1); //FIXME
    Tap headSink = new Hfs(sinkScheme, output, true);

    Map<String, Tap> sinks = new HashMap<String, Tap>();
    List<Pipe> pipes = new ArrayList<Pipe>();
    sinks.put(pipe.getName(), headSink);
    pipes.add(pipe);
    if (mConfiguration.doPredicateIndexing) {
      // calculate the count of the result and write it in the configuration
      // if the predicate is a variable then we have to split also the result and put it in the
      // right location
      setupPredicateCounts(pipe, sinks, pipes);
    }

    flow =
        new FlowConnector(mConfiguration.flowProperties)
            .connect(flowName, sources, sinks, pipes.toArray(new Pipe[0]));
    if (flow != null) {
      // flow.writeDOT("flow.dot");
    }
    flow.complete();

    try {
      TupleEntryIterator iterator = flow.openSink(pipe.getName());
      if (iterator.hasNext()) {
        hasNewInferences = true;
      }
      iterator.close();
    } catch (IOException e) {
      logger.error("io exception", e);
      throw new RuntimeException("io exception", e);
    }
    if (!hasNewInferences) {
      deleteResults(new Path(path));
    } else {
      // merge part files FIXME
      FileSystem fs = FileSystem.get(mConfiguration.hadoopConfiguration);

      // delete empty results (could be from reducers running on no data)
      int index = 0;
      while (true) {
        String value = String.valueOf(index);
        String file = path + "/" + "part-" + "00000".substring(0, 5 - value.length()) + value;
        Path filePath = new Path(file);
        if (fs.exists(filePath)) {
          Tap source = new Hfs(new Fields(0, 1, 2), file);
          TupleEntryIterator tei = source.openForRead(mConfiguration.jobConf);
          boolean noData = !tei.hasNext();
          tei.close();
          if (noData) {
            logger.info("delete empty result : " + file);
            fs.delete(filePath, false);
          }
        } else {
          break;
        }
        index++;
      }
    }

    if (hasNewInferences && mConfiguration.doPredicateIndexing) {
      FileSystem fs = FileSystem.get(mConfiguration.hadoopConfiguration);

      // update counts in configuration
      List<PredicateCount> predicateCounts = Utils.readPredicateCounts(flow, "predicatesPipe");

      distributedFileSystemManager.addPredicates(predicateCounts);

      if (ruleStreams.getHeadStream().getPredicate() == null) {
        // split result to the right locations (for variable predicate)
        Tap source = new Hfs(sinkScheme, output, true);
        Utils.splitStreamPerPredicates(
            mConfiguration,
            distributedFileSystemManager,
            source,
            predicateCounts,
            resultName,
            flowIdentificator);

        fs.delete(new Path(output), true);
      }

      distributedFileSystemManager.savePredicateConfig();
      String predicateGroupsTempPath =
          distributedFileSystemManager.getPredicateGroupsTempPath(mConfiguration.resultsName);
      fs.delete(new Path(predicateGroupsTempPath), true);
    }

    return hasNewInferences;
  }