/**
   * Get the type of the target term representation, query with the suitable input formatted file
   * and the corresponding index
   *
   * @param type
   * @return HashMap<String,ArrayList<ScoreDoc>> A set of target terms with their extracted
   *     documents
   * @throws IOException
   * @throws ParseException
   */
  @Override
  public HashMap<String, ArrayList<ScoreDoc>> extractDocsByRepresentation()
      throws IOException, ParseException {
    String indexName = null, inputFileName = null;
    indexName = "modernJewishOnly";
    m_qg.setType(InputType.Query);
    inputFileName = "hozOrigQueryAll.txt";

    // read the suitable input file
    LinkedList<Pair<String, String>> queries = new LinkedList<Pair<String, String>>();
    BufferedReader reader = new BufferedReader(new FileReader(m_inputDir + inputFileName));
    String line = reader.readLine();
    while (line != null) {
      int index = line.indexOf("\t");
      queries.add(new Pair<String, String>(line.substring(0, index), line.substring(index + 1)));
      line = reader.readLine();
    }
    reader.close();

    // search for the queries in the index
    IndexSearcher searcher =
        new IndexSearcher(IndexReader.open(FSDirectory.open(new File(m_indexDir + indexName))));
    HashMap<String, ArrayList<ScoreDoc>> termDocs = new HashMap<String, ArrayList<ScoreDoc>>();
    for (Pair<String, String> term : queries) {
      Query q = m_qg.generate(term.value());
      termDocs.put(
          TargetTerm2Id.getStrDesc(Integer.parseInt(term.key())),
          new ArrayList<ScoreDoc>(Arrays.asList(searcher.search(q, 1000).scoreDocs)));
    }
    return termDocs;
  }
  /**
   * Write top lin and balanced inclusion (cover) scores to a file
   *
   * @param writer
   * @param entailedElementId
   * @param entailingElements
   * @param entailedElement2NormMap
   * @param entailingElement2NormMap
   */
  private void writeEntailedElementScores(
      PrintWriter writer,
      Integer entailedElementId,
      TIntObjectMap<Pair<DoubleContainer, DoubleContainer>> entailingElements,
      TIntDoubleHashMap entailedElement2NormMap,
      TIntDoubleHashMap entailingElement2NormMap) {

    BoundedPriorityQueue<DistSimRule> linScores =
        new BoundedPriorityQueue<DistSimRule>(new DistSimRuleComparator(), m_maxRulesPerElement);
    BoundedPriorityQueue<DistSimRule> coverScores =
        new BoundedPriorityQueue<DistSimRule>(new DistSimRuleComparator(), m_maxRulesPerElement);

    double entailedElementNorm = entailedElement2NormMap.get(entailedElementId);

    TIntIterator iter = entailingElements.keySet().iterator();
    while (iter.hasNext()) {
      Integer entailingElementId = iter.next();

      // if there are no features for this element - then it is not similar to any other and we can
      // move on
      if (entailingElement2NormMap.get(entailingElementId) == 0.0) continue;

      double entailingElementNorm = entailingElement2NormMap.get(entailingElementId);

      Pair<DoubleContainer, DoubleContainer> scores = entailingElements.get(entailingElementId);

      Double linNominator = scores.key().value();
      Double coverNominator = scores.value().value();

      double linScore = linNominator / (entailedElementNorm + entailingElementNorm);
      double coverScore = coverNominator / entailingElementNorm;
      coverScore = Math.sqrt(linScore * coverScore);

      if (linScore > 0.0) {
        linScores.offer(new DistSimRule(entailedElementId, entailingElementId, linScore));
      }

      if (coverScore > 0.0) {
        coverScores.offer(new DistSimRule(entailedElementId, entailingElementId, coverScore));
      }
    }
    while (!linScores.isEmpty()) {
      DistSimRule linRule = linScores.poll();
      writer.println("LIN\t" + linRule);
    }
    while (!coverScores.isEmpty()) {
      DistSimRule coverRule = coverScores.poll();
      writer.println("COVER\t" + coverRule);
    }

    // we are done with the left element so we clear rightElements
    entailingElements.clear();
  }
  public void mergeRules(File vectorsDir, int maxVectorLen)
      throws NumberFormatException, IOException {
    TIntObjectHashMap<BoundedPriorityQueue<Pair<Integer, Double>>> linScores =
        new TIntObjectHashMap<BoundedPriorityQueue<Pair<Integer, Double>>>();
    TIntObjectHashMap<BoundedPriorityQueue<Pair<Integer, Double>>> balScores =
        new TIntObjectHashMap<BoundedPriorityQueue<Pair<Integer, Double>>>();
    String line;
    BufferedReader reader = null;

    for (String fileName : vectorsDir.list()) {
      if (fileName.endsWith(".rules") && fileName.contains("Trunc" + maxVectorLen)) {
        System.out.println("Reading: " + fileName);
        reader = new BufferedReader(new FileReader(vectorsDir.getAbsolutePath() + "/" + fileName));
        while ((line = reader.readLine()) != null) {

          String[] tokens = line.split("\t");
          String scoreType = tokens[0];
          Integer entailedElement = Integer.parseInt(tokens[1]);
          Integer entailingElement = Integer.parseInt(tokens[2]);
          Double score = Double.parseDouble(tokens[3]);
          if (scoreType.equals("LIN")) {
            if (linScores.contains(entailedElement))
              linScores
                  .get(entailedElement)
                  .offer(new Pair<Integer, Double>(entailingElement, score));
            else {
              linScores.put(
                  entailedElement,
                  new BoundedPriorityQueue<Pair<Integer, Double>>(
                      new obj.PairComparator(), m_maxRulesPerElement));
              linScores
                  .get(entailedElement)
                  .offer(new Pair<Integer, Double>(entailingElement, score));
            }
          } else if (scoreType.equals("COVER"))
            if (balScores.contains(entailedElement))
              balScores
                  .get(entailedElement)
                  .offer(new Pair<Integer, Double>(entailingElement, score));
            else {
              balScores.put(
                  entailedElement,
                  new BoundedPriorityQueue<Pair<Integer, Double>>(
                      new obj.PairComparator(), m_maxRulesPerElement));
              balScores
                  .get(entailedElement)
                  .offer(new Pair<Integer, Double>(entailingElement, score));
            }
        }
        reader.close();
      }
    }

    System.out.println("Uploading elements");
    TIntObjectMap<String> id2elementDesc = new TIntObjectHashMap<String>();
    TIntObjectMap<String> id2targetElementDesc = new TIntObjectHashMap<String>();

    reader = new BufferedReader(new FileReader(vectorsDir.getAbsolutePath() + "/elements.txt"));
    while ((line = reader.readLine()) != null) {
      id2elementDesc.put(Integer.parseInt(line.split("\t")[1]), line.split("\t")[0]);
    }
    reader.close();

    reader =
        new BufferedReader(new FileReader(vectorsDir.getAbsolutePath() + "/targetElements.txt"));
    while ((line = reader.readLine()) != null) {
      id2targetElementDesc.put(Integer.parseInt(line.split("\t")[1]), line.split("\t")[0]);
    }
    reader.close();

    PrintWriter writer =
        new PrintWriter(new FileOutputStream(vectorsDir.getAbsolutePath() + "/linRules.txt"));

    TIntIterator iter = linScores.keySet().iterator();
    while (iter.hasNext()) {
      Integer entailedElementId = iter.next();
      String entailedStr = id2targetElementDesc.get(entailedElementId);
      //			if (entailedStr.contains("_")) {
      //				String confDir = entailedStr.substring(entailedStr.indexOf("_")+1);
      //				File resultsDir = new File(f.getAbsolutePath()+"/" + confDir);
      //				if(!resultsDir.exists())
      //					resultsDir.mkdir();
      //				writer = new PrintWriter(new
      // FileOutputStream(resultsDir.getAbsolutePath()+"/"+entailedStr.substring(0,entailedStr.indexOf("_"))));
      //				writer = new PrintWriter(new
      // FileOutputStream(resultsDir.getAbsolutePath()+"/"+entailedElementId));
      //			}
      //			else
      //				writer = new PrintWriter(new FileOutputStream(f.getAbsoluteFile()+"/"+entailedStr));
      //				writer = new PrintWriter(new
      // FileOutputStream(f.getAbsoluteFile()+"/"+entailedElementId));

      while (!linScores.get(entailedElementId).isEmpty()) {
        Pair<Integer, Double> linRule = linScores.get(entailedElementId).poll();
        writer.println(
            entailedStr + "\t" + id2elementDesc.get(linRule.key()) + "\t" + linRule.value());
      }
    }
    if (writer != null) writer.close();
    writer = new PrintWriter(new FileOutputStream(vectorsDir.getAbsolutePath() + "/balRules.txt"));
    iter = balScores.keySet().iterator();
    while (iter.hasNext()) {
      Integer entailedElementId = iter.next();
      String entailedStr = id2targetElementDesc.get(entailedElementId);
      //			if (entailedStr.contains("_")) {
      //				String confDir = entailedStr.substring(entailedStr.indexOf("_")+1);
      //				File resultsDir = new File(f.getAbsolutePath()+"/" + confDir);
      //				if(!resultsDir.exists())
      //					resultsDir.mkdir();
      //				writer = new PrintWriter(new
      // FileOutputStream(resultsDir.getAbsolutePath()+"/"+entailedStr.substring(0,entailedStr.indexOf("_"))));
      //				writer = new PrintWriter(new
      // FileOutputStream(resultsDir.getAbsolutePath()+"/"+entailedElementId));
      //			}
      //			else
      //				writer = new PrintWriter(new FileOutputStream(f.getAbsoluteFile()+"/"+entailedStr));
      //				writer = new PrintWriter(new
      // FileOutputStream(f.getAbsoluteFile()+"/"+entailedElementId));
      while (!balScores.get(entailedElementId).isEmpty()) {
        Pair<Integer, Double> balRule = balScores.get(entailedElementId).poll();
        writer.println(
            entailedStr + "\t" + id2elementDesc.get(balRule.key()) + "\t" + balRule.value());
      }
    }
    writer.close();
  }
  /**
   * Compute rule scores after generating an inverted index an loading normalization data
   *
   * @param invertedIndex
   * @param entailedElement2NormMap
   * @param entailingElement2NormMap
   * @param targetVectorsFile
   * @param vectorsFile
   * @throws NumberFormatException
   * @throws IOException
   */
  private void computeRuleScores(
      TIntObjectMap<List<Pair<Integer, Float>>> invertedIndex,
      TIntDoubleHashMap entailedElement2NormMap,
      TIntDoubleHashMap entailingElement2NormMap,
      File targetVectorsFile,
      File vectorsFile)
      throws NumberFormatException, IOException {

    BufferedReader reader = new BufferedReader(new FileReader(targetVectorsFile));
    String vectorFileString = vectorsFile.getAbsolutePath();
    File rulesFile =
        new File(vectorFileString.substring(0, vectorFileString.lastIndexOf(".")) + ".rules");
    PrintWriter writer = new PrintWriter(new FileOutputStream(rulesFile));
    String line;

    Integer currEntailedElementId = null;
    TIntObjectMap<Pair<DoubleContainer, DoubleContainer>> entailingElements =
        new TIntObjectHashMap<Pair<DoubleContainer, DoubleContainer>>();
    int i = 0;
    int missingFeaturesCount = 0;
    while ((line = reader.readLine()) != null) {

      String[] tokens = line.split("\t");
      Integer entailedElement = Integer.parseInt(tokens[0]);
      Integer featureId = Integer.parseInt(tokens[2]);
      Double entailedPmi = Double.parseDouble(tokens[3]);

      // if there are no features for this element - then it is not similar to any other and we can
      // move on
      if (entailedElement2NormMap.get(entailedElement) == 0.0) continue;

      if (currEntailedElementId != null && !currEntailedElementId.equals(entailedElement)) {
        i++;
        if (i % 1000 == 0) {
          System.out.println(
              "Entailed element Id: "
                  + entailedElement
                  + ". Number of entailing elements: "
                  + entailingElements.size());
          System.out.println("Number of elements gone through: " + i);
        }
        writeEntailedElementScores(
            writer,
            currEntailedElementId,
            entailingElements,
            entailedElement2NormMap,
            entailingElement2NormMap);
      }

      List<Pair<Integer, Float>> elementPmiList = invertedIndex.get(featureId);
      if (elementPmiList == null) {
        missingFeaturesCount++;
        System.out.println("line: " + i + " " + tokens[2]);
        System.out.println("feature id: " + featureId);
        continue;
      }

      for (Pair<Integer, Float> elementPmiPair : elementPmiList) {
        //				if(!entailedElement.equals(elementPmiPair.key())) {

        int entailingId = elementPmiPair.key();
        double entailingPmi = elementPmiPair.value();
        double linScore = entailedPmi + entailingPmi;
        double coverScore = entailingPmi;

        Pair<DoubleContainer, DoubleContainer> scoresMap = entailingElements.get(entailingId);
        if (scoresMap == null) {
          scoresMap =
              new Pair<DoubleContainer, DoubleContainer>(
                  new DoubleContainer(linScore), new DoubleContainer(coverScore));
          entailingElements.put(entailingId, scoresMap);
        } else {
          scoresMap.key().add(linScore);
          scoresMap.value().add(coverScore);
        }

        //				}
      }
      currEntailedElementId = entailedElement;
    }
    writeEntailedElementScores(
        writer,
        currEntailedElementId,
        entailingElements,
        entailedElement2NormMap,
        entailingElement2NormMap);
    reader.close();
    writer.close();
    System.out.println("Num of missing features: " + missingFeaturesCount);
  }