예제 #1
0
  /**
   * Compares two sets of counts to see which items are interestingly over-represented in the first
   * set.
   *
   * @param a The first counts.
   * @param b The reference counts.
   * @param maxReturn The maximum number of items to return. Use maxReturn >= a.elementSet.size() to
   *     return all scores above the threshold.
   * @param threshold The minimum score for items to be returned. Use 0 to return all items more
   *     common in a than b. Use -Double.MAX_VALUE (not Double.MIN_VALUE !) to not use a threshold.
   * @return A list of scored items with their scores.
   */
  public static <T> List<ScoredItem<T>> compareFrequencies(
      Multiset<T> a, Multiset<T> b, int maxReturn, double threshold) {
    int totalA = a.size();
    int totalB = b.size();

    Ordering<ScoredItem<T>> byScoreAscending =
        new Ordering<ScoredItem<T>>() {
          @Override
          public int compare(ScoredItem<T> tScoredItem, ScoredItem<T> tScoredItem1) {
            return Double.compare(tScoredItem.score, tScoredItem1.score);
          }
        };
    Queue<ScoredItem<T>> best = new PriorityQueue<ScoredItem<T>>(maxReturn + 1, byScoreAscending);

    for (T t : a.elementSet()) {
      compareAndAdd(a, b, maxReturn, threshold, totalA, totalB, best, t);
    }

    // if threshold >= 0 we only iterate through a because anything not there can't be as or more
    // common than in b.
    if (threshold < 0) {
      for (T t : b.elementSet()) {
        // only items missing from a need be scored
        if (a.count(t) == 0) {
          compareAndAdd(a, b, maxReturn, threshold, totalA, totalB, best, t);
        }
      }
    }

    List<ScoredItem<T>> r = new ArrayList<ScoredItem<T>>(best);
    Collections.sort(r, byScoreAscending.reverse());
    return r;
  }
  public static void main(String[] args) {
    // Parse text to separate words
    String INPUT_TEXT = "Hello World! Hello All! Hi World!";
    // Create Multiset
    Multiset<String> multiset = LinkedHashMultiset.create(Arrays.asList(INPUT_TEXT.split(" ")));

    // Print count words
    System.out.println(
        multiset); // print [Hello x 2, World! x 2, All!, Hi]- in predictable iteration order
    // Print all unique words
    System.out.println(
        multiset.elementSet()); // print [Hello, World!, All!, Hi] - in predictable iteration order

    // Print count occurrences of words
    System.out.println("Hello = " + multiset.count("Hello")); // print 2
    System.out.println("World = " + multiset.count("World!")); // print 2
    System.out.println("All = " + multiset.count("All!")); // print 1
    System.out.println("Hi = " + multiset.count("Hi")); // print 1
    System.out.println("Empty = " + multiset.count("Empty")); // print 0

    // Print count all words
    System.out.println(multiset.size()); // print 6

    // Print count unique words
    System.out.println(multiset.elementSet().size()); // print 4
  }
  private static void logStats(Collection<PsiFile> otherFiles, long start) {
    long time = System.currentTimeMillis() - start;

    final Multiset<String> stats = HashMultiset.create();
    for (PsiFile file : otherFiles) {
      stats.add(
          StringUtil.notNullize(file.getViewProvider().getVirtualFile().getExtension())
              .toLowerCase());
    }

    List<String> extensions = ContainerUtil.newArrayList(stats.elementSet());
    Collections.sort(
        extensions,
        new Comparator<String>() {
          @Override
          public int compare(String o1, String o2) {
            return stats.count(o2) - stats.count(o1);
          }
        });

    String message =
        "Search in "
            + otherFiles.size()
            + " files with unknown types took "
            + time
            + "ms.\n"
            + "Mapping their extensions to an existing file type (e.g. Plain Text) might speed up the search.\n"
            + "Most frequent non-indexed file extensions: ";
    for (int i = 0; i < Math.min(10, extensions.size()); i++) {
      String extension = extensions.get(i);
      message += extension + "(" + stats.count(extension) + ") ";
    }
    LOG.info(message);
  }
예제 #4
0
파일: BagOfWords.java 프로젝트: khmoran/ml
  /**
   * Populate the FeatureVector with Bag of Words.
   *
   * @param c
   * @param fv
   */
  protected void populateFV(String text, FeatureVector<E> fv) {
    List<String> unnormalized = tokenizer.tokenize(text);

    Multiset<String> terms = HashMultiset.create();
    for (String token : unnormalized) {
      String norm = Util.normalize(token);
      if (!norm.isEmpty()) {
        terms.add(norm);
      }
    }

    // sparse representation... no need to put in 0's
    for (String term : terms.elementSet()) {
      // rare words don't get included, so check first
      if (!integerFeatureNames && train.getMetadata().containsKey(term)) {
        DoubleFeature bagFeat = new DoubleFeature(term, (double) terms.count(term));
        fv.put(term, bagFeat);
      } else if (integerFeatureNames
          && train.getMetadata().containsKey(String.valueOf(wordIndexMap.get(term)))) {
        String featureName = String.valueOf(wordIndexMap.get(term));
        DoubleFeature bagFeat = new DoubleFeature(featureName, (double) terms.count(term));
        fv.put(featureName, bagFeat);
      }
    }
  }
  private ScoredCandidates<Container> scoreContainers(
      Multiset<String> parents, int children, ResultDescription desc) {
    Builder<Container> candidates = DefaultScoredCandidates.fromSource(NAME);

    ResolvedContent containers = resolver.findByCanonicalUris(parents.elementSet());

    for (Multiset.Entry<String> parent : parents.entrySet()) {
      Maybe<Identified> possibledContainer = containers.get(parent.getElement());
      if (possibledContainer.hasValue()) {
        Identified identified = possibledContainer.requireValue();
        if (identified instanceof Container) {
          Container container = (Container) identified;
          Score score = score(parent.getCount(), children);
          candidates.addEquivalent(container, score);
          desc.appendText(
              "%s: scored %s (%s)", container.getCanonicalUri(), score, container.getTitle());
        } else {
          desc.appendText("%s: %s not container", parent, identified.getClass().getSimpleName());
        }
      } else {
        desc.appendText("%s: missing", parent);
      }
    }

    return candidates.build();
  }
예제 #6
0
  public static void main(String args[]) {

    // create a multiset collection
    Multiset<String> multiset = HashMultiset.create();

    multiset.add("a");
    multiset.add("b");
    multiset.add("c");
    multiset.add("d");
    multiset.add("a");
    multiset.add("b");
    multiset.add("c");
    multiset.add("b");
    multiset.add("b");
    multiset.add("b");

    // print the occurrence of an element
    System.out.println("Occurrence of 'b' : " + multiset.count("b"));

    // print the total size of the multiset
    System.out.println("Total Size : " + multiset.size());

    // get the distinct elements of the multiset as set
    Set<String> set = multiset.elementSet();

    // display the elements of the set
    System.out.println("Set [");

    for (String s : set) {
      System.out.println(s);
    }

    System.out.println("]");

    // display all the elements of the multiset using iterator
    Iterator<String> iterator = multiset.iterator();
    System.out.println("MultiSet [");

    while (iterator.hasNext()) {
      System.out.println(iterator.next());
    }

    System.out.println("]");

    // display the distinct elements of the multiset with their occurrence count
    System.out.println("MultiSet [");

    for (Multiset.Entry<String> entry : multiset.entrySet()) {
      System.out.println("Element: " + entry.getElement() + ", Occurrence(s): " + entry.getCount());
    }
    System.out.println("]");

    // remove extra occurrences
    multiset.remove("b", 2);

    // print the occurrence of an element
    System.out.println("Occurence of 'b' : " + multiset.count("b"));
  }
예제 #7
0
  public void removeNode(int nodeA) {

    Multiset<Integer> outSet = nodeOutEdges.get(nodeA);

    for (int adj : outSet.elementSet()) {
      nodeInEdges.get(adj).remove(nodeA, Integer.MAX_VALUE);
    }

    nodeOutEdges.remove(nodeA);
  }
예제 #8
0
  @Override
  public boolean matchesSafely(final Iterable<? super K> actual) {
    final Multiset<?> comparisonMultiSet = HashMultiset.create(comparisonIterable);
    final Multiset<?> actualMultiSet = HashMultiset.create(actual);

    for (final Object key : actualMultiSet.elementSet()) {
      if (!comparisonMultiSet.contains(key)
          || comparisonMultiSet.count(key) < actualMultiSet.count(key)) {
        return false;
      }
    }
    return true;
  }
예제 #9
0
  private Iterable<?> notRepeated(
      Iterable<? super K> actual, Iterable<? super K> comparisonIterable) {
    final Multiset<?> comparisonMultiSet = HashMultiset.create(comparisonIterable);
    final Multiset<?> actualMultiSet = HashMultiset.create(actual);

    final Set<Object> notRepeated = newHashSet();
    for (final Object key : actualMultiSet.elementSet()) {
      if (!comparisonMultiSet.contains(key)
          || comparisonMultiSet.count(key) < actualMultiSet.count(key)) {
        notRepeated.add(key);
      }
    }
    return notRepeated;
  }
예제 #10
0
  public void add(Multiset<BoolArray> multiset) {
    lock.lock();
    for (HashMultiset.Entry<BoolArray> entry : multiset.entrySet()) {
      BoolArray label =
          new SubGraphStructure(entry.getElement().getArray()).getOrderedForm().getAdjacencyArray();
      labelMap.add(label, entry.getCount());
    }

    if (isVerbose()) {
      System.out.printf(
          "Added %,d new signatures. LabelMap size:%,d\n", multiset.elementSet().size(), size());
    }
    if (size() > capacity)
      try {
        flush();
      } catch (IOException exp) {
        exp.printStackTrace();
        System.exit(-1);
      }
    lock.unlock();
  }
예제 #11
0
 /**
  * @param statsQuery StatisticsQueryCondition
  * @param statisticsStorage
  * @param scoringExps Set of experiments that have at least one non-zero score for
  *     statisticsQuery. This is used retrieving efos to be displayed in heatmap when no query efvs
  *     exist (c.f. atlasStatisticsQueryService.getScoringAttributesForGenes())
  * @return experiment counts corresponding for statsQuery
  */
 public static Multiset<Integer> getExperimentCounts(
     StatisticsQueryCondition statsQuery,
     StatisticsStorage statisticsStorage,
     Set<ExperimentInfo> scoringExps) {
   long start = System.currentTimeMillis();
   Multiset<Integer> counts =
       StatisticsQueryUtils.scoreQuery(statsQuery, statisticsStorage, scoringExps);
   long dur = System.currentTimeMillis() - start;
   int numOfGenesWithCounts = counts.elementSet().size();
   if (numOfGenesWithCounts > 0) {
     log.debug(
         "StatisticsQuery: "
             + statsQuery.prettyPrint()
             + " ==> result set size: "
             + numOfGenesWithCounts
             + " (duration: "
             + dur
             + " ms)");
   }
   return counts;
 }
 @SuppressWarnings("unused")
 public List<Object[]> GetMatchingsFromPartial(HashMap<PVariable, Object> MatchingVariables) {
   if (true
       || innerFindCall
           .getReferredQuery()
           .getAllAnnotations()
           .contains(new PAnnotation("incremental"))) {
     Multiset<LookaheadMatching> result =
         treatPatternCacher.GetMatchingsFromPartial(
             innerFindCall.getReferredQuery(), MatchingVariables, affectedVariables, false);
     // result must be parsed to List<Object[]>
     List<Object[]> ret = new ArrayList<Object[]>();
     // toarraylist false because only REAL matches count as a match, no need to count
     // local-duplicated matches multiple mode
     for (LookaheadMatching match : result.elementSet()) // .toArrayList(false))
     {
       // add all matchings as a "line" multi-matches only once
       ret.add(match.getParameterMatchValuesOnlyAsArray().toArray());
     }
     return ret;
   }
   return null;
 }
예제 #13
0
파일: BagOfWords.java 프로젝트: khmoran/ml
  /**
   * Create the Bag of Words features.
   *
   * @param citations
   */
  public void createFeatures(Collection<String> documents) {
    Multiset<String> terms = HashMultiset.create();

    for (String s : documents) {
      List<String> unnormalized = tokenizer.tokenize(s);

      // normalize them
      for (int i = 0; i < unnormalized.size(); i++) {
        String u = unnormalized.get(i);
        String norm = Util.normalize(u);
        if (!norm.isEmpty()) {
          terms.add(norm);
        }

        if (bigrams && (i < unnormalized.size() - 1)) {
          String second = unnormalized.get(i + 1);
          String normSecond = Util.normalize(second);
          if (!normSecond.isEmpty()) {
            terms.add(norm + "_" + normSecond);
          }
        }
      }
    }

    int i = 0;
    for (String term : terms.elementSet()) {
      if (terms.count(term) >= minOccurs // don't count infreq. words
          && term.length() >= minLength) { // or super short words
        if (!integerFeatureNames) {
          train.getMetadata().put(term, "boolean");
        } else {
          wordIndexMap.put(term, i++);
          train.getMetadata().put(String.valueOf(i), "boolean");
        }
      }
    }
  }
예제 #14
0
  @NotNull
  public VotingResult<L> predictLabelWithQuality(P x) // , Set<L> disallowedLabels)
      {
    final P scaledX = scalingModel.scaledCopy(x);

    L bestLabel = null;
    // L secondBestLabel = null;

    float bestOneClassProbability = 0;
    float secondBestOneClassProbability = 0;

    float bestOneVsAllProbability = 0;
    float secondBestOneVsAllProbability = 0;

    // stage 0: we're going to need the kernel value for x against each of the SVs, for each of the
    // kernels that was used in a subsidary binary machine

    //	KValueCache kValuesPerKernel = new KValueCache(scaledX);
    LoadingCache<KernelFunction<P>, float[]> kValuesPerKernel =
        CacheBuilder.newBuilder()
            .build(
                new CacheLoader<KernelFunction<P>, float[]>() {
                  public float[] load(@NotNull KernelFunction<P> kernel) {
                    float[] kvalues = new float[allSVs.length];
                    int i = 0;
                    for (P sv : allSVs) {
                      kvalues[i] = (float) kernel.evaluate(scaledX, sv);
                      i++;
                    }
                    return kvalues;
                  }
                });

    // Map<KernelFunction<P>, float[]> kValuesPerKernel =
    //        new MapMaker().makeComputingMap(new Function<KernelFunction<P>, float[]>() {
    //            public float[] apply(@NotNull KernelFunction<P> kernel) {
    //                float[] kvalues = new float[allSVs.length];
    //                int i = 0;
    //                for (P sv : allSVs) {
    //                    kvalues[i] = (float) kernel.evaluate(scaledX, sv);
    //                    i++;
    //                }
    //                return kvalues;
    //            }
    //        });

    // we don't want to consider any models that mention a disallowed label
    // (i.e., not only should such a prediction be rejected after the fact, but
    //  the binary machines involving disallowed labels shouldn't ever contribute to the voting in
    // the first place

    /*
    		Map<KernelFunction<P>, float[]> kValuesPerKernel = new HashMap<KernelFunction<P>, float[]>();

    		for (KernelFunction<P> kernel : param.getKernels())
    			{
    			float[] kvalues = new float[allSVs.length];
    			int i = 0;
    			for (P sv : allSVs)
    				{
    				kvalues[i] = (float) kernel.evaluate(scaledX, sv);
    				i++;
    				}
    			kValuesPerKernel.put(kernel,kvalues);
    			}
    */

    // REVIEW ignore one-class models for now; maybe revisit later

    /*
    // stage 1: one-class
    // always compute these; we may need them to tie-break when voting anyway (though that only works when probabilities are turned on)

    Map<L, Float> oneClassProbabilities = computeOneClassProbabilities(x);

    if (oneClassThreshold > 0 && oneClassProbabilities.isEmpty())
        {
        return null;
        }

    if (multiclassMode == MulticlassMode.OneClassOnly)
        {
        L bestLabel = null;
        float bestProbability = 0;
        for (Map.Entry<L, Float> entry : oneClassProbabilities.entrySet())
            {
            if (entry.getValue() > bestProbability)
                {
                bestLabel = entry.getKey();
                bestProbability = entry.getValue();
                }
            }
        return bestLabel;
        }

    // now oneClassProbabilities is populated with all of the classes that pass the threshold (maybe all of them).
    */

    // stage 2: one vs all

    Map<L, Float> oneVsAllProbabilities =
        oneVsAllMode == OneVsAllMode.None
            ? null
            : computeOneVsAllProbabilities(kValuesPerKernel.asMap());

    // now oneVsAllProbabilities is populated with all of the classes that pass the threshold (maybe
    // all of them).

    // if all classes were vetoed, return
    if ((oneVsAllMode == OneVsAllMode.Veto
            || oneVsAllMode == OneVsAllMode.VetoAndBreakTies
            || oneVsAllMode == OneVsAllMode.Best)
        && oneVsAllProbabilities.isEmpty()) {
      return new VotingResult<L>();
    }

    // if using the OneVsAll Best mode, then we should have had probabilities turned on, and
    // allVsAll voting will be ignored
    if (oneVsAllMode == OneVsAllMode.Best) {
      for (Map.Entry<L, Float> entry : oneVsAllProbabilities.entrySet()) {
        if (entry.getValue() > bestOneVsAllProbability) {
          secondBestOneVsAllProbability = bestOneVsAllProbability;
          bestLabel = entry.getKey();
          bestOneVsAllProbability = entry.getValue();
        }
      }
      return new VotingResult<L>(
          bestLabel,
          0,
          0,
          bestOneClassProbability,
          secondBestOneClassProbability,
          bestOneVsAllProbability,
          secondBestOneVsAllProbability);
    }

    // stage 3: voting

    int numLabels = oneVsOneModels.keySet().size();

    Multiset<L> votes = HashMultiset.create();

    if (allVsAllMode == AllVsAllMode.AllVsAll) {
      // vote using all models

      logger.debug(
          "Sample voting using all pairs of "
              + numLabels
              + " labels ("
              + ((numLabels * (numLabels - 1)) / 2. - numLabels)
              + " models)");

      // How AllVsAll with Veto differs from FilteredVsAll, etc.:
      // In the AllVsAll with Veto case, we may compute votes between two "inactive" (vetoed)
      // classes;
      // it may be that the winner of the voting later fails the oneVsAll filter, in which
      // case we may want to report unknown instead of reporting the best class that does pass.
      // This is what PhyloPythia does.

      for (BinaryModel<L, P> binaryModel : oneVsOneModels.values()) {
        float[] kvalues;
        try {
          kvalues = kValuesPerKernel.get(binaryModel.param.kernel);
        } catch (ExecutionException e) {
          throw new RuntimeException(e);
        }
        votes.add(binaryModel.predictLabel(kvalues, svIndexMaps.get(binaryModel)));
      }
    } else {
      // vote using only the active models one one side of the comparison, maybe on both.

      Set<L> activeClasses =
          oneVsAllProbabilities != null ? oneVsAllProbabilities.keySet() : oneVsOneModels.keySet();

      int requiredActive = allVsAllMode == AllVsAllMode.FilteredVsAll ? 1 : 2;

      int numActive = oneVsAllProbabilities != null ? oneVsAllProbabilities.size() : numLabels;
      if (requiredActive == 1) {
        logger.debug(
            "Sample voting with all "
                + numLabels
                + " vs. "
                + numActive
                + " active labels ("
                + ((numLabels * (numActive - 1)) / 2. - numActive)
                + " models)");
      } else {
        logger.debug(
            "Sample voting using pairs of only "
                + numActive
                + " active labels ("
                + ((numActive * (numActive - 1)) / 2. - numActive)
                + " models)");
      }

      // assert requiredActive == 2 ? voteMode = VoteMode.FilteredVsFiltered
      for (BinaryModel<L, P> binaryModel : oneVsOneModels.values()) {
        int activeCount =
            (activeClasses.contains(binaryModel.getTrueLabel()) ? 1 : 0)
                + (activeClasses.contains(binaryModel.getFalseLabel()) ? 1 : 0);

        if (activeCount >= requiredActive) {
          votes.add(binaryModel.predictLabel(scaledX));
        }
      }
    }

    // stage 4: find the label with the most votes (and break ties or veto as needed)

    int bestCount = 0;
    int secondBestCount = 0;

    int countSum = 0;
    for (L label : votes.elementSet()) {
      int count = votes.count(label);
      countSum += count;

      // get the oneVsAll value for this label, if needed
      Float oneVsAll = 1f; // pass by default
      if (oneVsAllMode == OneVsAllMode.Veto || oneVsAllMode == OneVsAllMode.VetoAndBreakTies) {
        // if this is null it means this label didn't pass the threshold earlier, so it should fail
        // here too
        oneVsAll = oneVsAllProbabilities.get(label);
        oneVsAll = oneVsAll == null ? 0f : oneVsAll;
      }

      // get the oneClass value for this label, if needed

      // if this is null it means this label didn't pass the threshold earlier
      //	Float oneClass = oneClassProbabilities.get(label);
      //	oneClass = oneClass == null ? 0f : oneClass;

      // primary sort by number of votes
      // secondary sort by one-vs-all probability, if available
      // tertiary sort by one-class probability, if available

      if (count > bestCount || (count == bestCount && oneVsAll > bestOneVsAllProbability))
      //	|| oneClass > bestOneClassProbability)))
      {
        secondBestCount = bestCount;
        secondBestOneVsAllProbability = bestOneVsAllProbability;

        bestLabel = label;
        bestCount = count;
        bestOneVsAllProbability = oneVsAll;
      }
    }

    // stage 5: check for inadequate evidence filters.

    double bestVoteProportion = (double) bestCount / (double) countSum;
    double secondBestVoteProportion = (double) secondBestCount / (double) countSum;
    if (bestVoteProportion < minVoteProportion) {
      return new VotingResult<L>();
    }

    if ((oneVsAllMode == OneVsAllMode.VetoAndBreakTies || oneVsAllMode == OneVsAllMode.Veto)
        && bestOneVsAllProbability < oneVsAllThreshold) {
      return new VotingResult<L>();
    }

    return new VotingResult<L>(
        bestLabel,
        (float) bestVoteProportion,
        (float) secondBestVoteProportion,
        bestOneClassProbability,
        secondBestOneClassProbability,
        bestOneVsAllProbability,
        secondBestOneVsAllProbability);
  }
  public void convertToNetwork() throws IOException, InvalidFormatException {

    container = MyFileImporter.container;
    container.setEdgeDefault(EdgeDefault.UNDIRECTED);

    String firstDelimiter;
    String secondDelimiter;
    firstDelimiter = Utils.getCharacter(MyFileImporter.firstConnectorDelimiter);
    secondDelimiter = Utils.getCharacter(MyFileImporter.secondConnectorDelimiter);
    boolean oneTypeOfAgent =
        MyFileImporter.getFirstConnectedAgent().equals(MyFileImporter.getSecondConnectedAgent());

    nbColumnFirstAgent = MyFileImporter.firstConnectedAgentIndex;
    nbColumnSecondAgent = MyFileImporter.secondConnectedAgentIndex;

    Integer lineCounter = 0;

    InputStream inp;
    inp = new FileInputStream(fileName);
    Workbook wb = WorkbookFactory.create(inp);

    Row row;
    Sheet sheet = wb.getSheet(sheetName);
    int startingRow;
    if (MyFileImporter.headersPresent) {
      startingRow = 1;
    } else {
      startingRow = 0;
    }
    Set<String> linesFirstAgent = new HashSet();
    Set<String> linesSecondAgent = new HashSet();
    for (int i = startingRow; i <= sheet.getLastRowNum(); i++) {

      row = sheet.getRow(i);
      if (row == null) {
        break;
      }

      Cell cell = row.getCell(nbColumnFirstAgent);
      if (cell == null) {
        Issue issue =
            new Issue(
                "problem with line "
                    + lineCounter
                    + " (empty column "
                    + MyFileImporter.getFirstConnectedAgent()
                    + "). It was skipped in the conversion",
                Issue.Level.WARNING);
        MyFileImporter.getStaticReport().logIssue(issue);
        continue;
      }

      String firstAgent = row.getCell(nbColumnFirstAgent).getStringCellValue();

      if (firstAgent == null || firstAgent.isEmpty()) {
        Issue issue =
            new Issue(
                "problem with line "
                    + lineCounter
                    + " (empty column "
                    + MyFileImporter.getFirstConnectedAgent()
                    + "). It was skipped in the conversion",
                Issue.Level.WARNING);
        MyFileImporter.getStaticReport().logIssue(issue);
        continue;
      }

      if (MyFileImporter.removeDuplicates) {
        boolean newLine = linesFirstAgent.add(firstAgent);
        if (!newLine) {
          continue;
        }
      }

      String secondAgent = null;

      if (!oneTypeOfAgent) {
        cell = row.getCell(nbColumnSecondAgent);
        if (cell == null) {
          Issue issue =
              new Issue(
                  "problem with line "
                      + lineCounter
                      + " (empty column "
                      + MyFileImporter.getFirstConnectedAgent()
                      + "). It was skipped in the conversion",
                  Issue.Level.WARNING);
          MyFileImporter.getStaticReport().logIssue(issue);
          continue;
        }
        secondAgent = row.getCell(nbColumnSecondAgent).getStringCellValue();
        if (secondAgent == null || secondAgent.isEmpty()) {
          Issue issue =
              new Issue(
                  "problem with line "
                      + lineCounter
                      + " (empty column "
                      + MyFileImporter.getSecondConnectedAgent()
                      + "). It was skipped in the conversion",
                  Issue.Level.WARNING);
          MyFileImporter.getStaticReport().logIssue(issue);
          continue;
        }
        if (MyFileImporter.removeDuplicates) {
          boolean newLine = linesFirstAgent.add(firstAgent);
          if (!newLine) {
            continue;
          }
        }
      }
      lineCounter++;

      String[] firstAgentSplit;
      String[] secondAgentSplit;

      if (firstDelimiter != null) {
        firstAgentSplit = firstAgent.trim().split(firstDelimiter);
      } else {
        firstAgentSplit = new String[1];
        firstAgentSplit[0] = firstAgent;
      }
      for (String node : firstAgentSplit) {
        nodesFirst.add(node.trim());
      }

      if (!oneTypeOfAgent) {

        if (secondDelimiter != null) {
          secondAgentSplit = secondAgent.trim().split(secondDelimiter);
        } else {
          secondAgentSplit = new String[1];
          secondAgentSplit[0] = secondAgent;
        }
        for (String node : secondAgentSplit) {
          nodesSecond.add(node.trim());
        }
      } else {
        secondAgentSplit = null;
      }

      String[] both = ArrayUtils.addAll(firstAgentSplit, secondAgentSplit);
      // let's find all connections between all the tags for this picture
      Utils usefulTools = new Utils();
      List<String> connections = usefulTools.getListOfLinks(both, MyFileImporter.removeSelfLoops);
      edges.addAll(connections);
    }

    NodeDraft node;
    AttributeTable atNodes = container.getAttributeModel().getNodeTable();
    AttributeColumn acFrequency = atNodes.addColumn("frequency", AttributeType.INT);
    AttributeColumn acType = atNodes.addColumn("type", AttributeType.STRING);

    for (String n : nodesFirst.elementSet()) {
      node = container.factory().newNodeDraft();
      node.setId(n);
      node.setLabel(n);
      node.addAttributeValue(acFrequency, nodesFirst.count(n));
      node.addAttributeValue(acType, MyFileImporter.getFirstConnectedAgent());
      container.addNode(node);
    }

    for (String n : nodesSecond.elementSet()) {
      node = container.factory().newNodeDraft();
      node.setId(n);
      node.setLabel(n);
      node.addAttributeValue(acFrequency, nodesSecond.count(n));
      node.addAttributeValue(acType, MyFileImporter.getSecondConnectedAgent());
      container.addNode(node);
    }

    // loop for edges
    Integer idEdge = 0;
    EdgeDraft edge;
    for (String e : edges.elementSet()) {
      System.out.println("edge: " + e);

      String sourceNode = e.split("\\|")[0];
      String targetNode = e.split("\\|")[1];
      if (!MyFileImporter.innerLinksIncluded) {
        if ((nodesFirst.contains(sourceNode) & nodesFirst.contains(targetNode))
            || (nodesSecond.contains(sourceNode) & nodesSecond.contains(targetNode))) {
          continue;
        }
      }
      edge = container.factory().newEdgeDraft();
      idEdge = idEdge + 1;
      edge.setSource(container.getNode(sourceNode));
      edge.setTarget(container.getNode(targetNode));
      edge.setWeight((float) edges.count(e));
      edge.setId(String.valueOf(idEdge));
      edge.setType(EdgeDraft.EdgeType.UNDIRECTED);
      container.addEdge(edge);
    }
  }