/**
  * Process the identifiers.
  *
  * @param type The type of thing these identifiers are.
  * @param input The creation input.
  * @param ids The identifiers.
  * @param unmatchedIds A collector for unmatched identifiers.
  * @param tempBag The temporary bag to add results to.
  * @throws IOException If we can't from the request.
  * @throws ClassNotFoundException if the type is not valid.
  * @throws InterMineException If something goes wrong building the bag.
  * @throws ObjectStoreException If there is a problem on the database level.
  */
 protected void processIdentifiers(
     final String type,
     ListCreationInput input,
     final Set<String> ids,
     final Set<String> unmatchedIds,
     final InterMineBag tempBag)
     throws IOException, ClassNotFoundException, InterMineException, ObjectStoreException {
   final Collection<String> addIssues = input.getAddIssues();
   String line;
   final StrMatcher matcher = getMatcher();
   final BufferedReader r = getReader(request);
   try {
     while ((line = r.readLine()) != null) {
       final StrTokenizer st = new StrTokenizer(line, matcher, StrMatcher.doubleQuoteMatcher());
       while (st.hasNext()) {
         final String token = st.nextToken();
         ids.add(token);
       }
       if (ids.size() >= BAG_QUERY_MAX_BATCH_SIZE) {
         addIdsToList(ids, tempBag, type, input.getExtraValue(), unmatchedIds, addIssues);
         ids.clear();
       }
     }
   } finally {
     if (r != null) {
       r.close();
     }
   }
   if (ids.size() > 0) {
     addIdsToList(ids, tempBag, type, input.getExtraValue(), unmatchedIds, addIssues);
   }
 }
Example #2
0
  /**
   * Parses the pattern string, which should be a comma separated list of regular expressions, each
   * of which may be surrounded with double quotes.
   *
   * @param inStr pattern string to parse
   * @return list of pattern regular expressions
   */
  private ArrayList<Pattern> parsePatternString(String inStr) {
    ArrayList<Pattern> result = new ArrayList<Pattern>();

    StrTokenizer tokenizer = new StrTokenizer(inStr, ',', '"');
    tokenizer.setIgnoreEmptyTokens(true);
    tokenizer.setIgnoredMatcher(StrMatcher.charSetMatcher(" \t\n\r"));

    while (tokenizer.hasNext()) {
      String tok = (String) tokenizer.next();
      Pattern pat = Pattern.compile(tok);
      result.add(pat);
    }

    return result;
  }
Example #3
0
  private ImportLine processLine(long lineNumber, String line, Map<Integer, String> labelsMap) {

    ImportLine toReturn = new ImportLine(this, lineNumber);

    String[] columns =
        new StrTokenizer(line, this.delimitor)
            .setIgnoreEmptyTokens(false)
            .setTrimmerMatcher(StrMatcher.trimMatcher())
            .getTokenArray();

    for (int i = 0; i < columns.length; i++) {
      String value = StringUtils.strip(columns[i], "\"");
      if (this.labels) {
        toReturn.addColumn(labelsMap.get(i), value);
      } else {
        toReturn.addColumn(String.valueOf(i), value);
      }
    }

    return toReturn;
  }
 /**
  * Get the String Matcher for parsing the list of identifiers.
  *
  * @return The matcher to use.
  */
 protected StrMatcher getMatcher() {
   final String bagUploadDelims = getProperty("list.upload.delimiters") + " ";
   final StrMatcher matcher = StrMatcher.charSetMatcher(bagUploadDelims);
   return matcher;
 }
Example #5
0
  @Override
  protected List<ImportLine> buildLignesSpecifique(String fileName) throws ImportException {

    List<ImportLine> toReturn = new ArrayList<ImportLine>();

    long lineNumber = 0;

    try {
      FileInputStream f = new FileInputStream(fileName);
      BufferedReader bufferedReader = null;
      try {
        // le fichier est encode en UCS-2 Little Endian
        if (this.littleEndian) {
          bufferedReader = new BufferedReader(new UCSReader(f, UCSReader.UCS2LE));
        } else {
          bufferedReader = new BufferedReader(new InputStreamReader(f));
        }

        lineNumber = skipLines(bufferedReader, lineNumber);

        // on lit les lignes suivantes jusqu'à baliseFin
        boolean labelsRead = labels ? false : true;
        Map<Integer, String> labelsMap = null;
        String line;

        while ((line = bufferedReader.readLine()) != null
            && (StringUtils.isBlank(endTag) || !StringUtils.equals(line, endTag))) {
          lineNumber++;

          if (!labelsRead) {
            // On lit les intitulés
            String[] labelsString =
                new StrTokenizer(line, this.delimitor)
                    .setIgnoreEmptyTokens(false)
                    .setTrimmerMatcher(StrMatcher.trimMatcher())
                    .getTokenArray();

            labelsMap = new HashMap<Integer, String>();

            for (int i = 0; i < labelsString.length; i++) {
              labelsMap.put(i, labelsString[i]);
            }
          } else {
            if (StringUtils.isNotBlank(line)) {
              toReturn.add(processLine(lineNumber, line, labelsMap));
            }
          }
        }

        if (!StringUtils.isBlank(endTag) && !StringUtils.equals(line, endTag)) {
          throw new ParseFileImportException(
              "Impossible de trouver la balise de fin : " + endTag, this);
        }

      } finally {
        bufferedReader.close();
        f.close();
      }
    } catch (IOException e) {
      String message = "Erreur d'E/S lors de la lecture du fichier : " + fileName;
      throw new ImportException(message, e);
    }
    return toReturn;
  }