/** * Process the identifiers. * * @param type The type of thing these identifiers are. * @param input The creation input. * @param ids The identifiers. * @param unmatchedIds A collector for unmatched identifiers. * @param tempBag The temporary bag to add results to. * @throws IOException If we can't from the request. * @throws ClassNotFoundException if the type is not valid. * @throws InterMineException If something goes wrong building the bag. * @throws ObjectStoreException If there is a problem on the database level. */ protected void processIdentifiers( final String type, ListCreationInput input, final Set<String> ids, final Set<String> unmatchedIds, final InterMineBag tempBag) throws IOException, ClassNotFoundException, InterMineException, ObjectStoreException { final Collection<String> addIssues = input.getAddIssues(); String line; final StrMatcher matcher = getMatcher(); final BufferedReader r = getReader(request); try { while ((line = r.readLine()) != null) { final StrTokenizer st = new StrTokenizer(line, matcher, StrMatcher.doubleQuoteMatcher()); while (st.hasNext()) { final String token = st.nextToken(); ids.add(token); } if (ids.size() >= BAG_QUERY_MAX_BATCH_SIZE) { addIdsToList(ids, tempBag, type, input.getExtraValue(), unmatchedIds, addIssues); ids.clear(); } } } finally { if (r != null) { r.close(); } } if (ids.size() > 0) { addIdsToList(ids, tempBag, type, input.getExtraValue(), unmatchedIds, addIssues); } }
/** * Parses the pattern string, which should be a comma separated list of regular expressions, each * of which may be surrounded with double quotes. * * @param inStr pattern string to parse * @return list of pattern regular expressions */ private ArrayList<Pattern> parsePatternString(String inStr) { ArrayList<Pattern> result = new ArrayList<Pattern>(); StrTokenizer tokenizer = new StrTokenizer(inStr, ',', '"'); tokenizer.setIgnoreEmptyTokens(true); tokenizer.setIgnoredMatcher(StrMatcher.charSetMatcher(" \t\n\r")); while (tokenizer.hasNext()) { String tok = (String) tokenizer.next(); Pattern pat = Pattern.compile(tok); result.add(pat); } return result; }
private ImportLine processLine(long lineNumber, String line, Map<Integer, String> labelsMap) { ImportLine toReturn = new ImportLine(this, lineNumber); String[] columns = new StrTokenizer(line, this.delimitor) .setIgnoreEmptyTokens(false) .setTrimmerMatcher(StrMatcher.trimMatcher()) .getTokenArray(); for (int i = 0; i < columns.length; i++) { String value = StringUtils.strip(columns[i], "\""); if (this.labels) { toReturn.addColumn(labelsMap.get(i), value); } else { toReturn.addColumn(String.valueOf(i), value); } } return toReturn; }
/** * Get the String Matcher for parsing the list of identifiers. * * @return The matcher to use. */ protected StrMatcher getMatcher() { final String bagUploadDelims = getProperty("list.upload.delimiters") + " "; final StrMatcher matcher = StrMatcher.charSetMatcher(bagUploadDelims); return matcher; }
@Override protected List<ImportLine> buildLignesSpecifique(String fileName) throws ImportException { List<ImportLine> toReturn = new ArrayList<ImportLine>(); long lineNumber = 0; try { FileInputStream f = new FileInputStream(fileName); BufferedReader bufferedReader = null; try { // le fichier est encode en UCS-2 Little Endian if (this.littleEndian) { bufferedReader = new BufferedReader(new UCSReader(f, UCSReader.UCS2LE)); } else { bufferedReader = new BufferedReader(new InputStreamReader(f)); } lineNumber = skipLines(bufferedReader, lineNumber); // on lit les lignes suivantes jusqu'à baliseFin boolean labelsRead = labels ? false : true; Map<Integer, String> labelsMap = null; String line; while ((line = bufferedReader.readLine()) != null && (StringUtils.isBlank(endTag) || !StringUtils.equals(line, endTag))) { lineNumber++; if (!labelsRead) { // On lit les intitulés String[] labelsString = new StrTokenizer(line, this.delimitor) .setIgnoreEmptyTokens(false) .setTrimmerMatcher(StrMatcher.trimMatcher()) .getTokenArray(); labelsMap = new HashMap<Integer, String>(); for (int i = 0; i < labelsString.length; i++) { labelsMap.put(i, labelsString[i]); } } else { if (StringUtils.isNotBlank(line)) { toReturn.add(processLine(lineNumber, line, labelsMap)); } } } if (!StringUtils.isBlank(endTag) && !StringUtils.equals(line, endTag)) { throw new ParseFileImportException( "Impossible de trouver la balise de fin : " + endTag, this); } } finally { bufferedReader.close(); f.close(); } } catch (IOException e) { String message = "Erreur d'E/S lors de la lecture du fichier : " + fileName; throw new ImportException(message, e); } return toReturn; }