private static PirsfDatRecord parseSubFamilyLine(String line) { PirsfDatRecord instance = null; String[] chunks = line.split(" "); for (int i = 0; i < chunks.length; i++) { if (i == 0) { if (chunks[i].length() > 1) { final String modelAccession = chunks[i].substring(1); instance = new PirsfDatRecord(modelAccession); if (LOGGER.isDebugEnabled()) { LOGGER.debug("Found a new model accession with sub families: " + modelAccession); } } } else if (i > 1) { final String subfamily = chunks[i]; if (LOGGER.isDebugEnabled()) { LOGGER.debug( "Found a new subfamily named " + subfamily + " for model accession: " + instance.getModelAccession()); } instance.addSubFamily(subfamily); } } return instance; }
public static Map<String, PirsfDatRecord> parse(final Resource pirsfDatFileResource) throws IOException { LOGGER.debug("Running PIRSF data file parser..."); if (pirsfDatFileResource == null) { throw new NullPointerException("Resource to the PIRSF dat file is null"); } if (!pirsfDatFileResource.exists()) { throw new IllegalStateException(pirsfDatFileResource.getFilename() + " does not exist"); } if (!pirsfDatFileResource.isReadable()) { throw new IllegalStateException(pirsfDatFileResource.getFilename() + " is not readable"); } // Result map final Map<String, PirsfDatRecord> data = new HashMap<String, PirsfDatRecord>(); BufferedReader reader = null; try { // Read input file line by line reader = new BufferedReader(new InputStreamReader(pirsfDatFileResource.getInputStream())); String line; PirsfDatRecord pirsfDatRecord = null; int row = 1; while ((line = reader.readLine()) != null) { Matcher modelStart = PIRSF_DAT_PATTERN_SUPERFAM.matcher(line); if (modelStart.find()) { // New accession without sub families final String modelAccession = line.substring(1); pirsfDatRecord = new PirsfDatRecord(modelAccession); if (LOGGER.isDebugEnabled()) { LOGGER.debug("Found a new model accession without sub families: " + modelAccession); } // Reset row attributes row = 1; } else if (row == 2) { // Model name final String modelName = line; pirsfDatRecord.setModelName(modelName); if (LOGGER.isDebugEnabled()) { LOGGER.debug("Set model name to " + modelName); } } else if (row == 3) { final String[] values = line.split("\\s+"); pirsfDatRecord.setValues(values); } else if (row == 4 && line.startsWith("BLAST: ")) { int index = line.indexOf(":"); if (index > -1 && line.length() >= index + 1) { line = line.substring(index + 1).trim(); } final boolean isBlastRequired = line.equalsIgnoreCase("YES"); pirsfDatRecord.setBlastRequired(isBlastRequired); data.put(pirsfDatRecord.getModelAccession(), pirsfDatRecord); } else { modelStart = PIRSF_DAT_PATTERN_SUBFAM.matcher(line); if (modelStart.find()) { // New accession with sub families row = 1; pirsfDatRecord = parseSubFamilyLine(line); } else { LOGGER.warn("Unexpected line in pirsf.dat: " + line); } } row++; } } finally { if (reader != null) { reader.close(); } } return data; }