/** * Returns the subsample with the given index. In case the subsample doesn't exist, null is * returned. * * @param subsampleId The subsamples index. * @return */ public Subsample getSubsample(Integer subsampleId) { if (subsamples == null) { return null; } for (Subsample s : subsamples) { if (s.getSubsampleIndex() == subsampleId) { return s; } } return null; }
/** * Adds the given subsample. In case a subsample with the same index already exists, this subample * is replaced. * * @param s */ public void setSubsample(Subsample s) { if (subsamples == null) { subsamples = new ArrayList<Subsample>(1); } // check if the subsample already exists for (int i = 0; i < subsamples.size(); i++) { // if a subsample with the same index already exists, replace it if (subsamples.get(i).getSubsampleIndex() == s.getSubsampleIndex()) { subsamples.set(i, s); return; } } // as the subsample wasn't set, add this one subsamples.add(s); }
/** * Converts the given meta-data to an mzTab formatted string. * * @return */ public String toMzTab() { StringBuilder mzTab = new StringBuilder(); if (title != null) { mzTab.append(createField("title", title)); } if (description != null) { mzTab.append(createField("description", description)); } // sample processing if (sampleProcessing != null) { for (Integer i = 1; i <= sampleProcessing.size(); i++) { mzTab.append( createField(String.format("sample_processing[%d]", i), sampleProcessing.get(i - 1))); } } // instrument if (instrument != null) { for (Integer i = 1; i <= instrument.size(); i++) { mzTab.append( createField(String.format("instrument[%d]-name", i), instrument.get(i - 1).getName())); mzTab.append( createField( String.format("instrument[%d]-source", i), instrument.get(i - 1).getSource())); mzTab.append( createField( String.format("instrument[%d]-analyzer", i), instrument.get(i - 1).getAnalyzer())); mzTab.append( createField( String.format("instrument[%d]-detector", i), instrument.get(i - 1).getDetector())); } } // software + software[1-n]-setting if (software != null) { for (Integer i = 1; i <= software.size(); i++) { mzTab.append(createField(String.format("software[%d]", i), software.get(i - 1))); // write out the settings for the specified software if (softwareSetting != null && softwareSetting.get(i) != null) { List<String> settings = softwareSetting.get(i); for (String s : settings) { mzTab.append(createField(String.format("software[%d]-setting"), s)); } } } } // false discovery rate if (falseDiscoveryRate != null) { mzTab.append(createField("false_discovery_rate", falseDiscoveryRate)); } // publication if (publication != null && publication.size() > 0) { String string = ""; for (String p : publication) { string += (string.length() > 1 ? "," : "") + p; } mzTab.append(createField("publication", string)); } // contact if (contact != null) { for (int i = 1; i <= contact.size(); i++) { mzTab.append( createField(String.format("contact[%d]-name", i), contact.get(i - 1).getName())); mzTab.append( createField( String.format("contact[%d]-affiliation", i), contact.get(i - 1).getAffiliation())); mzTab.append( createField(String.format("contact[%d]-email", i), contact.get(i - 1).getEmail())); } } // uri if (uri != null) { mzTab.append(createField("uri", uri)); } // mods if (mod != null) { mzTab.append(createField("mod", mod)); } // quantification method if (quantificationMethod != null) { mzTab.append(createField("quantification_method", quantificationMethod)); } // protein quant unit if (proteinQuantificationUnit != null) { mzTab.append(createField("protein-quantification_unit", proteinQuantificationUnit)); } // peptide quant unit if (peptideQuantificationUnit != null) { mzTab.append(createField("peptide-quantification_unit", peptideQuantificationUnit)); } // ms files if (msFiles != null) { List<Integer> ids = new ArrayList<Integer>(msFiles.keySet()); Collections.sort(ids); for (Integer index : ids) { mzTab.append( createField( String.format("ms_file[%d]-format", index + 1), msFiles.get(index).getFormat())); mzTab.append( createField( String.format("ms_file[%d]-location", index + 1), msFiles.get(index).getLocation())); mzTab.append( createField( String.format("ms_file[%d]-id_format", index + 1), msFiles.get(index).getIdFormat())); } } // colunits if (colunitProtein != null) { for (String column : colunitProtein.keySet()) { mzTab.append( createField( "colunit-protein", String.format("%s=%s", column, colunitProtein.get(column).toString()))); } } if (colunitPeptide != null) { for (String column : colunitPeptide.keySet()) { mzTab.append( createField( "colunit-peptide", String.format("%s=%s", column, colunitPeptide.get(column).toString()))); } } if (colunitSmallMolecule != null) { for (String column : colunitSmallMolecule.keySet()) { mzTab.append( createField( "colunit-small_molecule", String.format("%s=%s", column, colunitSmallMolecule.get(column).toString()))); } } // custom if (customParams != null) { for (Param p : customParams) { mzTab.append(createField("custom", p)); } } // species if (species != null) { for (int i = 1; i <= species.size(); i++) { mzTab.append(createField(String.format("species[%d]", i), species.get(i - 1))); } } // tissue if (tissue != null) { for (int i = 1; i <= tissue.size(); i++) { mzTab.append(createField(String.format("tissue[%d]", i), tissue.get(i - 1))); } } // cell_type if (cellType != null) { for (int i = 1; i <= cellType.size(); i++) { mzTab.append(createField(String.format("cell_type[%d]", i), cellType.get(i - 1))); } } // disease if (disease != null) { for (int i = 1; i <= disease.size(); i++) { mzTab.append(createField(String.format("disease[%d]", i), disease.get(i - 1))); } } // subsamples if (subsamples != null) { for (Subsample s : subsamples) { mzTab.append(s.toMzTab()); } } return mzTab.toString(); }
private void parseField(String subId, String field, String value) throws MzTabParsingException { logger.debug("parsing field: subId = " + subId + ", field = " + field + ", value = " + value); try { // simple fields with only one value if ("title".equals(field)) { title = value.trim(); } else if ("description".equals(field) && subId == null) { description = value.trim(); } else if ("false_discovery_rate".equals(field)) { falseDiscoveryRate = new ParamList(value); } else if ("uri".equals(field)) { uri = new URI(value); } else if ("mod".equals(field)) { mod = new ParamList(value); } else if ("quantification_method".equals(field)) { quantificationMethod = new Param(value); } else if ("protein-quantification_unit".equals(field)) { proteinQuantificationUnit = new Param(value); } else if ("peptide-quantification_unit".equals(field)) { peptideQuantificationUnit = new Param(value); } /** Complex fields with multiple values */ // sample processing else if (field.startsWith("sample_processing")) { int sampleProcessingIndex = Integer.parseInt( field.substring(18, field.length() - 1)); // extract the processing step number // create the array if necessary if (sampleProcessing == null) { sampleProcessing = new ArrayList<ParamList>(); } // set the param sampleProcessing.add(sampleProcessingIndex - 1, new ParamList(value)); } // instruments else if (field.startsWith("instrument")) { // get the instrument's index int instrumentIndex = Integer.parseInt(field.substring(11, field.indexOf(']', 11))); // create the instrument array if necessary if (instrument == null) { instrument = new ArrayList<Instrument>(); } // create the instrument if necessary if (instrument.get(instrumentIndex - 1) == null) { instrument.add(instrumentIndex - 1, new Instrument()); } // check which value is set if (field.endsWith("name")) { instrument.get(instrumentIndex - 1).setName(new Param(value)); } else if (field.endsWith("source")) { instrument.get(instrumentIndex - 1).setSource(new Param(value)); } else if (field.endsWith("analyzer")) { instrument.get(instrumentIndex - 1).setAnalyzer(new Param(value)); } else if (field.endsWith("detector")) { instrument.get(instrumentIndex - 1).setDetector(new Param(value)); } } // software else if (field.startsWith("software")) { // get the software's 1-based index int softwareIndex = Integer.parseInt(field.substring(9, field.length() - 1)); // create the software array if necessary if (software == null) { software = new ArrayList<Param>(); } // add the software software.add(softwareIndex - 1, new Param(value)); } // software[1-n]-setting else if (field.startsWith("software") && field.contains("]-setting")) { // get the software's 1-based index int softwareIndex = Integer.parseInt(field.substring(9, field.length() - 1)); // create the software map if necessary if (softwareSetting == null) { softwareSetting = new HashMap<Integer, List<String>>(); } // create the list for this software if (!softwareSetting.containsKey(softwareIndex)) { softwareSetting.put(softwareIndex, new ArrayList<String>()); } // add the setting softwareSetting.get(softwareIndex).add(value); } // publication else if (field.equals("publication")) { // split the string String[] publications = value.trim().split("\\|"); // create the publications array if necessary if (publication == null) { publication = new ArrayList<String>(publications.length); } publication.addAll(Arrays.asList(publications)); } // contact else if (field.startsWith("contact")) { // get the instrument's index int contactIndex = Integer.parseInt(field.substring(8, field.indexOf(']', 8))); // create the instrument array if necessary if (contact == null) { contact = new ArrayList<Contact>(); } // create the instrument if necessary if (contact.size() < contactIndex) { contact.add(contactIndex - 1, new Contact()); } // check which value is set if (field.endsWith("name")) { contact.get(contactIndex - 1).setName(value.trim()); } else if (field.endsWith("email")) { contact.get(contactIndex - 1).setEmail(value.trim()); } else if (field.endsWith("affiliation")) { contact.get(contactIndex - 1).setAffiliation(value.trim()); } } // ms_file else if (field.startsWith("ms_file")) { // get the instrument's index int msFileIndex = Integer.parseInt(field.substring(8, field.indexOf(']', 8))); // create the instrument array if necessary if (msFiles == null) { msFiles = new HashMap<Integer, MsFile>(); } // create the instrument if necessary if (msFiles.size() < msFileIndex) { msFiles.put(msFileIndex - 1, new MsFile()); } // check which value is set if (field.endsWith("id_format")) { msFiles.get(msFileIndex - 1).setIdFormat(new Param(value)); } else if (field.endsWith("format")) { msFiles.get(msFileIndex - 1).setFormat(new Param(value)); } else if (field.endsWith("location")) { msFiles.get(msFileIndex - 1).setLocation(value.trim()); } } // TODO: define how -custom params are handled and react on that else if (field.equals("custom")) { if (customParams == null) { customParams = new ArrayList<Param>(); } customParams.add(new Param(value)); } // colunits else if (field.startsWith("colunit-")) { Matcher matcher = COLUNIT_PATTERN.matcher(value); if (!matcher.find()) { throw new MzTabParsingException("Invalid colunit definition: '" + value + "'."); } String column = matcher.group(1); Param unit = new Param(matcher.group(2)); if (field.equals("colunit-protein")) { if (colunitProtein == null) { colunitProtein = new HashMap<String, Param>(); } colunitProtein.put(column, unit); } else if (field.equals("colunit-peptide")) { if (colunitPeptide == null) { colunitPeptide = new HashMap<String, Param>(); } colunitPeptide.put(column, unit); } else if (field.equals("colunit-small_molecule")) { if (colunitSmallMolecule == null) { colunitSmallMolecule = new HashMap<String, Param>(); } colunitSmallMolecule.put(column, unit); } } // species, tissue, cell type, disease - on the unit level else if (subId == null && field.startsWith("species")) { // get the instrument's index int speciesIndex = Integer.parseInt(field.substring(8, field.length() - 1)); // create the instrument array if necessary if (species == null) { species = new ArrayList<Param>(); } species.add(speciesIndex - 1, new Param(value)); } else if (subId == null && field.startsWith("tissue")) { // get the instrument's index int tissueIndex = Integer.parseInt(field.substring(7, field.length() - 1)); // create the instrument array if necessary if (tissue == null) { tissue = new ArrayList<Param>(); } tissue.add(tissueIndex - 1, new Param(value)); } else if (subId == null && field.startsWith("cell_type")) { // get the instrument's index int cellTypeIndex = Integer.parseInt(field.substring(10, field.length() - 1)); // create the instrument array if necessary if (cellType == null) { cellType = new ArrayList<Param>(); } cellType.add(cellTypeIndex - 1, new Param(value)); } else if (subId == null && field.startsWith("disease")) { // get the instrument's index int diseaseIndex = Integer.parseInt(field.substring(8, field.length() - 1)); // create the instrument array if necessary if (disease == null) { disease = new ArrayList<Param>(); } disease.add(diseaseIndex - 1, new Param(value)); } /** Parse subsample specific data */ else if (subId != null) { // extract the index int subIndex = Integer.parseInt(subId.substring(4, subId.length() - 1)); // make sure the index is greater than 0 // (it should be a 1 based index, and the code will break if this is not the case) if (subIndex < 1) { throw new MzTabParsingException("Found sub sample index smaller than 1!"); } // make sure the subsample array exists if (subsamples == null) { subsamples = new ArrayList<Subsample>(); } // make sure this subsample already exists if (subsamples.size() < subIndex) { subsamples.add(subIndex - 1, new Subsample(this.unitId, subIndex)); } Subsample subsample = subsamples.get(subIndex - 1); // parse the field if ("description".equals(field)) { subsample.setDescription(value.trim()); } else if ("quantification_reagent".equals(field)) { subsample.setQuantificationReagent(new Param(value)); } else if ("custom".equals(field)) { if (subsample.getCustomParams() == null) { subsample.setCustomParams(new ArrayList<Param>(1)); } subsample.getCustomParams().add(new Param(value)); } else if (field.startsWith("species")) { // get the instrument's index int speciesIndex = Integer.parseInt(field.substring(8, field.length() - 1)); // create the instrument array if necessary if (subsample.getSpecies() == null) { subsample.setSpecies(new ArrayList<Param>()); } subsample.getSpecies().add(speciesIndex - 1, new Param(value)); } else if (field.startsWith("tissue")) { // get the instrument's index int tissueIndex = Integer.parseInt(field.substring(7, field.length() - 1)); // create the instrument array if necessary if (subsample.getTissue() == null) { subsample.setTissue(new ArrayList<Param>()); } subsample.getTissue().add(tissueIndex - 1, new Param(value)); } else if (field.startsWith("cell_type")) { // get the instrument's index int cellTypeIndex = Integer.parseInt(field.substring(10, field.length() - 1)); // create the instrument array if necessary if (subsample.getCellType() == null) { subsample.setCellType(new ArrayList<Param>()); } subsample.getCellType().add(cellTypeIndex - 1, new Param(value)); } else if (field.startsWith("disease")) { // get the instrument's index int diseaseIndex = Integer.parseInt(field.substring(8, field.length() - 1)); // create the instrument array if necessary if (subsample.getDisease() == null) { subsample.setDisease(new ArrayList<Param>()); } subsample.getDisease().add(diseaseIndex - 1, new Param(value)); } } else { logger.warn("Unknown unit field encountered: " + field); } } catch (Exception e) { throw new MzTabParsingException("Failed to parse mztab metadata field: " + e.getMessage(), e); } }