private final Sample maybeAddImplicitSample( SampleDB sampleDB, final String id, final String familyID, final Gender gender) { if (id != null && sampleDB.getSample(id) == null) { Sample s = new Sample( id, sampleDB, familyID, null, null, gender, Affection.UNKNOWN, Sample.UNSET_QT); sampleDB.addSample(s); return s; } else return null; }
public final List<Sample> parse( Reader reader, EnumSet<MissingPedField> missingFields, SampleDB sampleDB) { final List<String> lines = new XReadLines(reader).readLines(); // What are the record offsets? final int familyPos = missingFields.contains(MissingPedField.NO_FAMILY_ID) ? -1 : 0; final int samplePos = familyPos + 1; final int paternalPos = missingFields.contains(MissingPedField.NO_PARENTS) ? -1 : samplePos + 1; final int maternalPos = missingFields.contains(MissingPedField.NO_PARENTS) ? -1 : paternalPos + 1; final int sexPos = missingFields.contains(MissingPedField.NO_SEX) ? -1 : Math.max(maternalPos, samplePos) + 1; final int phenotypePos = missingFields.contains(MissingPedField.NO_PHENOTYPE) ? -1 : Math.max(sexPos, Math.max(maternalPos, samplePos)) + 1; final int nExpectedFields = MathUtils.arrayMaxInt( Arrays.asList(samplePos, paternalPos, maternalPos, sexPos, phenotypePos)) + 1; // go through once and determine properties int lineNo = 1; boolean isQT = false; final List<String[]> splits = new ArrayList<String[]>(lines.size()); for (final String line : lines) { if (line.startsWith(commentMarker)) continue; if (line.trim().equals("")) continue; final String[] parts = line.split("\\s+"); if (parts.length != nExpectedFields) throw new UserException.MalformedFile( reader.toString(), "Bad PED line " + lineNo + ": wrong number of fields"); if (phenotypePos != -1) { isQT = isQT || !CATAGORICAL_TRAIT_VALUES.contains(parts[phenotypePos]); } splits.add(parts); lineNo++; } logger.info("Phenotype is other? " + isQT); // now go through and parse each record lineNo = 1; final List<Sample> samples = new ArrayList<Sample>(splits.size()); for (final String[] parts : splits) { String familyID = null, individualID, paternalID = null, maternalID = null; Gender sex = Gender.UNKNOWN; String quantitativePhenotype = Sample.UNSET_QT; Affection affection = Affection.UNKNOWN; if (familyPos != -1) familyID = maybeMissing(parts[familyPos]); individualID = parts[samplePos]; if (paternalPos != -1) paternalID = maybeMissing(parts[paternalPos]); if (maternalPos != -1) maternalID = maybeMissing(parts[maternalPos]); if (sexPos != -1) { if (parts[sexPos].equals(SEX_MALE)) sex = Gender.MALE; else if (parts[sexPos].equals(SEX_FEMALE)) sex = Gender.FEMALE; else sex = Gender.UNKNOWN; } if (phenotypePos != -1) { if (isQT) { if (parts[phenotypePos].equals(MISSING_VALUE1)) affection = Affection.UNKNOWN; else { affection = Affection.OTHER; quantitativePhenotype = parts[phenotypePos]; } } else { if (parts[phenotypePos].equals(MISSING_VALUE1)) affection = Affection.UNKNOWN; else if (parts[phenotypePos].equals(MISSING_VALUE2)) affection = Affection.UNKNOWN; else if (parts[phenotypePos].equals(PHENOTYPE_UNAFFECTED)) affection = Affection.UNAFFECTED; else if (parts[phenotypePos].equals(PHENOTYPE_AFFECTED)) affection = Affection.AFFECTED; else throw new ReviewedGATKException( "Unexpected phenotype type " + parts[phenotypePos] + " at line " + lineNo); } } final Sample s = new Sample( individualID, sampleDB, familyID, paternalID, maternalID, sex, affection, quantitativePhenotype); samples.add(s); sampleDB.addSample(s); lineNo++; } for (final Sample sample : new ArrayList<Sample>(samples)) { Sample dad = maybeAddImplicitSample( sampleDB, sample.getPaternalID(), sample.getFamilyID(), Gender.MALE); if (dad != null) samples.add(dad); Sample mom = maybeAddImplicitSample( sampleDB, sample.getMaternalID(), sample.getFamilyID(), Gender.FEMALE); if (mom != null) samples.add(mom); } return samples; }