Exemplo n.º 1
0
 private final Sample maybeAddImplicitSample(
     SampleDB sampleDB, final String id, final String familyID, final Gender gender) {
   if (id != null && sampleDB.getSample(id) == null) {
     Sample s =
         new Sample(
             id, sampleDB, familyID, null, null, gender, Affection.UNKNOWN, Sample.UNSET_QT);
     sampleDB.addSample(s);
     return s;
   } else return null;
 }
Exemplo n.º 2
0
  public final List<Sample> parse(
      Reader reader, EnumSet<MissingPedField> missingFields, SampleDB sampleDB) {
    final List<String> lines = new XReadLines(reader).readLines();

    // What are the record offsets?
    final int familyPos = missingFields.contains(MissingPedField.NO_FAMILY_ID) ? -1 : 0;
    final int samplePos = familyPos + 1;
    final int paternalPos = missingFields.contains(MissingPedField.NO_PARENTS) ? -1 : samplePos + 1;
    final int maternalPos =
        missingFields.contains(MissingPedField.NO_PARENTS) ? -1 : paternalPos + 1;
    final int sexPos =
        missingFields.contains(MissingPedField.NO_SEX) ? -1 : Math.max(maternalPos, samplePos) + 1;
    final int phenotypePos =
        missingFields.contains(MissingPedField.NO_PHENOTYPE)
            ? -1
            : Math.max(sexPos, Math.max(maternalPos, samplePos)) + 1;
    final int nExpectedFields =
        MathUtils.arrayMaxInt(
                Arrays.asList(samplePos, paternalPos, maternalPos, sexPos, phenotypePos))
            + 1;

    // go through once and determine properties
    int lineNo = 1;
    boolean isQT = false;
    final List<String[]> splits = new ArrayList<String[]>(lines.size());
    for (final String line : lines) {
      if (line.startsWith(commentMarker)) continue;
      if (line.trim().equals("")) continue;

      final String[] parts = line.split("\\s+");

      if (parts.length != nExpectedFields)
        throw new UserException.MalformedFile(
            reader.toString(), "Bad PED line " + lineNo + ": wrong number of fields");

      if (phenotypePos != -1) {
        isQT = isQT || !CATAGORICAL_TRAIT_VALUES.contains(parts[phenotypePos]);
      }

      splits.add(parts);
      lineNo++;
    }
    logger.info("Phenotype is other? " + isQT);

    // now go through and parse each record
    lineNo = 1;
    final List<Sample> samples = new ArrayList<Sample>(splits.size());
    for (final String[] parts : splits) {
      String familyID = null, individualID, paternalID = null, maternalID = null;
      Gender sex = Gender.UNKNOWN;
      String quantitativePhenotype = Sample.UNSET_QT;
      Affection affection = Affection.UNKNOWN;

      if (familyPos != -1) familyID = maybeMissing(parts[familyPos]);
      individualID = parts[samplePos];
      if (paternalPos != -1) paternalID = maybeMissing(parts[paternalPos]);
      if (maternalPos != -1) maternalID = maybeMissing(parts[maternalPos]);

      if (sexPos != -1) {
        if (parts[sexPos].equals(SEX_MALE)) sex = Gender.MALE;
        else if (parts[sexPos].equals(SEX_FEMALE)) sex = Gender.FEMALE;
        else sex = Gender.UNKNOWN;
      }

      if (phenotypePos != -1) {
        if (isQT) {
          if (parts[phenotypePos].equals(MISSING_VALUE1)) affection = Affection.UNKNOWN;
          else {
            affection = Affection.OTHER;
            quantitativePhenotype = parts[phenotypePos];
          }
        } else {
          if (parts[phenotypePos].equals(MISSING_VALUE1)) affection = Affection.UNKNOWN;
          else if (parts[phenotypePos].equals(MISSING_VALUE2)) affection = Affection.UNKNOWN;
          else if (parts[phenotypePos].equals(PHENOTYPE_UNAFFECTED))
            affection = Affection.UNAFFECTED;
          else if (parts[phenotypePos].equals(PHENOTYPE_AFFECTED)) affection = Affection.AFFECTED;
          else
            throw new ReviewedGATKException(
                "Unexpected phenotype type " + parts[phenotypePos] + " at line " + lineNo);
        }
      }

      final Sample s =
          new Sample(
              individualID,
              sampleDB,
              familyID,
              paternalID,
              maternalID,
              sex,
              affection,
              quantitativePhenotype);
      samples.add(s);
      sampleDB.addSample(s);
      lineNo++;
    }

    for (final Sample sample : new ArrayList<Sample>(samples)) {
      Sample dad =
          maybeAddImplicitSample(
              sampleDB, sample.getPaternalID(), sample.getFamilyID(), Gender.MALE);
      if (dad != null) samples.add(dad);

      Sample mom =
          maybeAddImplicitSample(
              sampleDB, sample.getMaternalID(), sample.getFamilyID(), Gender.FEMALE);
      if (mom != null) samples.add(mom);
    }

    return samples;
  }