コード例 #1
0
 /** {@inheritDoc} */
 @Override
 public void process(Reader reader) throws Exception {
   // Data has format:
   // id | description
   @SuppressWarnings("rawtypes")
   Iterator lineIter = FormattedTextParser.parseTabDelimitedReader(reader);
   int count = 0;
   while (lineIter.hasNext()) {
     String[] line = (String[]) lineIter.next();
     try {
       String entrez = line[0];
       String description = line[1];
       LOG.error("description " + count++ + " " + description);
       if (!StringUtils.isBlank(description)) {
         Item gene = createItem("Gene");
         gene.setAttribute("primaryIdentifier", entrez);
         gene.setAttribute("description", description);
         gene.setReference("organism", getOrganism(HUMAN_TAXON_ID));
         store(gene);
       }
     } catch (IndexOutOfBoundsException e) {
       LOG.info("Failed to read line: " + Arrays.asList(line));
     }
   }
 }
コード例 #2
0
  protected void createFromFile(File f) throws IOException {
    // data is in format:
    // ZDBID	ID1,ID2,ID3
    Iterator<?> lineIter =
        FormattedTextParser.parseTabDelimitedReader(new BufferedReader(new FileReader(f)));
    while (lineIter.hasNext()) {
      String[] line = (String[]) lineIter.next();

      if (line.length < 2 || line[0].startsWith("#") || !line[0].startsWith(GENE_PATTERN)) {
        continue;
      }

      String zfinId = line[0];
      String[] synonyms = StringUtil.split(line[1].trim(), ",");

      resolver.addMainIds(taxonId, zfinId, Collections.singleton(zfinId));
      resolver.addSynonyms(taxonId, zfinId, new HashSet<String>(Arrays.asList(synonyms)));
    }
  }
コード例 #3
0
  /**
   * Process Topo files and create two maps.
   *
   * @param file a topo file.
   * @return Map<key:primaryId/fullName, value:Map<key:symbol/symbol, value:level/position>>
   */
  private Map<String, Map<String, String>> processTopoFile(File file) {
    Map<String, Map<String, String>> topoMap = new HashMap<String, Map<String, String>>();

    try {
      Reader reader = new FileReader(file);
      Iterator<?> tsvIter;
      try {
        tsvIter = FormattedTextParser.parseTabDelimitedReader(reader);
      } catch (IOException e) {
        e.printStackTrace();
        throw new RuntimeException(e);
      }

      while (tsvIter.hasNext()) {
        String[] line = (String[]) tsvIter.next();
        if (line.length > 1) { // the file could end with an empty line
          if ("topos_tf.tsv".equals(file.getName())) {
            String symbol = line[0];
            String primaryId = line[1];
            String level = line[2];

            Map<String, String> aMap = new HashMap<String, String>();
            aMap.put(symbol, level);
            topoMap.put(primaryId, aMap);
          } else if ("topos_mirna.tsv".equals(file.getName())) {
            String symbol = line[0];
            String fullName = line[1];
            String position = line[2];

            Map<String, String> aMap = new HashMap<String, String>();
            aMap.put(symbol, position);
            topoMap.put(fullName, aMap);
          }
        }
      }
    } catch (FileNotFoundException e) {
      e.printStackTrace();
      throw new RuntimeException(e);
    }

    return topoMap;
  }
コード例 #4
0
  public void processJournals(Reader reader) throws Exception {

    Iterator lineIter = FormattedTextParser.parseDelimitedReader(reader, '|');

    while (lineIter.hasNext()) {
      String[] line = (String[]) lineIter.next();

      if (line.length < 2) {
        throw new RuntimeException(
            "Journal line does not have enough elements: " + line.length + line[0]);
      }

      String primaryIdentifier = line[0];
      String name = line[1];
      String abbrev = line[2];
      String publisher = line[3];

      Item journal = getJournal(primaryIdentifier);

      if (!StringUtils.isEmpty(name)) {
        journal.setAttribute("name", name);
      }
      if (!StringUtils.isEmpty(abbrev)) {
        journal.setAttribute("abbrev", abbrev);
      }
      if (!StringUtils.isEmpty(publisher)) {
        journal.setAttribute("publisher", publisher);
      }

      if (journal.getAttribute("primaryIdentifier").getValue().equals("ZDB-JRNL-050621-1000")) {
        System.out.println("storing: ZDB-JRNL-050621-1000");
      }

      try {

        store(journal);
      } catch (ObjectStoreException e) {
        throw new SAXException(e);
      }
    }
  }
コード例 #5
0
  /**
   * Process all rows of the map_title.tab file
   *
   * @param reader a reader for the map_title.tab file
   * @throws IOException
   * @throws ObjectStoreException
   */
  private void processMapTitleFile(Reader reader) throws IOException, ObjectStoreException {
    Iterator lineIter = FormattedTextParser.parseTabDelimitedReader(reader);

    // this file has data of the format:
    // pathway id | pathway name
    while (lineIter.hasNext()) {
      // line is a string array with the one element for each tab separated value
      // on the next line of the file
      String[] line = (String[]) lineIter.next();

      String pathwayId = line[0];
      String pathwayName = line[1];

      // getPathway will create an Item or fetch it from a map if seen before
      Item pathway = getPathway(pathwayId);
      pathway.setAttribute("name", pathwayName);

      // once we have set the pathway name that is all the information needed so we can store
      store(pathway);
    }
  }
コード例 #6
0
  /**
   * @param reader
   * @throws Exception
   * @throws ObjectStoreException
   */
  private void processHalfLifeDataFile(Reader preader) throws Exception, ObjectStoreException {

    /*
     * Sample line
     * protein half-life       YMR028W 8.2     min     0.1     hr      25466257
     * protein half-life       YLR110C 10.0    min     0.2     hr      25466257
     */
    System.out.println("Processing Protien HalfLife Data file....");

    Iterator<?> tsvIter;
    try {
      tsvIter = FormattedTextParser.parseTabDelimitedReader(preader);
    } catch (Exception e) {
      throw new BuildException("cannot parse file: " + getCurrentFile(), e);
    }

    while (tsvIter.hasNext()) {

      String[] line = (String[]) tsvIter.next();

      if (line.length < 5) {
        LOG.error("Couldn't process line. Expected 6 cols, but was " + line.length);
        continue;
      }

      String experiment = line[0].trim();
      String protein = line[1].trim();
      String value = line[2].trim();
      String units = line[3].trim();
      // String valueMinutes =  line[2].trim();
      // String stringMinutes =  line[3].trim();
      // String valueHours = line[4].trim();
      // String stringHours = line[5].trim();
      String pmid = line[4].trim();

      newProduct(experiment, protein, value, units, pmid);
    }

    preader.close();
  }
コード例 #7
0
  /** {@inheritDoc} */
  public void process(Reader reader) throws Exception {

    // Create a chromosome
    Item chromosome = createItem("Chromosome");
    chromosome.setAttribute("primaryIdentifier", CHROMOSOME_PID);
    store(chromosome);

    @SuppressWarnings("rawtypes")
    Iterator lineIter = FormattedTextParser.parseTabDelimitedReader(reader);

    while (lineIter.hasNext()) {
      String[] line = (String[]) lineIter.next();

      // remove header line
      if (!line[0].equals(HEADER_LINE)) {
        String ecogeneId = line[0];
        String geneName = line[1];
        String eCK = line[2];
        String swissProtId = line[3];
        String wisconsinGenBankId = line[4];
        String genBankProteinId = line[5];
        String genoBaseId = line[6];
        String type = line[7];
        String strand = line[8];
        String start = line[9];
        String end = line[10];
        String synonym = line[11];

        Set<String> symSet = new TreeSet<String>();

        if (!eCK.equals(NULL_STRING)) {
          symSet.add(eCK);
        }

        if (!genoBaseId.equals(NULL_STRING)) {
          symSet.addAll(Arrays.asList(StringUtil.split(genoBaseId, "; ")));
        }

        if (!synonym.equals(NONE_STRING)) {
          symSet.addAll(Arrays.asList(synonym.split(", ")));
        }

        if (type.equals(TYPE_GENE)) {

          Item gene = createItem("Gene");
          gene.setReference("chromosome", chromosome);
          gene.setReference("organism", getOrganism(ECOLI_TAXON));
          gene.setAttribute("primaryIdentifier", ecogeneId);
          gene.setAttribute("secondaryIdentifier", wisconsinGenBankId);
          gene.setAttribute("name", geneName);
          gene.setAttribute("symbol", geneName);

          if (symSet.size() > 0) {
            for (String sym : symSet) {
              createSynonym(gene, sym, true);
            }
          }

          if (!swissProtId.equals(NULL_STRING)) {

            if (proteinMap.containsKey(swissProtId)) {
              // Reference a protein to a gene (a gene has proteins
              // collection)
              gene.addToCollection("proteins", proteinMap.get(swissProtId));
            } else {
              Item protein = createItem("Protein");
              protein.setAttribute("primaryAccession", swissProtId);
              // NCBI Protein id, remove "g"
              protein.setAttribute("secondaryIdentifier", genBankProteinId.substring(1));
              gene.addToCollection("proteins", protein);
              store(protein);
              proteinMap.put(swissProtId, protein);
            }
          }

          // Create chromosome location
          if (start.matches(DIGIT_REGEX) && end.matches(DIGIT_REGEX)) {

            Item location = createItem("Location");
            location.setAttribute("start", start);
            location.setAttribute("end", end);
            location.setReference("feature", gene);
            location.setReference("locatedOn", chromosome);

            if (strand.equals(CLOCKWISE)) {
              location.setAttribute("strand", "+1");
            } else if (strand.equals(COUNTER_CLOCKWISE)) {
              location.setAttribute("strand", "-1");
            } else {
              location.setAttribute("strand", "0");
            }

            gene.setReference("chromosomeLocation", location);

            store(location);
          }

          store(gene);

        } else if (type.equals(TYPE_RNA)) { // TODO code refactory

          Item rna = createItem("NcRNA");
          rna.setReference("chromosome", chromosome);
          rna.setReference("organism", getOrganism(ECOLI_TAXON));
          rna.setAttribute("primaryIdentifier", ecogeneId);
          rna.setAttribute("secondaryIdentifier", wisconsinGenBankId);
          rna.setAttribute("name", geneName);
          rna.setAttribute("symbol", geneName);

          if (symSet.size() > 0) {
            for (String sym : symSet) {
              createSynonym(rna, sym, true);
            }
          }

          // Create chromosome location
          if (start.matches(DIGIT_REGEX) && end.matches(DIGIT_REGEX)) {

            Item location = createItem("Location");
            location.setAttribute("start", start);
            location.setAttribute("end", end);
            location.setReference("feature", rna);
            location.setReference("locatedOn", chromosome);

            if (strand.equals(CLOCKWISE)) {
              location.setAttribute("strand", "+1");
            } else if (strand.equals(COUNTER_CLOCKWISE)) {
              location.setAttribute("strand", "-1");
            } else {
              location.setAttribute("strand", "0");
            }

            rna.setReference("chromosomeLocation", location);

            store(location);
          }

          store(rna);
        }
      }
    }
  }
コード例 #8
0
  /**
   * Process the edge data file.
   *
   * @param file the edge data file
   * @param tfMap a customized map with TF information
   * @param miRNAMap a customized map with miRNA information
   */
  private void processEdgeFile(
      File file,
      Map<String, Map<String, String>> tfMap,
      Map<String, Map<String, String>> miRNAMap) {

    try {
      Reader reader = new FileReader(file);
      Iterator<?> tsvIter;
      try {
        tsvIter = FormattedTextParser.parseTabDelimitedReader(reader);
      } catch (IOException e) {
        e.printStackTrace();
        throw new RuntimeException(e);
      }

      while (tsvIter.hasNext()) {
        String[] line = (String[]) tsvIter.next();
        if (line.length > 1) {
          String sourceIdentifier = line[0];
          String targetIdentifier = line[1];

          try {
            if (tfMap.containsKey(sourceIdentifier)) {
              // Create source gene
              String sourceSymbol = tfMap.get(sourceIdentifier).keySet().iterator().next();
              String sourceLevel = tfMap.get(sourceIdentifier).get(sourceSymbol);
              String sourceGenePid = createGene(sourceIdentifier, sourceSymbol);

              // Create networkProperty for source gene
              Item sourceNetworkProperty = createNetworkProperty(TOPO_TYPE_LEVEL, sourceLevel);
              sourceNetworkProperty.setReference("node", geneItems.get(sourceGenePid));
              store(sourceNetworkProperty);

              if (tfMap.containsKey(targetIdentifier)) {
                // Create regulation for both genes
                Item regulation = createRegulation(INTERACTION_TYPE_TF_TF);

                // Create target gene
                String targetSymbol = tfMap.get(targetIdentifier).keySet().iterator().next();
                String targetLevel = tfMap.get(targetIdentifier).get(targetSymbol);
                String targetGenePid = createGene(targetIdentifier, targetSymbol);

                // Create networkProperty for target gene
                Item targetNetworkProperty = createNetworkProperty(TOPO_TYPE_LEVEL, targetLevel);
                targetNetworkProperty.setReference("node", geneItems.get(targetGenePid));
                store(targetNetworkProperty);

                regulation.setReference("source", geneItems.get(sourceGenePid));
                regulation.setReference("target", geneItems.get(targetGenePid));
                store(regulation);
              } else if (miRNAMap.containsKey(targetIdentifier)) {
                // Create regulation for both genes
                Item regulation = createRegulation(INTERACTION_TYPE_TF_MIRNA);

                // Create target gene
                String targetSymbol = miRNAMap.get(targetIdentifier).keySet().iterator().next();
                String targetPosition = miRNAMap.get(targetIdentifier).get(targetSymbol);
                String targetGenePid = createGene(null, targetSymbol);

                // Create networkProperty for target gene
                Item targetNetworkProperty =
                    createNetworkProperty(TOPO_TYPE_POSITION, targetPosition);
                targetNetworkProperty.setReference("node", geneItems.get(targetGenePid));
                store(targetNetworkProperty);

                regulation.setReference("source", geneItems.get(sourceGenePid));
                regulation.setReference("target", geneItems.get(targetGenePid));
                store(regulation);
              } else {
                continue;
              }
            } else if (miRNAMap.containsKey(sourceIdentifier)) {
              // Create source gene
              String sourceSymbol = miRNAMap.get(sourceIdentifier).keySet().iterator().next();
              String sourcePosition = miRNAMap.get(sourceIdentifier).get(sourceSymbol);
              String sourceGenePid = createGene(null, sourceSymbol);

              // Create networkProperty for source gene
              Item sourceNetworkProperty =
                  createNetworkProperty(TOPO_TYPE_POSITION, sourcePosition);
              sourceNetworkProperty.setReference("node", geneItems.get(sourceGenePid));
              store(sourceNetworkProperty);

              if (tfMap.containsKey(targetIdentifier)) {
                // Create regulation for both genes
                Item regulation = createRegulation(INTERACTION_TYPE_MIRNA_TF);

                // Create target gene
                String targetSymbol = tfMap.get(targetIdentifier).keySet().iterator().next();
                String targetLevel = tfMap.get(targetIdentifier).get(targetSymbol);
                String targetGenePid = createGene(targetIdentifier, targetSymbol);

                // Create networkProperty for target gene
                Item targetNetworkProperty = createNetworkProperty(TOPO_TYPE_LEVEL, targetLevel);
                targetNetworkProperty.setReference("node", geneItems.get(targetGenePid));
                store(targetNetworkProperty);

                regulation.setReference("source", geneItems.get(sourceGenePid));
                regulation.setReference("target", geneItems.get(targetGenePid));
                store(regulation);
              } else if (miRNAMap.containsKey(targetIdentifier)) {
                // Create regulation for both genes
                Item regulation = createRegulation(INTERACTION_TYPE_MIRNA_MIRNA);

                // Create target gene
                String targetSymbol = miRNAMap.get(targetIdentifier).keySet().iterator().next();
                String targetPosition = miRNAMap.get(targetIdentifier).get(targetSymbol);
                String targetGenePid = createGene(null, targetSymbol);

                // Create networkProperty for target gene
                Item targetNetworkProperty =
                    createNetworkProperty(TOPO_TYPE_POSITION, targetPosition);
                targetNetworkProperty.setReference("node", geneItems.get(targetGenePid));
                store(targetNetworkProperty);

                regulation.setReference("source", geneItems.get(sourceGenePid));
                regulation.setReference("target", geneItems.get(targetGenePid));
                store(regulation);
              } else {
                continue;
              }
            } else {
              continue;
            }
          } catch (ObjectStoreException e) {
            e.printStackTrace();
            throw new RuntimeException(e);
          }
        }
      }
    } catch (FileNotFoundException e) {
      e.printStackTrace();
      throw new RuntimeException(e);
    }
  }
コード例 #9
0
  /**
   * Process the csv file
   *
   * @param reader the Reader
   * @see DataConverter#process
   * @throws Exception if something goes wrong
   */
  @Override
  public void process(Reader reader) throws Exception {
    if (rslv == null) {
      rslv = IdResolverService.getFlyIdResolver();
    }

    Iterator<String[]> it = FormattedTextParser.parseTabDelimitedReader(reader);

    while (it.hasNext()) {

      String[] lineBits = it.next();
      String geneCG = lineBits[0];

      if (!geneCG.startsWith("CG")) {
        // ignore clones for now
        continue;
      }

      // Try to create/fetch gene, if null the IdResolver failed so do nothing for this row
      Item gene = getGene(geneCG);
      if (gene == null) {
        continue;
      }

      String stage = lineBits[1];

      String resultKey = geneCG + stage;
      Item result = getResult(resultKey, gene.getIdentifier(), pub.getIdentifier(), stage);

      Integer stageNumber = null;
      try {
        stageNumber = new Integer(stage);
      } catch (NumberFormatException e) {
        // bad line in file, just keep going
        continue;
      }
      result.setAttribute("stageRange", STAGE_LABELS[stageNumber.intValue()] + " (BDGP in situ)");

      if (lineBits.length > 2) {
        String image = lineBits[2];
        if (StringUtils.isNotEmpty(image)) {
          setImage(result, URL + image);
        }
      }
      if (lineBits.length > 3) {
        String term = lineBits[3];
        Item termItem = getTerm(term);
        if (termItem != null) {
          result.addToCollection("mRNAExpressionTerms", termItem);
        }
        if ("no staining".equals(term)) {
          result.setAttribute("expressed", "false");
        }
      }
    }

    for (Item result : results.values()) {
      if (!result.hasCollection("mRNAExpressionTerms")
          || result.getCollection("mRNAExpressionTerms").getRefIds().isEmpty()) {
        result.setAttribute("expressed", "false");
      }
    }

    storeAll(imgs);
    storeAll(results);
  }