public static void toSequenceFile(String fileName, Collection<String> pdbIds, boolean verbose)
      throws IOException {

    int failure = 0;
    int success = 0;
    int chains = 0;

    try (SequenceFile.Writer writer =
        SequenceFile.createWriter(
            new Configuration(),
            SequenceFile.Writer.file(new Path(fileName)),
            SequenceFile.Writer.keyClass(Text.class),
            SequenceFile.Writer.valueClass(IntArrayWritable.class),
            SequenceFile.Writer.compression(
                SequenceFile.CompressionType.BLOCK, new BZip2Codec())); ) {
      for (String pdbId : pdbIds) {
        if (verbose) {
          System.out.println(pdbId);
        }

        Structure s = null;
        try {
          s = StructureIO.getStructure(pdbId);
          success++;
        } catch (Exception e) {
          // some files can't be read. Let's just skip those!
          e.printStackTrace();
          failure++;
          continue;
        }

        if (s == null) {
          System.err.println("structure null: " + pdbId);
          continue;
        }

        if (s.getChains().size() == 0) {
          continue;
        }

        chains += append(writer, pdbId, s);
      }
      IOUtils.closeStream(writer);
    }

    if (verbose) {
      System.out.println("Total structures: " + pdbIds.size());
      System.out.println("Success: " + success);
      System.out.println("Failure: " + failure);
      System.out.println("Chains: " + chains);
    }
  }
Ejemplo n.º 2
0
  @Test
  public void testReadString() throws Exception {
    if (SKIP) {
      return;
    }

    //        final Path file = new Path("hdfs://localhost:9000/tmp/test/test-hdfs-file");
    final Path file =
        new Path(new File("../../../../target/test/test-camel-string").getAbsolutePath());
    org.apache.hadoop.conf.Configuration conf = new org.apache.hadoop.conf.Configuration();
    // now set classes for filesystems. This is normally done using java.util.ServiceLoader which
    // doesn't
    // work inside OSGi.
    conf.setClass("fs.file.impl", org.apache.hadoop.fs.LocalFileSystem.class, FileSystem.class);
    conf.setClass(
        "fs.hdfs.impl", org.apache.hadoop.hdfs.DistributedFileSystem.class, FileSystem.class);
    SequenceFile.Writer writer =
        SequenceFile.createWriter(
            conf,
            SequenceFile.Writer.file(file),
            SequenceFile.Writer.keyClass(NullWritable.class),
            SequenceFile.Writer.valueClass(Text.class));
    NullWritable keyWritable = NullWritable.get();
    Text valueWritable = new Text();
    String value = "CIAO!";
    valueWritable.set(value);
    writer.append(keyWritable, valueWritable);
    writer.sync();
    writer.close();

    context.addRoutes(
        new RouteBuilder() {
          public void configure() {
            //
            // from("hdfs2://localhost:9000/tmp/test/test-hdfs-file?fileSystemType=HDFS&fileType=SEQUENCE_FILE&initialDelay=0").to("mock:result");
            from("hdfs2:///"
                    + file.toUri()
                    + "?fileSystemType=LOCAL&fileType=SEQUENCE_FILE&initialDelay=0")
                .to("mock:result");
          }
        });
    context.start();

    MockEndpoint resultEndpoint = context.getEndpoint("mock:result", MockEndpoint.class);
    resultEndpoint.expectedMessageCount(1);
    resultEndpoint.assertIsSatisfied();
  }