Java Huffman Examples

Programming Language: Java

Class/Type: Huffman

Examples at hotexamples.com: 3

Java Huffman - 3 examples found. These are the top rated real world Java examples of Huffman extracted from open source projects. You can rate examples to help us improve the quality of examples.

Frequently Used Methods

Show Hide

findCode(2)

build(1)

decode(1)

encodedLength(1)

makeFreqTable(1)

makeHuffmanTree(1)

queueTree(1)

Example #1

Show file

File: HpackTest.java Project: danielnorberg/h2client

 private void writeString(final ByteBuf buf, final AsciiString s) {
   final int encodedLength = Huffman.encodedLength(s);
   if (encodedLength < s.length()) {
     Hpack.writeHuffmanString(buf, s, encodedLength);
   } else {
     Hpack.writeRawString(buf, s);
   }
 }

Example #2

Show file

File: HpackTest.java Project: danielnorberg/h2client

  @Test
  public void testHuffmanDecode() throws Exception {
    final String expected = "https://www.example.com";

    final byte[] encoded =
        TestUtil.bytes(
            0x9d, 0x29, 0xad, 0x17, 0x18, 0x63, 0xc7, 0x8f, 0x0b, 0x97, 0xc8, 0xe9, 0xae, 0x82,
            0xae, 0x43, 0xd3);

    final ByteBuf in = Unpooled.wrappedBuffer(encoded);
    final ByteBuf out = Unpooled.buffer();

    Huffman.decode(in, out);

    final String decoded = out.toString(US_ASCII);
    assertThat(decoded, is(expected));
  }

Example #3

Show file

File: Word2Vec.java Project: whqwill/sparktest

  // Training word2vec based on corpus
  public void train(JavaRDD<String> corpusRDD) throws Exception {
    log.info("Start training ...");

    // SparkContext
    final JavaSparkContext sc = new JavaSparkContext(corpusRDD.context());

    // Pre-defined variables
    Map<String, Object> tokenizerVarMap = getTokenizerVarMap();
    Map<String, Object> word2vecVarMap = getWord2vecVarMap();

    // Variables to fill in in train
    // final JavaRDD<AtomicLong> sentenceWordsCountRDD;
    final JavaRDD<List<VocabWord>> vocabWordListRDD;
    // final JavaPairRDD<List<VocabWord>, Long> vocabWordListSentenceCumSumRDD;
    final VocabCache vocabCache;
    final JavaRDD<Long> sentenceCumSumCountRDD;

    // Start Training //
    //////////////////////////////////////
    log.info("Tokenization and building VocabCache ...");
    // Processing every sentence and make a VocabCache which gets fed into a LookupCache
    Broadcast<Map<String, Object>> broadcastTokenizerVarMap = sc.broadcast(tokenizerVarMap);
    TextPipeline pipeline =
        new TextPipeline(corpusRDD.repartition(numPartitions), broadcastTokenizerVarMap);
    pipeline.buildVocabCache();
    pipeline.buildVocabWordListRDD();

    // Get total word count and put into word2vec variable map
    word2vecVarMap.put("totalWordCount", pipeline.getTotalWordCount() / numPartitions);

    // 2 RDDs: (vocab words list) and (sentence Count).Already cached
    // sentenceWordsCountRDD = pipeline.getSentenceCountRDD();
    vocabWordListRDD = pipeline.getVocabWordListRDD();

    // Get vocabCache and broad-casted vocabCache
    Broadcast<VocabCache> vocabCacheBroadcast = pipeline.getBroadCastVocabCache();
    vocabCache = vocabCacheBroadcast.getValue();

    //////////////////////////////////////
    log.info("Building Huffman Tree ...");
    // Building Huffman Tree would update the code and point in each of the vocabWord in vocabCache
    Huffman huffman = new Huffman(vocabCache.vocabWords());
    huffman.build();

    /////////////////////////////////////
    log.info("Training word2vec sentences ...");

    word2vecVarMap.put("vecNum", vocabCache.numWords());

    // Map<Tuple2<Integer,Integer>, INDArray> s0 = new HashMap();
    Map<Pair<Integer, Integer>, INDArray> s0 = new HashMap();
    for (int k = 0; k < K; k++) {
      for (int i = 0; i < vocabCache.numWords(); i++) {
        s0.put(new Pair(i, k), getRandomSyn0Vec(vectorLength));
      }
    }
    for (int i = vocabCache.numWords(); i < vocabCache.numWords() * 2 - 1; i++) {
      s0.put(new Pair(i, 0), Nd4j.zeros(1, vectorLength));
    }

    for (int i = 0; i < iterations; i++) {
      System.out.println("iteration: " + i);

      word2vecVarMap.put("alpha", alpha - (alpha - minAlpha) / iterations * i);
      word2vecVarMap.put("minAlpha", alpha - (alpha - minAlpha) / iterations * (i + 1));

      FlatMapFunction firstIterationFunction =
          new FirstIterationFunction(word2vecVarMap, expTable, sc.broadcast(s0));

      class MapPairFunction
          implements PairFunction<Map.Entry<Integer, INDArray>, Integer, INDArray> {
        public Tuple2<Integer, INDArray> call(Map.Entry<Integer, INDArray> pair) {
          return new Tuple2(pair.getKey(), pair.getValue());
        }
      }

      class Sum implements Function2<INDArray, INDArray, INDArray> {
        public INDArray call(INDArray a, INDArray b) {
          return a.add(b);
        }
      }

      // @SuppressWarnings("unchecked")
      JavaPairRDD<Pair<Integer, Integer>, INDArray> indexSyn0UpdateEntryRDD =
          vocabWordListRDD
              .mapPartitions(firstIterationFunction)
              .mapToPair(new MapPairFunction())
              .cache();
      Map<Pair<Integer, Integer>, Object> count = indexSyn0UpdateEntryRDD.countByKey();
      indexSyn0UpdateEntryRDD = indexSyn0UpdateEntryRDD.reduceByKey(new Sum());

      // Get all the syn0 updates into a list in driver
      List<Tuple2<Pair<Integer, Integer>, INDArray>> syn0UpdateEntries =
          indexSyn0UpdateEntryRDD.collect();

      // Updating syn0
      s0 = new HashMap();
      for (Tuple2<Pair<Integer, Integer>, INDArray> syn0UpdateEntry : syn0UpdateEntries) {
        int cc = Integer.parseInt(count.get(syn0UpdateEntry._1).toString());
        // int cc = 1;
        if (cc > 0) {
          INDArray tmp = Nd4j.zeros(1, vectorLength).addi(syn0UpdateEntry._2).divi(cc);
          s0.put(syn0UpdateEntry._1, tmp);
        }
      }
    }

    syn0 = Nd4j.zeros(vocabCache.numWords() * K, vectorLength);
    for (Map.Entry<Pair<Integer, Integer>, INDArray> ss : s0.entrySet()) {
      if (ss.getKey().getFirst() < vocabCache.numWords()) {
        syn0.getRow(ss.getKey().getSecond() * vocabCache.numWords() + ss.getKey().getFirst())
            .addi(ss.getValue());
      }
    }

    vocab = vocabCache;
    syn0.diviRowVector(syn0.norm2(1));

    BufferedWriter write = new BufferedWriter(new FileWriter(new File(path), false));
    for (int i = 0; i < syn0.rows(); i++) {
      String word = vocab.wordAtIndex(i % vocab.numWords());
      if (word == null) {
        continue;
      }
      word = word + "(" + i / vocab.numWords() + ")";
      StringBuilder sb = new StringBuilder();
      sb.append(word.replaceAll(" ", "_"));
      sb.append(" ");
      INDArray wordVector = syn0.getRow(i);
      for (int j = 0; j < wordVector.length(); j++) {
        sb.append(wordVector.getDouble(j));
        if (j < wordVector.length() - 1) {
          sb.append(" ");
        }
      }
      sb.append("\n");
      write.write(sb.toString());
    }
    write.flush();
    write.close();
  }