コード例 #1
0
  public static void main(String[] args) throws Exception {
    int iterations = 100;
    Nd4j.dtype = DataBuffer.Type.DOUBLE;
    Nd4j.factory().setDType(DataBuffer.Type.DOUBLE);
    List<String> cacheList = new ArrayList<>();

    log.info("Load & Vectorize data....");
    File wordFile = new ClassPathResource("words.txt").getFile();
    Pair<InMemoryLookupTable, VocabCache> vectors = WordVectorSerializer.loadTxt(wordFile);
    VocabCache cache = vectors.getSecond();
    INDArray weights = vectors.getFirst().getSyn0();

    for (int i = 0; i < cache.numWords(); i++) cacheList.add(cache.wordAtIndex(i));

    log.info("Build model....");
    BarnesHutTsne tsne =
        new BarnesHutTsne.Builder()
            .setMaxIter(iterations)
            .theta(0.5)
            .normalize(false)
            .learningRate(500)
            .useAdaGrad(false)
            .usePca(false)
            .build();

    log.info("Store TSNE Coordinates for Plotting....");
    String outputFile = "target/archive-tmp/tsne-standard-coords.csv";
    (new File(outputFile)).getParentFile().mkdirs();
    tsne.plot(weights, 2, cacheList, outputFile);
  }
コード例 #2
0
  public static void main(String[] args) throws Exception {
    // STEP 1: Initialization
    int iterations = 100;
    // create an n-dimensional array of doubles
    DataTypeUtil.setDTypeForContext(DataBuffer.Type.DOUBLE);
    List<String> cacheList =
        new ArrayList<>(); // cacheList is a dynamic array of strings used to hold all words

    // STEP 2: Turn text input into a list of words
    log.info("Load & Vectorize data....");
    File wordFile = new ClassPathResource("words.txt").getFile(); // Open the file
    // Get the data of all unique word vectors
    Pair<InMemoryLookupTable, VocabCache> vectors = WordVectorSerializer.loadTxt(wordFile);
    VocabCache cache = vectors.getSecond();
    INDArray weights =
        vectors.getFirst().getSyn0(); // seperate weights of unique words into their own list

    for (int i = 0; i < cache.numWords(); i++) // seperate strings of words into their own list
    cacheList.add(cache.wordAtIndex(i));

    // STEP 3: build a dual-tree tsne to use later
    log.info("Build model....");
    BarnesHutTsne tsne =
        new BarnesHutTsne.Builder()
            .setMaxIter(iterations)
            .theta(0.5)
            .normalize(false)
            .learningRate(500)
            .useAdaGrad(false)
            //                .usePca(false)
            .build();

    // STEP 4: establish the tsne values and save them to a file
    log.info("Store TSNE Coordinates for Plotting....");
    String outputFile = "target/archive-tmp/tsne-standard-coords.csv";
    (new File(outputFile)).getParentFile().mkdirs();
    tsne.plot(weights, 2, cacheList, outputFile);
    // This tsne will use the weights of the vectors as its matrix, have two dimensions, use the
    // words strings as
    // labels, and be written to the outputFile created on the previous line

    // !!! Possible error: plot was recently deprecated. Might need to re-do the last line
  }