示例#1
0
  /**
   * @param args
   * @throws Exception
   */
  public static void main(String args[]) throws Exception {

    log.setLevel(Level.INFO);

    Configuration v_HadoopConf = new Configuration();
    s_HadoopConf = v_HadoopConf;
    s_Config = Config.getInstance();
    String v_PathPrefix = s_Config.getBaseDir() + s_Config.getDataDir() + "mahout/";
    String v_TextDir = s_Config.getTextPath();

    Path m_DocumentDir = new Path(v_TextDir);
    Path m_SequenceDir = new Path(v_PathPrefix, "sequence/");
    Path m_TokensDir = new Path(v_PathPrefix, "tokens");
    Path m_TF = new Path(v_PathPrefix, "termfreq/");
    String m_VecFolder = "Vectors";
    Path m_tf_idf = new Path(v_PathPrefix, "tfidf/");

    boolean m_Sequential = true;

    HadoopUtil.delete(v_HadoopConf, new Path(v_PathPrefix, "clusters/"));

    if (!s_Config.getReuseTFIDF()) {
      createTFIDF(
          v_HadoopConf, m_DocumentDir, m_SequenceDir, m_TokensDir, m_TF, m_VecFolder, m_tf_idf);
    }

    HierarchicalKMeansClusterer v_Hkmc = new HierarchicalKMeansClusterer();
    SetTree<ClusterDescriptor> v_Tree = v_Hkmc.run(s_HadoopConf, m_Sequential);

    saveAsTree(v_Tree);

    saveAsXml(v_Tree);
  }
示例#2
0
  private static void saveAsXml(SetTree<ClusterDescriptor> v_Tree) throws FileNotFoundException {
    try {

      PrintStream v_PS = new PrintStream(s_Config.getOutputXmlFilename());

      JAXBContext jaxbContext = JAXBContext.newInstance(SetTree.class);
      Marshaller jaxbMarshaller = jaxbContext.createMarshaller();

      // output pretty printed
      jaxbMarshaller.setProperty(Marshaller.JAXB_FORMATTED_OUTPUT, true);

      jaxbMarshaller.marshal(v_Tree, v_PS);

    } catch (JAXBException e) {
      e.printStackTrace();
    }
  }
示例#3
0
 private static void saveAsTree(SetTree<ClusterDescriptor> v_Tree) throws FileNotFoundException {
   PrintStream v_PS = new PrintStream(s_Config.getOutputTreeFilename());
   v_PS.println("Canopy T2 values: " + s_Config.getCanopyRanges());
   v_PS.println();
   Util.prettyPrint(v_Tree, v_PS);
 }