/** * @param args * @throws Exception */ public static void main(String args[]) throws Exception { log.setLevel(Level.INFO); Configuration v_HadoopConf = new Configuration(); s_HadoopConf = v_HadoopConf; s_Config = Config.getInstance(); String v_PathPrefix = s_Config.getBaseDir() + s_Config.getDataDir() + "mahout/"; String v_TextDir = s_Config.getTextPath(); Path m_DocumentDir = new Path(v_TextDir); Path m_SequenceDir = new Path(v_PathPrefix, "sequence/"); Path m_TokensDir = new Path(v_PathPrefix, "tokens"); Path m_TF = new Path(v_PathPrefix, "termfreq/"); String m_VecFolder = "Vectors"; Path m_tf_idf = new Path(v_PathPrefix, "tfidf/"); boolean m_Sequential = true; HadoopUtil.delete(v_HadoopConf, new Path(v_PathPrefix, "clusters/")); if (!s_Config.getReuseTFIDF()) { createTFIDF( v_HadoopConf, m_DocumentDir, m_SequenceDir, m_TokensDir, m_TF, m_VecFolder, m_tf_idf); } HierarchicalKMeansClusterer v_Hkmc = new HierarchicalKMeansClusterer(); SetTree<ClusterDescriptor> v_Tree = v_Hkmc.run(s_HadoopConf, m_Sequential); saveAsTree(v_Tree); saveAsXml(v_Tree); }
private static void saveAsXml(SetTree<ClusterDescriptor> v_Tree) throws FileNotFoundException { try { PrintStream v_PS = new PrintStream(s_Config.getOutputXmlFilename()); JAXBContext jaxbContext = JAXBContext.newInstance(SetTree.class); Marshaller jaxbMarshaller = jaxbContext.createMarshaller(); // output pretty printed jaxbMarshaller.setProperty(Marshaller.JAXB_FORMATTED_OUTPUT, true); jaxbMarshaller.marshal(v_Tree, v_PS); } catch (JAXBException e) { e.printStackTrace(); } }
private static void saveAsTree(SetTree<ClusterDescriptor> v_Tree) throws FileNotFoundException { PrintStream v_PS = new PrintStream(s_Config.getOutputTreeFilename()); v_PS.println("Canopy T2 values: " + s_Config.getCanopyRanges()); v_PS.println(); Util.prettyPrint(v_Tree, v_PS); }