private static void runOverHTMLfiles() { System.out.println("Start ML generation"); // read files.txt String files = HTMLhandler.readURLTxtasString("http://localhost:8080/ensenTensorielWeb/ml/files.txt"); String out2 = "id,tct,cb,cn,cbl,cnl,ca,cp,qt,qr,mr,ar,arl,mrl,pt,cas,as,len,tt,se,ss,hl,sss,csc,cs,nch,d,php,fph,lph,selected\n"; // concepts Sentences.csv String out3 = "id,qt,qr,qmr,mr,ar,cns,arl,mrl,pt,as,len,tt,se,ss,hl,sss,cs,nch,d,php,fph,lph,selected\n"; // concepts Sentences.csv String Html1 = "<html> <head> <meta http-equiv=\"content-type\" content=\"text/html; charset=windows-1250\"> <meta name=\"generator\" content=\"PSPad editor, www.pspad.com\"> <script src='jquery.1.9.1.js'></script> <script src='jquery-ui.js'></script> <link rel='stylesheet' href='jquery-ui.css' type='text/css' charset=''utf-8' /> <link rel='stylesheet' href='jquery.ui.dialog.css' type='text/css' charset='utf-8' /><title></title> </head> <body>"; Html1 += " <script type='text/javascript'>"; Html1 += "$(function() { $( '#dialog' ).dialog({ autoOpen: false, height: 700, width:750, modal: true, position:['middle',20], }); $( '#opener' ).click(function() { save(); $( '#dialog' ).dialog( 'open' ); }); });"; Html1 += "var all1='';var all2='';"; Html1 += "function save(){ all1='';all2=''; $('input:checkbox:checked').each(function() { all2+= $(this).val()+' '; }); var res2 = all2.split(' '); document.getElementById('res2').value = (res2.sort()+''); $('input:radio:checked').each(function() { if($(this).val()!='') all1+= $(this).val()+' '; }); var res1 = all1.split(' '); document.getElementById('res1').value = res1.sort(); }"; Html1 += " </script>"; Html1 += " <div id='dialog' title='Results'> <p>Please copy this to <b>\"main-sentences-evaluation.txt\"</b>:<textarea name='res1' id='res1' cols='35' rows='10'></textarea> </p>"; Html1 += " <p>Please copy this to <b>\"concepts-sentences-evaluation.txt\"</b>:<textarea name='res2' id='res2' cols='35' rows='10'></textarea> </p></div>"; String Html2 = " </body> </html>"; /*int phCounter=0; int conceptCounter=0;*/ String currQ = ""; Query query = null; int index = 0; int i = 0; String[] lines = files.split("\n"); for (int lineN = 0; lineN < lines.length; lineN++) { /*if (index == 2) break;*/ String line = lines[lineN]; if (line.contains("Q:")) // query { currQ = line.split(":")[1]; System.out.println(currQ); query = new Query(currQ); query.id = index; i = 0; index++; } else { // file String id = line.split(",")[0]; String title = line.split(",")[1]; String url = "http://localhost:8080/ensenTensorielWeb/ml/" + id + ".html"; System.out.println("Start id: " + id + ", title: " + title); DocumentAnalyzer analyzer = new DocumentAnalyzer(); String out = ""; // for HTML out += Html1; out += "<h1>Q" + query.id + " - " + currQ + "<a href='#' id='opener'>click here to save</a></h1></hr> <h2>Q" + query.id + "D" + i + " (<a target='blank' href='" + url + "'> " + title + "</a>)</h2>"; analyzer.run(url, query, title, i); System.out.println("Q" + analyzer.Doci.q.id + ", doc" + analyzer.Doci.Rank + " analyzed"); String[] results = generateConcepts(analyzer, id); if (results != null) { out += results[0]; out2 += results[1]; out3 += results[2]; out += Html2; Printer.printToFile("ml/dataset/" + id + "-sentences.html", out); out = ""; } System.out.println("finished id: " + id + ", title: " + title); System.out.println(); System.out.println(); System.out.println(); i++; } } Printer.printToFile("ml/dataset/conceptSentences.csv", out2); Printer.printToFile("ml/dataset/mainSentences.csv", out3); }
static void MLDataset() { // String qs = "Karl Marx, French revolution,Bermuda Triangle,Pineal Gland,Gray // wolf,PlayStation,Eurovision Song Contest,Flu,Gucci,the godfather"; String qs = "Karl Marx, French revolution"; int index = 0; String out2 = "id,tct,cb,cn,cbl,cnl,ca,cp,qt,qr,mr,ar,arl,mrl,pt,cas,as,len,tt,se,ss,hl,sss,csc,cs,selected\n"; // concepts Sentences.csv String out3 = "id,qt,qr,qmr,mr,ar,cns,arl,mrl,pt,as,len,tt,se,ss,hl,sss,cs,selected\n"; // concepts // Sentences.csv for (String q : qs.split(",")) { Query query = new Query(q); query.id = index++; Searcher S = new Searcher(); List<Result> Results = S.search(q, 5); ArrayList<Document> documents = new ArrayList<Document>(); DocumentAnalyzer analyzer = new DocumentAnalyzer(); int i = 0; String out = ""; // for HTML String Html1 = "<html> <head> <meta http-equiv=\"content-type\" content=\"text/html; charset=windows-1250\"> <meta name=\"generator\" content=\"PSPad editor, www.pspad.com\"> <script src='jquery.1.9.1.js'></script> <script src='jquery-ui.js'></script> <link rel='stylesheet' href='jquery-ui.css' type='text/css' charset=''utf-8' /> <link rel='stylesheet' href='jquery.ui.dialog.css' type='text/css' charset='utf-8' /><title></title> </head> <body>"; Html1 += " <script type='text/javascript'>"; Html1 += "$(function() { $( '#dialog' ).dialog({ autoOpen: false, height: 700, width:750, modal: true, position:['middle',20], }); $( '#opener' ).click(function() { save(); $( '#dialog' ).dialog( 'open' ); }); });"; Html1 += "var all1='';var all2='';"; Html1 += "function save(){ all1='';all2=''; $('input:checkbox:checked').each(function() { all2+= $(this).val()+' '; }); var res2 = all2.split(' '); document.getElementById('res2').value = (res2.sort()+''); $('input:radio:checked').each(function() { if($(this).val()!='') all1+= $(this).val()+' '; }); var res1 = all1.split(' '); document.getElementById('res1').value = res1.sort(); }"; Html1 += " </script>"; Html1 += " <div id='dialog' title='Results'> <p>Please copy this to <b>\"main-sentences-evaluation.txt\"</b>:<textarea name='res1' id='res1' cols='35' rows='10'></textarea> </p>"; Html1 += " <p>Please copy this to <b>\"concepts-sentences-evaluation.txt\"</b>:<textarea name='res2' id='res2' cols='35' rows='10'></textarea> </p></div>"; String Html2 = " </body> </html>"; for (Result res : Results) { out += Html1; out += "<h1>Q" + query.id + " - " + q + "<a href='#' id='opener'>click here to save</a></h1></hr> <h2>Q" + query.id + "D" + i + " (<a target='blank' href='" + res.getLink() + "'> " + res.getTitle() + "</a>)</h2>"; analyzer.run(res, i, query); String id = "Q" + query.id + "D" + analyzer.Doci.Rank; String[] results = generateConcepts(analyzer, id); if (results != null) { out += results[0]; out2 += results[1]; out3 += results[2]; out += Html2; Printer.printToFile("ML/Q" + query.id + "D" + i + ".html", out); out = ""; } i++; } } Printer.printToFile("ML/conceptSentences.csv", out2); Printer.printToFile("ML/mainSentences.csv", out3); }