Exemplo n.º 1
0
  private static void runOverHTMLfiles() {
    System.out.println("Start ML generation");

    // read files.txt
    String files =
        HTMLhandler.readURLTxtasString("http://localhost:8080/ensenTensorielWeb/ml/files.txt");
    String out2 =
        "id,tct,cb,cn,cbl,cnl,ca,cp,qt,qr,mr,ar,arl,mrl,pt,cas,as,len,tt,se,ss,hl,sss,csc,cs,nch,d,php,fph,lph,selected\n"; // concepts Sentences.csv
    String out3 =
        "id,qt,qr,qmr,mr,ar,cns,arl,mrl,pt,as,len,tt,se,ss,hl,sss,cs,nch,d,php,fph,lph,selected\n"; // concepts Sentences.csv
    String Html1 =
        "<html>  <head>  <meta http-equiv=\"content-type\" content=\"text/html; charset=windows-1250\">  <meta name=\"generator\" content=\"PSPad editor, www.pspad.com\">  <script src='jquery.1.9.1.js'></script>   <script src='jquery-ui.js'></script>   <link rel='stylesheet' href='jquery-ui.css' type='text/css' charset=''utf-8' />   <link rel='stylesheet' href='jquery.ui.dialog.css' type='text/css' charset='utf-8' /><title></title>  </head>  <body>";
    Html1 += " <script type='text/javascript'>";
    Html1 +=
        "$(function() {             $( '#dialog' ).dialog({                 autoOpen: false, height: 700,               width:750,               modal: true,               position:['middle',20],            });            $( '#opener' ).click(function() {             save();              $( '#dialog' ).dialog( 'open' );            });          });";
    Html1 += "var all1='';var all2='';";
    Html1 +=
        "function save(){ all1='';all2='';     $('input:checkbox:checked').each(function()  {  all2+= $(this).val()+' '; }); var res2 = all2.split(' '); document.getElementById('res2').value = (res2.sort()+''); $('input:radio:checked').each(function()  {  if($(this).val()!='') all1+= $(this).val()+' '; }); var res1 = all1.split(' '); document.getElementById('res1').value = res1.sort(); }";
    Html1 += " </script>";
    Html1 +=
        "  <div id='dialog' title='Results'>  <p>Please copy this to <b>\"main-sentences-evaluation.txt\"</b>:<textarea name='res1' id='res1' cols='35' rows='10'></textarea> </p>";
    Html1 +=
        "   <p>Please copy this to <b>\"concepts-sentences-evaluation.txt\"</b>:<textarea name='res2' id='res2' cols='35' rows='10'></textarea> </p></div>";
    String Html2 = "  </body> </html>";
    /*int phCounter=0;
    int conceptCounter=0;*/
    String currQ = "";
    Query query = null;
    int index = 0;
    int i = 0;
    String[] lines = files.split("\n");
    for (int lineN = 0; lineN < lines.length; lineN++) {
      /*if (index == 2)
      break;*/
      String line = lines[lineN];
      if (line.contains("Q:")) // query
      {
        currQ = line.split(":")[1];
        System.out.println(currQ);
        query = new Query(currQ);
        query.id = index;
        i = 0;
        index++;

      } else { // file
        String id = line.split(",")[0];
        String title = line.split(",")[1];
        String url = "http://localhost:8080/ensenTensorielWeb/ml/" + id + ".html";
        System.out.println("Start id: " + id + ", title: " + title);
        DocumentAnalyzer analyzer = new DocumentAnalyzer();
        String out = ""; // for HTML
        out += Html1;
        out +=
            "<h1>Q"
                + query.id
                + " - "
                + currQ
                + "<a href='#' id='opener'>click here to save</a></h1></hr> <h2>Q"
                + query.id
                + "D"
                + i
                + " (<a target='blank' href='"
                + url
                + "'> "
                + title
                + "</a>)</h2>";
        analyzer.run(url, query, title, i);
        System.out.println("Q" + analyzer.Doci.q.id + ", doc" + analyzer.Doci.Rank + " analyzed");

        String[] results = generateConcepts(analyzer, id);

        if (results != null) {
          out += results[0];
          out2 += results[1];
          out3 += results[2];
          out += Html2;
          Printer.printToFile("ml/dataset/" + id + "-sentences.html", out);
          out = "";
        }

        System.out.println("finished id: " + id + ", title: " + title);
        System.out.println();
        System.out.println();
        System.out.println();
        i++;
      }
    }
    Printer.printToFile("ml/dataset/conceptSentences.csv", out2);
    Printer.printToFile("ml/dataset/mainSentences.csv", out3);
  }
Exemplo n.º 2
0
  static void MLDataset() {
    // String qs = "Karl Marx, French revolution,Bermuda Triangle,Pineal Gland,Gray
    // wolf,PlayStation,Eurovision Song Contest,Flu,Gucci,the godfather";
    String qs = "Karl Marx, French revolution";
    int index = 0;
    String out2 =
        "id,tct,cb,cn,cbl,cnl,ca,cp,qt,qr,mr,ar,arl,mrl,pt,cas,as,len,tt,se,ss,hl,sss,csc,cs,selected\n"; // concepts Sentences.csv
    String out3 =
        "id,qt,qr,qmr,mr,ar,cns,arl,mrl,pt,as,len,tt,se,ss,hl,sss,cs,selected\n"; // concepts
    // Sentences.csv
    for (String q : qs.split(",")) {
      Query query = new Query(q);
      query.id = index++;
      Searcher S = new Searcher();
      List<Result> Results = S.search(q, 5);
      ArrayList<Document> documents = new ArrayList<Document>();
      DocumentAnalyzer analyzer = new DocumentAnalyzer();
      int i = 0;
      String out = ""; // for HTML

      String Html1 =
          "<html>  <head>  <meta http-equiv=\"content-type\" content=\"text/html; charset=windows-1250\">  <meta name=\"generator\" content=\"PSPad editor, www.pspad.com\">  <script src='jquery.1.9.1.js'></script>   <script src='jquery-ui.js'></script>   <link rel='stylesheet' href='jquery-ui.css' type='text/css' charset=''utf-8' />   <link rel='stylesheet' href='jquery.ui.dialog.css' type='text/css' charset='utf-8' /><title></title>  </head>  <body>";
      Html1 += " <script type='text/javascript'>";
      Html1 +=
          "$(function() {             $( '#dialog' ).dialog({                 autoOpen: false, height: 700,               width:750,               modal: true,               position:['middle',20],            });            $( '#opener' ).click(function() {             save();              $( '#dialog' ).dialog( 'open' );            });          });";
      Html1 += "var all1='';var all2='';";
      Html1 +=
          "function save(){ all1='';all2='';     $('input:checkbox:checked').each(function()  {  all2+= $(this).val()+' '; }); var res2 = all2.split(' '); document.getElementById('res2').value = (res2.sort()+''); $('input:radio:checked').each(function()  {  if($(this).val()!='') all1+= $(this).val()+' '; }); var res1 = all1.split(' '); document.getElementById('res1').value = res1.sort(); }";
      Html1 += " </script>";
      Html1 +=
          "  <div id='dialog' title='Results'>  <p>Please copy this to <b>\"main-sentences-evaluation.txt\"</b>:<textarea name='res1' id='res1' cols='35' rows='10'></textarea> </p>";
      Html1 +=
          "   <p>Please copy this to <b>\"concepts-sentences-evaluation.txt\"</b>:<textarea name='res2' id='res2' cols='35' rows='10'></textarea> </p></div>";

      String Html2 = "  </body> </html>";

      for (Result res : Results) {
        out += Html1;
        out +=
            "<h1>Q"
                + query.id
                + " - "
                + q
                + "<a href='#' id='opener'>click here to save</a></h1></hr> <h2>Q"
                + query.id
                + "D"
                + i
                + " (<a target='blank' href='"
                + res.getLink()
                + "'> "
                + res.getTitle()
                + "</a>)</h2>";
        analyzer.run(res, i, query);
        String id = "Q" + query.id + "D" + analyzer.Doci.Rank;
        String[] results = generateConcepts(analyzer, id);
        if (results != null) {
          out += results[0];
          out2 += results[1];
          out3 += results[2];
          out += Html2;
          Printer.printToFile("ML/Q" + query.id + "D" + i + ".html", out);
          out = "";
        }
        i++;
      }
    }
    Printer.printToFile("ML/conceptSentences.csv", out2);
    Printer.printToFile("ML/mainSentences.csv", out3);
  }