示例#1
0
文件: Worker.java 项目: m1/Parker
  public Worker(String url, boolean verbose) throws Exception {
    Document doc;
    doc = Jsoup.connect(url).get();
    // select anchors with href only
    Elements links = doc.select("a[href]");
    String l_Href;
    String host;
    int linksNum;
    Parser parser;
    for (Element link : links) {
      // absolute = http:// added
      l_Href = link.attr("abs:href");
      if (!l_Href.isEmpty()) {
        parser = new Parser(l_Href);
        host = parser.getHost();
        // if tempStats contains the url, add one to the value
        if (tempStats.containsKey(host)) {
          linksNum = tempStats.get(host);
          tempStats.put(host, linksNum += 1);
        }
        // if it doesn't, add it

        else {
          tempStats.put(host, 1);
        }
        // parse the url
        tempQueue.add(parser.getURL());
      }
    }
    if (verbose) {
      System.out.println(
          Thread.currentThread().getName() + " : " + tempQueue.size() + " links from " + url);
    }
  }
 // If there is a <title> element on the start page, use that as our AU
 // name.
 String recomputeRegName() {
   if (!isStarted()) {
     // This can get invoked (seveeral times, mostly from logging) before
     // enough mechanism has started to make it possible to resolve the CuUrl
     // below.
     return null;
   }
   try {
     CachedUrl cu = makeCachedUrl(m_registryUrl);
     if (cu == null) return null;
     URL cuUrl = CuUrl.fromCu(cu);
     Parser parser = new Parser(cuUrl.toString());
     NodeList nodelst = parser.extractAllNodesThatMatch(new NodeClassFilter(TitleTag.class));
     Node nodes[] = nodelst.toNodeArray();
     recomputeRegName = false;
     if (nodes.length < 1) return null;
     // Get the first title found
     TitleTag tag = (TitleTag) nodes[0];
     if (tag == null) return null;
     return tag.getTitle();
   } catch (MalformedURLException e) {
     log.warning("recomputeRegName", e);
     return null;
   } catch (ParserException e) {
     if (e.getThrowable() instanceof FileNotFoundException) {
       log.warning("recomputeRegName: " + e.getThrowable().toString());
     } else {
       log.warning("recomputeRegName", e);
     }
     return null;
   }
 }
示例#3
0
 private void parseMapsXml() {
   try {
     String fileName = LocationLoader.getLocationDir(NavigineApp.AppContext, null) + "/maps.xml";
     List<LocationInfo> infoList = Parser.parseMapsXml(NavigineApp.AppContext, fileName);
     mInfoList = new ArrayList<LocationInfo>();
     if (infoList != null) mInfoList = infoList;
     mAdapter.updateList();
     new File(fileName).delete();
   } catch (Throwable e) {
     Log.e(TAG, Log.getStackTraceString(e));
   }
 }
示例#4
0
  @SuppressWarnings("deprecation")
  public static void main(String[] args) throws java.lang.Exception {
    try {

      String zipCode = ReadInput();

      WeatherProxy proxy = new WeatherProxy();
      String weatherData = proxy.getWeatherForZip(zipCode);

      String cityStateTemp = Parser.GetCityStateTemperature(weatherData);
      System.out.println(cityStateTemp);

    } catch (NumberFormatException e) {
      System.out.println("invalid zip code format");
    } catch (Exception e) {
      System.out.println(e);
    }
  }
示例#5
0
  public static void main(String[] args) {
    /** Define a host server */
    String host = "localhost";
    /** Define a port */
    int port = 19999;
    int port2 = 19990;
    // int port3 = 19980;
    StringBuffer instr = new StringBuffer();
    String TimeStamp;
    Parser parser = new Parser();
    System.out.println("SocketClient initialized");

    try {

      // parsing
      // DataStream ds =
      // new PlainTextByLineDataStream(
      // new FileReader(new File("input.txt")));
      BufferedReader inputReader = new BufferedReader(new FileReader("input.txt"));
      String sent;
      while ((sent = inputReader.readLine()) != null) {
        // String sentence = (String)ds.nextToken() + (char) 13;
        String sentence = sent + (char) 13;
        // System.out.println(str);
        System.out.println("Parsing....");

        Tree tree = parser.parse(sentence);
        System.out.println(tree);

        System.out.println("Extracting features...");
        String srlIdentifier = "python srl-identifier.py " + '"' + tree + '"';
        // System.out.println(srlIdentifier);
        Runtime rr = Runtime.getRuntime();
        Process pp = rr.exec(srlIdentifier);
        BufferedReader brr = new BufferedReader(new InputStreamReader(pp.getInputStream()));
        pp.waitFor();

        BufferedReader reader = new BufferedReader(new FileReader("identifier.test"));
        BufferedReader classifier = new BufferedReader(new FileReader("classifier.test"));
        String line;
        PrintWriter identifierOutput = new PrintWriter("identifier-output.txt");
        PrintWriter classifierOutput = new PrintWriter("classifier-output.txt");
        BufferedReader preds = new BufferedReader(new FileReader("pred.test"));

        while ((line = reader.readLine()) != null) {

          String pred = preds.readLine();
          String features = line + (char) 13;
          String classifierFeature = classifier.readLine() + (char) 13;

          InetAddress address = InetAddress.getByName(host);
          // Establish a socket connetion
          Socket connection = new Socket(address, port);
          Socket connection2 = new Socket(address, port2);

          // Instantiate a BufferedOutputStream object
          BufferedOutputStream bos = new BufferedOutputStream(connection.getOutputStream());

          // Instantiate an OutputStreamWriter object with the optional character
          // encoding.
          //

          OutputStreamWriter osw = new OutputStreamWriter(bos, "US-ASCII");

          BufferedReader fromServer =
              new BufferedReader(new InputStreamReader(connection.getInputStream()));

          // Write across the socket connection and flush the buffer

          osw.write(features);
          osw.flush();
          String identifierResponse = fromServer.readLine();
          identifierOutput.println(identifierResponse);

          BufferedOutputStream bos2 = new BufferedOutputStream(connection2.getOutputStream());

          // Instantiate an OutputStreamWriter object with the optional character
          // encoding.
          //
          OutputStreamWriter osw2 = new OutputStreamWriter(bos2, "US-ASCII");

          BufferedReader fromServer2 =
              new BufferedReader(new InputStreamReader(connection2.getInputStream()));

          osw2.write(classifierFeature);
          osw2.flush();
          String ClassifierResponse = fromServer2.readLine();
          classifierOutput.println(pred + ' ' + ClassifierResponse);
        }
        identifierOutput.close();
        classifierOutput.close();

        Runtime rlabeler = Runtime.getRuntime();
        String srlClassifier = "python concept-formulator.py";
        Process p = rlabeler.exec(srlClassifier);
        BufferedReader br = new BufferedReader(new InputStreamReader(p.getInputStream()));
        p.waitFor();
        // System.out.println("here i am");
        String line2;
        while ((line2 = br.readLine()) != null) {
          System.out.println(line2);
          // while (br.ready())
          // System.out.println(br.readLine());
        }
      }

    } catch (Exception e) {
      String cause = e.getMessage();
      if (cause.equals("python: not found")) System.out.println("No python interpreter found.");
    }
  }
示例#6
0
 private static boolean handleURL(String address) {
   Main.status(String.format("Processing page \"%s\".", address));
   try {
     NodeList posts = getPosts(address);
     if (posts.toNodeArray().length == 0) {
       return false;
     }
     for (Node post_node : posts.toNodeArray()) {
       if (post_node instanceof TagNode) {
         TagNode post = (TagNode) post_node;
         Post new_post = new Post(Long.parseLong(post.getAttribute("id").substring(5)));
         if (!Main.post_post_hash.containsKey(new_post)) {
           NodeList photo_posts = getPhotoPosts(post.getChildren());
           NodeList remarks = getRemarks(photo_posts);
           for (Node node : remarks.toNodeArray()) {
             Matcher matcher = lores.matcher(node.getText());
             String media_url = "";
             if (matcher.find()) {
               media_url = matcher.group();
               media_url = media_url.substring(17, media_url.length() - 1);
             }
             String thumb =
                 media_url.replace(
                     media_url.substring(media_url.lastIndexOf("_"), media_url.lastIndexOf(".")),
                     "_75sq");
             URL thumb_url = new URL(thumb);
             new_post.pictures.add(new Picture(new URL(media_url), thumb_url));
           }
           NodeList photoset_posts = getPhotosetPosts(post.getChildren());
           NodeList iframes = getIFrames(photoset_posts);
           for (Node node : iframes.toNodeArray()) {
             if (node instanceof TagNode) {
               String iframe_url = ((TagNode) node).getAttribute("src");
               Parser parser2 = new Parser(iframe_url);
               NodeList a_list = parser2.extractAllNodesThatMatch(new TagNameFilter("a"));
               Node[] a_array = a_list.toNodeArray();
               Node[] img_array =
                   a_list.extractAllNodesThatMatch(new TagNameFilter("img"), true).toNodeArray();
               String media_url;
               for (int i = 0; i < a_array.length; i++) {
                 media_url = ((TagNode) img_array[i]).getAttribute("src");
                 String thumb =
                     media_url.replace(
                         media_url.substring(
                             media_url.lastIndexOf("_"), media_url.lastIndexOf(".")),
                         "_75sq");
                 URL thumb_url = new URL(thumb);
                 new_post.pictures.add(new Picture(new URL(media_url), thumb_url));
               }
             }
           }
           Main.handlePost(new_post);
         } else {
           new_post = post_post_hash.get(new_post);
           handleNonDownloadPost(new_post);
         }
       }
     }
   } catch (Exception ex) {
     ex.printStackTrace();
     Main.status("Error handling post.");
   }
   return true;
 }
示例#7
0
 public void run() {
   Parser parser;
   InputManager inputReader;
   String sentences[];
   int numOfSentences;
   int i = 0;
   Hashtable preferences;
   try {
     // setup the input stream from the client to server
     inputFromClient =
         new BufferedReader(new InputStreamReader(new DataInputStream(socket.getInputStream())));
     // setup the output stream from the server to client
     outputToClient = new DataOutputStream(socket.getOutputStream());
     // get the preferences
     inputString = inputFromClient.readLine();
     if (inputString.startsWith(">>begin prefs<<")) {
       preferences = getPreferences(inputFromClient);
     } else {
       System.out.println("error: expected prefs");
       return;
     }
     // create new parser
     parser = new Parser(preferences, outputToClient);
     // configure the parser
     if (parser.configParser(prefsFile) != 1) {
       System.out.println("error: preferences could not be written. Using defaults");
     }
     // send a "ready" signal
     outputToClient.writeBytes("ready\n");
     inputString = inputFromClient.readLine();
     while (inputString != null) {
       // check to see if termination condition is set
       if (terminate) {
         outputString = ">>TERMINATE<<\n";
         outputToClient.writeBytes(outputString + ">>end<<\n");
         // clear flag
         terminate = false;
       }
       if (inputString.compareTo(">>done<<") == 0) {
         // close sockets and streams
         break;
       }
       // check to see if the user is sending new preferences
       else if (inputString.startsWith(">>begin prefs<<")) {
         preferences = getPreferences(inputFromClient);
         parser.setPrefs(preferences);
         outputToClient.writeBytes("ready\n");
       } else {
         outputString = parser.parse(inputString + "\n");
         outputToClient.writeBytes(outputString + ">>end<<\n");
         outputToClient.flush();
       }
       inputString = inputFromClient.readLine();
     }
     // then terminate the connection
     inputFromClient.close();
     outputToClient.close();
     socket.close();
     // delete the preferences file that was being used for this connection
     try {
       if ((new File(prefsFile)).delete()) {
         System.out.println("temp file: " + prefsFile + " deleted");
       } else {
         System.out.println("temp file: " + prefsFile + " could not be deleted");
       }
     } catch (Exception e) {
       e.printStackTrace();
     }
     status = 1;
   } catch (Exception e) {
     status = 0;
     e.printStackTrace();
   }
 }
示例#8
0
  /**
   * Binds the annotated variables.
   *
   * @param http http context
   * @param arg argument array
   * @throws QueryException query exception
   * @throws IOException I/O exception
   */
  void bind(final HTTPContext http, final Expr[] arg) throws QueryException, IOException {
    // bind variables from segments
    for (int s = 0; s < path.size; s++) {
      final Matcher m = TEMPLATE.matcher(path.segment[s]);
      if (!m.find()) continue;
      final QNm qnm = new QNm(token(m.group(1)), context);
      bind(qnm, arg, new Atm(http.segment(s)));
    }

    // cache request body
    final String ct = http.contentType();
    IOContent body = null;

    if (requestBody != null) {
      body = cache(http, null);
      try {
        // bind request body in the correct format
        body.name(http.method + IO.XMLSUFFIX);
        bind(requestBody, arg, Parser.item(body, context.context.prop, ct));
      } catch (final IOException ex) {
        error(INPUT_CONV, ex);
      }
    }

    // bind query parameters
    final Map<String, String[]> params = http.params();
    for (final RestXqParam rxp : queryParams) bind(rxp, arg, params.get(rxp.key));

    // bind form parameters
    if (!formParams.isEmpty()) {
      if (MimeTypes.APP_FORM.equals(ct)) {
        // convert parameters encoded in a form
        body = cache(http, body);
        addParams(body.toString(), params);
      }
      for (final RestXqParam rxp : formParams) bind(rxp, arg, params.get(rxp.key));
    }

    // bind header parameters
    for (final RestXqParam rxp : headerParams) {
      final StringList sl = new StringList();
      final Enumeration<?> en = http.req.getHeaders(rxp.key);
      while (en.hasMoreElements()) {
        for (final String s : en.nextElement().toString().split(", *")) sl.add(s);
      }
      bind(rxp, arg, sl.toArray());
    }

    // bind cookie parameters
    final Cookie[] ck = http.req.getCookies();
    for (final RestXqParam rxp : cookieParams) {
      String v = null;
      if (ck != null) {
        for (final Cookie c : ck) {
          if (rxp.key.equals(c.getName())) v = c.getValue();
        }
      }
      if (v == null) bind(rxp, arg);
      else bind(rxp, arg, v);
    }
  }