예제 #1
0
  public DataBag exec(Tuple input) throws IOException {
    try {
      if (!input.isNull()) {
        // Create the output like a databag {(res1,res2),(res3,res4)..}
        DataBag output_databag = mBagFactory.newDefaultBag();
        // Unpack tuple in order to get the bag {(1,2),(3,4),...}
        String input_time = (String) input.get(0);
        try {

          DateFormat formatter = new SimpleDateFormat("MM/dd/yyyy kk:mm:ss");
          Date date =
              formatter.parse(
                  String.format(
                      "%s/%s/%s %s:%s:%s",
                      input_time.substring(5, 7),
                      input_time.substring(8, 10),
                      input_time.substring(0, 4),
                      input_time.substring(11, 13),
                      input_time.substring(14, 16),
                      input_time.substring(17, 18)));
          Calendar calendar = Calendar.getInstance();
          calendar.setTime(date);
          int dayOfWeek = calendar.get(Calendar.DAY_OF_WEEK);
          int dayOfMonth = calendar.get(Calendar.DAY_OF_MONTH);
          int hour = calendar.get(Calendar.HOUR_OF_DAY);

          // Add items to output
          Tuple items = TupleFactory.getInstance().newTuple(1);
          items.set(0, String.format("%d:%d:%d", dayOfWeek, dayOfMonth, hour));
          output_databag.add(items);

        } catch (Exception e) {
          Tuple items = TupleFactory.getInstance().newTuple(1);
          items.set(0, "petting #1" + e.getMessage());
          output_databag.add(items);
          return output_databag;
        }
        return output_databag;
      } else {
        DataBag output_databag = mBagFactory.newDefaultBag();
        Tuple items = TupleFactory.getInstance().newTuple(1);
        items.set(0, "petting #2");
        output_databag.add(items);
        return output_databag;
      }
    } catch (Exception e) {
      System.err.println("Error with ?? ..");
      DataBag output_databag = mBagFactory.newDefaultBag();
      Tuple items = TupleFactory.getInstance().newTuple(1);
      items.set(0, "petting #3" + e.getMessage());
      output_databag.add(items);
      return output_databag;
    }
  }
  @Override
  public String exec(Tuple input) throws IOException {
    String output = "";
    if (input != null && !input.isNull()) {
      String _html = (String) input.get(0);
      if (_html != null && !_html.isEmpty()) {
        output = _html;
        int start_index = _html.indexOf("<body");
        start_index += 5;
        int stop_index = _html.indexOf("</body");
        if (start_index >= 0
            && start_index < _html.length()
            && stop_index >= start_index
            && stop_index < _html.length()) output = _html.substring(start_index, stop_index);
        Pattern REMOVE_TAGS = Pattern.compile("<.+?>");
        Matcher m = REMOVE_TAGS.matcher(output);
        output = m.replaceAll("");
        REMOVE_TAGS = Pattern.compile("<!--(.+?)-->");
        m = REMOVE_TAGS.matcher(output);
        output = m.replaceAll("");
        /*				boolean intag = false;
        				String inp = output;
        				String outp = "";

        				for (int i=0; i < inp.length(); ++i)
        				{
        				    if (!intag && inp.charAt(i) == '<')
        				    {
        				        intag = true;
        				        continue;
        				    }
        				    if (intag && inp.charAt(i) == '>')
        				    {
        				        intag = false;
        				        continue;
        				    }
        				    if (!intag)
        				    {
        				        outp = outp + inp.charAt(i);
        				    }
        				}
        				output = outp;
        //				output = output.replaceAll("\\s+","");
        */
      }
    }
    return output;
  }
예제 #3
0
  public DataBag exec(Tuple input) throws IOException {
    if (input == null || input.size() < 1 || input.isNull(0)) return null;

    // Output bag
    DataBag bagOfTokens = bagFactory.newDefaultBag();

    StringReader textInput = new StringReader(input.get(0).toString());
    PTBTokenizer ptbt = new PTBTokenizer(textInput, new CoreLabelTokenFactory(), "");

    for (CoreLabel label; ptbt.hasNext(); ) {
      label = (CoreLabel) ptbt.next();
      if (label.value().length() > 2) {
        System.err.println(label.toString());
        Tuple termText = tupleFactory.newTuple(label.word());
        bagOfTokens.add(termText);
      }
    }
    return bagOfTokens;
  }
예제 #4
0
 @Override
 public boolean isNull(int fieldNum) throws ExecException {
   return t.isNull(fieldNum);
 }