コード例 #1
0
ファイル: NGram.java プロジェクト: patterncat/sina-services
 @Override
 public void addThruPipe(Instance inst) {
   Object data = inst.getData();
   List<String> tokens = Collections.emptyList();
   if (data instanceof String) {
     tokens = Arrays.asList(((String) data).split("\\s+"));
   } else if (data instanceof List) {
     tokens = (List<String>) data;
   }
   ArrayList<String> list = new ArrayList<String>();
   StringBuffer buf = new StringBuffer();
   for (int j = 0; j < gramSizes.length; j++) {
     int len = gramSizes[j];
     if (len <= 0 || len > tokens.size()) continue;
     for (int i = 0; i < tokens.size() - len + 1; i++) {
       buf.delete(0, buf.length());
       int k = 0;
       for (; k < len - 1; ++k) {
         buf.append(tokens.get(i + k));
         buf.append(' ');
       }
       buf.append(tokens.get(i + k));
       list.add(buf.toString().intern());
     }
   }
   inst.setData(list);
 }
コード例 #2
0
  /**
   * 构造并初始化网格
   *
   * @param carrier 样本实例
   * @return 推理网格
   */
  protected Node[][] initialLattice(Instance carrier) {
    int[][] data = (int[][]) carrier.getData();

    int length = carrier.length();

    Node[][] lattice = new Node[length][];
    for (int l = 0; l < length; l++) {
      lattice[l] = new Node[ysize];
      for (int c = 0; c < ysize; c++) {
        lattice[l][c] = new Node(ysize);
        for (int i = 0; i < orders.length; i++) {
          if (data[l][i] == -1 || data[l][i] >= weights.length) // TODO: xpqiu 2013.2.1
          continue;
          if (orders[i] == 0) {
            lattice[l][c].score += weights[data[l][i] + c];
          } else if (orders[i] == 1) {
            int offset = c;
            for (int p = 0; p < ysize; p++) {
              // weights对应trans(c,p)的按行展开
              lattice[l][c].trans[p] += weights[data[l][i] + offset];
              offset += ysize;
            }
          }
        }
      }
    }

    return lattice;
  }
コード例 #3
0
 @Override
 public void addThruPipe(Instance inst) throws Exception {
   String str = (String) inst.getSource();
   BinarySparseVector sv = (BinarySparseVector) inst.getData();
   List<RETemplate> templates = new ArrayList<RETemplate>();
   for (int i = 0; i < group.size(); i++) {
     RETemplate qt = group.get(i);
     float w = qt.matches(str);
     if (w > 0) {
       //				System.out.println(qt.comment);
       int id = features.lookupIndex("template:" + qt.comment);
       sv.put(id);
     }
   }
 }
コード例 #4
0
  /** @return 预测序列和对照序列之间不同的Clique数量 */
  @Override
  protected int diff(Instance inst, float[] weights, Object targets, Object predicts) {

    data = (int[][]) inst.getData();

    if (targets == null) golds = (int[]) inst.getTarget();
    else golds = (int[]) targets;
    preds = (int[]) predicts;

    int diff = 0;

    if (golds[0] != preds[0]) {
      diff++;
      diffClique(weights, 0);
    }
    for (int p = 1; p < data.length; p++) {
      if (golds[p - 1] != preds[p - 1] || golds[p] != preds[p]) {
        diff++;
        diffClique(weights, p);
      }
    }

    return diff;
  }
コード例 #5
0
ファイル: DictLabel.java プロジェクト: HarveyTvT/SocialPlus
  public void addThruPipe(Instance instance) throws Exception {
    String[][] data = (String[][]) instance.getData();

    int length = data[0].length;
    int[][] dicData = new int[length][labels.size()];

    int indexLen = dict.getIndexLen();
    for (int i = 0; i < length; i++) {
      if (i + indexLen <= length) {
        WordInfo s = getNextN(data[0], i, indexLen);
        int[] index = dict.getIndex(s.word);
        if (index != null) {
          for (int k = 0; k < index.length; k++) {
            int n = index[k];
            if (n == indexLen) { // 下面那个check函数的特殊情况,只为了加速
              label(i, s.len, dicData);
              if (!mutiple) {
                i = i + s.len;
                break;
              }
            }
            int len = check(i, n, length, data[0], dicData);
            if (len > 0 && !mutiple) {
              i = i + len;
              break;
            }
          }
        }
      }
    }

    for (int i = 0; i < length; i++)
      if (hasWay(dicData[i])) for (int j = 0; j < dicData[i].length; j++) dicData[i][j]++;

    instance.setDicData(dicData);
  }