コード例 #1
0
  private void balanceSentiment(
      String file1,
      String fieldDelim1,
      int[] fields1,
      String file2,
      String fieldDelim2,
      int[] fields2,
      boolean append)
      throws IOException {
    Map<Integer, Integer> sentiFreq = TwitterCorpusStat.sentimentFreq(file1, fieldDelim1, fields1);

    int[] freq = new int[3];

    freq[0] = sentiFreq.get(OMTweet.POLARITY_POSITIVE);
    freq[1] = sentiFreq.get(OMTweet.POLARITY_NEGATIVE);
    freq[2] = sentiFreq.get(OMTweet.POLARITY_NEUTRAL);

    int[][] indices = new int[3][];
    for (int i = 0; i < 3; i++) {
      indices[i] = null;
    }

    int sbjDiff;
    if ((sbjDiff = Math.min(freq[0], freq[1]) * 2) > freq[2]) {
      sbjDiff = (sbjDiff - freq[3]) / 2;
    } else {
      sbjDiff = 0;
    }

    if (freq[0] != freq[1] || sbjDiff > 0) {
      int senti1 = 0;
      int senti2 = 1;
      if (freq[0] < freq[1]) {
        senti1 = 1;
        senti2 = 0;
      }

      indices[senti1] =
          randomIndices(freq[senti1], freq[senti2] - sbjDiff, System.currentTimeMillis());
      freq[senti1] = freq[senti2] - sbjDiff;
      if (sbjDiff > 0) {
        indices[senti2] =
            randomIndices(freq[senti2], freq[senti2] - sbjDiff, System.currentTimeMillis());
        freq[senti2] = freq[senti2] - sbjDiff;
      }
    }

    int sbj = freq[0] + freq[1];
    if (sbj < freq[2]) {
      indices[2] = randomIndices(freq[2], sbj, System.currentTimeMillis());
      freq[2] = sbj;
    } else if (sbj > freq[2]) {
      throw new IllegalStateException();
    }

    int[] idx = new int[3];
    int[] cursor = new int[3];
    for (int i = 0; i < 3; i++) {
      idx[i] = 0;
      cursor[i] = 0;
    }

    OMTwitterCorpusFileReader reader = new OMTwitterCorpusFileReader(file1, fieldDelim1, fields1);
    OMTwitterCorpusFileWriter writer =
        new OMTwitterCorpusFileWriter(file2, fieldDelim2, fields2, append);

    int senti = 0;
    while (reader.hasNext()) {
      OMTweet tweet = reader.next();

      switch (tweet.getPolarity()) {
        case OMTweet.POLARITY_POSITIVE:
          senti = 0;
          break;
        case OMTweet.POLARITY_NEGATIVE:
          senti = 1;
          break;
        case OMTweet.POLARITY_NEUTRAL:
          senti = 2;
          break;
      }

      if (indices[senti] == null) {
        writer.write(tweet);

      } else if (cursor[senti] < indices[senti].length
          && indices[senti][cursor[senti]] == idx[senti]) {
        writer.write(tweet);
        cursor[senti]++;
      }

      idx[senti]++;
    }

    writer.close();
    reader.close();
  }
コード例 #2
0
  @Override
  public void destroy() {
    try {
      Set<Entry<String, Integer>> set = map.entrySet();
      ArrayList<Entry<String, Integer>> list = new ArrayList<Entry<String, Integer>>();
      list.addAll(set);
      Collections.sort(
          list,
          new Comparator<Entry<String, Integer>>() {
            public int compare(Entry<String, Integer> o1, Entry<String, Integer> o2) {
              return o1.getKey().compareTo(o2.getKey());
            }
          });

      /////////////////////////////////////////////
      System.out.println("# NER");

      for (Entry<String, Integer> e : list) {
        int idx = e.getValue();
        double prec = (stat[idx][0] != 0) ? (double) stat[idx][2] / (double) stat[idx][0] : -1;
        double recall = (stat[idx][1] != 0) ? (double) stat[idx][2] / (double) stat[idx][1] : -1;
        double f = (prec + recall > 0) ? (2 * prec * recall / (prec + recall)) : -1;
        //				System.out.format("%02d %15s %3d/%3d=%7.4f %3d/%3d=%7.4f %7.4f\n", idx, e.getKey(),
        // stat[idx][2], stat[idx][0], prec, stat[idx][2], stat[idx][1], recall, f);
        System.out.format(
            "%02d\t%s\t%d\t%.4f\t%.4f\t%.4f\n", idx, e.getKey(), stat[idx][1], prec, recall, f);
      }
      System.out.println();

      /////////////////////////////////////////////

      System.out.format("%10s\t%12s\t%12s\t%12s\n", "Index", "Type", "Answer", "Classified");
      for (int i = 0; i < list.size(); i += 3) {
        Entry<String, Integer> e = list.get(i);
        String s = e.getKey();
        int idx = e.getValue();
        if (idx != labelNoneIdx) {
          s = s.substring(0, s.lastIndexOf('_'));
          System.out.format(
              "%010d\t%12s\t%12d\t%12d\n",
              idx / 3, s, answerEntityCnt[idx / 3], classifiedEntityCnt[idx / 3]);
        }
      }
      System.out.println();

      /////////////////////////////////////////////

      System.out.println("# senti");
      for (int i = 0; i < 3; i++) {
        int idx = i;
        double prec = (senti[idx][0] != 0) ? (double) senti[idx][2] / (double) senti[idx][0] : -1;
        double recall = (senti[idx][1] != 0) ? (double) senti[idx][2] / (double) senti[idx][1] : -1;
        double f = (prec + recall > 0) ? (2 * prec * recall / (prec + recall)) : -1;
        //				System.out.format("%s\t%d\t%.4f\t%.4f\t%.4f\n", sentiStr(idx), senti[idx][1], prec,
        // recall, f);
        System.out.format(
            "%02d %15s %3d/%3d =%6.4f  %3d/%3d =%6.4f %7.4f\n",
            idx,
            sentiStr(idx),
            senti[idx][2],
            senti[idx][0],
            prec,
            senti[idx][2],
            senti[idx][1],
            recall,
            f);
      }

      /////////////

      System.out.println("# senti: sbj & neu");
      double prec =
          (senti[0][0] + senti[1][0] != 0)
              ? (double) (senti[0][2] + senti[1][2]) / (double) (senti[0][0] + senti[1][0])
              : -1;
      double recall =
          (senti[0][1] + senti[1][1] != 0)
              ? (double) (senti[0][2] + senti[1][2]) / (double) (senti[0][1] + senti[1][1])
              : -1;
      double f = (prec + recall > 0) ? (2 * prec * recall / (prec + recall)) : -1;
      System.out.format(
          "%02d %15s %3d/%3d =%6.4f  %3d/%3d =%6.4f %7.4f\n",
          0,
          sentiStr(0) + "/" + sentiStr(1),
          senti[0][2] + senti[1][2],
          senti[0][0] + senti[1][0],
          prec,
          senti[0][2] + senti[1][2],
          senti[0][1] + senti[1][1],
          recall,
          f);

      int idx = 2;
      prec = (senti[idx][0] != 0) ? (double) senti[idx][2] / (double) senti[idx][0] : -1;
      recall = (senti[idx][1] != 0) ? (double) senti[idx][2] / (double) senti[idx][1] : -1;
      f = (prec + recall > 0) ? (2 * prec * recall / (prec + recall)) : -1;
      System.out.format(
          "%02d %15s %3d/%3d =%6.4f  %3d/%3d =%6.4f %7.4f\n",
          idx,
          sentiStr(idx),
          senti[idx][2],
          senti[idx][0],
          prec,
          senti[idx][2],
          senti[idx][1],
          recall,
          f);

      //////////////

      //			System.out.println("# senti : pos & neg");
      //			prec = (senti[0][0]+senti[2][0] != 0) ? (double)(senti[0][2]+senti[2][2]) /
      // (double)(senti[0][0]+senti[2][0]) : -1;
      //			recall = (senti[0][1]+senti[2][1] != 0) ? (double)(senti[0][2]+senti[2][2]) /
      // (double)(senti[0][1]+senti[2][1]) : -1;
      //			f = (prec + recall > 0) ? (2 * prec * recall / (prec + recall)) : -1;
      //			System.out.format("%02d %15s %3d/%3d =%6.4f  %3d/%3d =%6.4f %7.4f\n", 0,
      // sentiStr(0)+"/"+sentiStr(2), senti[0][2]+senti[2][2], senti[0][0]+senti[2][0], prec,
      // senti[0][2]+senti[2][2], senti[0][1]+senti[2][1], recall, f);
      //
      //			idx = 1;
      //			prec = (senti[idx][0] != 0) ? (double)senti[idx][2] / (double)senti[idx][0] : -1;
      //			recall = (senti[idx][1] != 0) ? (double)senti[idx][2] / (double)senti[idx][1] : -1;
      //			f = (prec + recall > 0) ? (2 * prec * recall / (prec + recall)) : -1;
      //			System.out.format("%02d %15s %3d/%3d =%6.4f  %3d/%3d =%6.4f %7.4f\n", idx, sentiStr(idx),
      // senti[idx][2], senti[idx][0], prec, senti[idx][2], senti[idx][1], recall, f);

      evalCorpusReader.close();
    } catch (Exception e) {
      e.printStackTrace();
    }
    super.destroy();
  }