Ejemplo n.º 1
0
 public DictionaryEntry lookup(byte utfBytes[], int hashCode, int charLength) {
   int res = Arrays.binarySearch(hashCodes, hashCode);
   if (res < 0) {
     return null;
   }
   DictionaryEntry entry = entries[res];
   if (Arrays.equals(entry.entryBytes, utfBytes)) {
     return entry;
   }
   int pos = res - 1;
   while ((pos >= 0) && (hashCodes[pos] == hashCode)) {
     entry = entries[pos];
     if (Arrays.equals(entry.entryBytes, utfBytes)) {
       return entry;
     }
     --pos;
   }
   pos = res + 1;
   while ((pos < entries.length) && (hashCodes[pos] == hashCode)) {
     entry = entries[pos];
     if (Arrays.equals(entry.entryBytes, utfBytes)) {
       return entry;
     }
     ++pos;
   }
   return UNKNOWN;
 }
Ejemplo n.º 2
0
  /**
   * Copy from the copy method in StructUtil. Did not want to drag that code in. maybe this actually
   * should go to struct.
   *
   * @param from
   * @param to
   * @param excludes
   * @return
   * @throws Exception
   */
  public static <T extends struct> T xcopy(struct from, T to, String... excludes) throws Exception {
    Arrays.sort(excludes);
    for (Field f : from.fields()) {
      if (Arrays.binarySearch(excludes, f.getName()) >= 0) continue;

      Object o = f.get(from);
      if (o == null) continue;

      Field tof = to.getField(f.getName());
      if (tof != null)
        try {
          tof.set(to, Converter.cnv(tof.getGenericType(), o));
        } catch (Exception e) {
          System.out.println(
              "Failed to convert "
                  + f.getName()
                  + " from "
                  + from.getClass()
                  + " to "
                  + to.getClass()
                  + " value "
                  + o
                  + " exception "
                  + e);
        }
    }

    return to;
  }
  /**
   * Returns an int to int hash map with old row numbers mapped to new row numbers, which defines a
   * sorted order for this column
   *
   * @param validRows row numbers from which the sorted order is retrieved.
   */
  public VIntIntHashMap getNewOrder(int[] validRows) {

    Object[] sortedValues = getValuesInRange(validRows[0], validRows[validRows.length - 1]);

    Object[] rawValues = getValues(validRows);

    // will hold the new order, the returned value.
    int[] newOrder = new int[validRows.length];

    // a flag array. occupiedIndices[j] == true mean that the row number
    // stored at validRows[j] was already mapped to a value in tempMap
    boolean[] ocuupiedIndices = new boolean[validRows.length];

    // for each valid row validRows[i]
    for (int i = 0; i < validRows.length; i++) {

      // finding the index of its mapped Object in values

      int newRow = Arrays.binarySearch(sortedValues, rawValues[i]);

      // because binarySearch can return the same index for items that are identical
      // checking for this option too.
      if (ocuupiedIndices[newRow]) {

        // if newRow index was already used - finding an alternative index
        newRow = getNewRow(rawValues[i], sortedValues, newRow, ocuupiedIndices);

        // marking the associated flag as true
      }
      ocuupiedIndices[newRow] = true;

      newOrder[newRow] = validRows[i];
    } // end of for
    return VHashService.getMappedOrder(validRows, newOrder);
  }
Ejemplo n.º 4
0
 /**
  * Binary search of a key in a list. If there are several hits the last one is returned.
  *
  * @param a array to search into
  * @param e key to search for
  * @return index of the found hit or where the key ought to be inserted
  */
 private int sortedLastIndexOf(final int[] a, final int e) {
   int i = Arrays.binarySearch(a, 0, rows, e);
   if (i >= 0) {
     while (++i < rows && a[i] == e) ;
     return i - 1;
   }
   return i;
 }
Ejemplo n.º 5
0
  public final int readCharacter(final char[] allowed) throws IOException {
    // if we restrict to a limited set and the current character
    // is not in the set, then try again.
    char c;

    Arrays.sort(allowed); // always need to sort before binarySearch

    while (Arrays.binarySearch(allowed, c = (char) readVirtualKey()) < 0) ;

    return c;
  }
Ejemplo n.º 6
0
  /**
   * Inserts a new record.
   *
   * @param pre record PRE
   * @param id record ID
   * @param c number of inserted records
   */
  public void insert(final int pre, final int id, final int c) {
    if (rows == 0 && pre == id && id == baseid + 1) {
      // no mapping and we append at the end => nothing to do
      baseid += c;
      return;
    }

    int pos = 0;
    int inc = c;
    int oid = pre;

    if (rows > 0) {
      pos = Arrays.binarySearch(pres, 0, rows, pre);
      if (pos < 0) {
        pos = -pos - 1;
        if (pos != 0) {
          // check if inserting into an existing id interval
          final int prev = pos - 1;
          final int prevcnt = nids[prev] - fids[prev] + 1;
          final int prevpre = pres[prev];

          if (pre < prevpre + prevcnt) {
            // split the id interval
            final int split = pre - prevpre;
            final int fid = fids[prev] + split;

            // add a new next interval
            add(pos, pre, fid, nids[prev], incs[prev], oids[prev]);

            // shrink the previous interval
            nids[prev] = fid - 1;
            incs[prev] -= prevcnt - split;

            oid = oids[prev];
            inc += incs[prev];
          } else {
            oid = pre - incs[prev];
            inc += incs[prev];
          }
        }
      } else if (pos > 0) {
        oid = oids[pos];
        inc += incs[pos - 1];
      }

      increment(pos, c);
    }

    // add the new interval
    add(pos, pre, id, id + c - 1, inc, oid);
  }
Ejemplo n.º 7
0
    /** {@inheritDoc} */
    public boolean retainAll(int[] array) {
      boolean changed = false;
      Arrays.sort(array);
      int[] values = _values;
      byte[] states = _states;

      for (int i = values.length; i-- > 0; ) {
        if (states[i] == FULL && (Arrays.binarySearch(array, values[i]) < 0)) {
          removeAt(i);
          changed = true;
        }
      }
      return changed;
    }
Ejemplo n.º 8
0
 public boolean retainAll(final int[] array) {
   boolean changed = false;
   Arrays.sort(array);
   final int[] set = this._set;
   final byte[] states = this._states;
   int i = set.length;
   while (i-- > 0) {
     if (states[i] == 1 && Arrays.binarySearch(array, set[i]) < 0) {
       this.remove(set[i]);
       changed = true;
     }
   }
   return changed;
 }
Ejemplo n.º 9
0
    /** {@inheritDoc} */
    public boolean retainAll(long[] array) {
      boolean changed = false;
      Arrays.sort(array);
      long[] set = _set;
      byte[] states = _states;

      for (int i = set.length; i-- > 0; ) {
        if (states[i] == FULL && (Arrays.binarySearch(array, set[i]) < 0)) {
          removeAt(i);
          changed = true;
        }
      }
      return changed;
    }
    /** {@inheritDoc} */
    @Override
    public boolean retainAll(char[] array) {
      boolean changed = false;
      Arrays.sort(array);
      TCharOffheapArray values = _values;
      TByteOffheapArray states = _states;

      for (int i = capacity(); i-- > 0; ) {
        if (states.get(i) == FULL && (Arrays.binarySearch(array, values.get(i)) < 0)) {
          removeAt(i);
          changed = true;
        }
      }
      return changed;
    }
Ejemplo n.º 11
0
 /**
  * Gets a list of all the devices found on the bus matching the specified set of product IDs. Any
  * device with a product ID equal to one of the products listed in <i>productIDs[]</i> will be
  * returned. You can search for devices by product name using {@link #productNameToID( String[]
  * productName ) productNameToID()}, like so:
  *
  * <pre>USBDevice[] devices = deviceManager.getDeviceByProductID(
  *  deviceManager.productNameToID( new String[] { "USB-AO16-16A", "USB-AO16-16" } ) );</pre>
  *
  * @param productIDs an array containing one or more product IDs to search for.
  * @return An array of all the devices found. If no devices were found matching the specified set
  *     of product IDs, the array will be empty (i.e. contain zero items).
  * @throws IllegalArgumentException
  */
 public USBDevice[] getDeviceByProductID(int[] productIDs) {
   if (productIDs == null || productIDs.length < 1)
     throw new IllegalArgumentException("Invalid product ID array");
   for (int index = 0; index < productIDs.length; index++) {
     if (productIDs[index] < MIN_PRODUCT_ID || productIDs[index] > MAX_PRODUCT_ID)
       throw new IllegalArgumentException("Invalid product ID: " + productIDs[index]);
   } // for( int index ...
   int[] sortedProductIDs = productIDs.clone();
   Arrays.sort(sortedProductIDs);
   Vector<USBDevice> devices = new Vector<USBDevice>();
   for (int index = 0; index < deviceList.size(); index++) {
     final int productID = deviceList.get(index).getProductID();
     if (Arrays.binarySearch(sortedProductIDs, productID) >= 0) devices.add(deviceList.get(index));
   } // for( int index ...
   return devices.toArray(new USBDevice[0]);
 } // getDeviceByProductID()
Ejemplo n.º 12
0
  /**
   * /* return the address of the function that address is in
   *
   * @param vma
   * @return
   */
  public Symbol getSymbol(final long vma) {
    if (this.symbols == null) {
      return null;
    }

    // @@@ If this works, move it to a single instance in this class.
    final SymbolComparator symbol_comparator = new SymbolComparator();

    int ndx = Arrays.binarySearch(this.symbols, vma, symbol_comparator);
    if (ndx > 0) {
      return this.symbols[ndx];
    }
    if (ndx == -1) {
      return null;
    }
    ndx = -ndx - 1;
    return this.symbols[ndx - 1];
  }
 /** Copy the doc files for the current ClassDoc if necessary. */
 private void copyDocFiles() {
   PackageDoc containingPackage = annotationTypeDoc.containingPackage();
   if ((configuration.packages == null
           || Arrays.binarySearch(configuration.packages, containingPackage) < 0)
       && !containingPackagesSeen.contains(containingPackage.name())) {
     // Only copy doc files dir if the containing package is not
     // documented AND if we have not documented a class from the same
     // package already. Otherwise, we are making duplicate copies.
     Util.copyDocFiles(
         configuration,
         Util.getPackageSourcePath(configuration, annotationTypeDoc.containingPackage())
             + DirectoryManager.getDirectoryPath(annotationTypeDoc.containingPackage())
             + File.separator,
         DocletConstants.DOC_FILES_DIR_NAME,
         true);
     containingPackagesSeen.add(containingPackage.name());
   }
 }
Ejemplo n.º 14
0
 public void solve() {
   n = ni();
   int m = ni();
   int a[] = new int[n];
   for (int i = 0; i < n; i++) a[i] = ni();
   Arrays.sort(a);
   for (int i = 0; i < m; i++) {
     int t = ni();
     int w = Arrays.binarySearch(a, t);
     if (w < 0) {
       w = (-w) - 1;
       int min = Integer.MAX_VALUE;
       if (is(w) && min > Math.abs(t - a[w])) min = Math.abs(t - a[w]);
       if (is(w - 1) && min > Math.abs(t - a[w - 1])) min = Math.abs(t - a[w - 1]);
       if (is(w + 1) && min > Math.abs(t - a[w + 1])) min = Math.abs(t - a[w + 1]);
       pw.print(min + " ");
     } else {
       pw.print(0 + " ");
     }
   }
 }
Ejemplo n.º 15
0
 public final ClusNode getTree(int fold) {
   int idx = Arrays.binarySearch(m_Folds, fold);
   ClusNode node = m_Nodes[idx];
   if (node.hasBestTest() && node.atBottomLevel()) {
     OptXValSplit split = null;
     int nb = getNbChildren();
     for (int i = 0; i < nb; i++) {
       OptXValSplit msplit = (OptXValSplit) getChild(i);
       if (msplit.contains(fold)) {
         split = msplit;
         break;
       }
     }
     int arity = node.updateArity();
     for (int i = 0; i < arity; i++) {
       OptXValNode subnode = (OptXValNode) split.getChild(i);
       node.setChild(subnode.getTree(fold), i);
     }
   }
   return node;
 }
Ejemplo n.º 16
0
  public static void main(String[] args) throws IOException {

    BufferedReader br = new BufferedReader(new InputStreamReader(System.in));
    int T = parseInt(br.readLine());
    int N = T;
    int[] A = new int[T];
    for (int t = 0; t < T; t++) {
      StringTokenizer st = new StringTokenizer(br.readLine());
      A[t] = parseInt(st.nextToken());
    }

    int[] fe = new int[N * N * N];
    int cnt = 0;
    for (int a = 0; a < T; a++)
      for (int b = 0; b < T; b++) for (int c = 0; c < T; c++) fe[cnt++] = (A[a] + A[b]) * A[c];

    Arrays.sort(fe);
    System.out.println(Arrays.toString(fe));
    int ret = 0;
    for (int i = 0; i < N; i++)
      for (int j = 0; j < N; j++)
        for (int k = 0; k < N; k++) {
          int abc = A[i] * A[j] + A[k];
          int ai = Arrays.binarySearch(fe, abc);
          if (ai >= 0) {
            while (ai >= 0 && fe[ai] == abc) ai--;
            ai++;
            while (ai < N * N * N && fe[ai] == abc) {
              ai++;
              ret++;
            }
          }
          System.out.println(abc + " " + ret);
        }

    System.out.println(ret);

    br.close();
  }
Ejemplo n.º 17
0
    /**
     * Loads options from a reader into this instance. Will read from the stream until it hits a
     * section header, ie a '[' character, and resets the reader to point to this character.
     *
     * @param reader where to read from
     * @throws IOException at an I/O problem
     */
    public void load(BufferedReader reader) throws IOException {
      while (reader.ready()) {
        reader.mark(NAME_MAXLENGTH);
        String line = reader.readLine().trim();

        // Check for section header
        if (line.length() > 0 && line.charAt(0) == HEADER_START) {
          reader.reset();
          return;
        }

        int delimIndex = -1;
        // blank line
        if (line.equals("")) {
          this.addBlankLine();
        }
        // comment line
        else if ((delimIndex = Arrays.binarySearch(this.commentDelimsSorted, line.charAt(0)))
            >= 0) {
          addComment(line.substring(1), this.commentDelimsSorted[delimIndex]);
        }
        // option line
        else {
          delimIndex = -1;
          int delimNum = -1;
          int lastSpaceIndex = -1;
          for (int i = 0, l = line.length(); i < l && delimIndex < 0; i++) {
            delimNum = Arrays.binarySearch(this.optionDelimsSorted, line.charAt(i));
            if (delimNum >= 0) {
              delimIndex = i;
            } else {
              boolean isSpace =
                  Arrays.binarySearch(this.OPTION_DELIMS_WHITESPACE, line.charAt(i)) >= 0;
              if (!isSpace && lastSpaceIndex >= 0) {
                break;
              } else if (isSpace) {
                lastSpaceIndex = i;
              }
            }
          }
          // delimiter at start of line
          if (delimIndex == 0) {
            // XXX what's a man got to do?
          }
          // no delimiter found
          else if (delimIndex < 0) {
            if (lastSpaceIndex < 0) {
              this.set(line, "");
            } else {
              this.set(line.substring(0, lastSpaceIndex), line.substring(lastSpaceIndex + 1));
            }
          }
          // delimiter found
          else {
            this.set(
                line.substring(0, delimIndex),
                line.substring(delimIndex + 1),
                line.charAt(delimIndex));
          }
        }
      }
    }
Ejemplo n.º 18
0
  /**
   * Reads CEL files. <code>readCel</code> reads the intensity signals from the tumor sample and the
   * normal sample. The copy number at a particular locus is calculated using the ratio of the tumor
   * intensity at that locus and the correspoding normal intensity times 2.
   *
   * @param normalCelFileName Affymetrix CEL file of the paired normal sample
   * @param tumorCelFileName Affymetrix CEL file of the tumor sample
   * @param cdfFileName Affymetrix library file (CDF file) for the platform on which the samples are
   *     generated
   */
  private void readCel(String normalCelFileName, String tumorCelFileName, String cdfFileName) {

    FusionCELData celNormal;
    FusionCELData celTumor;
    FusionCDFData cdf;

    celNormal = new FusionCELData();
    celNormal.setFileName(normalCelFileName);
    if (celNormal.read() == false) {
      System.out.println("Failed to read the CEL file.");
      return;
    }

    celTumor = new FusionCELData();
    celTumor.setFileName(tumorCelFileName);
    if (celTumor.read() == false) {
      System.out.println("Failed to read the CEL file.");
      return;
    }

    cdf = new FusionCDFData();
    cdf.setFileName(cdfFileName);
    if (cdf.read() == false) {
      System.out.println("Failed to read the CDF file.");
      return;
    }

    int nsets = cdf.getHeader().getNumProbeSets();

    ProbeSetIntensityData[] probeSetDataNormal = new ProbeSetIntensityData[nsets];
    ProbeSetIntensityData[] probeSetDataTumor = new ProbeSetIntensityData[nsets];
    // SNP array structure:
    //      1.  Each probeset contains several groups (the number of groups varies among different
    // probeset)
    //      2.  Each group contains several cells (the number of cells also varies among different
    // groups)
    for (int iset = 0; iset < nsets; iset++) {

      String probeSetName = cdf.getProbeSetName(iset); // get the probeset name
      FusionCDFProbeSetInformation set = new FusionCDFProbeSetInformation();
      cdf.getProbeSetInformation(iset, set);
      int ngroups = set.getNumGroups();

      int numPmANormal = 0; // Pm : perfect match
      int numPmBNormal = 0;

      int numPmATumor = 0;
      int numPmBTumor = 0;

      int numMmANormal = 0; // Mm: Mis-Match
      int numMmBNormal = 0;

      int numMmATumor = 0;
      int numMmBTumor = 0;

      probeSetDataNormal[iset] = new ProbeSetIntensityData();
      probeSetDataTumor[iset] = new ProbeSetIntensityData();

      probeSetDataNormal[iset].probeSetType = set.getProbeSetType();
      probeSetDataTumor[iset].probeSetType = set.getProbeSetType();

      probeSetDataNormal[iset].probeSetID = cdf.getProbeSetName(iset) + "";
      probeSetDataTumor[iset].probeSetID = cdf.getProbeSetName(iset) + "";

      for (int igroup = 0; igroup < ngroups; igroup++) {

        FusionCDFProbeGroupInformation group = new FusionCDFProbeGroupInformation();
        set.getGroup(igroup, group);
        int ncells = group.getNumCells();

        for (int icell = 0; icell < ncells; icell++) {

          FusionCDFProbeInformation probe = new FusionCDFProbeInformation();
          group.getCell(icell, probe);

          try {

            char pBase = probe.getPBase();
            char tBase = probe.getTBase();
            //                       only If the match is perfect, the intensity of this cell
            // contributes
            if ((((pBase + tBase) == 213) || ((pBase + tBase) == 202))) {
              //            Perfect match is the match that with pBase:tBase = a:t or c:g
              if ((igroup % 2) == 0) {

                if (!celNormal.isOutlier(probe.getX(), probe.getY())) {
                  probeSetDataNormal[iset].pmA +=
                      celNormal.getIntensity(probe.getX(), probe.getY());
                  numPmANormal++;
                }

                if (!celTumor.isOutlier(probe.getX(), probe.getY())) {
                  probeSetDataTumor[iset].pmA += celTumor.getIntensity(probe.getX(), probe.getY());
                  numPmATumor++;
                }

              } else {

                if (!celNormal.isOutlier(probe.getX(), probe.getY())) {
                  probeSetDataNormal[iset].pmB +=
                      celNormal.getIntensity(probe.getX(), probe.getY());
                  numPmBNormal++;
                }

                if (!celTumor.isOutlier(probe.getX(), probe.getY())) {
                  probeSetDataTumor[iset].pmB += celTumor.getIntensity(probe.getX(), probe.getY());
                  numPmBTumor++;
                }
              }
            }
          } catch (Exception e) {
          }
        }
      }
      // using the average of intensity of the perfect match cells as the intensity for certain
      // probeset

      if (numPmANormal != 0) {
        probeSetDataNormal[iset].pmA = probeSetDataNormal[iset].pmA / numPmANormal;
      }
      if (numPmBNormal != 0) {
        probeSetDataNormal[iset].pmB = probeSetDataNormal[iset].pmB / numPmBNormal;
      }
      if (numPmATumor != 0) {
        probeSetDataTumor[iset].pmA = probeSetDataTumor[iset].pmA / numPmATumor;
      }
      if (numPmBTumor != 0) {
        probeSetDataTumor[iset].pmB = probeSetDataTumor[iset].pmB / numPmBTumor;
      }
    }

    try {
      genotypeCalling(probeSetDataNormal); //  to decide whether certain probe is AB type
    } catch (Exception e) {
    }

    Arrays.sort(probeSetDataNormal);
    Arrays.sort(probeSetDataTumor);

    ArrayList<Integer> indexFoundList = new ArrayList<Integer>(probeSetDataNormal.length);
    ArrayList<Integer> indexFoundListSNP = new ArrayList<Integer>(probeSetDataNormal.length);

    for (int i = 0; i < numAnnotatedProbeSet; i++) {
      int indexFound =
          Arrays.binarySearch(probeSetDataNormal, new ProbeSetIntensityData(probeSetID[i]));

      if (indexFound >= 0) {

        isGenotypeAB[i] = probeSetDataNormal[indexFound].isGenotypeAB;
        probeSetType[i] = probeSetDataNormal[indexFound].probeSetType;

        copyNumber[i] =
            2
                * (probeSetDataTumor[indexFound].pmA + probeSetDataTumor[indexFound].pmB)
                / (probeSetDataNormal[indexFound].pmA + probeSetDataNormal[indexFound].pmB);

        intensityNormal[i] =
            probeSetDataNormal[indexFound].pmA + probeSetDataNormal[indexFound].pmB;

        intensityTumor[i] = probeSetDataTumor[indexFound].pmA + probeSetDataTumor[indexFound].pmB;

        //                For 250k affymetrix chip, all the probes are SNP probes
        if (probeSetType[i] == FusionGeneChipProbeSetType.GenotypingProbeSetType) {
          alleleA[i] = probeSetDataTumor[indexFound].pmA / probeSetDataNormal[indexFound].pmA;
          alleleB[i] = probeSetDataTumor[indexFound].pmB / probeSetDataNormal[indexFound].pmB;

          if (alleleA[i] > 1E-10
              && alleleA[i] <= 30
              && alleleB[i] >= 1E-10
              && alleleB[i] <= 30
              && copyNumber[i] > 1E-10
              && copyNumber[i] <= 30) {
            isOutlier[i] = false;
            indexFoundList.add(i);
            indexFoundListSNP.add(i);
          }
        }
        //              For SNP6.0, there are half SNP probes and half CN probes
        if (probeSetType[i] == FusionGeneChipProbeSetType.CopyNumberProbeSetType) {
          if (copyNumber[i] > 1E-10 && copyNumber[i] <= 30) {
            isOutlier[i] = false;
            indexFoundList.add(i);
          }
        }
      }
    }

    /* Median filtering on the intensity data before calculating the copy numbers*/
    // Filters f1 = new Filters(intensityNormal);
    // f1.medianFilter(3);
    // Filters f2 = new Filters(intensityTumor);
    // f2.medianFilter(3);

    // for (int j = 0; j < indexFoundList.size(); j ++) {

    //     int i = indexFoundList.get(j);

    //     copyNumber[i] = intensityTumor[i] / intensityNormal[i] * 2;

    //     if (!(copyNumber[i] > 1E-10 && copyNumber[i] <=30)) {
    // 	copyNumber[i] = 2.0;
    // 	isOutlier[i] = true;
    //     }

    // }

    globalNormalization(indexFoundList, indexFoundListSNP);
  }
  /**
   * @param args
   * @throws IOException
   */
  public static void main(String[] args) throws IOException {

    if (args.length == 1) {
      Configuration.parseConfigurationFile(args[0]);
    } else {
      Configuration.parseConfigurationFile(
          "/scratch/weale/data/config/enwiki/CreateTitleWordMapping.xml");
    }

    System.out.println("Creating File: link-" + Configuration.stemming);

    System.out.println("Initializing Valid ID List.");

    String vidFileName =
        Configuration.baseDir
            + "/"
            + Configuration.binaryDir
            + "/"
            + Configuration.type
            + "/"
            + Configuration.date
            + "/"
            + Configuration.type
            + "-"
            + Configuration.date
            + "-"
            + Configuration.graph
            + ".vid";

    IDVertexTranslation vid = null;
    try {
      ObjectInputStream oos = new ObjectInputStream(new FileInputStream(vidFileName));
      vid = (IDVertexTranslation) oos.readObject();
      oos.close();
    } catch (Exception e) {
      System.err.println("Problem with file:" + vidFileName);
      e.printStackTrace();
      System.exit(1);
    }

    System.out.println("Initializing Redirect List.");
    IDIDRedirect rdl = null;
    String rdlFileName =
        Configuration.baseDir
            + "/"
            + Configuration.binaryDir
            + "/"
            + Configuration.type
            + "/"
            + Configuration.date
            + "/"
            + Configuration.type
            + "-"
            + Configuration.date
            + "-"
            + Configuration.graph
            + ".rdr";
    try {
      ObjectInputStream oos = new ObjectInputStream(new FileInputStream(rdlFileName));
      rdl = (IDIDRedirect) oos.readObject();
      oos.close();
    } // end: try{}
    catch (Exception e) {
      System.err.println("Problem with file:" + rdlFileName);
      e.printStackTrace();
      System.exit(1);
    }

    String csvFile =
        Configuration.baseDir
            + "/"
            + Configuration.sourceDir
            + "/"
            + Configuration.type
            + "/"
            + Configuration.date
            + "/"
            + "anchor.csv";

    System.out.println("Opening .csv File");
    Scanner in = new Scanner(new FileReader(csvFile));

    // Set stem flag
    stem = Configuration.stemming.equals("t");

    /* STEP 1
     *
     * Create the word set in order to know how many terms
     * are in our initial file.
     */
    System.out.println("Creating Term Array");
    TreeSet<String> ts = new TreeSet<String>();
    while (in.hasNext()) {
      String s = in.nextLine();
      int lastComma = s.lastIndexOf(',');
      int secondLastComma = s.substring(0, lastComma - 1).lastIndexOf(',');
      String term = s.substring(1, secondLastComma - 1);
      term = term.toLowerCase();

      int count = Integer.parseInt(s.substring(lastComma + 1));
      int id = Integer.parseInt(s.substring(secondLastComma + 1, lastComma));

      // Check for valid ID
      int vertex = -1;
      if (vid.isValidWikiID(id)) {
        vertex = vid.getVertex(id);
      } else if (rdl.isRedirectID(id)) {
        vertex = vid.getVertex(rdl.redirectIDToValidID(id));
      }

      // Valid ID. Add term.
      if (vertex >= 0) {
        if (stem) {
          ts.add(PorterStemmerTokenizerFactory.stem(term));
        } else {
          ts.add(term);
        }
      }
    } // end: while(in)
    in.close();
    System.out.println(ts.size());

    /* STEP 2
     *
     * Create the TermToVertexCount array of the appropriate size
     * and initialize objects.
     *
     * We use arrays for space efficiency -- these can get large.
     */
    int valid = 0;
    TermToVertexCount[] terms = new TermToVertexCount[ts.size()];
    Iterator<String> set = ts.iterator();
    int i = 0;
    while (set.hasNext()) {
      terms[i] = new TermToVertexCount(set.next());
      i++;
    } // end: while(set)
    Arrays.sort(terms, new TermToVertexCountComparator());
    ts = null; // Free Up Memory (?)

    in = new Scanner(new FileReader(csvFile));

    while (in.hasNext()) {
      String s = in.nextLine();
      int lastComma = s.lastIndexOf(',');
      int secondLastComma = s.substring(0, lastComma - 1).lastIndexOf(',');
      String term = s.substring(1, secondLastComma - 1);
      term = term.toLowerCase();

      int count = Integer.parseInt(s.substring(lastComma + 1));
      int id = Integer.parseInt(s.substring(secondLastComma + 1, lastComma));

      // Check for valid ID
      int vertex = -1;
      if (vid.isValidWikiID(id)) {
        vertex = vid.getVertex(id);
      } else if (rdl.isRedirectID(id)) {
        vertex = vid.getVertex(rdl.redirectIDToValidID(id));
      }

      // Valid ID. Add term.
      if (vertex >= 0) {
        valid++;
        if (stem) {
          term = PorterStemmerTokenizerFactory.stem(term);
        }

        int pos =
            Arrays.binarySearch(
                terms, new TermToVertexCount(term), new TermToVertexCountComparator());

        if (pos >= 0) {
          terms[pos].addVertex(vertex, count);
        } else {
          System.err.println("Invalid Term Found: " + term);
        }
      } // end: if(vertex)
    } // end: while(in)

    /* STEP 4
     *
     * Write objects to the .tvc file
     */
    System.out.println("Writing Mappings To File");
    TermToVertexMapping tvm = new TermToVertexMapping(terms);
    ObjectOutputStream out =
        new ObjectOutputStream(
            new FileOutputStream(
                Configuration.baseDir
                    + "/"
                    + Configuration.binaryDir
                    + "/"
                    + Configuration.type
                    + "/"
                    + Configuration.date
                    + "/"
                    + Configuration.type
                    + "-"
                    + Configuration.date
                    + "-"
                    + Configuration.graph
                    + "-"
                    + "link-"
                    + Configuration.stemming
                    + ".tvc"));

    out.writeObject(tvm);
    out.close();
    terms = null; // Free Up Memory (?)

    /* STEP 5
     *
     * Create the vertex set in order to know how many verticies
     * are in our initial file.
     */
    in = new Scanner(new FileReader(csvFile));
    System.out.println("Creating Integer Array");
    TreeSet<Integer> vertexSet = new TreeSet<Integer>();
    while (in.hasNext()) {
      String s = in.nextLine();
      int lastComma = s.lastIndexOf(',');
      int secondLastComma = s.substring(0, lastComma - 1).lastIndexOf(',');
      String term = s.substring(1, secondLastComma - 1);

      int count = Integer.parseInt(s.substring(lastComma + 1));
      int id = Integer.parseInt(s.substring(secondLastComma + 1, lastComma));

      // Check for valid ID
      int vertex = -1;
      if (vid.isValidWikiID(id)) {
        vertex = vid.getVertex(id);
      } else if (rdl.isRedirectID(id)) {
        vertex = vid.getVertex(rdl.redirectIDToValidID(id));
      }

      // Valid ID. Add term.
      if (vertex >= 0) {
        vertexSet.add(vertex);
      }
    }
    /* STEP 6
     *
     * Create the VertexToTermCount array of the appropriate size
     * and initialize objects.
     *
     * We use arrays for space efficiency -- these can get large.
     */
    VertexToTermCount[] verticies = new VertexToTermCount[vertexSet.size()];
    Iterator<Integer> it = vertexSet.iterator();
    i = 0;
    while (it.hasNext()) {
      verticies[i] = new VertexToTermCount(it.next());
      i++;
    } // end: while(set)
    Arrays.sort(verticies, new VertexToTermCountComparator());
    vertexSet = null; // Free Up Memory (?)
    in.close();

    /* STEP 7
     *
     * Add terms to our VertexToTermCount objects.
     */
    System.out.println("Creating Term Mappings");
    in = new Scanner(new FileReader(csvFile));
    while (in.hasNext()) {
      String s = in.nextLine();
      int lastComma = s.lastIndexOf(',');
      int secondLastComma = s.substring(0, lastComma - 1).lastIndexOf(',');
      String term = s.substring(1, secondLastComma - 1);
      term = term.toLowerCase();

      int count = Integer.parseInt(s.substring(lastComma + 1));
      int id = Integer.parseInt(s.substring(secondLastComma + 1, lastComma));

      // Check for valid ID
      int vertex = -1;
      if (vid.isValidWikiID(id)) {
        vertex = vid.getVertex(id);
      } else if (rdl.isRedirectID(id)) {
        vertex = vid.getVertex(rdl.redirectIDToValidID(id));
      }

      // Valid ID. Add term.
      if (vertex >= 0) {
        int pos =
            Arrays.binarySearch(
                verticies, new VertexToTermCount(vertex), new VertexToTermCountComparator());
        if (pos >= 0) {
          verticies[pos].addTerm(term, count);
        } else {
          System.err.println("Invalid vertex Found: " + vertex);
        }
      }
    } // end: while(true)

    in.close();

    /* STEP 8
     *
     * Write objects to the .vtc file
     */
    System.out.println("Writing Mappings to File");
    VertexToTermMapping vwm = new VertexToTermMapping(verticies);
    out =
        new ObjectOutputStream(
            new FileOutputStream(
                Configuration.baseDir
                    + "/"
                    + Configuration.binaryDir
                    + "/"
                    + Configuration.type
                    + "/"
                    + Configuration.date
                    + "/"
                    + Configuration.type
                    + "-"
                    + Configuration.date
                    + "-"
                    + Configuration.graph
                    + "-"
                    + "link-"
                    + Configuration.stemming
                    + ".vtc"));

    out.writeObject(vwm);
    out.close();
    verticies = null; // Free Up Memory (?)
  } // end: main()
Ejemplo n.º 20
0
 public final void setNode(int fold, ClusNode node) {
   int idx = Arrays.binarySearch(m_Folds, fold);
   m_Nodes[idx] = node;
 }