public void run() {
   File[] indexFiles = rootDirectory.listFiles();
   long keysProcessedforReplica = 0;
   for (File indexFile : indexFiles) {
     keysProcessedforReplica +=
         dumpData.dumpIndex(
             indexFile,
             rootDirectory.getName(),
             replicaList,
             new ArrayList<String>(),
             blobIdToStatusMap,
             indexStats,
             true);
   }
   logger.info(
       "Total keys processed for " + rootDirectory.getName() + " " + keysProcessedforReplica);
   totalKeysProcessed.addAndGet(keysProcessedforReplica);
   countDownLatch.countDown();
 }
  private static void read(XMLEventReader reader, DumpData data, ReadMode readMode)
      throws XMLStreamException {
    // basic page info
    int pageId = -1;
    String title = null;
    String pageText = null;

    boolean withinRevisionTag = false;

    int pagesProcessed = 0;

    while (reader.hasNext()) {
      XMLEvent event = reader.nextEvent();
      if (event.isStartElement()) {
        StartElement startElement = event.asStartElement();
        String strStartElement = startElement.getName().getLocalPart();

        if (strStartElement.equals(PAGE_REVISION_TAG)) {
          withinRevisionTag = true;
        }

        if (!withinRevisionTag) {
          switch (strStartElement) {
            case PAGE_ID_TAG:
              pageId = Integer.parseInt(reader.nextEvent().asCharacters().getData());
              break;
            case PAGE_TITLE_TAG:
              if (readMode == ReadMode.TITLE) {
                title = reader.nextEvent().asCharacters().getData();
              }
              break;
            default:
              break;
          }
        } else if (readMode == ReadMode.CONTENT && strStartElement.equals(PAGE_REVISION_TEXT_TAG)) {
          pageText = reader.getElementText();
        }
      }

      if (event.isEndElement()) {
        EndElement endElement = event.asEndElement();
        String strEndElement = endElement.getName().getLocalPart();
        switch (strEndElement) {
          case PAGE_REVISION_TAG:
            withinRevisionTag = false;
            break;
          case PAGE_TAG:
            // process retrieved page related information depending on the dump.
            if (pageId == -1) {
              logger_.warn("Invalid Page Entry");
            } else if (readMode == ReadMode.TITLE) {
              logger_.debug("Extracted page : " + title + "(id : " + pageId + ")");
              data.addPageEntry(pageId, title);
            } else if (readMode == ReadMode.CONTENT) {
              logger_.debug("Extracted page content (id : " + pageId + ")");
              data.addContentInfo(pageId, pageText);
            }

            // reset
            pageId = -1;
            title = null;
            pageText = null;
            if (++pagesProcessed % 1_000_000 == 0)
              logger_.info("Processed: {} k", pagesProcessed / 1000);
            break;
          default:
            break;
        }
      }
    }
  }
Example #3
0
  private void writeOut(
      PageContext pc, DumpData data, Writer writer, boolean expand, boolean inside)
      throws IOException {

    if (data == null) return;
    if (!(data instanceof DumpTable)) {
      writer.write(StringUtil.escapeHTML(data.toString()));
      return;
    }
    DumpTable table = (DumpTable) data;

    // prepare data
    DumpRow[] rows = table.getRows();
    int cols = 0;
    for (int i = 0; i < rows.length; i++)
      if (rows[i].getItems().length > cols) cols = rows[i].getItems().length;

    TemplateLine tl = null;
    if (!inside) tl = SystemUtil.getCurrentContext();
    String context = tl == null ? "" : tl.toString();

    if (rows.length == 1 && rows[0].getItems().length == 2) {
      DumpData d = rows[0].getItems()[1];
      if (!(d instanceof DumpTable)) {
        writer.write(StringUtil.escapeHTML(d.toString()));
        return;
      }
    }

    writer.write(
        "<table  cellpadding=\"1\" cellspacing=\"0\" "
            + (table.getWidth() != null ? " width=\"" + table.getWidth() + "\"" : "")
            + ""
            + (table.getHeight() != null ? " height=\"" + table.getHeight() + "\"" : "")
            + " border=\"1\">");

    // header
    if (!StringUtil.isEmpty(table.getTitle())) {
      writer.write("<tr><td title=\"" + context + "\" colspan=\"" + cols + "\">");
      // isSetContext=true;
      String contextPath = "";
      pc = ThreadLocalPageContext.get(pc);
      if (pc != null) {
        contextPath = pc.getHttpServletRequest().getContextPath();
        if (contextPath == null) contextPath = "";
      }
      writer.write(
          "<b>"
              + (!StringUtil.isEmpty(table.getTitle()) ? table.getTitle() : "")
              + "</b>"
              + (!StringUtil.isEmpty(table.getComment()) ? "<br>" + table.getComment() : "")
              + "</td></tr>");
    }

    // items
    DumpData value;
    for (int i = 0; i < rows.length; i++) {
      writer.write("<tr>");
      DumpData[] items = rows[i].getItems();
      // int comperator=1;
      for (int y = 0; y < cols; y++) {
        if (y <= items.length - 1) value = items[y];
        else value = new SimpleDumpData("&nbsp;");
        // comperator*=2;
        if (value == null) value = new SimpleDumpData("null");
        // else if(value.equals(""))value="&nbsp;";
        if (!inside) {
          writer.write("<td title=\"" + context + "\">");
        } else writer.write("<td>");
        writeOut(pc, value, writer, expand, true);
        writer.write("</td>");
      }
      writer.write("</tr>");
    }

    // footer
    writer.write("</table>");
  }