/** * Find the end tag that lines up to the beginning tag. * * @param index The index to start the search on. This specifies the starting data unit. * @param tag The beginning tag that we are seeking the end tag for. * @return The index that the ending tag was found at. Returns -1 if not found. */ public final int findEndTag(final int index, final Tag tag) { int depth = 0; int count = index; while (count < this.page.getDataSize()) { final DataUnit du = this.page.getDataUnit(count); if (du instanceof TagDataUnit) { final Tag nextTag = ((TagDataUnit) du).getTag(); if (tag.getName().equalsIgnoreCase(nextTag.getName())) { if (nextTag.getType() == Tag.Type.END) { if (depth == 0) { return count; } else { depth--; } } else if (nextTag.getType() == Tag.Type.BEGIN) { depth++; } } } count++; } return -1; }
/** * Called by loadContents to load a span. * * @param index The index to begin loading at. * @param tag The beginning tag. */ private void loadSpan(final int index, final Tag tag) { final Span span = new Span(this.page); final String classAttribute = tag.getAttributeValue("class"); final String idAttribute = tag.getAttributeValue("id"); span.setIdAttribute(idAttribute); span.setClassAttribute(classAttribute); span.setBegin(index); span.setEnd(findEndTag(index + 1, tag)); addHierarchyElement(span); }
/** * Called by loadContents to load a div tag. * * @param index The index to begin at. * @param tag The beginning div tag. */ private void loadDiv(final int index, final Tag tag) { final Div div = new Div(this.page); final String classAttribute = tag.getAttributeValue("class"); final String idAttribute = tag.getAttributeValue("id"); div.setIdAttribute(idAttribute); div.setClassAttribute(classAttribute); div.setBegin(index); div.setEnd(findEndTag(index + 1, tag)); addHierarchyElement(div); }
/** * Called by loadContents to load an input tag on the form. * * @param index The index to begin loading at. * @param tag The beginning tag. */ protected final void loadInput(final int index, final Tag tag) { final String type = tag.getAttributeValue("type"); final String name = tag.getAttributeValue("name"); final String value = tag.getAttributeValue("value"); final Input input = new Input(this.page); input.setType(type); input.setName(name); input.setValue(value); if (this.lastForm != null) { this.lastForm.addElement(input); } else { this.page.addContent(input); } }
/** * Called by loadContents to load a link on the page. * * @param index The index to begin loading at. * @param tag The beginning tag. */ protected final void loadLink(final int index, final Tag tag) { final Link link = new Link(this.page); final String href = tag.getAttributeValue("href"); if (href != null) { link.setTarget(new Address(this.base, href)); link.setBegin(index); link.setEnd(findEndTag(index + 1, tag)); this.page.addContent(link); } }
/** * Called by loadContents to load a form on the page. * * @param index The index to begin loading at. * @param tag The beginning tag. */ protected final void loadForm(final int index, final Tag tag) { final String method = tag.getAttributeValue("method"); final String action = tag.getAttributeValue("action"); final Form form = new Form(this.page); form.setBegin(index); form.setEnd(findEndTag(index + 1, tag)); if ((method == null) || method.equalsIgnoreCase("GET")) { form.setMethod(Form.Method.GET); } else { form.setMethod(Form.Method.POST); } if (action == null) { form.setAction(new Address(this.base)); } else { form.setAction(new Address(this.base, action)); } this.page.addContent(form); this.lastForm = form; }
/** * Using the data units, which should have already been loaded by this time, load the contents of * the web page. This includes the title, any links and forms. Div tags and spans are also * processed. */ protected final void loadContents() { for (int index = 0; index < this.page.getDataSize(); index++) { final DataUnit du = this.page.getDataUnit(index); if (du instanceof TagDataUnit) { final Tag tag = ((TagDataUnit) du).getTag(); if (tag.getType() != Tag.Type.END) { if (tag.getName().equalsIgnoreCase("a")) { loadLink(index, tag); } else if (tag.getName().equalsIgnoreCase("title")) { loadTitle(index, tag); } else if (tag.getName().equalsIgnoreCase("form")) { loadForm(index, tag); } else if (tag.getName().equalsIgnoreCase("input")) { loadInput(index, tag); } } if (tag.getType() == Tag.Type.BEGIN) { if (tag.getName().equalsIgnoreCase("div")) { loadDiv(index, tag); } else if (tag.getName().equalsIgnoreCase("span")) { loadSpan(index, tag); } } if (tag.getType() == Tag.Type.END) { if (tag.getName().equalsIgnoreCase("div")) { if (this.lastHierarchyElement != null) { this.lastHierarchyElement = this.lastHierarchyElement.getParent(); } } else if (tag.getName().equalsIgnoreCase("span")) { if (this.lastHierarchyElement != null) { this.lastHierarchyElement = this.lastHierarchyElement.getParent(); } } } } } }
/** * Create a tag data unit. * * @param tag The tag name to create the data unit for. */ private void createTagDataUnit(final Tag tag) { final TagDataUnit d = new TagDataUnit(); d.setTag(tag.clone()); this.page.addDataUnit(d); }