public static void main(String[] args) throws Exception { String address = "http://vnexpress.net/GL/Xa-hoi/2009/02/3BA0B4AB/"; webClient.setURL(address, new URL(address)); // String address = "http://vnmedia.vn/newsdetail.asp?NewsId=154558&CatId=58"; java.net.URL url = new java.net.URL(address); HTMLDocument document = HTMLParser.createDocument(loadContent(address), "utf-8"); RefsDecoder decoder = new RefsDecoder(); NodeIterator iterator = document.getRoot().iterator(); while (iterator.hasNext()) { HTMLNode node = iterator.next(); if (!node.isNode(Name.CONTENT)) continue; char[] chars = node.getValue(); chars = decoder.decode(chars); chars = CharsUtil.cutAndTrim(chars, 0, chars.length); chars = java.text.Normalizer.normalize(new String(chars), Normalizer.Form.NFC).toCharArray(); node.setValue(chars); } loadCSS(address, document); NodePath nodePath = pathParser.toPath("BODY"); HTMLNode body = extractor.lookNode(document.getRoot(), nodePath); WebPageDataSearcher dataSearcher = new WebPageDataSearcher(document); HTMLNode node = dataSearcher.search(body); File file = new File("F:\\Temp2\\web\\output\\extract.htm"); byte[] bytes = new byte[0]; if (node != null) bytes = node.getTextValue().getBytes(Application.CHARSET); RWData.getInstance().save(file, bytes); }
public DeleteContentsDialog(Browser _browser, String domain, String[] _ids, String[] titles) { this.domain = domain; this.browser = _browser; this.ids = _ids; shell = new Shell(browser.getShell(), SWT.CLOSE | SWT.RESIZE | SWT.APPLICATION_MODAL); ClientRM clientRM = DeleteDomainPlugin.getResources(); ApplicationFactory factory = new ApplicationFactory(shell, clientRM, getClass().getName()); shell.setText(factory.getLabel("title")); factory.setComposite(shell); shell.setLayout(new GridLayout(1, false)); shell.addShellListener( new ShellAdapter() { public void shellClosed(ShellEvent e) { new ShellSetter(DeleteContentsDialog.class, shell); shell.dispose(); } }); factory.setComposite(shell); RefsDecoder decoder = new RefsDecoder(); butTitles = new Button[titles.length]; // selectors = new DeleteSingleArticleSelector[10]; for (int i = 0; i < butTitles.length; i++) { titles[i] = new String(decoder.decode(titles[i].toCharArray())); butTitles[i] = new Button(shell, SWT.CHECK); butTitles[i].setSelection(true); butTitles[i].setToolTipText(clientRM.getLabel("itemDeleteTooltip")); butTitles[i].setText(titles[i]); butTitles[i].setLayoutData(new GridData()); } Composite bottom = new Composite(shell, SWT.NONE); GridData gridData = new GridData(GridData.FILL_HORIZONTAL); // gridData.horizontalSpan = 2; bottom.setLayoutData(gridData); RowLayout rowLayout = new RowLayout(); bottom.setLayout(rowLayout); rowLayout.justify = true; factory.setComposite(bottom); factory.createButton( "butDeletDomain", new SelectionAdapter() { public void widgetSelected(SelectionEvent evt) { deleteDomain(); } }); SelectionAdapter syncListener = new SelectionAdapter() { public void widgetSelected(SelectionEvent evt) { StringBuilder builder = new StringBuilder(); for (int i = 0; i < ids.length; i++) { if (!butTitles[i].getSelection()) continue; if (builder.length() > 0) builder.append('\n'); builder.append(ids[i]); } DeleteContentPlugin.delete(browser, builder.toString()); shell.dispose(); } }; factory.createButton("butOk", syncListener); factory.createButton( "butClose", new SelectionAdapter() { public void widgetSelected(SelectionEvent evt) { new ShellSetter(DeleteContentsDialog.class, shell); shell.dispose(); } }); Rectangle displayRect = UIDATA.DISPLAY.getBounds(); int x = (displayRect.width - 350) / 2; int y = (displayRect.height - 300) / 2; shell.setImage(browser.getShell().getImage()); new ShellGetter(DeleteContentsDialog.class, shell, 550, 350, x, y); shell.open(); }