You are on page 1of 2

import java.io.IOException; import java.net.MalformedURLException; import java.net.URL; import import import import org.htmlcleaner.CleanerProperties; org.htmlcleaner.HtmlCleaner; org.htmlcleaner.TagNode; org.htmlcleaner.

XPatherException;

public class HtmlParserSamples { private final static String HTML = "<html xmlns=\"http://www.w3.org/1999/xhtml\">\n" + "<head>\n" + " <title>This is a test page</title>\n" + "</head>\n" + "<body>\n" + " <h1>This is a simple Html page to test:</h1>\n" + " <h2>This is a simple Html page to test:</h2>\n" + " <table>\n" + " <tr>\n" + " <td>Hello</td>\n" + " <td>World!</td>\n" + " </tr>\n" + " </table>\n" + "</body>\n" + "</html>"; public static void main(String[] args) throws Exception { parseWithHtmlCleaner(); } private static void parseWithHtmlCleaner() throws XPatherException, Malforme dURLException, IOException { System.out.println("*** HTMLCLEANER ***"); CleanerProperties props = new CleanerProperties(); // set some properties to non-default values props.setTranslateSpecialEntities(true); props.setTransResCharsToNCR(true); props.setOmitComments(true); // do parsing TagNode tagNode = new HtmlCleaner(props).clean(new URL( "http://wi i.teamliquid.net/starcraft2/3_Gate_Robo")); Object[] nodes = tagNode.evaluateXPath("//h1[@id='firstHeading']"); TagNode Headernode = (TagNode)nodes[0]; System.out.println("Build Title: " + Headernode.getText()); System.out.println("------------------------------");

nodes = tagNode.evaluateXPath("//table[@class='wi itable collapsible']") ; if(nodes == null || nodes.length == 0){ nodes = tagNode.evaluateXPath("//table[@class='wi itable collapsible collapsed']"); } TagNode node = (TagNode)nodes[0]; nodes = node.evaluateXPath("//li//a"); Object[] nodes1; nodes1 = node.evaluateXPath("//li");

for (int i = 0; i < nodes1.length; i++) { System.out.print(((TagNode)nodes1[i]).getText()); } } }

You might also like