import java.io.IOException; import java.net.MalformedURLException; import java.net.

URL; import import import import org.htmlcleaner.CleanerProperties; org.htmlcleaner.HtmlCleaner; org.htmlcleaner.TagNode; org.htmlcleaner.XPatherException;

public class HtmlParserSamples { private final static String HTML = "<html xmlns=\"http://www.w3.org/1999/xhtml\">\n" + "<head>\n" + " <title>This is a test page</title>\n" + "</head>\n" + "<body>\n" + " <h1>This is a simple Html page to test:</h1>\n" + " <h2>This is a simple Html page to test:</h2>\n" + " <table>\n" + " <tr>\n" + " <td>Hello</td>\n" + " <td>World!</td>\n" + " </tr>\n" + " </table>\n" + "</body>\n" + "</html>"; public static void main(String[] args) throws Exception { parseWithHtmlCleaner(); } private static void parseWithHtmlCleaner() throws XPatherException, Malforme dURLException, IOException { System.out.println("*** HTMLCLEANER ***"); CleanerProperties props = new CleanerProperties(); // set some properties to non-default values props.setTranslateSpecialEntities(true); props.setTransResCharsToNCR(true); props.setOmitComments(true); // do parsing TagNode tagNode = new HtmlCleaner(props).clean(new URL( "http://wi i.teamliquid.net/starcraft2/3_Gate_Robo")); Object[] nodes = tagNode.evaluateXPath("//h1[@id='firstHeading']"); TagNode Headernode = (TagNode)nodes[0]; System.out.println("Build Title: " + Headernode.getText()); System.out.println("------------------------------");

 

Object[] nodes1. } } }     .evaluateXPath("//table[@class='wi itable collapsible collapsed']").out. nodes1 = node.length.evaluateXPath("//li//a").getText()).length == 0){ nodes = tagNode. i++) { System.evaluateXPath("//li").print(((TagNode)nodes1[i]).evaluateXPath("//table[@class='wi itable collapsible']") . i < nodes1. nodes = node. if(nodes == null || nodes. for (int i = 0.nodes = tagNode. } TagNode node = (TagNode)nodes[0].