java代码
1、当使用jsoup加载网页时候,是无法加载script里面的内容的
2、解决办法:使用htmlunit
3、直接上代码:
<dependency> <groupId>net.sourceforge.htmlunit</groupId> <artifactId>htmlunit</artifactId> <version>2.36.0</version> </dependency> <dependency> <groupId>org.jsoup</groupId> <artifactId>jsoup</artifactId> <version>1.12.1</version> </dependency> |
@Test public void test() { final WebClient webClient = new WebClient(BrowserVersion.CHROME); webClient.getOptions().setThrowExceptionOnScriptError(false); webClient.getOptions().setThrowExceptionOnFailingStatusCode(false); webClient.getOptions().setActiveXNative(false); webClient.getOptions().setCssEnabled(false); webClient.getOptions().setJavaScriptEnabled(true); webClient.setAjaxController(new NicelyResynchronizingAjaxController()); HtmlPage page = null; try { page = webClient.getPage("http://localhost:8080/index.html");//.注意如果加载JS css等静态网页,需要使用TextPage来接收 } catch (Exception e) { e.printStackTrace(); }finally { webClient.close(); } webClient.waitForBackgroundJavaScript(30000); String pageXml = page.asXml(); Document document = Jsoup.parse(pageXml);//获取html文档 System.out.println(document.getElementById("content").html()); } |