首页 >  java代码 >  jsoup加载动态网页

1、当使用jsoup加载网页时候,是无法加载script里面的内容的

2、解决办法:使用htmlunit

3、直接上代码:

<dependency>
    <groupId>net.sourceforge.htmlunit</groupId>
    <artifactId>htmlunit</artifactId>
    <version>2.36.0</version>
</dependency>
 
<dependency>
    <groupId>org.jsoup</groupId>
    <artifactId>jsoup</artifactId>
    <version>1.12.1</version>
</dependency>

 

@Test
public void test() {
    final WebClient webClient = new WebClient(BrowserVersion.CHROME);
 
    webClient.getOptions().setThrowExceptionOnScriptError(false);
    webClient.getOptions().setThrowExceptionOnFailingStatusCode(false);
    webClient.getOptions().setActiveXNative(false);
    webClient.getOptions().setCssEnabled(false);
    webClient.getOptions().setJavaScriptEnabled(true);
    webClient.setAjaxController(new NicelyResynchronizingAjaxController());
 
    HtmlPage page = null;
    try {
        page = webClient.getPage("http://localhost:8080/index.html");//.注意如果加载JS css等静态网页,需要使用TextPage来接收
    } catch (Exception e) {
        e.printStackTrace();
    }finally {
        webClient.close();
    }
 
    webClient.waitForBackgroundJavaScript(30000);
 
    String pageXml = page.asXml();
    Document document = Jsoup.parse(pageXml);//获取html文档
    System.out.println(document.getElementById("content").html());
 
}