我用htmlunit从网页上抓取图像。我是htmlunit的初学者。我已经编码,但是不知道如何获取图像。下面是我的代码。

import java.io.*;
import java.net.URL;
import com.gargoylesoftware.htmlunit.BrowserVersion;
import com.gargoylesoftware.htmlunit.WebClient;
import com.gargoylesoftware.htmlunit.html.HtmlPage;

public class urlscrap {

    public static void main(String[] args) throws Exception
    {

        //WebClient webClient = new WebClient(Opera);
        WebClient webClient = new WebClient();
        HtmlPage currentPage = (HtmlPage) webClient.getPage(new URL("http://www.google.com"));

        System.out.println(currentPage.asText());
        //webClient.closeAllWindows();

    }
}

最佳答案

这对您有用吗?

import java.net.URL;
import java.util.List;

import com.gargoylesoftware.htmlunit.WebClient;
import com.gargoylesoftware.htmlunit.html.HtmlImage;
import com.gargoylesoftware.htmlunit.html.HtmlPage;

public class urlscrap {

    public static void main(String[] args) throws Exception
    {

        //WebClient webClient = new WebClient(Opera);
        WebClient webClient = new WebClient();
        HtmlPage currentPage = (HtmlPage) webClient.getPage(new URL("http://www.google.com"));
      //get list of all divs
        final List<?> images = currentPage.getByXPath("//img");
        for (Object imageObject : images) {
            HtmlImage image = (HtmlImage) imageObject;
            System.out.println(image.getSrcAttribute());
        }
        //webClient.closeAllWindows();
    }
}

08-04 19:44