4.0.0 Novel novel.spider 0.0.1-SNAPSHOT org.jsoup jsoup 1.9.2 org.apache.httpcomponents httpclient 4.5.2dom4jdom4j1.6.1junitjunit4.12
package novel.spider.entity;import java.io.Serializable;/** * 小说章节实体类 * @author lilonghua * @date: 2017年6月22日 */public class Chapter implements Serializable {private static final long serialVersiOnUID= 1L;private String title;//小说章节private String url;//章节链接public String getTitle() {return title;}public void setTitle(String title) {this.title = title;}public String getUrl() {return url;}public void setUrl(String url) {this.url = url;}@Overridepublic String toString() {return "Chapter [title=" + title + ", url=" + url + "]";}}
ackage novel.spider.interfaces;import java.util.List;import novel.spider.entity.Chapter;/** * 小说url接口 * @author lilonghua * @date: 2017年6月22日 */public interface IChapterInter {/*** 获取一个完整的url链接,显示所有章节列表* @param @param url* @param @return * @return * @throws */public List getChapter(String url);}
package novel.spider.impl;import java.util.ArrayList;import java.util.List;import org.apache.http.client.methods.CloseableHttpResponse;import org.apache.http.client.methods.HttpGet;import org.apache.http.impl.client.CloseableHttpClient;import org.apache.http.impl.client.HttpClientBuilder;import org.apache.http.util.EntityUtils;import org.jsoup.Jsoup;import org.jsoup.nodes.Document;import org.jsoup.nodes.Element;import org.jsoup.select.Elements;import novel.spider.entity.Chapter;import novel.spider.interfaces.IChapterInter;/** * 小说url接口实现类 * * @author lilonghua * @date: 2017年6月22日 */public class IChapterInterImpl implements IChapterInter {protected String crawl(String url) throws Exception {//采用HttpClient技术try (CloseableHttpClient httpClient = HttpClientBuilder.create().build(); CloseableHttpResponse httpRespOnse= httpClient.execute(new HttpGet(url))) {String result = EntityUtils.toString(httpResponse.getEntity());return result;} catch (Exception e) {throw new RuntimeException(e);}}@Overridepublic List getChapter(String url) {try {String result = crawl(url);Document doc = Jsoup.parse(result);Elements as = doc.select("#list dd a");List chapters = new ArrayList<>();for (Element a : as) {Chapter chapter = new Chapter();chapter.setTitle(a.text());chapter.setUrl("http://www.bxwx8.org" + a.attr("href"));chapters.add(chapter);}return chapters;} catch (Exception e) {throw new RuntimeException(e);}}}
package novel.spider.test;import java.util.List;import org.junit.Test;import novel.spider.entity.Chapter;import novel.spider.impl.IChapterInterImpl;/** * 测试链接 * @author lilonghua * @date: 2017年6月22日 */public class TestOne {@Testpublic void test1(){IChapterInterImpl ChapterInterImpl = new IChapterInterImpl();List chapterList = ChapterInterImpl.getChapter("http://www.biquge.tw/0_5/");for (Chapter chapter : chapterList) {System.out.println(chapter);}}}