`
fengzhizi715
  • 浏览: 160044 次
  • 性别: Icon_minigender_1
  • 来自: 上海 苏州
社区版块
存档分类
最新评论

写了一个比较当当、卓越、China-pub图书的简单程序

    博客分类:
  • Java
阅读更多

主要是运用httpclient+htmlparser。
程序写得比较粗糙,抽空会写得更加完善

/**
 * @author Tony Shen
 *
 */
public class CompareBook {
	private String bookName;
	private String author;
	private String publish;
	private String originalPrice;
	private String price;
	private String desc;
	private String publishDate;
	
	public String getBookName() {
		return bookName;
	}
	public void setBookName(String bookName) {
		this.bookName = bookName;
	}
	public String getAuthor() {
		return author;
	}
	public void setAuthor(String author) {
		this.author = author;
	}
	public String getPublish() {
		return publish;
	}
	public void setPublish(String publish) {
		this.publish = publish;
	}
	public String getOriginalPrice() {
		return originalPrice;
	}
	public void setOriginalPrice(String originalPrice) {
		this.originalPrice = originalPrice;
	}
	public String getPrice() {
		return price;
	}
	public void setPrice(String price) {
		this.price = price;
	}
	public String getDesc() {
		return desc;
	}
	public void setDesc(String desc) {
		this.desc = desc;
	}
	public String getPublishDate() {
		return publishDate;
	}
	public void setPublishDate(String publishDate) {
		this.publishDate = publishDate;
	}
}


import java.net.URLEncoder;

import org.apache.http.client.HttpClient;
import org.apache.http.client.ResponseHandler;
import org.apache.http.client.methods.HttpGet;
import org.apache.http.impl.client.BasicResponseHandler;
import org.apache.http.impl.client.DefaultHttpClient;
import org.apache.http.params.HttpProtocolParams;
import org.htmlparser.Node;
import org.htmlparser.NodeFilter;
import org.htmlparser.Parser;
import org.htmlparser.filters.HasAttributeFilter;
import org.htmlparser.filters.NodeClassFilter;
import org.htmlparser.tags.LinkTag;
import org.htmlparser.tags.Span;
import org.htmlparser.util.NodeList;

/**
 * 抓取当当图书的信息
 * @author Tony Shen
 *
 */
public class FecthDangDang {
	private String bookName;
	
	public FecthDangDang(String bookName) {
		this.bookName = bookName;
	}
	
	public String getResponse() throws Exception {
		HttpClient httpclient = new DefaultHttpClient();
		httpclient.getParams().setParameter(
				HttpProtocolParams.HTTP_CONTENT_CHARSET, "UTF-8");

		String paramStr = URLEncoder.encode(bookName, "GBK");
		String url = "http://search.dangdang.com/search.php?catalog=&key="
				+ paramStr + "&SearchFromTop=1";

		HttpGet httpget = new HttpGet(url);

		ResponseHandler<String> responseHandler = new BasicResponseHandler();
		String responseBody = httpclient.execute(httpget, responseHandler);

		httpclient.getConnectionManager().shutdown();
		return responseBody;
	}

	public CompareBook fetchData(String responseBody) throws Exception {
		
		CompareBook book = new CompareBook();
		book.setBookName(bookName);
		
		Parser parser = new Parser(responseBody);

		NodeFilter filter = new HasAttributeFilter("class", "list_r_list");
		NodeList nodelist = parser.extractAllNodesThatMatch(filter);

		NodeList nodeList1 = P(nodelist, "class", "list_r_list_h4_info3");
		NodeList nodeList2 = P(nodelist, "class", "gray del");
		NodeList nodeList3 = P(nodelist, "class", "red");

		Node dateNode = nodeList1.elementAt(0);
		Span datelink = (Span) dateNode;
		book.setPublishDate(datelink.toPlainTextString());

		Node originalNode = nodeList2.elementAt(0);
		Span originallink = (Span) originalNode;
		book.setOriginalPrice(originallink.toPlainTextString());

		Node priceNode = nodeList3.elementAt(0);
		Span pricelink = (Span) priceNode;
		book.setPrice(pricelink.toPlainTextString());

		filter = new NodeClassFilter(LinkTag.class);
		nodelist = nodelist.extractAllNodesThatMatch(filter, true);

		Node descNode = nodelist.elementAt(1);
		LinkTag desclink = (LinkTag) descNode;
		book.setDesc(desclink.getLinkText());

		Node nameNode = nodelist.elementAt(4);
		LinkTag namelink = (LinkTag) nameNode;
		book.setAuthor(namelink.getLinkText());

		Node publishingNode = nodelist.elementAt(5);
		LinkTag publishinglink = (LinkTag) publishingNode;
		book.setPublish(publishinglink.getLinkText());
		return book;
	}

	public NodeList P(NodeList nodelist, String a, String b) {
		NodeFilter filter = new HasAttributeFilter(a, b);
		nodelist = nodelist.extractAllNodesThatMatch(filter, true);
		return nodelist;
	}

	public String getBookName() {
		return bookName;
	}

	public void setBookName(String bookName) {
		this.bookName = bookName;
	}
	
}


import java.net.URLEncoder;

import org.apache.http.client.HttpClient;
import org.apache.http.client.ResponseHandler;
import org.apache.http.client.methods.HttpGet;
import org.apache.http.impl.client.BasicResponseHandler;
import org.apache.http.impl.client.DefaultHttpClient;
import org.apache.http.params.HttpProtocolParams;
import org.htmlparser.Node;
import org.htmlparser.NodeFilter;
import org.htmlparser.Parser;
import org.htmlparser.filters.HasAttributeFilter;
import org.htmlparser.tags.Span;
import org.htmlparser.util.NodeList;

/**
 * 抓取卓越图书的信息
 * @author Tony Shen
 *
 */
public class FetchZhuoYue {
	
	private String bookName;
	
	public FetchZhuoYue(String bookName) {
		this.bookName = bookName;
	}
	
	public String getResponse() throws Exception {
		HttpClient httpclient = new DefaultHttpClient();
		httpclient.getParams().setParameter(
				HttpProtocolParams.HTTP_CONTENT_CHARSET, "UTF-8");

		String paramStr = URLEncoder.encode(bookName, "GBK");
		String url = "http://www.amazon.cn/s/ref=nb_ss?url=search-alias%3Dbooks&keywords="
			+ paramStr + "&Go.x=15&Go.y=13&searchKind=name";

		HttpGet httpget = new HttpGet(url);

		ResponseHandler<String> responseHandler = new BasicResponseHandler();
		String responseBody = httpclient.execute(httpget, responseHandler);

		httpclient.getConnectionManager().shutdown();
		return responseBody;
	}
	
	public CompareBook fetchData(String responseBody) throws Exception {
		CompareBook book = new CompareBook();
		book.setBookName(bookName);
		
		Parser parser = new Parser(responseBody);
		NodeFilter filter = new HasAttributeFilter("class", "n2");
		NodeList nodelist = parser.extractAllNodesThatMatch(filter);
		
		NodeList nodeList1 = P(nodelist, "class", "saleprice");

		Node priceNode = nodeList1.elementAt(0);
		Span pricelink = (Span) priceNode;
		book.setPrice(pricelink.toPlainTextString());
		return book;
	}

	public NodeList P(NodeList nodelist, String a, String b) {
		NodeFilter filter = new HasAttributeFilter(a, b);
		nodelist = nodelist.extractAllNodesThatMatch(filter, true);
		return nodelist;
	}

	public String getBookName() {
		return bookName;
	}

	public void setBookName(String bookName) {
		this.bookName = bookName;
	}
	
}



import java.net.URLEncoder;

import org.apache.http.client.HttpClient;
import org.apache.http.client.ResponseHandler;
import org.apache.http.client.methods.HttpGet;
import org.apache.http.impl.client.BasicResponseHandler;
import org.apache.http.impl.client.DefaultHttpClient;
import org.apache.http.params.HttpProtocolParams;
import org.htmlparser.Node;
import org.htmlparser.NodeFilter;
import org.htmlparser.Parser;
import org.htmlparser.filters.HasAttributeFilter;
import org.htmlparser.filters.TagNameFilter;
import org.htmlparser.util.NodeList;

/**
 * 抓取China-Pub图书的信息
 * @author Tony Shen
 *
 */
public class FecthChinaPub {
	
	private String bookName;
	
	public FecthChinaPub(String bookName) {
		this.bookName = bookName;
	}
	
	public String getResponse() throws Exception {
		HttpClient httpclient = new DefaultHttpClient();
		httpclient.getParams().setParameter(
				HttpProtocolParams.HTTP_CONTENT_CHARSET, "UTF-8");

		String paramStr = URLEncoder.encode(bookName, "GBK");
		String url = "http://www.china-pub.com/s/?key1="
			+ paramStr + "&type=&pz=1";

		HttpGet httpget = new HttpGet(url);

		ResponseHandler<String> responseHandler = new BasicResponseHandler();
		String responseBody = httpclient.execute(httpget, responseHandler);

		httpclient.getConnectionManager().shutdown();
		return responseBody;
	}
	
	public CompareBook fetchData(String responseBody) throws Exception {
		CompareBook book = new CompareBook();
		book.setBookName(bookName);
		
		Parser parser = new Parser(responseBody);
		NodeFilter filter = new HasAttributeFilter("class", "listview");
		NodeList nodelist = parser.extractAllNodesThatMatch(filter);

		filter = new TagNameFilter("ul");
		nodelist = nodelist.extractAllNodesThatMatch(filter, true);

		Node descNode = nodelist.elementAt(0);
		String[] strsStrings = descNode.toPlainTextString().trim().split("\\s+");
		book.setPrice(strsStrings[0]);
		return book;
	}
	
	public String getBookName() {
		return bookName;
	}

	public void setBookName(String bookName) {
		this.bookName = bookName;
	}

}




/**
 * @author Tony Shen
 *
 */
public class ComparePrice {
	private static String bookName = "我的奋斗";
	
	public static void main(String[] args) {
		FecthDangDang dd = new FecthDangDang(bookName);
		FetchZhuoYue zy = new FetchZhuoYue(bookName);
		FecthChinaPub cp = new FecthChinaPub(bookName);
		try {
			String responseDD = dd.getResponse();
			CompareBook book1 = dd.fetchData(responseDD);
			System.out.println("++++++当当抓取结果+++++");
			System.out.println("书名:"+book1.getBookName());
			System.out.println("作者:"+book1.getAuthor());
			System.out.println("出版社:"+book1.getPublish());
			System.out.println("原价:"+book1.getOriginalPrice());
			System.out.println("现价:"+book1.getPrice());
			System.out.println("描述:"+book1.getDesc());
			System.out.println(book1.getPublishDate());
			
			String responseZY = zy.getResponse();
			CompareBook book2 = zy.fetchData(responseZY);
			System.out.println("++++++卓越抓取结果+++++");
			System.out.println("书名:"+book2.getBookName());
			System.out.println("现价:"+book2.getPrice());
			
			String responseCP = cp.getResponse();
			CompareBook book3 = cp.fetchData(responseCP);
			System.out.println("++++++China-Pub抓取结果+++++");
			System.out.println("书名:"+book3.getBookName());
			System.out.println("现价:"+book3.getPrice());
		} catch (Exception e) {
			e.printStackTrace();
		}
	}
}



程序的运行结果


  • 描述: 结构
  • 大小: 28.2 KB
1
1
分享到:
评论
4 楼 fengzhizi715 2010-11-22  
ww20042005 写道
我运行了一下,出错了
java.lang.NullPointerException
at com.sun.dang.FecthDangDang.fetchData(FecthDangDang.java:67)
at com.sun.dang.ComparePrice.main(ComparePrice.java:16)

他们网页的结构都改了 所以报错了 呵呵
3 楼 ww20042005 2010-11-22  
我运行了一下,出错了
java.lang.NullPointerException
at com.sun.dang.FecthDangDang.fetchData(FecthDangDang.java:67)
at com.sun.dang.ComparePrice.main(ComparePrice.java:16)
2 楼 fengzhizi715 2010-05-31  

写得比较仓促的时候 重构的时候会把接口抽象出来
1 楼 gml520 2010-05-30  
写一个接口吧,然后写几个实现类。

相关推荐

Global site tag (gtag.js) - Google Analytics