主要是运用httpclient+htmlparser。
程序写得比较粗糙,抽空会写得更加完善
/**
* @author Tony Shen
*
*/
public class CompareBook {
private String bookName;
private String author;
private String publish;
private String originalPrice;
private String price;
private String desc;
private String publishDate;
public String getBookName() {
return bookName;
}
public void setBookName(String bookName) {
this.bookName = bookName;
}
public String getAuthor() {
return author;
}
public void setAuthor(String author) {
this.author = author;
}
public String getPublish() {
return publish;
}
public void setPublish(String publish) {
this.publish = publish;
}
public String getOriginalPrice() {
return originalPrice;
}
public void setOriginalPrice(String originalPrice) {
this.originalPrice = originalPrice;
}
public String getPrice() {
return price;
}
public void setPrice(String price) {
this.price = price;
}
public String getDesc() {
return desc;
}
public void setDesc(String desc) {
this.desc = desc;
}
public String getPublishDate() {
return publishDate;
}
public void setPublishDate(String publishDate) {
this.publishDate = publishDate;
}
}
import java.net.URLEncoder;
import org.apache.http.client.HttpClient;
import org.apache.http.client.ResponseHandler;
import org.apache.http.client.methods.HttpGet;
import org.apache.http.impl.client.BasicResponseHandler;
import org.apache.http.impl.client.DefaultHttpClient;
import org.apache.http.params.HttpProtocolParams;
import org.htmlparser.Node;
import org.htmlparser.NodeFilter;
import org.htmlparser.Parser;
import org.htmlparser.filters.HasAttributeFilter;
import org.htmlparser.filters.NodeClassFilter;
import org.htmlparser.tags.LinkTag;
import org.htmlparser.tags.Span;
import org.htmlparser.util.NodeList;
/**
* 抓取当当图书的信息
* @author Tony Shen
*
*/
public class FecthDangDang {
private String bookName;
public FecthDangDang(String bookName) {
this.bookName = bookName;
}
public String getResponse() throws Exception {
HttpClient httpclient = new DefaultHttpClient();
httpclient.getParams().setParameter(
HttpProtocolParams.HTTP_CONTENT_CHARSET, "UTF-8");
String paramStr = URLEncoder.encode(bookName, "GBK");
String url = "http://search.dangdang.com/search.php?catalog=&key="
+ paramStr + "&SearchFromTop=1";
HttpGet httpget = new HttpGet(url);
ResponseHandler<String> responseHandler = new BasicResponseHandler();
String responseBody = httpclient.execute(httpget, responseHandler);
httpclient.getConnectionManager().shutdown();
return responseBody;
}
public CompareBook fetchData(String responseBody) throws Exception {
CompareBook book = new CompareBook();
book.setBookName(bookName);
Parser parser = new Parser(responseBody);
NodeFilter filter = new HasAttributeFilter("class", "list_r_list");
NodeList nodelist = parser.extractAllNodesThatMatch(filter);
NodeList nodeList1 = P(nodelist, "class", "list_r_list_h4_info3");
NodeList nodeList2 = P(nodelist, "class", "gray del");
NodeList nodeList3 = P(nodelist, "class", "red");
Node dateNode = nodeList1.elementAt(0);
Span datelink = (Span) dateNode;
book.setPublishDate(datelink.toPlainTextString());
Node originalNode = nodeList2.elementAt(0);
Span originallink = (Span) originalNode;
book.setOriginalPrice(originallink.toPlainTextString());
Node priceNode = nodeList3.elementAt(0);
Span pricelink = (Span) priceNode;
book.setPrice(pricelink.toPlainTextString());
filter = new NodeClassFilter(LinkTag.class);
nodelist = nodelist.extractAllNodesThatMatch(filter, true);
Node descNode = nodelist.elementAt(1);
LinkTag desclink = (LinkTag) descNode;
book.setDesc(desclink.getLinkText());
Node nameNode = nodelist.elementAt(4);
LinkTag namelink = (LinkTag) nameNode;
book.setAuthor(namelink.getLinkText());
Node publishingNode = nodelist.elementAt(5);
LinkTag publishinglink = (LinkTag) publishingNode;
book.setPublish(publishinglink.getLinkText());
return book;
}
public NodeList P(NodeList nodelist, String a, String b) {
NodeFilter filter = new HasAttributeFilter(a, b);
nodelist = nodelist.extractAllNodesThatMatch(filter, true);
return nodelist;
}
public String getBookName() {
return bookName;
}
public void setBookName(String bookName) {
this.bookName = bookName;
}
}
import java.net.URLEncoder;
import org.apache.http.client.HttpClient;
import org.apache.http.client.ResponseHandler;
import org.apache.http.client.methods.HttpGet;
import org.apache.http.impl.client.BasicResponseHandler;
import org.apache.http.impl.client.DefaultHttpClient;
import org.apache.http.params.HttpProtocolParams;
import org.htmlparser.Node;
import org.htmlparser.NodeFilter;
import org.htmlparser.Parser;
import org.htmlparser.filters.HasAttributeFilter;
import org.htmlparser.tags.Span;
import org.htmlparser.util.NodeList;
/**
* 抓取卓越图书的信息
* @author Tony Shen
*
*/
public class FetchZhuoYue {
private String bookName;
public FetchZhuoYue(String bookName) {
this.bookName = bookName;
}
public String getResponse() throws Exception {
HttpClient httpclient = new DefaultHttpClient();
httpclient.getParams().setParameter(
HttpProtocolParams.HTTP_CONTENT_CHARSET, "UTF-8");
String paramStr = URLEncoder.encode(bookName, "GBK");
String url = "http://www.amazon.cn/s/ref=nb_ss?url=search-alias%3Dbooks&keywords="
+ paramStr + "&Go.x=15&Go.y=13&searchKind=name";
HttpGet httpget = new HttpGet(url);
ResponseHandler<String> responseHandler = new BasicResponseHandler();
String responseBody = httpclient.execute(httpget, responseHandler);
httpclient.getConnectionManager().shutdown();
return responseBody;
}
public CompareBook fetchData(String responseBody) throws Exception {
CompareBook book = new CompareBook();
book.setBookName(bookName);
Parser parser = new Parser(responseBody);
NodeFilter filter = new HasAttributeFilter("class", "n2");
NodeList nodelist = parser.extractAllNodesThatMatch(filter);
NodeList nodeList1 = P(nodelist, "class", "saleprice");
Node priceNode = nodeList1.elementAt(0);
Span pricelink = (Span) priceNode;
book.setPrice(pricelink.toPlainTextString());
return book;
}
public NodeList P(NodeList nodelist, String a, String b) {
NodeFilter filter = new HasAttributeFilter(a, b);
nodelist = nodelist.extractAllNodesThatMatch(filter, true);
return nodelist;
}
public String getBookName() {
return bookName;
}
public void setBookName(String bookName) {
this.bookName = bookName;
}
}
import java.net.URLEncoder;
import org.apache.http.client.HttpClient;
import org.apache.http.client.ResponseHandler;
import org.apache.http.client.methods.HttpGet;
import org.apache.http.impl.client.BasicResponseHandler;
import org.apache.http.impl.client.DefaultHttpClient;
import org.apache.http.params.HttpProtocolParams;
import org.htmlparser.Node;
import org.htmlparser.NodeFilter;
import org.htmlparser.Parser;
import org.htmlparser.filters.HasAttributeFilter;
import org.htmlparser.filters.TagNameFilter;
import org.htmlparser.util.NodeList;
/**
* 抓取China-Pub图书的信息
* @author Tony Shen
*
*/
public class FecthChinaPub {
private String bookName;
public FecthChinaPub(String bookName) {
this.bookName = bookName;
}
public String getResponse() throws Exception {
HttpClient httpclient = new DefaultHttpClient();
httpclient.getParams().setParameter(
HttpProtocolParams.HTTP_CONTENT_CHARSET, "UTF-8");
String paramStr = URLEncoder.encode(bookName, "GBK");
String url = "http://www.china-pub.com/s/?key1="
+ paramStr + "&type=&pz=1";
HttpGet httpget = new HttpGet(url);
ResponseHandler<String> responseHandler = new BasicResponseHandler();
String responseBody = httpclient.execute(httpget, responseHandler);
httpclient.getConnectionManager().shutdown();
return responseBody;
}
public CompareBook fetchData(String responseBody) throws Exception {
CompareBook book = new CompareBook();
book.setBookName(bookName);
Parser parser = new Parser(responseBody);
NodeFilter filter = new HasAttributeFilter("class", "listview");
NodeList nodelist = parser.extractAllNodesThatMatch(filter);
filter = new TagNameFilter("ul");
nodelist = nodelist.extractAllNodesThatMatch(filter, true);
Node descNode = nodelist.elementAt(0);
String[] strsStrings = descNode.toPlainTextString().trim().split("\\s+");
book.setPrice(strsStrings[0]);
return book;
}
public String getBookName() {
return bookName;
}
public void setBookName(String bookName) {
this.bookName = bookName;
}
}
/**
* @author Tony Shen
*
*/
public class ComparePrice {
private static String bookName = "我的奋斗";
public static void main(String[] args) {
FecthDangDang dd = new FecthDangDang(bookName);
FetchZhuoYue zy = new FetchZhuoYue(bookName);
FecthChinaPub cp = new FecthChinaPub(bookName);
try {
String responseDD = dd.getResponse();
CompareBook book1 = dd.fetchData(responseDD);
System.out.println("++++++当当抓取结果+++++");
System.out.println("书名:"+book1.getBookName());
System.out.println("作者:"+book1.getAuthor());
System.out.println("出版社:"+book1.getPublish());
System.out.println("原价:"+book1.getOriginalPrice());
System.out.println("现价:"+book1.getPrice());
System.out.println("描述:"+book1.getDesc());
System.out.println(book1.getPublishDate());
String responseZY = zy.getResponse();
CompareBook book2 = zy.fetchData(responseZY);
System.out.println("++++++卓越抓取结果+++++");
System.out.println("书名:"+book2.getBookName());
System.out.println("现价:"+book2.getPrice());
String responseCP = cp.getResponse();
CompareBook book3 = cp.fetchData(responseCP);
System.out.println("++++++China-Pub抓取结果+++++");
System.out.println("书名:"+book3.getBookName());
System.out.println("现价:"+book3.getPrice());
} catch (Exception e) {
e.printStackTrace();
}
}
}
程序的运行结果
- 描述: 结构
- 大小: 28.2 KB
分享到:
相关推荐
当当开源sharding-jdbc-轻量级数据库分库分表中间件,比较成熟的分库分表中间件,值得一看。
本书由China-pub首发,目前在当当和淘宝上都有卖。价格低至7.5折,比较实惠。大家先下载试读版本看看,也可以去书店逛逛,看看书写的如何。相关网址如下: China-Pub: http://www.china-pub.com/197674 当当:...
模拟当当网自己练习的小项目,这是本人亲自编写的代码,不到之处,还请多多指教。
本程序是完全模拟当当网而做的网上购物系统,运用了java+jsp+struts+hibernate等技术,开发环境:myelispe,Tomcat,是相关毕业程序的首选!
京东 当当 卓越价格变化监控器(三合一)2.3
这是一个当当网主页的HTML+CSS代码实现,希望对大家有用
当当网web项目,包含技术及相应框架:html、css、jsp、bootstrap、javaScript、JQuery、maven、tomcat、JDK1.8、SpringMVC、Spring、mybatis、Mysql等...(解压出来sql文件在主目录下)
首页远吗,找了很久。拿出来大家一起分享,不是很好
用html+css制作的一个当当图书网 不用修改直接使用
3、在页面的右侧有一个随滚动条上下移动的广告图片,并且图片上方有一个“关闭”按钮,单击“关闭”按钮,图片和“关闭”按钮均隐藏 4、页面中间的特效是带数字按钮的循环显示的图片广告,六张图片按规定的时间间隔...
Dubbox now means Dubbo eXtensions, and it adds features like RESTful remoting, Kyro/FST serialization, etc to the Dubbo service framework
一个简单的当当网购物车页面,可以实现简单购物
我的写当当网2012-05-31
一个仿当当网的系统,可以当做java ee项目的练习!其中运用了SSH三大框架!
struct2. 当当网-网页制作
当当网网站,JSP技术 MySQL数据库 安装方便 操作简单
jsp+JavaScript+struts+...和国内最好的网上书店-------当当网的框架相同,包括顾客的购物车、订单管理和留言板,支持商品搜索;后台管理员的商品、订单、会员、系统管理。界面优美,功能齐全,不可多得的优秀作品。
分库分表_基于当当jdbc-sharding,分库分表_基于当当jdbc-sharding
软件类型:免费软件 所属类别:图书动漫 更新时间:昨天 程序大小:16.41MB ...2、24种书籍排版样式供你选择,总有一款你的菜。 3、全程记录阅读点滴,方便笔记,分享书籍。手机看书,感受更美好的阅读时光。