?1?/*
?2??*?Copyright?(c)?2006?Your?Corporation.?All?Rights?Reserved.
?3??*/
?4?package?liuxuan;
?5?
?6?/**
?7??*?Created?by?IntelliJ?IDEA.
?8??*?User:?Administrator
?9??*?Date:?2006-7-26
10??*?Time:?15:33:49
11??*?To?change?this?template?use?File?|?Settings?|?File?Templates.
12??*/
13?import?org.htmlparser.Node;
14?import?org.htmlparser.Parser;
15?import?org.htmlparser.http.ConnectionManager;
16?import?org.htmlparser.tags.LinkTag;
17?import?org.htmlparser.util.ParserException;
18?import?org.htmlparser.visitors.ObjectFindingVisitor;
19?
20?import?java.sql.Statement;
21?import?java.sql.DriverManager;
22?import?java.sql.Connection;
23?import?java.sql.SQLException;
24?
25?public?class?LinkDemo
26?{
27?????public?static?void?main?(String[]?args)?throws?ParserException,?SQLException
28?????{
29?????????ConnectionManager?cn?=?new?ConnectionManager();
30?????????cn.setProxyHost("10.75.1.38");
31?????????cn.setProxyPort(80);
32?????????Parser.setConnectionManager(cn);
33?????????Parser?parser;
34?????????//parser.s
35?????????String[]?pyurl?=?new?String[2]?;
36?
37?????????pyurl[0]="http://www.google.cn/search?num=100&hl=zh-CN&newwindow=1&q=%E6%BF%AE%E9%98%B3&btnG=%E6%90%9C%E7%B4%A2&meta=cr%3DcountryCN";
38?????????pyurl[1]="http://www.google.cn/search?q=%E6%BF%AE%E9%98%B3&num=100&hl=zh-CN&lr=&cr=countryCN&newwindow=1&start=100&sa=N";
39?????????for?(int?j=0;j<pyurl.length;j++)?{
40?????????????parser?=?new?Parser?(pyurl[j]);
41?????????????ObjectFindingVisitor?visitor?=?new?ObjectFindingVisitor?(LinkTag.class);
42?????????????parser.visitAllNodesWith?(visitor);
43?????????????Node[]?links?=?visitor.getTags?();
44?????????????String?sql;
45?????????????????try?{
46?????????????????????????Class.forName("com.mysql.jdbc.Driver");
47?????????????????????}?catch?(ClassNotFoundException?e)?{
48?????????????????????????e.printStackTrace();??//To?change?body?of?catch?statement?use?File?|?Settings?|?File?Templates.
49?????????????????????}
50?????????????????????Connection?conn?=?null;
51?????????????????????try?{
52?????????????????????????conn?=?DriverManager.getConnection("jdbc:mysql://localhost:3306/crawdb",?"root",?"root");
53?????????????????????}?catch?(SQLException?e)?{
54?????????????????????????e.printStackTrace();??//To?change?body?of?catch?statement?use?File?|?Settings?|?File?Templates.
55?????????????????????}
56??????????????Statement?stmt?=?conn.createStatement();
57?????????????for?(int?i?=?0;?i?<?links.length;?i++)
58?????????????{
59?????????????????????sql="";
60?????????????????LinkTag?linkTag?=?(LinkTag)links[i];
61?????????????????if?(linkTag.getLink().indexOf("cache")<0?&&??linkTag.getLink().indexOf("google")<0?&&linkTag.getLinkText().indexOf("濮陽")>0)??{
62?
63?
64???????????????????????sql="insert?into?urls(name,note,url)?values('"+linkTag.getLinkText?()+"','"+"lixuan"+"','"+linkTag.getLink?()+"')";
65???????????????????????stmt.executeUpdate(sql);
66?????????????????????????//rs.close();
67?
68?????????????????????System.out.print?("\""?+?linkTag.getLinkText?()?+?"\"?=>?");
69?????????????????System.out.println?(linkTag.getLink?());
70?????????????????}
71?????????????}
72??????????????stmt.close();
73??????????????conn.close();
74?????????}
75?????}
76?
77?}
78?