請求登錄人人網比較麻煩,需要記住cookie,尤其是這句代碼,
httpContext.setAttribute(ClientContext.COOKIE_STORE,httpClient.getParams().getParameter("CookieStore"));試了很多遍才找到httpClient.getParams().getParameter("CookieStore"))。
主要代碼如下:
package com.koyo.downloadphoto.service.impl;
import java.io.File;
import java.io.InputStream;
import java.util.ArrayList;
import java.util.List;
import org.apache.commons.io.FileUtils;
import org.apache.commons.io.IOUtils;
import org.apache.http.HttpEntity;
import org.apache.http.HttpResponse;
import org.apache.http.NameValuePair;
import org.apache.http.client.entity.UrlEncodedFormEntity;
import org.apache.http.client.methods.HttpGet;
import org.apache.http.client.methods.HttpPost;
import org.apache.http.client.protocol.ClientContext;
import org.apache.http.message.BasicNameValuePair;
import org.apache.http.util.EntityUtils;
import org.apache.log4j.Logger;
import org.htmlparser.Parser;
import org.htmlparser.filters.AndFilter;
import org.htmlparser.filters.HasAttributeFilter;
import org.htmlparser.filters.TagNameFilter;
import org.htmlparser.tags.ImageTag;
import org.htmlparser.tags.LinkTag;
import org.htmlparser.tags.Span;
import org.htmlparser.util.NodeList;
import com.koyo.downloadphoto.service.Spider;
import com.koyo.downloadphoto.utils.HttpUtils;
import com.koyo.downloadphoto.utils.ParseUtils;
public class SpiderForRenRen extends Spider {
private Logger logger = Logger.getLogger(SpiderForRenRen.class);
@Override
public void execute() {
try {
String url = " + "/album/relatives";
// ===================請求登錄======================================================
HttpPost post = new HttpPost(" // 添加POST參數
List<NameValuePair> nvps = new ArrayList<NameValuePair>();
nvps.add(new BasicNameValuePair("email", loginName));
nvps.add(new BasicNameValuePair("password", loginPassword));
post.setEntity(new UrlEncodedFormEntity(nvps, "utf-8"));
HttpResponse response = httpClient.execute(post, httpContext);
HttpEntity entity = response.getEntity();
if (entity != null) {
InputStream is = entity.getContent();
// 使用響應中的編碼來解釋響應的內容
String html1 = IOUtils.toString(is);
LinkTag linkTag1 = ParseUtils.parseTag(html1, LinkTag.class);
String url1 = linkTag1.getLink();
HttpGet get = new HttpGet(url1);
response = httpClient.execute(get, httpContext);
// 保存cookie
httpContext.setAttribute(ClientContext.COOKIE_STORE, httpClient
.getParams().getParameter("CookieStore"));
EntityUtils.consume(response.getEntity());
System.out.println("賬號:" + loginName);
System.out.println("密碼:" + loginPassword);
}
// ===================獲取相冊頁面信息===================================================
// 根據URL地址,獲取網頁內容
String html = HttpUtils.getHtml(httpClient, httpContext, url);
if (html == null) {
logger.error("無法獲取【" + url + "】網址的內容");
throw new RuntimeException("無法獲取【" + url + "】網址的內容");
}
//獲取好友名
Parser parser = new Parser();
parser.setInputHTML(html);
AndFilter andFilter = new AndFilter(new TagNameFilter("ul"), new HasAttributeFilter("class", "nav-tabs"));
NodeList nodes = parser.parse(andFilter);
String tempString = nodes.toHtml();
LinkTag tempTag = ParseUtils.parseTag(tempString, LinkTag.class);
String tempName = tempTag.getLinkText();
// String friendName = tempName.substring(tempName.indexOf("\n")+1,tempName.lastIndexOf("\n"));
String friendName = tempName.trim();
//獲取相冊名
String albumName;
List<LinkTag> linkTags = ParseUtils.parseTags(html, LinkTag.class,
"class", "album-cover");
List<Span> spans = ParseUtils.parseTags(html, Span.class,"class","album-name");
if (linkTags != null) {
for (int i=0; i<linkTags.size(); i++) {
tempName = spans.get(i).getStringText();
//由于頭像相冊前還有一個<span class="userhead"/> 故不能使用tempName.trim()
albumName = tempName.substring(tempName.lastIndexOf("\n")+1);
url = linkTags.get(i).getLink();
// 根據URL地址,獲取網頁內容
html = HttpUtils.getHtml(httpClient, httpContext, url);
if (html == null) {
logger.error("無法獲取【" + url + "】網址的內容");
throw new RuntimeException("無法獲取【" + url + "】網址的內容");
}
List<LinkTag> linkTags2 = ParseUtils.parseTags(html,
LinkTag.class, "class", "picture");
if (linkTags2 != null) {
for (LinkTag linkTag2 : linkTags2) {
url = linkTag2.getLink();
// 根據URL地址,獲取網頁內容
html = HttpUtils.getHtml(httpClient, httpContext,
url);
if (html == null) {
logger.error("無法獲取【" + url + "】網址的內容");
throw new RuntimeException("無法獲取【" + url
+ "】網址的內容");
}
// 網頁中所包含的圖片,并下載到upload目錄,然后創建Attachment對象
ImageTag imageTag = ParseUtils.parseTag(html,
ImageTag.class, "id", "photo");
if (imageTag != null) {
// 得到圖片所在的路徑目錄
// String baseUrl = url.substring(0,
// url.lastIndexOf("/") + 1);
// 這個是<img>標簽中的src的值
String imageUrl = imageTag.getImageURL();
String photoName = imageUrl.substring(imageUrl
.lastIndexOf("/"));
// 圖片的絕對路徑
// String absoluteUrl = baseUrl + imageUrl;
// : "文章標題/xxx.jpg"
String imageName = friendName + "/" +albumName + photoName;
// 把圖片保存到upload目錄
// 首先確定,保存到本地的圖片的路徑
String imageLocalFile = "D:/PhotosForRenRen/"
+ imageName;
// 如果圖片已經被下載到本地,則不再下載
if (!new File(imageLocalFile).exists()) {
// 下載圖片的信息
byte[] image = HttpUtils.getImage(
httpClient, httpContext, imageUrl);
// 直接使用new
// FileOutputStream(imageLocalFile)這種方式,創建一個
// 文件輸出流,存在的問題就是:如果這個文件所在的目錄不存在,則創建不了
// 輸出流,會拋出異常!
// 所以,使用輔助的工具類來創建一個文件輸出流:FileUtils.openOutputStream(new
// File(imageLocalFile))
// 通過這個方法,當文件所在的父目錄不存在的時候,將自動創建其所有的父目錄
IOUtils.write(image, FileUtils
.openOutputStream(new File(
imageLocalFile)));
System.out.println("圖片【" + imageUrl
+ "】已下載");
}
}
}
}
}
}
} catch (Exception e) {
e.printStackTrace();
}
}
}