本文采用 HttpClient 來模擬站點的登錄發帖回復,介紹 HttpClient 的用法和常見問題的解決方案。 HttpClient 是 Apache Jakarta Common 下的子項目,可以用來提供支持 HTTP 協議的客戶端編程工具包,模擬瀏覽器的行為。它提供了很多的方法來簡化網絡的訪問,雖然大部分的功能可以使用較底層的 java.net.HttpURLConnection 來實現。例如: - 實現了所有 HTTP 的方法( GET,POST 等)
- 支持 HTTPS 協議
- 支持代理服務器
- 自動維護 Cookies 等
我們知道, http 協議是面向無連接的,要維持會話,現在基本上都是采用基于 Cookies 的方式( Session 機制也是通過 Cookies 實現的),所以 HttpClient 的自動維護 Cookies 的方式對我們的登錄發帖回復非常有用(一般網站都需要先登錄再發帖回復)。 下面的 例子都是采用 commons-httpclient-3.1.jar 包來實現的(雖然 commons-httpclient-4.0 已經發布,但是代碼發生了較大的重構,調用方式也發生了很大的改變)。 下載 jar 包的路徑為: http://hc.apache.org/downloads.cgi 由于 httpclient 使用了 Apache Jakarta common 下的子項目 logging 和 codec ,所以也需要在 http://commons.apache.org/ 下載這兩個包: commons-logging.jar commons-codec-1.3.jar 為了更好地理解代碼,設計的 UML 類圖如下: 方法調用的時序圖如下:
其中, BrowserContext 類代表瀏覽器上下文對象,維護 HttpClient 鏈接和 Cookies 。 KaixinSitePost 是負責實現開心網的具體登錄發帖回復邏輯的類。 BrowserContext 的代碼如下:
1 /** */ /** 2 * Copyright (C): 2009 3 * @author 陳新漢 4 * Aug 24, 2009 3:09:00 PM 5 */ 6 7 /** */ /** 8 * 瀏覽器進程上下文 9 */ 10 public class BrowserContext 11 { 12 private HttpClient client; // 注意:每個站點和每個用戶,對應一個單獨的BrowserContext對象 13 private Cookie[] cookies = new Cookie[ 0 ]; // 維護Cookies 14 private Proxyips proxyip = null ; // 當前的代理IP 15 private Siteusers user = null ; // 當前的登錄用戶 16 17 public Cookie[] getCookies() { 18 return cookies; 19 } 20 21 public void setCookies(Cookie[] cookies) { 22 this .cookies = cookies; 23 } 24 25 public void addCookie(Cookie c) { 26 if (cookies != null && cookies.length > 0 ) { 27 Cookie[] others = new Cookie[cookies.length + 1 ]; 28 System.arraycopy(cookies, 0 , others, 0 , cookies.length); 29 others[others.length - 1 ] = c; 30 cookies = others; 31 } else { 32 cookies = new Cookie[ 1 ]; 33 cookies[ 0 ] = c; 34 } 35 } 36 37 public Proxyips getProxyip() { 38 return proxyip; 39 } 40 41 public void setProxyip(Proxyips proxyip) { 42 this .proxyip = proxyip; 43 if ( this .proxyip != null ) { 44 client.getHostConfiguration().setProxy(proxyip.getIp(),proxyip.getPort()); 45 client.getParams().setAuthenticationPreemptive( true ); 46 // 如果代理需要密碼驗證,這里設置用戶名密碼 47 // client.getState().setProxyCredentials(AuthScope.ANY, new UsernamePasswordCredentials("","")); 48 } 49 } 50 51 public HttpClient getClient() { 52 return client; 53 } 54 55 public Siteusers getUser() { 56 return user; 57 } 58 59 public void setUser(Siteusers user) { 60 this .user = user; 61 } 62 63 private BrowserContext(Site site) { 64 super (); 65 Protocol myhttps = new Protocol( " https " , new MySecureProtocolSocketFactory(), 443 ); 66 Protocol.registerProtocol( " https " , myhttps); 67 client = new HttpClient(); 68 client.getParams().setCookiePolicy(CookiePolicy.BROWSER_COMPATIBILITY); 69 HttpConnectionManagerParams managerParams = client.getHttpConnectionManager().getParams(); 70 // 設置連接超時時間(單位毫秒) 71 // managerParams.setConnectionTimeout(50000); 72 // 設置讀數據超時時間(單位毫秒) 73 // managerParams.setSoTimeout(120000); 74 initForSiteVisit(site.getSite(),site.getPort(),site.getCharset()); 75 } 76 77 public BrowserContext(Site site,Proxyips proxyip) { 78 this (site); 79 this .setProxyip(proxyip); 80 } 81 82 private void initForSiteVisit(String siteurl, int port,String charset) { 83 client.getHostConfiguration().setHost(siteurl, port, " http " ); 84 // 解決中文亂碼問題,和指定網站的頁面編碼一致 85 client.getParams().setParameter(HttpMethodParams.HTTP_CONTENT_CHARSET, charset); 86 } 87 88 // 查看cookie信息 89 public void printCookies() 90 { 91 System.out.println( " ---------------Cookie---------------- " ); 92 if (cookies != null ) { 93 for (Cookie c:cookies) { 94 System.out.println(c.getName() + " : " + c.getValue()); 95 } 96 } else { 97 System.out.println( " 沒有設置Cookies " ); 98 } 99 System.out.println( " ---------------Cookie---------------- " ); 100 } 101 102 public void setCommonMethodRequestHeaders(HttpMethodBase method) 103 { 104 method.setRequestHeader( " Accept " , " */* " ); 105 // method.setRequestHeader("Accept-Language", "zh-cn"); 106 // method.setRequestHeader("Accept-Encoding", "gzip,deflate"); 107 method.setRequestHeader( " User-Agent " , " Mozilla/4.0 (compatible; MSIE 7.0; Windows NT 5.1;) " ); 108 // 設置非常重要 109 method.setRequestHeader( " Connection " , " Keep-Alive " ); 110 } 111 112 public String redirectToURL(String url) throws IOException 113 { 114 if (url != null ) { 115 try { 116 System.out.println( " 頁面重定向到: " + url); 117 String responseString = this .doCommonVisitWithURL(url); 118 // System.out.println(responseString); 119 return responseString; 120 } catch (IOException e) { 121 System.out.println( " 重定向: " + url + " 出錯 " ); 122 } 123 } else { 124 System.out.println( " redirect url is null " ); 125 } 126 return null ; 127 } 128 129 public String doCommonVisitWithURL(String url) throws IOException { 130 GetMethod get = new GetMethod(url); 131 return this .doGet(get); 132 } 133 134 public String doPost(ExpectContinueMethod post) throws IOException 135 { 136 if (post == null ) 137 return null ; 138 try 139 { 140 if (getCookies() != null ) { 141 // printCookies(); 142 client.getState().addCookies(cookies); 143 post.addRequestHeader( " Cookie " ,getCookies().toString()); 144 // System.out.println(post.getRequestHeader("Cookie").getValue()); 145 } 146 setCommonMethodRequestHeaders(post); 147 int statusCode = client.executeMethod(post); 148 cookies = client.getState().getCookies(); 149 System.out.println(statusCode); 150 // System.out.println(post.getResponseHeader("Location")); 151 String responseString = post.getResponseBodyAsString(); 152 System.out.println(responseString); 153 printCookies(); 154 post.releaseConnection(); 155 if (statusCode == 301 || statusCode == 302 ) { 156 redirectToURL(post.getResponseHeader( " Location " ).getValue()); 157 } 158 return responseString; 159 } 160 finally { 161 if (post != null ) 162 post.releaseConnection(); 163 } 164 } 165 166 public String doGet(GetMethod get) throws IOException 167 { 168 if (get == null ) 169 return null ; 170 if (cookies != null ) { 171 // printCookies(); 172 client.getState().addCookies(cookies); 173 get.addRequestHeader( " Cookie " ,cookies.toString()); 174 } 175 try { 176 setCommonMethodRequestHeaders(get); 177 int statusCode = client.executeMethod(get); 178 cookies = client.getState().getCookies(); // 重新保存Cookies 179 printCookies(); 180 System.out.println(statusCode); 181 if (statusCode == 301 || statusCode == 302 ) { 182 redirectToURL(get.getResponseHeader( " Location " ).getValue()); 183 } 184 String responseString = get.getResponseBodyAsString(); 185 // System.out.println(responseString); 186 return responseString; 187 } 188 finally { 189 if (get != null ) 190 get.releaseConnection(); 191 } 192 } 193 194 public String getRedirectURL(String content) 195 { 196 if (content != null && content.indexOf( " window.location=\ "" )!=-1){ 197 int begin = content.indexOf( " window.location=\ "" ); 198 int end = content.indexOf( " \ "" , begin+17); 199 return content.substring(begin + 17 ,end); 200 } 201 return null ; 202 } 203 } KaixinSitePost類的代碼: 1 /** *//** 2 * Copyright (C): 2009 3 * @author 陳新漢 4 * Aug 14, 2009 11:16:32 AM 5 */ 6 7 /** *//** 8 * 模擬測試網站(不需要驗證碼) 9 * 開心網(www.kaixin.com) 10 */ 11 public class KaixinSitePost implements ISitePost 12  { 13 private static final String LOGON_SITE = "www.kaixin.com"; 14 private static final int LOGON_PORT = 80; 15 private static final String CHARSET="UTF-8"; 16 private BrowserContext context=null; 17 18 19 //單個用戶登錄 20 public String login(Siteusers userinfo, Proxyips ip) 21 { 22 if(userinfo!=null) 23 { 24 SiteLogin login=new SiteLogin(context,"http://login.kaixin.com/Login.do"); 25 if(ip!=null) 26 login.getContext().setProxyip(ip); 27 Map<String,String> params=new HashMap<String,String>(); 28 params.put("ss", "10106"); 29 params.put("loginregFrom", "index"); 30 params.put("origURL", "http://www.kaixin.com/SysHome.do"); 31 params.put("email", userinfo.getUsername()); 32 params.put("password", userinfo.getUserpwd()); 33 login.addRequestParameters(params); 34 return login.login(userinfo); 35 } 36 return null; 37 } 38 39 public List<Siteboards> parseBoard(Siteboards data) { 40 41 return null; 42 } 43 44 public String post(Postinfos postinfo,List<Siteboards> siteboards) 45 { 46 if(postinfo!=null && siteboards!=null) { 47 SitePost sport=new SitePost(context); 48 context.getClient().getHostConfiguration().setHost("blog.kaixin.com"); 49 Map<String,String> params=new HashMap<String,String>(); 50 params.put("categoryId", "0"); 51 params.put("blogControl", "1"); 52 params.put("title", postinfo.getTitle()); 53 params.put("body",postinfo.getContent()); 54 sport.addRequestParameters(params); 55 56 for(Siteboards sb:siteboards) { 57 sb.setPostUrl("http://blog.kaixin.com/NewEntry.do"); 58 try { 59 sport.post(postinfo, sb); 60 }catch(IOException e) { 61 e.printStackTrace(); 62 } 63 } 64 } 65 return null; 66 } 67 68 69 public String reply(Postinfos postinfo,List<Articleinfos> arts) 70 { 71 72 return null; 73 } 74 75 /** *//** 76 * @param args 77 */ 78 public static void main(String[] args) 79 { 80 try 81 { 82 Siteusers userinfo=new Siteusers(); 83 userinfo.setUsername("xxxx"); 84 userinfo.setUserpwd("xxxx"); 85 Proxyips ips = new Proxyips(); 86 ips.setIp("218.56.64.210"); 87 ips.setPort(8080); 88 KaixinSitePost sp=new KaixinSitePost(); 89 sp.login(userinfo,ips); 90 Postinfos post=new Postinfos(); 91 post.setContent("<p>lllllllllllllllllllllll</p>"); 92 post.setTitle("中文測試"); 93 List<Siteboards> siteboards=new ArrayList<Siteboards>(); 94 siteboards.add(new Siteboards()); 95 siteboards.add(new Siteboards()); 96 sp.post(post,siteboards); 97 } 98 catch(Exception e) { 99 e.printStackTrace(); 100 } 101 } 102 } 封裝登錄的類SiteLogin: 1 /** *//** 2 * Copyright (C): 2009 3 * @author 陳新漢 4 * Aug 24, 2009 3:03:00 PM 5 */ 6 7 /** *//** 8 * 站點登錄 9 */ 10 public class SiteLogin extends AbstractMethodAdapter 11  { 12 private HttpMethodBase method; 13 private boolean ispost=true; 14 protected BrowserContext context; //當前的瀏覽器進程上下文 15 16 public BrowserContext getContext() { 17 return context; 18 } 19 20 /** *//** 21 * 構造函數 22 * @param context 23 * @param url 24 * @param ispost 設置是否POST方式提交,默認為POST 25 */ 26 public SiteLogin(BrowserContext context,String url,boolean ispost) { 27 super(); 28 this.context = context; 29 this.ispost=ispost; 30 method = this.ispost?new PostMethod(url):new GetMethod(url); 31 } 32 33 public SiteLogin(BrowserContext context,String url) { 34 this(context,url,true); 35 } 36 37 public String login(Siteusers user) 38 { 39 int statusCode=0; 40 if(this.ispost && this.hasRequestParameters()) { 41 ((PostMethod)method).setRequestBody(this.getRequestParams()); 42 } 43 44 if(this.hasExtraRequestHeaders()) { 45 this.addExtraRequestHeaders(method,this.getExtraRequestHeaders()); 46 } 47 context.setCommonMethodRequestHeaders(method); 48 try 49 { 50 if(context.getCookies()!=null) { 51 //printCookies(); 52 context.getClient().getState().addCookies(context.getCookies()); 53 method.addRequestHeader("Cookie", context.getCookies().toString()); 54 } 55 statusCode = context.getClient().executeMethod(method); 56 context.setCookies(context.getClient().getState().getCookies()); 57 String responseString = method.getResponseBodyAsString(); 58 //System.out.println(responseString); 59 method.releaseConnection(); 60 if(statusCode==HttpStatus.SC_OK) { 61 System.out.println("登錄成功"); 62 return responseString; 63 } 64 else if(statusCode==302 ||statusCode==301) { 65 System.out.println("登錄成功,頁面重定向"); 66 String url=method.getResponseHeader("Location").getValue(); 67 return context.redirectToURL(url); 68 } 69 else { 70 System.out.println("登錄失敗,狀態碼:"+statusCode); 71 } 72 }catch(Exception e) { 73 e.printStackTrace(); 74 }finally { 75 if(method!=null) 76 method.releaseConnection(); 77 } 78 return null; 79 } 80 } 81 封裝站點發帖的類SitePost: 1 /** *//** 2 * Copyright (C): 2009 3 * @author 陳新漢 4 * Aug 24, 2009 5:05:55 PM 5 */ 6 7 /** *//** 8 * 站點發帖新帖 9 */ 10 public class SitePost extends CommonSitePost 11  { 12 13 public SitePost(BrowserContext context) { 14 super(); 15 this.context=context; 16 } 17 18 public String post(Postinfos postinfo,Siteboards siteboard) throws IOException 19 { 20 if (postinfo != null && siteboard != null) { 21 if (StringUtils.isNotEmpty(siteboard.getPostUrl())) { 22 PostMethod post = new PostMethod(siteboard.getPostUrl()); 23 if(this.hasRequestParameters()) { 24 post.setRequestBody(this.getRequestParams()); 25 } 26 if(this.hasExtraRequestHeaders()) { 27 this.addExtraRequestHeaders(post,this.getExtraRequestHeaders()); 28 } 29 context.setCommonMethodRequestHeaders(post); 30 this.context.doPost(post); 31 }else { 32 System.out.println("版面的新帖提交地址不能為空!"); 33 } 34 }else { 35 System.out.println("帖子或者版面信息輸入都不能為空"); 36 } 37 return null; 38 } 39 } 40 友情提醒:本博文章歡迎轉載,但請注明出處: 陳新漢
|