锘??xml version="1.0" encoding="utf-8" standalone="yes"?>亚洲男同帅GAY片在线观看,亚洲精品国产综合久久久久紧,国产精品亚洲片在线花蝴蝶 http://www.tkk7.com/willpower88/category/37510.html瀵笿AVA鏈夌偣鐞嗚В浜嗏︹?/description>zh-cnMon, 09 Feb 2009 08:36:13 GMTMon, 09 Feb 2009 08:36:13 GMT60lucene2.0+heritrix紺轟緥琛ュ厖http://www.tkk7.com/willpower88/archive/2009/02/09/253914.html涓鍑?/dc:creator>涓鍑?/author>Mon, 09 Feb 2009 07:44:00 GMThttp://www.tkk7.com/willpower88/archive/2009/02/09/253914.htmlhttp://www.tkk7.com/willpower88/comments/253914.htmlhttp://www.tkk7.com/willpower88/archive/2009/02/09/253914.html#Feedback0http://www.tkk7.com/willpower88/comments/commentRss/253914.htmlhttp://www.tkk7.com/willpower88/services/trackbacks/253914.html search鐨凟xtractor浠g爜濡備笅錛岋紙鍒拰涔︿笂瀹炰緥鐩稿悓錛変緵澶у鍙傝冿細闄勪歡閲屾湁瀹屾暣浠g爜
package com.luceneheritrixbook.extractor.younet;

import java.io.BufferedWriter;
import java.io.File;
import java.io.FileWriter;
import java.io.IOException;
import java.util.Date;

import org.htmlparser.Node;
import org.htmlparser.NodeFilter;
import org.htmlparser.Parser;
import org.htmlparser.filters.AndFilter;
import org.htmlparser.filters.HasAttributeFilter;
import org.htmlparser.filters.HasChildFilter;
import org.htmlparser.filters.TagNameFilter;
import org.htmlparser.tags.ImageTag;
import org.htmlparser.util.NodeIterator;
import org.htmlparser.util.NodeList;

import com.luceneheritrixbook.extractor.Extractor;
import com.luceneheritrixbook.util.StringUtils;

/**
 * <p></p>
 * 
@author cnyqiao@hotmail.com
 * @date   Feb 6, 2009 
 
*/

public class ExtractYounetMoblie extends Extractor {

    @Override
    
public void extract() {
        BufferedWriter bw 
= null;
        NodeFilter title_filter 
= new AndFilter(new TagNameFilter("div"), new HasAttributeFilter("class""mo_tit"));
        NodeFilter attribute_filter 
= new AndFilter(new TagNameFilter("p"), new HasChildFilter(new AndFilter(new TagNameFilter("span"), new HasAttributeFilter("class""gn_sp1 blue1"))));
        NodeFilter img_filter 
= new AndFilter(new TagNameFilter("span"), new HasChildFilter(new TagNameFilter("img")));
        
        
//鎻愬彇鏍囬淇℃伅
        try {
            
//Parser鏍規嵁榪囨護鍣ㄨ繑鍥炴墍鏈夋弧瓚寵繃婊ゆ潯浠剁殑鑺傜偣
            
// 榪唬閫愭笎鏌ユ壘
            NodeList nodeList=this.getParser().parse(title_filter);
            NodeIterator it 
= nodeList.elements();
            StringBuffer title 
= new StringBuffer();
            
while (it.hasMoreNodes()) {
                Node node 
= (Node) it.nextNode();
                String[] names 
= node.toPlainTextString().split(" ");
                
for(int i = 0; i < names.length; i++)
                    title.append(names[i]).append(
"-");
                title.append(
new Date().getTime());
                
//鍒涘緩瑕佺敓鎴愮殑鏂囦歡
                bw = new BufferedWriter(new FileWriter(new File(this.getOutputPath() + title + ".txt")));
                
//鑾峰彇褰撳墠鎻愬彇欏電殑瀹屾暣URL鍦板潃
                int startPos = this.getInuputFilePath().indexOf("mirror"+ 6;
                String url_seg 
= this.getInuputFilePath().substring(startPos);
                url_seg 
= url_seg.replaceAll("\\\\""/");
                String url 
= "http:/" + url_seg;
                
//鍐欏叆褰撳墠鎻愬彇欏電殑瀹屾暣URL鍦板潃
                bw.write(url + NEWLINE);
                bw.write(names[
0+ NEWLINE);
                bw.write(names[
1+ NEWLINE);
                
            }
            
// 閲嶇疆Parser
            this.getParser().reset();
            Parser attNameParser 
= null;
            Parser attValueParser 
= null;
            
//Parser parser=new Parser("http://www.sina.com.cn");
            NodeFilter attributeName_filter = new AndFilter(new TagNameFilter("span"), new HasAttributeFilter("class""gn_sp1 blue1"));
            NodeFilter attributeValue_filter 
= new AndFilter(new TagNameFilter("span"), new HasAttributeFilter("class""gn_sp2"));
            String attName 
= "";
            String attValue 
= "";
            
// 榪唬閫愭笎鏌ユ壘
            nodeList=this.getParser().parse(attribute_filter);
            it 
= nodeList.elements();
            
while (it.hasMoreNodes()) {                
                Node node 
= (Node) it.nextNode();
                attNameParser 
= new Parser();
                attNameParser.setEncoding(
"GB2312");
                attNameParser.setInputHTML(node.toHtml());
                NodeList attNameNodeList 
= attNameParser.parse(attributeName_filter);
                attName 
= attNameNodeList.elements().nextNode().toPlainTextString();
                
                attValueParser 
= new Parser();
                attValueParser.setEncoding(
"GB2312");
                attValueParser.setInputHTML(node.toHtml());
                NodeList attValueNodeList 
= attValueParser.parse(attributeValue_filter);
                attValue 
= attValueNodeList.elements().nextNode().toPlainTextString();
                bw.write(attName.trim() 
+ attValue.trim());
                bw.newLine();
            }
            
// 閲嶇疆Parser
            this.getParser().reset();
            String imgUrl 
= "";
            String fileType 
="";
            
// 榪唬閫愭笎鏌ユ壘
            nodeList=this.getParser().parse(img_filter);
            it 
= nodeList.elements();
            
while (it.hasMoreNodes()) {                
                Node node 
= (Node) it.nextNode();
                
                ImageTag imgNode 
= (ImageTag)node.getChildren().elements().nextNode();
                imgUrl 
= imgNode.getAttribute("src");                
                fileType 
= imgUrl.trim().substring(imgUrl
                        .lastIndexOf(
"."+ 1);
                
//鐢熸垚鏂扮殑鍥劇墖鐨勬枃浠跺悕
                String new_iamge_file = StringUtils.encodePassword(imgUrl, HASH_ALGORITHM) + "." + fileType;
                
//imgUrl = new HtmlPaserFilterTest().replace(new_iamge_file, "+", " ");
                
//鍒╃敤miorr鐩綍涓嬬殑鍥劇墖鐢熸垚鐨勬柊鐨勫浘鐗?/span>
                this.copyImage(imgUrl, new_iamge_file);
                bw.write(SEPARATOR 
+ NEWLINE);
                bw.write(new_iamge_file 
+ NEWLINE);
            }
            
            
        } 
catch(Exception e) {
            e.printStackTrace();
        } 
finally {
            
try{
                
if (bw != null)
                    bw.close();
            }
catch(IOException e){
                e.printStackTrace();
            }
        }
        
    }
}
榪愯涔︿笂鐨刪eritrix瀹炰緥錛屽茍鎸変功涓婄殑榛樿璁劇疆榪涜鎶撳彇濡備笅錛碉疾錛╋細錛堣鑷繁鍒嗘瀽鏁寸悊錛?br />
http://mobile.younet.com/files/list_1.html
http://mobile.younet.com/files/list_2.html
http://mobile.younet.com/files/list_3.html



]]>
主站蜘蛛池模板: 国产亚洲精品无码专区| 亚洲第一网站免费视频| 久草免费福利视频| 亚洲美女免费视频| 麻豆国产VA免费精品高清在线| 中美日韩在线网免费毛片视频| 亚洲成年轻人电影网站www | 好大好硬好爽免费视频| 免费国产va在线观看| 亚洲一区二区三区夜色| 免费真实播放国产乱子伦| 无码人妻一区二区三区免费看 | 99久久免费看国产精品| 色偷偷亚洲第一综合网| 亚洲无线电影官网| 免费在线观看日韩| 中文字幕免费在线看线人 | 亚洲精品网站在线观看你懂的| 日本一区免费电影| 99久久精品免费精品国产| 特级av毛片免费观看| 亚洲国产精品免费在线观看| 国产成人亚洲精品91专区手机| 思思re热免费精品视频66| 三年片免费高清版| 国产亚洲精品国产福利在线观看 | 亚洲中文字幕无码爆乳app| 国产亚洲精品资源在线26u| 国产成人免费福利网站| 97人妻无码一区二区精品免费| jyzzjyzz国产免费观看| 亚洲色偷偷偷综合网| 亚洲国产精品日韩在线观看| 亚洲国产成人片在线观看| 又黄又爽的视频免费看| 成人au免费视频影院| 亚洲精品在线免费观看视频| 少妇性饥渴无码A区免费| 九九视频高清视频免费观看| 亚洲aⅴ无码专区在线观看春色| 亚洲黄色三级网站|