import
?org.htmlparser.Node;
import
?org.htmlparser.NodeFilter;
import
?org.htmlparser.Parser;
import
?org.htmlparser.filters.TagNameFilter;
import
?org.htmlparser.tags.TableTag;
import
?org.htmlparser.util.NodeList;
/**
?*?<br>
?*?標(biāo)題:?<br>
?*?功能概要:?<br>
?*?版權(quán):?cityyouth.cn?(c)?2005?<br>
?*?公司:上海城市青年網(wǎng)?<br>
?*?創(chuàng)建時(shí)間:2005-12-21?<br>
?*?修改時(shí)間:?<br>
?*?修改原因:
?*?
?*?
@author
?張偉
?*?
@version
?1.0
?
*/
public
?
class
?TestYahoo?{
????
public
?
static
?
void
?testHtml()?{
????????
try
?{
????????????String?sCurrentLine;
????????????String?sTotalString;
????????????sCurrentLine?
=
?
""
;
????????????sTotalString?
=
?
""
;
????????????java.io.InputStream?l_urlStream;
????????????java.net.URL?l_url?
=
?
new
?java.net.URL(
????????????????????
"
http://sports.sina.com.cn/iframe/nba/live/
"
);
????????????java.net.HttpURLConnection?l_connection?
=
?(java.net.HttpURLConnection)?l_url
????????????????????.openConnection();
????????????l_connection.connect();
????????????l_urlStream?
=
?l_connection.getInputStream();
????????????java.io.BufferedReader?l_reader?
=
?
new
?java.io.BufferedReader(
????????????????????
new
?java.io.InputStreamReader(l_urlStream));
????????????
while
?((sCurrentLine?
=
?l_reader.readLine())?
!=
?
null
)?{
????????????????sTotalString?
+=
?sCurrentLine;
????????????}
????????????System.out.println(sTotalString);
????????????System.out.println(
"
====================
"
);
????????????String?testText?
=
?extractText(sTotalString);
????????????System.out.println(testText);
????????}?
catch
?(Exception?e)?{
????????????e.printStackTrace();
????????}
????}
????
/**
?????*?抽取純文本信息
?????*?
?????*?
@param
?inputHtml
?????*?
@return
?????
*/
????
public
?
static
?String?extractText(String?inputHtml)?
throws
?Exception?{
????????StringBuffer?text?
=
?
new
?StringBuffer();
????????Parser?parser?
=
?Parser.createParser(
new
?String(inputHtml.getBytes(),
????????????????
"
8859_1
"
),?
"
8859-1
"
);
????????
//
?遍歷所有的節(jié)點(diǎn)
????????NodeList?nodes?
=
?parser.extractAllNodesThatMatch(
new
?NodeFilter()?{
????????????
public
?
boolean
?accept(Node?node)?{
????????????????
return
?
true
;
????????????}
????????});
????????Node?node?
=
?nodes.elementAt(
0
);
????????text.append(
new
?String(node.toPlainTextString().getBytes(
"
8859_1
"
)));
????????
return
?text.toString();
????}
????
/**
?????*?讀取文件的方式來(lái)分析內(nèi)容.?filePath也可以是一個(gè)Url.
?????*?
?????*?
@param
?resource
?????*????????????文件/Url
?????
*/
????
public
?
static
?
void
?test5(String?resource)?
throws
?Exception?{
????????Parser?myParser?
=
?
new
?Parser(resource);
????????
//
?設(shè)置編碼
????????myParser.setEncoding(
"
GBK
"
);
????????String?filterStr?
=
?
"
table
"
;
????????NodeFilter?filter?
=
?
new
?TagNameFilter(filterStr);
????????NodeList?nodeList?
=
?myParser.extractAllNodesThatMatch(filter);
????????TableTag?tabletag?
=
?(TableTag)?nodeList.elementAt(
11
);
????????????
????????????System.out.println(tabletag.toHtml());
????????????
????????????System.out.println(
"
==============
"
);
????}
????
/*
?????*?public?static?void?main(String[]?args)?{?TestYahoo?testYahoo?=?new
?????*?TestYahoo();?testYahoo.testHtml();?}
?????
*/
????
public
?
static
?
void
?main(String[]?args)?
throws
?Exception?{
????????test5(
"
http://sports.yahoo.com/nba/scoreboard
"
);
????}
}
posted on 2006-09-15 10:04
阿成 閱讀(3957)
評(píng)論(0) 編輯 收藏 所屬分類:
Open source