package search;

import java.util.regex.Matcher;
import java.util.regex.Pattern;

/** *//**
* 功能:過濾標簽
* @author jiadong
* 時間:2007年10月15日??
* 版本:1.0?
*/

public class HtmlTagsProcessor
{

public static String tagsProcessor(String inputString)
{
String processedString = "" ;
Pattern js_pattern,style_pattern, html_pattern ,particular_pattern;
Matcher js_matcher,style_matcher,html_matcher,particular_matcher;
//過濾js的正則表達式
String js_str = "<script.*>X*.*</script>X*";
//過濾樣式的正則表達式
String style_str = "<style.*>X*.*</style>X*";
//過濾html標簽的正則表達式
String html_str = "<[^>]+>" ;
String particular_str = ">|&| |"";
//處理js標簽
js_pattern = Pattern.compile(js_str, Pattern.CASE_INSENSITIVE);
js_matcher = js_pattern.matcher(inputString);
processedString = js_matcher.replaceAll("");
//處理樣式標簽
style_pattern = Pattern.compile(style_str, Pattern.CASE_INSENSITIVE);
style_matcher = style_pattern.matcher(processedString);
processedString = style_matcher.replaceAll("");
//處理html標簽
html_pattern = Pattern.compile(html_str, Pattern.CASE_INSENSITIVE);
html_matcher = html_pattern.matcher(processedString);
processedString = html_matcher.replaceAll("");
particular_pattern = Pattern.compile(particular_str,Pattern.CASE_INSENSITIVE);
particular_matcher = particular_pattern.matcher(processedString);
processedString = particular_matcher.replaceAll("");
return processedString ;
}
}
posted on 2007-10-17 11:32
jiadong 閱讀(408)
評論(0) 編輯 收藏 所屬分類:
OTHERS