?
目前在java平臺(tái)上,要解析xml文檔,即使只有"<abc></abc>"這樣的一個(gè)標(biāo)簽,在生成document對(duì)象時(shí),也至少要花費(fèi)300ms左右,這樣一次交互至少要在600ms左右,加上其它處理,一次通訊要1000ms以上,使得soap協(xié)議在java平臺(tái)上根本不能進(jìn)行實(shí)際應(yīng)用.
其它這并不是SOAP協(xié)議的問題,著關(guān)鍵在于對(duì)XML文檔的解析.基于這個(gè)原因,筆者實(shí)現(xiàn)了用正則表達(dá)式來解析XML文檔的一些API,利用它來在替換中移動(dòng)的大多數(shù)SOAP的接口,效率提高了10倍左右.
package org.axman.xml.regex;
import java.util.regex.*;
import java.util.*;
/**
?*
?* <p>Title: Document</p>
?*
?* <p>Description: 用正則表達(dá)式解析xml,目的是為了提高性能.</p>
?*
?* <p>Copyright: Copyright (c) 2005</p>
?*
?* <p>Company: org.axman</p>
?*
?* @author :Axman
?* @version 1.0
?*/
public class Document {
? private String xmlString;
? /**
?? * 傳入xml的字符串內(nèi)容,對(duì)于InputStream,Reader對(duì)象請(qǐng)轉(zhuǎn)換為String對(duì)象后傳入構(gòu)造方法.
?? * @param xmlString String
?? * @throws IllegalArgumentException
?? */
? public Document(String xmlString) throws IllegalArgumentException{
??? if(xmlString == null || xmlString.length() == 0)
????? throw new IllegalArgumentException("Input string orrer!");
??? this.xmlString = xmlString;
? }
? /**
?? * 在文檔中搜索指定的元素,返回符合條件的元素?cái)?shù)組.
?? * @param tagName String
?? * @return String[]
?? */
? public String[] getElementsByTag(String tagName){
??? Pattern p = Pattern.compile("<"+tagName+"[^>]*?((>.*?</"+tagName+">)|(/>))");
??? Matcher m = p.matcher(this.xmlString);
??? ArrayList<String> al = new ArrayList<String>();
??? while(m.find())
????? al.add(m.group());
??? String[] arr = al.toArray(new String[al.size()]);
??? al.clear();
??? return arr;
? }
? /**
?? * 用xpath模式提取元素,以#為分隔符
?? * 如 ROOT#PARENT#CHILD表示提取ROOT元素下的PARENT元素下的CHILD元素
?? * @param singlePath String
?? * @return String
?? */
? public String getElementBySinglePath(String singlePath){
??? String[] path = singlePath.split("#");
??? String lastTag = path[path.length-1];
??? String tmp = "(<"+lastTag+"[^>]*?((>.*?</"+lastTag+">)|(/>)))";
??????????????????????????????????????? //最后一個(gè)元素,可能是<x>v</x>形式或<x/>形式
??? for(int i=path.length-2;i >=0;i--){
????? lastTag = path[i];
????? tmp = "<"+lastTag+">.*"+tmp + ".*</"+lastTag+">";
??? }
??? Pattern p = Pattern.compile(tmp);
??? Matcher m = p.matcher(this.xmlString);
??? if(m.find()){
????? return m.group(1);
??? }
??? return "";
? }
? /**
?? * 用xpath模式提取元素從多重元素中獲取指批定元素,以#為分隔符
?? * 元素后無索引序號(hào)則默認(rèn)為0: ROOT#PARENT[2]#CHILD[1]
?? * @param singlePath String
?? * @return String
?? */
? public String getElementByMultiPath(String singlePath){
??? try{
????? String[] path = singlePath.split("#");
????? String input = this.xmlString;
????? String[] ele = null;
????? for (int i = 0; i < path.length; i++) {
??????? Pattern p = Pattern.compile("(\\w+)(\\[(\\d+)\\])?");
??????? Matcher m = p.matcher(path[i]);
??????? if (m.find()) {
????????? String tagName = m.group(1);
????????? System.out.println(input + "----" + tagName);
????????? int index = (m.group(3) == null) ? 0 :
????????????? new Integer(m.group(3)).intValue();
????????? ele = getElementsByTag(input, tagName);
????????? input = ele[index];
??????? }
????? }
????? return input;
??? }catch(Exception e){
????? return null;
??? }
? }
? /**
?? * 在給定的元素中搜索指定的元素,返回符合條件的元素?cái)?shù)組.對(duì)于不同級(jí)別的同名元素限制作用,即可以
?? * 搜索元素A中的子元素C.而對(duì)于元素B中子元素C則過慮,通過多級(jí)限定可以準(zhǔn)確定位.
?? * @param parentElementString String
?? * @param tagName String
?? * @return String[]
?? */
? public static String[] getElementsByTag(String parentElementString,String tagName){
??? Pattern p = Pattern.compile("<"+tagName+"[^>]*?((>.*?</"+tagName+">)|(/>))");
??? Matcher m = p.matcher(parentElementString);
??? ArrayList<String> al = new ArrayList<String>();
??? while(m.find())
????? al.add(m.group());
??? String[] arr = al.toArray(new String[al.size()]);
??? al.clear();
??? return arr;
? }
? /**
?? * 從指定的父元素中根據(jù)xpath模式獲取子元素,singlePath以#為分隔符
?? * 如 ROOT#PARENT#CHILD表示提取ROOT元素下的PARENT元素下的CHILD元素
?? * @param parentElementString String
?? * @param singlePath String
?? * @return String
?? */
? public static String getElementBySinglePath(String parentElementString,String singlePath){
??? String[] path = singlePath.split("#");
??? String lastTag = path[path.length-1];
??? String tmp = "(<"+lastTag+"[^>]*?((>.*?</"+lastTag+">)|(/>)))";
??????????????????????????????????????? //最后一個(gè)元素,可能是<x>v</x>形式或<x/>形式
??? for(int i=path.length-2;i >=0;i--){
????? lastTag = path[i];
????? tmp = "<"+lastTag+">.*"+tmp + ".*</"+lastTag+">";
??? }
??? Pattern p = Pattern.compile(tmp);
??? Matcher m = p.matcher(parentElementString);
??? if(m.find()){
????? return m.group(1);
??? }
??? return "";
? }
? /**
?? * 用xpath模式提取元素從指定的多重元素中獲取指批定元素,以#為分隔符
?? * @param parentElementString String
?? * @param singlePath String
?? * @return String
?? */
? public static String getElementByMultiPath(String parentElementString,String singlePath){
??? try{
????? String[] path = singlePath.split("#");
????? String input = parentElementString;
????? String[] ele = null;
????? for (int i = 0; i < path.length; i++) {
??????? Pattern p = Pattern.compile("(\\w+)(\\[(\\d+)\\])?");
??????? Matcher m = p.matcher(path[i]);
??????? if (m.find()) {
????????? String tagName = m.group(1);
????????? int index = (m.group(3) == null) ? 0 :
????????????? new Integer(m.group(3)).intValue();
????????? ele = getElementsByTag(input, tagName);
????????? input = ele[index];
??????? }
????? }
????? return input;
??? }catch(Exception e){
????? return null;
??? }
? }
? /**
?? * 在給定的元素中獲取所有屬性的集合.該元素應(yīng)該從getElementsByTag方法中獲取
?? * @param elementString String
?? * @return HashMap
?? */
? public HashMap<String,String> getAttributes(String elementString){
??? HashMap hm = new HashMap<String,String>();
??? Pattern p = Pattern.compile("<[^>]+>");
??? Matcher m = p.matcher(elementString);
??? String tmp = m.find()?m.group():"";
??? p = Pattern.compile("(\\w+)\\s*=\\s*\"([^\"]+)\"");
??? m = p.matcher(tmp);
??? while(m.find()){
????? hm.put(m.group(1).trim(),m.group(2).trim());
??? }
??? return hm;
? }
? /**
?? * 在給定的元素中獲取指定屬性的值.該元素應(yīng)該從getElementsByTag方法中獲取
?? * @param elementString String
?? * @param attributeName String
?? * @return String
?? */
? public static String getAttribute(String elementString,String attributeName){
??? HashMap hm = new HashMap<String,String>();
??? Pattern p = Pattern.compile("<[^>]+>");
??? Matcher m = p.matcher(elementString);
??? String tmp = m.find()?m.group():"";
??? p = Pattern.compile("(\\w+)\\s*=\\s*\"([^\"]+)\"");
??? m = p.matcher(tmp);
??? while(m.find()){
????? if(m.group(1).trim().equals(attributeName))
??????? return m.group(2).trim();
??? }
??? return "";
? }
? /**
?? * 獲取指定元素的文本內(nèi)容
?? * @param elementString String
?? * @return String
?? */
? public static String getElementText(String elementString){
??? Pattern p = Pattern.compile(">([^<>]*)<");
??? Matcher m = p.matcher(elementString);
??? if(m.find()){
????? return m.group(1);
??? }
??? return "";
? }
? public static void main(String[] args){
??? new Document("<ROOT>sss <PARENT>sss <CHILD>aaaa</CHILD>ss </PARENT>sss </ROOT>").getElementByMultiPath("ROOT[0]#PARENT#CHILD");
??? //System.out.println(child);
? }
}