`

使用sax解析xml文件的一个小例子

阅读更多

import java.io.FileInputStream;

import java.io.IOException;

import java.io.InputStream;

import java.io.InputStreamReader;

import java.io.Reader;

import java.lang.reflect.InvocationTargetException;

import java.util.ArrayList;

import java.util.List;

import java.util.Stack;

import java.util.regex.Matcher;

import java.util.regex.Pattern;

 

import javax.xml.parsers.SAXParser;

import javax.xml.parsers.SAXParserFactory;

 

import org.apache.commons.beanutils.BeanUtils;

import org.xml.sax.Attributes;

import org.xml.sax.InputSource;

import org.xml.sax.SAXException;

import org.xml.sax.helpers.DefaultHandler;

 

public class ParserTest extends DefaultHandler {

 

        StringBuffer tdBuffer= new StringBuffer();

        boolean tr_in_stack = false;

        static String tag_tr = "tr";

        static String tag_td = "td";

       

        TrData trData;

 

        Stack<TrData> trDatas = new Stack<TrData>();

 

        Stack<Object> objects = new Stack<Object>();

 

        Stack<String> tagNames = new Stack<String>();

 

        public void startDocument() throws SAXException {

                System.out.println("-----    document begin     -----");

        }

 

        public void endDocument() throws SAXException {

                System.out.println("-----    document end       -----");

        }

 

        public void characters(char ch[], int start, int length)

                        throws SAXException {

                // this.objects.isEmpty() ||

                if (this.tagNames.isEmpty())

                        return;

 

                if (this.tagNames.peek().equals(tag_td)) {

                        String value = new String(ch, start, length);

 

                        if (value == null)

                                return;

 

                        value = value.trim();

 

                        if ("".equals(value))

                                return;

 

                        tdBuffer.append(value);

                }

        }

 

        /* add the name of the attribute belonged any object */

        public void startElement(String uri, String localName, String qName,

                        Attributes atts) throws SAXException {

                qName = qName.toLowerCase();

 

                if (!qName.equals(tag_tr) && !qName.equals(tag_td))

                        return;

 

                if (qName.equals(tag_tr)){

                        if(tr_in_stack)

                                this.tagNames.clear();

                        else

                                this.trData = new TrData();

                       

                        tr_in_stack = true;

                }

 

                this.tagNames.add(qName);

        }

 

        /* remove the name of the attribute belonged any object */

        public void endElement(String uri, String localName, String qName)

                        throws SAXException {

 

                if (tagNames.isEmpty()&&!qName.equals("html"))

                        return;

 

                qName = qName.toLowerCase();

 

                if (qName.equals(tag_td)){

                        this.tagNames.pop();

                       

                        String value = this.tdBuffer.toString();

                       

                        if(!"".equals(value)||value!=null)

                                this.trData.addTd(value);

                       

                        this.tdBuffer = new StringBuffer();

                }

 

                if (qName.equals(tag_tr)) {

                        this.tagNames.clear();

                        tr_in_stack = false;

                       

                        this.trDatas.add(this.trData);

                }

               

                if(qName.equals("html")){

                        int ii=0;

                        List<String> strs = new ArrayList<String>();

                        String s=null;

                        while(ii<=trDatas.lastIndexOf(trDatas.lastElement())){

                                if(!trDatas.elementAt(ii).getTds().get(0).isEmpty()){

                                        s=trDatas.elementAt(ii).getTds().toString();

                                        s=s.substring(1, s.length()-1);

                                        Pattern p = Pattern.compile(" ");

                                        Matcher m = p.matcher(s);

                                        s=m.replaceAll("");

                                        strs.add(s);

                                }

                                ii++;                  

                        }

                        for(String str:strs){

                                System.out.println(strs.indexOf(str)+"  "+str);

                                System.out.println();

                        }

                     

                }

 

        }

       

              

        

        protected void setAttributes(Object target, Attributes atts)

                        throws IllegalAccessException, InvocationTargetException {

                /* set the attribute */

                for (int i = 0; i < atts.getLength(); i++) {

                        BeanUtils.setProperty(target, atts.getQName(i), atts.getValue(i));

                }

        }

 

        private boolean isValid(String value) {

                if (value.startsWith("<img") || value.startsWith("<table"))

                        return false;

 

                return true;

        }

 

        public static class TrData {

                private List<String> tds = new ArrayList<String>();

 

                public void addTd(String tdMsg) {

                        tds.add(tdMsg);

                }

 

                public List<String> getTds() {

                        return tds;

                }

        }

 

        public static void main(String args[]) {

 

                SAXParserFactory saxFactory = SAXParserFactory.newInstance();

                InputStream inputStream = null;

                Reader reader = null;

                try {

                        /* inputStream of xml */

                        inputStream = new FileInputStream(

                                        "D:/1.xml");

                        /* specify the CharSet when reading the Stream */

                        reader = new InputStreamReader(inputStream, "UTF-8");

 

                        /*

                          * construct the InputSource with reader, which is the source of

                          * saxParser

                          */

                        InputSource source = new InputSource(reader);

 

                        /* create parser with SaxFactory */

                        SAXParser parser = saxFactory.newSAXParser();

                        /* the most importence component used to parse the xml */

                        ParserTest handler = new ParserTest();

 

                        /* parse the xml source with studentHandler */

                        parser.parse(source, handler);

                       

 

                } catch (Exception e) {

                        e.printStackTrace();

                         

                } finally {

                        /* release the resource */

                        try {

                                if (reader != null)

                                        reader.close();

                                if (inputStream != null)

                                        inputStream.close();

                        } catch (IOException ioe) {

                                ioe.printStackTrace();

                        }

                }

        }

}

分享到:
评论

相关推荐

Global site tag (gtag.js) - Google Analytics