參考自:http://blog.csdn.net/yyywyr/article/details/38359049
http://blog.csdn.net/warrenwyf/article/details/5703279
http://zhidao.baidu.com/question/568729363.html
1 KML文件
现有一个在ARCGIS中生成的点要素shapefile文件,将其转换成kmz文件。可是这个kmz文件并非纯文本的KML服务。而是一个压缩文件,用压缩软件打开这个kmz之后会发现当中包括了一个“doc.kml”和一个“*.png”图标文件。因此。我们须要解析的就是这个doc.kml文件。

其格式例如以下:

<?

xml version="1.0" encoding="UTF-8"?

> <kml xmlns="http://www.opengis.net/kml/2.2" xmlns:gx="http://www.google.com/kml/ext/2.2" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://www.opengis.net/kml/2.2 http://schemas.opengis.net/kml/2.2.0/ogckml22.xsd http://www.google.com/kml/ext/2.2 http://code.google.com/apis/kml/schema/kml22gx.xsd"> <Document id="4thalter"> <name>4thalter</name> <Snippet></Snippet> <Folder id="FeatureLayer0"> <name>4thalter</name> <Snippet></Snippet> <Placemark id="ID_00000"> <name>456</name> <Snippet></Snippet> <description> 这里是一个HTML文档 </description> <styleUrl>#IconStyle00</styleUrl> <Point> <altitudeMode>clampToGround</altitudeMode> <coordinates> 119.46,30.96,0</coordinates> </Point> </Placemark> <Placemark id="ID_00002"> <name>457</name> <Snippet></Snippet> <description> 这里是一个HTML文档 </description> <styleUrl>#IconStyle00</styleUrl> <Point> <altitudeMode>clampToGround</altitudeMode> <coordinates> 120.46,30.96,0</coordinates> </Point> </Placemark> <Placemark id="ID_00022"> <name>xc64</name> <Snippet></Snippet> <description> 这里是一个HTML文档 </description> <styleUrl>#IconStyle00</styleUrl> <Point> <altitudeMode>clampToGround</altitudeMode> <coordinates> 118.81,30.93,0</coordinates> </Point> </Placemark> </Folder> <Style id="IconStyle00"> <IconStyle> <Icon><href>Layer0_Symbol_11269a08.png</href></Icon> <scale>0.437500</scale> </IconStyle> <LabelStyle> <color>ff000000</color> <scale>0.833333</scale> </LabelStyle> <PolyStyle> <color>ff000000</color> <outline>0</outline> </PolyStyle> </Style> </Document> </kml>

我们须要解析的属性标签存放在description结点下的HTML中,例如以下:

<![CDATA[<html xmlns:fo="http://www.w3.org/1999/XSL/Format" xmlns:msxsl="urn:schemas-microsoft-com:xslt">

<head>

<META http-equiv="Content-Type" content="text/html">

<meta http-equiv="content-type" content="text/html; charset=UTF-8">

</head>

<body style="margin:0px 0px 0px 0px;overflow:auto;background:#FFFFFF;">

<table style="font-family:Arial,Verdana,Times;font-size:12px;text-align:left;100%;border-collapse:collapse;padding:3px 3px 3px 3px">

<tr style="text-align:center;font-weight:bold;background:#9CBCE2">

<td>第四组</td>

</tr>

<tr>

<td>

<table style="font-family:Arial,Verdana,Times;font-size:12px;text-align:left;100%;border-spacing:0px; padding:3px 3px 3px 3px">

<tr>

<td>FID</td>

<td>22</td>

</tr>

<tr bgcolor="#D4E4F3">

<td>soiltype</td>

<td>6</td>

</tr>

<tr>

<td>x</td>

<td>673556</td>

</tr>

<tr bgcolor="#D4E4F3">

<td>y</td>

<td>3424365</td>

</tr>

<tr>

<td>dem</td>

<td>14</td>

</tr>

<tr bgcolor="#D4E4F3">

<td>planc</td>

<td>0</td>

</tr>

<tr>

<td>profc</td>

<td>0</td>

</tr>

<tr bgcolor="#D4E4F3">

<td>slope</td>

<td>0</td>

</tr>

<tr>

<td>PYNAME</td>

<td>第四组</td>

</tr>

<tr bgcolor="#D4E4F3">

<td>ID</td>

<td>664</td>

</tr>

<tr>

<td>name</td>

<td>xc64</td>

</tr>

</table>

</td>

</tr>

</table>

</body>

</html>

]]>

在本案例中。我须要从HTML中提取出坐标x,y和name这三个属性。代码例如以下:

package com.test.parsekml;

import java.io.File;
import java.io.FileInputStream;
import java.io.IOException;
import java.io.InputStream;
import java.util.Iterator;
import java.util.zip.ZipEntry;
import java.util.zip.ZipException;
import java.util.zip.ZipFile;
import java.util.zip.ZipInputStream;

import org.dom4j.Document;
import org.dom4j.DocumentException;
import org.dom4j.Element;
import org.dom4j.io.SAXReader;
import org.jsoup.Jsoup;
import org.jsoup.select.Elements;
import android.util.Log;

public class ReadKml {
    public void parseKml(String pathName) throws Exception
    {       
        File file = new File(pathName);//pathName为KML文件的路径
        try {
            ZipFile zipFile = new ZipFile(file);
            ZipInputStream zipInputStream = null;
            InputStream inputStream = null;
            ZipEntry entry = null;
            zipInputStream = new ZipInputStream(new FileInputStream(file));
            while ((entry = zipInputStream.getNextEntry()) != null) {
                String zipEntryName = entry.getName();
                Log.d("压缩实体的名称:", zipEntryName);
                if (zipEntryName.endsWith("kml") || zipEntryName.endsWith("kmz")) {                 
                    inputStream = zipFile.getInputStream(entry);
                    parseXmlWithDom4j(inputStream);
                }else if (zipEntryName.endsWith("png")) {
                    /*ByteArrayOutputStream byteArrayOut = new ByteArrayOutputStream();
                    byte[] b = new byte[512];
                    int readedByteSize = 0;
                    while ((readedByteSize = zipInputStream.read(b)) != -1) {
                        byteArrayOut.write(b, 0, readedByteSize);
                    }
                    byteArrayOut.flush();
                    byteArrayOut.close();
                    InputStream isBitmap = new ByteArrayInputStream(byteArrayOut.toByteArray());
                    Bitmap bitmap = BitmapFactory.decodeStream(isBitmap);
                    isBitmap.close();*/
                }
            }

            zipInputStream.close();
            inputStream.close();
        } catch (ZipException e) {
            // TODO Auto-generated catch block
            e.printStackTrace();
        } catch (IOException e) {
            // TODO Auto-generated catch block
            e.printStackTrace();
        }
    }
    public void parseXmlWithDom4j(InputStream input) throws Exception
    {
        SAXReader reader = new SAXReader();
        Document document = null;
        try {
            document = reader.read(input);
        } catch (DocumentException e) {
            // TODO: handle exception
            e.printStackTrace();
        }
        Element root = document.getRootElement();//获取doc.kml文件的根结点
        listNodes(root);    
    }
    //遍历当前节点下的全部节点  
    public void listNodes(Element node){  
        Log.d("当前结点的名称:", node.getName());
        //首先获取当前节点的全部属性节点  
       /* List<Attribute> list = node.attributes();  
        //遍历属性节点  
        for(Attribute attribute : list){  
            Log.d("属性", attribute.getName() +":" + attribute.getValue());  
        }  */
        //假设当前节点内容不为空,则输出
        if(!(node.getTextTrim().equals("")) && "description".equals(node.getName())){  
             //Log.d("当前结点内容:", node.getText());
             parseHtml(node.getText());
        }  
        //同一时候迭代当前节点以下的全部子节点  
        //使用递归  
        Iterator<Element> iterator = node.elementIterator();  
        while(iterator.hasNext()){  
            Element e = iterator.next();  
            listNodes(e);  
        }  
    } 
    public void parseHtml(String htmlData)
    {
        org.jsoup.nodes.Document document = Jsoup.parse(htmlData);
        Elements trs = document.select("table").select("tr");
        String trContent = "";
        String trContentSplit[] = null;
        String x = "";
        String y = "";
        String name = "";
        for (int i = 2; i < trs.size(); i++) {//在KML文件里的HTML文本中,共同拥有13个tr,每一个tr包括了一个属性,当中第二个tr包括了全部的属性,因此我们在处理时从第三个tr開始
            trContent = trs.get(i).text();          
            trContentSplit = trContent.split(" ");
            if ("name".equals(trContentSplit[0])) {
                name = trContentSplit[1];

            }
            if ("x".equals(trContentSplit[0]) || "X".equals(trContentSplit[0])) {
                x = trContentSplit[1].trim();           
            }
            if ("y".equals(trContentSplit[0]) || "Y".equals(trContentSplit[0])) {
                y = trContentSplit[1].trim();               
            }           

            /*Elements tds = elements.get(i).select("td");
            for (int j = 0; j < tds.size(); j++) {
                htmlContent = tds.get(j).text();
            }*/         
        }
        Log.d("X:", x);
        Log.d("Y:", y);
        Log.d("Name:", name);

    }
}

本文中使用了dom4j包来解析XML和jsoup包来解析HTML