偏小数据的就不做多描述,网上有很多资料,在此有大概50G的xml格式的地理数据转换为json格式的数据,之后上传到mongodb数据库中,有什么好的建议,欢迎指正
解析xml数据
import java.util.ArrayList;import java.util.List;import org.xml.sax.Attributes;import org.xml.sax.ContentHandler;import org.xml.sax.Locator;import org.xml.sax.SAXException;import com.mongodb.DBObject;/* * @author * @time 2015-11-8 * 主要是是implements ContentHandler,主要实现接口ContentHandler中的startDocument()、endDocument()、startElement()、endElement() * 另外自定义方法writeToMongoDB()、storeDBMongo() * */public class MyContentHandler implements ContentHandler { private StringBuffer buf; private String ctitle; private String cns; private String cid; private String ctext; private String ctimestamp; private int idnumber=0; List listdata=new ArrayList(); List list=new ArrayList(); @Override public void setDocumentLocator(Locator locator) { // TODO Auto-generated method stub } @Override public void startDocument() throws SAXException { // TODO Auto-generated method stub buf=new StringBuffer(); System.out.println("*******解析开始*******"); } @Override public void endDocument() throws SAXException { // TODO Auto-generated method stub try { writeToMongoDB(); } catch (Exception e) { // TODO Auto-generated catch block e.printStackTrace(); } System.out.println("*******解析结束*******"); } //把数据导入MongoDB数据库中 private void writeToMongoDB() throws Exception { // TODO Auto-generated method stub Listdblist=new ArrayList (); for(Data d:listdata){ dblist.add(BSONT.mapToBSON(d.toJSONMap())); } MongoDBT.writeListToMongo("IP", 27017,"databaseName", "collectionName", dblist); } @Override public void startPrefixMapping(String prefix, String uri) throws SAXException { // TODO Auto-generated method stub } @Override public void endPrefixMapping(String prefix) throws SAXException { // TODO Auto-generated method stub } @Override public void startElement(String uri, String localName, String qName, Attributes attributes) throws SAXException { // TODO Auto-generated method stub if(qName=="page"){ idnumber=1; } if(qName=="title"){ ctitle=qName; }else if(qName=="ns"){ cns=qName; }else if(qName=="id"&&idnumber==1){ cid=qName; idnumber=0; }else if(qName=="timestamp"){ ctimestamp=qName; }else if(qName=="text"){ ctext=qName; } } @Override public void endElement(String uri, String localName, String qName) throws SAXException { // TODO Auto-generated method stub if(ctitle==qName){ String sss=buf.toString(); ctitle=""; list.add(sss); buf.setLength(0); }else if(cns==qName){ cns=""; String sss=buf.toString(); list.add(sss); buf.setLength(0); }else if(cid==qName){ cid=""; String sss=buf.toString(); list.add(sss); buf.setLength(0); }else if(ctimestamp==qName){ ctimestamp=""; String sss=buf.toString(); list.add(sss); buf.setLength(0); }else if(ctext==qName){ ctext=""; String sss=buf.toString(); list.add(sss); buf.setLength(0); //有些sss中虽然有重定向标记,但没有“[[”和“]]”,那么就会出现String的index不在范围内的问题 if((sss.toUpperCase().contains("#REDIRECT")||sss.contains("#重定向"))&&sss.contains("[[")&&sss.contains("]]")){ int i=sss.indexOf("[["); int j=sss.indexOf("]]"); String s=sss.substring(i+2,j); list.add(s); list.add("redirect"); }else{ list.add(""); list.add("article"); } } if(qName=="page"){ storeDBMongo(list); } } private void storeDBMongo(List lt) { // TODO Auto-generated method stub for(int i=0;i =300){ writeToMongoDB(); listdata.clear(); } list.clear(); } catch (Exception e) { // TODO Auto-generated catch block e.printStackTrace(); } } @Override public void characters(char[] ch, int start, int length) throws SAXException { // TODO Auto-generated method stub if(ctitle=="title"){ buf.append(new String(ch,start,length)); }else if(cns=="ns"){ buf.append(new String(ch,start,length)); }else if(cid=="id"){ buf.append(new String(ch,start,length)); list.add(new String(ch,start,length)); }else if(ctimestamp=="timestamp"){ buf.append(new String(ch,start,length)); }else if(ctext=="text"){ buf.append(new String(ch,start,length)); } } @Override public void ignorableWhitespace(char[] ch, int start, int length) throws SAXException { // TODO Auto-generated method stub } @Override public void processingInstruction(String target, String data) throws SAXException { // TODO Auto-generated method stub } @Override public void skippedEntity(String name) throws SAXException { // TODO Auto-generated method stub } }
自定义类Data、JSONT
import java.util.HashMap;还有就是类MyErrorHandler
import java.util.Map;
public class Data {
private String id;
private String namespace;
private String type;
private String title;
private String markup;
private String lastEsited;
private String target;
public String getId() {
return id;
}
public void setId(String id) {
this.id = id;
}
public String getNamespace() {
return namespace;
}
public void setNamespace(String namespace) {
this.namespace = namespace;
}
public String getType() {
return type;
}
public void setType(String type) {
this.type = type;
}
public String getTitle() {
return title;
}
public void setTitle(String title) {
this.title = title;
}
public String getMarkup() {
return markup;
}
public void setMarkup(String markup) {
this.markup = markup;
}
public String getLastEsited() {
return lastEsited;
}
public void setLastEsited(String lastEsited) {
this.lastEsited = lastEsited;
}
public String getTarget() {
return target;
}
public void setTarget(String target) {
this.target = target;
}
public MaptoJSONMap(){
MapjsOnmap=new HashMap ();
jsonmap.put("id", this.id);
jsonmap.put("namespace", this.namespace);
jsonmap.put("type", this.type);
jsonmap.put("title", this.title);
jsonmap.put("markup", this.markup);
jsonmap.put("lastEsited", this.lastEsited);
jsonmap.put("target", this.target);
return jsonmap;
}
}
/*
* NextMap-Crawler Module
*
* Copyright (C) 2002-2014,Institute of Geographic Sciences and Natural Resources Research,
* Chinese Academy of Sciences
*
* This library is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation;
* version 2.1 of the License.
*
* This library is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*/
import java.io.IOException;
import java.io.StringWriter;
import java.util.List;
import java.util.Map;
import com.fasterxml.jackson.databind.ObjectMapper;
/**
*
* @author zhuhaichuan
* @date 2015-11-8
*
*
*/
public class JSONT {
public static String mapToJSONString(Map map) {
StringWriter sw = new StringWriter();
try {
ObjectMapper mapper = new ObjectMapper();
mapper.writeValue(sw, map);
} catch (IOException e) {
// TODO Auto-generated catch block
e.printStackTrace();
}
return sw.toString();
}
/**
*
* @param list
* @return
*/
public static String listToJSONString(List list) {
StringWriter sw = new StringWriter();
try {
ObjectMapper mapper = new ObjectMapper();
mapper.writeValue(sw, list);
} catch (IOException e) {
// TODO Auto-generated catch block
e.printStackTrace();
}
return sw.toString();
}
/**
*
* @param list
* @return
*/
public static String beanToJSONString(Object bean) {
StringWriter sw = new StringWriter();
try {
ObjectMapper mapper = new ObjectMapper();
mapper.writeValue(sw, bean);
} catch (IOException e) {
// TODO Auto-generated catch block
e.printStackTrace();
}
return sw.toString();
}
/**
*
* @param jsonstr
* @return
*/
public static Map jsonToMap(String jsonstr) {
Map map = null;
try {
ObjectMapper mapper = new ObjectMapper();
map = mapper.readValue(jsonstr, Map.class);
} catch (IOException e) {
// TODO Auto-generated catch block
e.printStackTrace();
}
return map;
}
/**
*
* @param jsonstr
* @return
*/
public static List jsonToList(String jsonstr) {
List list = null;
try {
ObjectMapper mapper = new ObjectMapper();
list = mapper.readValue(jsonstr, List.class);
} catch (IOException e) {
// TODO Auto-generated catch block
e.printStackTrace();
}
return list;
}
}
import org.xml.sax.ErrorHandler;
import org.xml.sax.SAXException;
import org.xml.sax.SAXParseException;
public class MyErrorHandler implements ErrorHandler {
@Override
public void warning(SAXParseException exception) throws SAXException {
// TODO Auto-generated method stub
System.out.println("*******WARNING******");
System.out.println("行号:" + exception.getLineNumber());
System.out.println("列号:" + exception.getColumnNumber());
System.out.println("exception信息:" + exception.getMessage());
System.out.println("********************");
}
@Override
public void error(SAXParseException exception) throws SAXException {
// TODO Auto-generated method stub
System.out.println("******* ERROR ******");
System.out.println("行号:" + exception.getLineNumber());
System.out.println("列号:" + exception.getColumnNumber());
System.out.println("exception信息:" + exception.getMessage());
System.out.println("********************");
}
@Override
public void fatalError(SAXParseException exception) throws SAXException {
// TODO Auto-generated method stub
System.out.println("******** FATAL ERROR ********");
System.out.println("行号:" + exception.getLineNumber());
System.out.println("列号:" + exception.getColumnNumber());
System.out.println("exception信息" + exception.getMessage());
System.out.println("*****************************");
}
}
import java.util.ArrayList;
import java.util.List;
import com.mongodb.DB;
import com.mongodb.DBCollection;
import com.mongodb.DBObject;
import com.mongodb.Mongo;
public class MongoDBT {
public static void writeListToMongo(String ip,int port,String dbname,String collname,Listlist) throws Exception{
Mongo mOngo=new Mongo(ip,port);
DB db=mongo.getDB(dbname);
DBCollection collection=db.getCollection(collname);
Listdblist=new ArrayList ();
for(int i=0;idblist.add(list.get(i));
}
collection.insert(dblist);
mongo.close();
}
}