我有一个带有嵌套标签的XML文件。我们可以使用DOM,JDOM解析器,我想在整个XML文件中的所有标记的字符串中替换从单引号(')到双引号的字符串。标签也可以嵌套在标签内。我想要一些for循环来查找所有标签并替换值,例如HYPER SHIPPING'SDN BHD_First_Page-> HYPER SHIPPING''SDN BHD_First_Page
样例代码
public void iterateChildNodes(org.jdom.Element parentNode) {
if(parentNode.getChildren().size() == 0) {
if(parentNode.getText().contains("'")) {
parentNode.setText(parentNode.getText().replaceAll("'", "\'"));
LOGGER.info("************* Below Value updated");
LOGGER.info(parentNode.getText());
}
}else {
List<Element> rec = parentNode.getChildren();
for(Element i : rec) {
iterateChildNodes(i);
}
}
}
样本XML文件
<Document>
<Identifier>DOC1</Identifier>
<Type>HYPER SHIPPING SDN BHD</Type>
<Description>HYPER SHIPPING SDN BHD</Description>
<Confidence>33.12</Confidence>
<ConfidenceThreshold>10.0</ConfidenceThreshold>
<Valid>true</Valid>
<Reviewed>true</Reviewed>
<ReviewedBy>SYSTEM</ReviewedBy>
<ValidatedBy>SYSTEM</ValidatedBy>
<ErrorMessage/>
<Value>HYPER SHIPPING'SDN BHD_First_Page</Value> //Value to be replaced here
<DocumentDisplayInfo/>
<DocumentLevelFields/>
<Pages>
<Page>
<Identifier>PG0</Identifier>
<OldFileName>HYPER-KL FEB-0001-0001.tif</OldFileName>
<NewFileName>BI2E7_0.tif</NewFileName>
<SourceFileID>1</SourceFileID>
<PageLevelFields>
<PageLevelField>
<Name>Search_Engine_Classification</Name>
<Value>Park Street '10 road</Value> //Value to be replaced here
<Type/>
<Confidence>66.23</Confidence>
<LearnedFileName>HYPER KL-JUN-0001.tif</LearnedFileName>
<OcrConfidenceThreshold>0.0</OcrConfidenceThreshold>
<OcrConfidence>0.0</OcrConfidence>
<FieldOrderNumber>0</FieldOrderNumber>
<ForceReview>false</ForceReview>
</PageLevelField>
</PageLevelFields>
</Page>
</Pages>
</Document>
此代码可以取代所有'
与"
从XML文件。
在此处未添加描述,请尝试逐步进行编码。这很容易理解。
(更新)
Part 1: Using JDOM
import java.util.ArrayList;
import java.util.Arrays;
import java.util.List;
import org.w3c.dom.NodeList;
import org.jdom2.input.SAXBuilder;
import org.jdom2.transform.JDOMSource;
import org.w3c.dom.*;
import java.io.*;
public class XmlParse {
public static void main(String[] args) {
XmlParse xmlParse = new XmlParse();
// xmlParse.xmlToObj();
xmlParse.updateXmlAndSaveJDom();
}
public void updateXmlAndSaveJDom() {
try {
File inputFile = new File("document.xml");
SAXBuilder saxBuilder = new SAXBuilder();
org.jdom2.Document doc = saxBuilder.build(inputFile);
org.jdom2.Element classElement = doc.getRootElement();
iterateChildNodesJDom(classElement);
writeXMLJDom(doc);
} catch (Exception ex) {
ex.printStackTrace();
}
}
public void writeXMLJDom(org.jdom2.Document doc) throws Exception {
TransformerFactory transformerFactory = TransformerFactory.newInstance();
Transformer transformer = transformerFactory.newTransformer();
JDOMSource source = new JDOMSource(doc);
StreamResult result = new StreamResult(new File("updated-document-jdom.xml"));
transformer.transform(source, result);
}
public void iterateChildNodesJDom(org.jdom2.Element e) {
if(e.getChildren().size() == 0) {
System.out.println(e.getName() + ","+ e.getText());
if(e.getText().contains("'")) {
e.setText(e.getText().replaceAll("\'", "\""));
}
}else {
System.out.println(e.getName());
for(org.jdom2.Element i: e.getChildren()) {
iterateChildNodesJDom(i);
}
}
}
}
Part 2: Using DOM
import java.util.ArrayList;
import java.util.List;
import org.w3c.dom.NodeList;
import org.w3c.dom.*;
import javax.xml.parsers.*;
import javax.xml.transform.Transformer;
import javax.xml.transform.TransformerFactory;
import javax.xml.transform.dom.DOMSource;
import javax.xml.transform.stream.StreamResult;
import java.io.*;
public class XmlParse {
public static void main(String[] args) {
XmlParse xmlParse = new XmlParse();
xmlParse.updateXmlAndSave();
}
public void updateXmlAndSave() {
try {
File inputFile = new File("document.xml");
DocumentBuilderFactory dbFactory = DocumentBuilderFactory.newInstance();
DocumentBuilder dBuilder = dbFactory.newDocumentBuilder();
Document doc = dBuilder.parse(inputFile);
doc.getDocumentElement().normalize();
Node parentNode = doc.getFirstChild();
iterateChildNodes(parentNode);
writeXML(doc);
} catch (Exception ex) {
ex.printStackTrace();
}
}
//create new xml file with updated value
public void writeXML(Document doc) throws Exception{
TransformerFactory transformerFactory = TransformerFactory.newInstance();
Transformer transformer = transformerFactory.newTransformer();
DOMSource source = new DOMSource(doc);
StreamResult result = new StreamResult(new File("updated-document.xml"));
transformer.transform(source, result);
}
public void iterateChildNodes(Node parentNode) {
NodeList nodeList = parentNode.getChildNodes();
for (int temp = 0; temp < nodeList.getLength(); temp++) {
Node node = nodeList.item(temp);
if (node.getNodeType() == Node.ELEMENT_NODE) {
Element element = (Element) node;
//System.out.print(element.getNodeName());
if(element.hasChildNodes() && element.getChildNodes().getLength() > 1) {
//System.out.println("Child > "+element.getNodeName());
iterateChildNodes(element);
}else {
//System.out.println(" - "+element.getTextContent());
if(element.getTextContent().contains("'")) {
String str = element.getTextContent().replaceAll("\'", "\"");
//update Node value
element.setTextContent(str);
}
}
}
}
}
}
输入文件document.xml
:
<Document>
<Identifier>DOC1</Identifier>
<Type>HYPER SHIPPING SDN BHD</Type>
<Description>HYPER SHIPPING SDN BHD</Description>
<Confidence>33.12</Confidence>
<ConfidenceThreshold>10.0</ConfidenceThreshold>
<Valid>true</Valid>
<Reviewed>true</Reviewed>
<ReviewedBy>SYSTEM</ReviewedBy>
<ValidatedBy>SYSTEM</ValidatedBy>
<ErrorMessage/>
<Value>HYPER SHIPPING'SDN BHD_First_Page</Value> //Value to be replaced here
<DocumentDisplayInfo/>
<DocumentLevelFields/>
<Pages>
<Page>
<Identifier>PG0</Identifier>
<OldFileName>HYPER-KL FEB-0001-0001.tif</OldFileName>
<NewFileName>BI2E7_0.tif</NewFileName>
<SourceFileID>1</SourceFileID>
<PageLevelFields>
<PageLevelField>
<Name>Search_Engine_Classification</Name>
<Value>Park Street '10 road</Value> //Value to be replaced here
<Type/>
<Confidence>66.23</Confidence>
<LearnedFileName>HYPER KL-JUN-0001.tif</LearnedFileName>
<OcrConfidenceThreshold>0.0</OcrConfidenceThreshold>
<OcrConfidence>0.0</OcrConfidence>
<FieldOrderNumber>0</FieldOrderNumber>
<ForceReview>false</ForceReview>
</PageLevelField>
</PageLevelFields>
</Page>
</Pages>
</Document>
输出updated-document.xml/updated-document-jdom.xml
:
<?xml version="1.0" encoding="UTF-8" standalone="no"?>
<Document>
<Identifier>DOC1</Identifier>
<Type>HYPER SHIPPING SDN BHD</Type>
<Description>HYPER SHIPPING SDN BHD</Description>
<Confidence>33.12</Confidence>
<ConfidenceThreshold>10.0</ConfidenceThreshold>
<Valid>true</Valid>
<Reviewed>true</Reviewed>
<ReviewedBy>SYSTEM</ReviewedBy>
<ValidatedBy>SYSTEM</ValidatedBy>
<ErrorMessage/>
<Value>HYPER SHIPPING"SDN BHD_First_Page</Value><DocumentDisplayInfo/>
<DocumentLevelFields/>
<Pages>
<Page>
<Identifier>PG0</Identifier>
<OldFileName>HYPER-KL FEB-0001-0001.tif</OldFileName>
<NewFileName>BI2E7_0.tif</NewFileName>
<SourceFileID>1</SourceFileID>
<PageLevelFields>
<PageLevelField>
<Name>Search_Engine_Classification</Name>
<Value>Park Street "10 road</Value><Type/>
<Confidence>66.23</Confidence>
<LearnedFileName>HYPER KL-JUN-0001.tif</LearnedFileName>
<OcrConfidenceThreshold>0.0</OcrConfidenceThreshold>
<OcrConfidence>0.0</OcrConfidence>
<FieldOrderNumber>0</FieldOrderNumber>
<ForceReview>false</ForceReview>
</PageLevelField>
</PageLevelFields>
</Page>
</Pages>
</Document>
本文收集自互联网,转载请注明来源。
如有侵权,请联系 [email protected] 删除。
我来说两句