3 """xml2json.py Convert XML to JSON
5 Relies on ElementTree for the XML parsing. This is based on
6 pesterfish.py but uses a different XML->JSON mapping.
7 The XML->JSON mapping is described at
8 http://www.xml.com/pub/a/2006/05/31/converting-between-xml-and-json.html
10 Rewritten to a command line utility by Hay Kranen < github.com/hay > with
11 contributions from George Hamilton (gmh04) and Dan Brown (jdanbrown)
15 <e>text</e> "e": "text"
16 <e name="value" /> "e": { "@name": "value" }
17 <e name="value">text</e> "e": { "@name": "value", "#text": "text" }
18 <e> <a>text</a ><b>text</b> </e> "e": { "a": "text", "b": "text" }
19 <e> <a>text</a> <a>text</a> </e> "e": { "a": ["text", "text"] }
20 <e> text <a>text</a> </e> "e": { "#text": "text", "a": "text" }
22 This is very similar to the mapping used for Yahoo Web Services
23 (http://developer.yahoo.com/common/json.html#xml).
25 This is a mess in that it is so unpredictable -- it requires lots of testing
26 (e.g. to see if values are lists or strings or dictionaries). For use
27 in Python this could be vastly cleaner. Think about whether the internal
28 form can be more self-consistent while maintaining good external
29 characteristics for the JSON.
31 Look at the Yahoo version closely to see how it works. Maybe can adopt
32 that completely if it makes more sense...
34 R. White, 2006 November 6
42 import xml.etree.cElementTree as ET
47 split_array = tag.split('}')
48 if len(split_array) > 1:
49 strip_ns_tag = split_array[1]
54 def elem_to_internal(elem, strip_ns=1, strip=1):
55 """Convert an Element into an internal dictionary (not JSON!)."""
60 elem_tag = strip_tag(elem.tag)
62 for key, value in list(elem.attrib.items()):
65 # loop over subelements to merge them
67 v = elem_to_internal(subelem, strip_ns=strip_ns, strip=strip)
71 tag = strip_tag(subelem.tag)
76 # add to existing list for this tag
78 except AttributeError:
79 # turn existing entry into a list
80 d[tag] = [d[tag], value]
82 # add a new non-list entry
87 # ignore leading and trailing whitespace
97 # use #text element if other attributes exist
101 # text is the value if no attributes
106 def internal_to_elem(pfsh, factory=ET.Element):
108 """Convert an internal dictionary (not JSON!) into an Element.
110 Whatever Element implementation we could import will be
111 used by default; if you want to use something else, pass the
112 Element class as the factory parameter.
119 tag = list(pfsh.keys())
121 raise ValueError("Illegal structure with multiple tags: %s" % tag)
124 if isinstance(value, dict):
125 for k, v in list(value.items()):
132 elif isinstance(v, list):
134 sublist.append(internal_to_elem({k: v2}, factory=factory))
136 sublist.append(internal_to_elem({k: v}, factory=factory))
139 e = factory(tag, attribs)
147 def elem2json(elem, options, strip_ns=1, strip=1):
149 """Convert an ElementTree or Element into a JSON string."""
151 if hasattr(elem, 'getroot'):
152 elem = elem.getroot()
155 return json.dumps(elem_to_internal(elem, strip_ns=strip_ns, strip=strip), sort_keys=True, indent=4, separators=(',', ': '))
157 return json.dumps(elem_to_internal(elem, strip_ns=strip_ns, strip=strip))
160 def json2elem(json_data, factory=ET.Element):
162 """Convert a JSON string into an Element.
164 Whatever Element implementation we could import will be used by
165 default; if you want to use something else, pass the Element class
166 as the factory parameter.
169 return internal_to_elem(json.loads(json_data), factory)
172 def xml2json(xmlstring, options, strip_ns=1, strip=1):
174 """Convert an XML string into a JSON string."""
176 elem = ET.fromstring(xmlstring)
177 return elem2json(elem, options, strip_ns=strip_ns, strip=strip)
180 def json2xml(json_data, factory=ET.Element):
182 """Convert a JSON string into an XML string.
184 Whatever Element implementation we could import will be used by
185 default; if you want to use something else, pass the Element class
186 as the factory parameter.
188 if not isinstance(json_data, dict):
189 json_data = json.loads(json_data)
191 elem = internal_to_elem(json_data, factory)
192 return ET.tostring(elem)
196 p = optparse.OptionParser(
197 description='Converts XML to JSON or the other way around. Reads from standard input by default, or from file if given.',
199 usage='%prog -t xml2json -o file.json [file]'
201 p.add_option('--type', '-t', help="'xml2json' or 'json2xml'", default="xml2json")
202 p.add_option('--out', '-o', help="Write to OUT instead of stdout")
204 '--strip_text', action="store_true",
205 dest="strip_text", help="Strip text for xml2json")
207 '--pretty', action="store_true",
208 dest="pretty", help="Format JSON output so it is easier to read")
210 '--strip_namespace', action="store_true",
211 dest="strip_ns", help="Strip namespace for xml2json")
213 '--strip_newlines', action="store_true",
214 dest="strip_nl", help="Strip newlines for xml2json")
215 options, arguments = p.parse_args()
217 inputstream = sys.stdin
218 if len(arguments) == 1:
220 inputstream = open(arguments[0])
222 sys.stderr.write("Problem reading '{0}'\n".format(arguments[0]))
226 input = inputstream.read()
230 if options.strip_text:
235 input = input.replace('\n', '').replace('\r','')
236 if (options.type == "xml2json"):
237 out = xml2json(input, options, strip_ns, strip)
239 out = json2xml(input)
242 file = open(options.out, 'w')
248 if __name__ == "__main__":