1 changed file
xml_to_json.py + | ||
Add comment 1 Plus #!/usr/bin/env python
Add comment 2 Plus # vim:ts=4:sts=4:sw=4:et
Add comment 3 Plus #
Add comment 4 Plus # Author: Hari Sekhon
Add comment 5 Plus # Date: 2016-01-15 00:07:09 +0000 (Fri, 15 Jan 2016)
Add comment 6 Plus #
Add comment 7 Plus # https://github.com/harisekhon/pytools
Add comment 8 Plus #
Add comment 9 Plus # License: see accompanying Hari Sekhon LICENSE file
Add comment 10 Plus #
Add comment 11 Plus # If you're using my code you're welcome to connect with me on LinkedIn and optionally send me feedback
Add comment 12 Plus # to help improve or steer this or other code I publish
Add comment 13 Plus #
Add comment 14 Plus # http://www.linkedin.com/in/harisekhon
Add comment 15 Plus #
Add comment 16 Plus
Add comment 17 Plus """
Add comment 18 Plus
Add comment 19 Plus Tool to convert XML to JSON
Add comment 20 Plus
Add comment 21 Plus Reads any given files as XML and prints the equivalent JSON to stdout for piping or redirecting to a file.
Add comment 22 Plus
Add comment 23 Plus Directories if given are detected and recursed, processing all files in the directory tree ending in a .xml suffix.
Add comment 24 Plus
Add comment 25 Plus Works like a standard unix filter program - if no files are passed as arguments or '-' is passed then reads from
Add comment 26 Plus standard input.
Add comment 27 Plus
Add comment 28 Plus """
Add comment 29 Plus
Add comment 30 Plus from __future__ import absolute_import
Add comment 31 Plus from __future__ import division
Add comment 32 Plus from __future__ import print_function
Add comment 33 Plus #from __future__ import unicode_literals
Add comment 34 Plus
Add comment 35 Plus import json
Add comment 36 Plus import os
Add comment 37 Plus import re
Add comment 38 Plus import sys
Add comment 39 Plus import xml
Add comment 40 Plus import xmltodict
Add comment 41 Plus libdir = os.path.abspath(os.path.join(os.path.dirname(__file__), 'pylib'))
Add comment 42 Plus sys.path.append(libdir)
Add comment 43 Plus try:
Add comment 44 Plus # pylint: disable=wrong-import-position
Add comment 45 Plus from harisekhon.utils import die, ERRORS, log, log_option
Add comment 46 Plus from harisekhon import CLI
Add comment 47 Plus except ImportError as _:
Add comment 48 Plus print('module import failed: %s' % _, file=sys.stderr)
Add comment 49 Plus print("Did you remember to build the project by running 'make'?", file=sys.stderr)
Add comment 50 Plus print("Alternatively perhaps you tried to copy this program out without it's adjacent libraries?", file=sys.stderr)
Add comment 51 Plus sys.exit(4)
Add comment 52 Plus
Add comment 53 Plus __author__ = 'Hari Sekhon'
Add comment 54 Plus __version__ = '0.1'
Add comment 55 Plus
Add comment 56 Plus
Add comment 57 Plus class XmlToJson(CLI):
Add comment 58 Plus
Add comment 59 Plus def __init__(self):
Add comment 60 Plus # Python 2.x
Add comment 61 Plus super(XmlToJson, self).__init__()
Add comment 62 Plus # Python 3.x
Add comment 63 Plus # super().__init__()
Add comment 64 Plus self.indent = None
Add comment 65 Plus self.re_xml_suffix = re.compile(r'.*\.xml$', re.I)
Add comment 66 Plus
Add comment 67 Plus def add_options(self):
Add comment 68 Plus self.add_opt('-p', '--pretty', action='store_true', help='Pretty Print the resulting JSON')
Add comment 69 Plus
Add comment 70 Plus def xml_to_json(self, content, filepath=None):
Add comment 71 Plus try:
Add comment 72 Plus _ = xmltodict.parse(content)
Add comment 73 Plus except xml.parsers.expat.ExpatError as _:
Add comment 74 Plus file_detail = ''
Add comment 75 Plus if filepath is not None:
Add comment 76 Plus file_detail = ' in file \'{0}\''.format(filepath)
Add comment 77 Plus die("Failed to parse XML{0}: {1}".format(file_detail, _))
Add comment 78 Plus json_string = json.dumps(_, sort_keys=True, indent=self.indent) #, separators=(',', ': '))
Add comment 79 Plus return json_string
Add comment 80 Plus
Add comment 81 Plus def run(self):
Add comment 82 Plus if self.get_opt('pretty'):
Add comment 83 Plus log_option('pretty', True)
Add comment 84 Plus self.indent = 4
Add comment 85 Plus if not self.args:
Add comment 86 Plus self.args.append('-')
Add comment 87 Plus for arg in self.args:
Add comment 88 Plus if arg == '-':
Add comment 89 Plus continue
Add comment 90 Plus if not os.path.exists(arg):
Add comment 91 Plus print("'%s' not found" % arg)
Add comment 92 Plus sys.exit(ERRORS['WARNING'])
Add comment 93 Plus if os.path.isfile(arg):
Add comment 94 Plus log_option('file', arg)
Add comment 95 Plus elif os.path.isdir(arg):
Add comment 96 Plus log_option('directory', arg)
Add comment 97 Plus else:
Add comment 98 Plus die("path '%s' could not be determined as either a file or directory" % arg)
Add comment 99 Plus for arg in self.args:
Add comment 100 Plus self.process_path(arg)
Add comment 101 Plus
Add comment 102 Plus def process_path(self, path):
Add comment 103 Plus if path == '-' or os.path.isfile(path):
Add comment 104 Plus self.process_file(path)
Add comment 105 Plus elif os.path.isdir(path):
Add comment 106 Plus for root, _, files in os.walk(path):
Add comment 107 Plus for filename in files:
Add comment 108 Plus filepath = os.path.join(root, filename)
Add comment 109 Plus if self.re_xml_suffix.match(filepath):
Add comment 110 Plus self.process_file(filepath)
Add comment 111 Plus else:
Add comment 112 Plus die("failed to determine if path '%s' is a file or directory" % path)
Add comment 113 Plus
Add comment 114 Plus def process_file(self, filepath):
Add comment 115 Plus log.debug('processing filepath \'%s\'', filepath)
Add comment 116 Plus if filepath == '-':
Add comment 117 Plus filepath = '<STDIN>'
Add comment 118 Plus if filepath == '<STDIN>':
Add comment 119 Plus self.xml_to_json(sys.stdin.read())
Add comment 120 Plus else:
Add comment 121 Plus with open(filepath) as _:
Add comment 122 Plus content = _.read()
Add comment 123 Plus print(self.xml_to_json(content, filepath=filepath))
Add comment 124 Plus
Add comment 125 Plus
Add comment 126 Plus if __name__ == '__main__':
Add comment 127 Plus XmlToJson().main()
Add comment 128 Plus