added xml_to_json.py
0c71d424
Hari Sekhon
committed
1 changed file
xml_to_json.py
/xml_to_json.py+128
/xml_to_json.py
Add comment 1 Plus  #!/usr/bin/env python
Add comment 2 Plus  # vim:ts=4:sts=4:sw=4:et
Add comment 3 Plus  #
Add comment 4 Plus  # Author: Hari Sekhon
Add comment 5 Plus  # Date: 2016-01-15 00:07:09 +0000 (Fri, 15 Jan 2016)
Add comment 6 Plus  #
Add comment 7 Plus  # https://github.com/harisekhon/pytools
Add comment 8 Plus  #
Add comment 9 Plus  # License: see accompanying Hari Sekhon LICENSE file
Add comment 10 Plus  #
Add comment 11 Plus  # If you're using my code you're welcome to connect with me on LinkedIn and optionally send me feedback
Add comment 12 Plus  # to help improve or steer this or other code I publish
Add comment 13 Plus  #
Add comment 14 Plus  # http://www.linkedin.com/in/harisekhon
Add comment 15 Plus  #
Add comment 16 Plus  
Add comment 17 Plus  """
Add comment 18 Plus  
Add comment 19 Plus  Tool to convert XML to JSON
Add comment 20 Plus  
Add comment 21 Plus  Reads any given files as XML and prints the equivalent JSON to stdout for piping or redirecting to a file.
Add comment 22 Plus  
Add comment 23 Plus  Directories if given are detected and recursed, processing all files in the directory tree ending in a .xml suffix.
Add comment 24 Plus  
Add comment 25 Plus  Works like a standard unix filter program - if no files are passed as arguments or '-' is passed then reads from
Add comment 26 Plus  standard input.
Add comment 27 Plus  
Add comment 28 Plus  """
Add comment 29 Plus  
Add comment 30 Plus  from __future__ import absolute_import
Add comment 31 Plus  from __future__ import division
Add comment 32 Plus  from __future__ import print_function
Add comment 33 Plus  #from __future__ import unicode_literals
Add comment 34 Plus  
Add comment 35 Plus  import json
Add comment 36 Plus  import os
Add comment 37 Plus  import re
Add comment 38 Plus  import sys
Add comment 39 Plus  import xml
Add comment 40 Plus  import xmltodict
Add comment 41 Plus  libdir = os.path.abspath(os.path.join(os.path.dirname(__file__), 'pylib'))
Add comment 42 Plus  sys.path.append(libdir)
Add comment 43 Plus  try:
Add comment 44 Plus   # pylint: disable=wrong-import-position
Add comment 45 Plus   from harisekhon.utils import die, ERRORS, log, log_option
Add comment 46 Plus   from harisekhon import CLI
Add comment 47 Plus  except ImportError as _:
Add comment 48 Plus   print('module import failed: %s' % _, file=sys.stderr)
Add comment 49 Plus   print("Did you remember to build the project by running 'make'?", file=sys.stderr)
Add comment 50 Plus   print("Alternatively perhaps you tried to copy this program out without it's adjacent libraries?", file=sys.stderr)
Add comment 51 Plus   sys.exit(4)
Add comment 52 Plus  
Add comment 53 Plus  __author__ = 'Hari Sekhon'
Add comment 54 Plus  __version__ = '0.1'
Add comment 55 Plus  
Add comment 56 Plus  
Add comment 57 Plus  class XmlToJson(CLI):
Add comment 58 Plus  
Add comment 59 Plus   def __init__(self):
Add comment 60 Plus   # Python 2.x
Add comment 61 Plus   super(XmlToJson, self).__init__()
Add comment 62 Plus   # Python 3.x
Add comment 63 Plus   # super().__init__()
Add comment 64 Plus   self.indent = None
Add comment 65 Plus   self.re_xml_suffix = re.compile(r'.*\.xml$', re.I)
Add comment 66 Plus  
Add comment 67 Plus   def add_options(self):
Add comment 68 Plus   self.add_opt('-p', '--pretty', action='store_true', help='Pretty Print the resulting JSON')
Add comment 69 Plus  
Add comment 70 Plus   def xml_to_json(self, content, filepath=None):
Add comment 71 Plus   try:
Add comment 72 Plus   _ = xmltodict.parse(content)
Add comment 73 Plus   except xml.parsers.expat.ExpatError as _:
Add comment 74 Plus   file_detail = ''
Add comment 75 Plus   if filepath is not None:
Add comment 76 Plus   file_detail = ' in file \'{0}\''.format(filepath)
Add comment 77 Plus   die("Failed to parse XML{0}: {1}".format(file_detail, _))
Add comment 78 Plus   json_string = json.dumps(_, sort_keys=True, indent=self.indent) #, separators=(',', ': '))
Add comment 79 Plus   return json_string
Add comment 80 Plus  
Add comment 81 Plus   def run(self):
Add comment 82 Plus   if self.get_opt('pretty'):
Add comment 83 Plus   log_option('pretty', True)
Add comment 84 Plus   self.indent = 4
Add comment 85 Plus   if not self.args:
Add comment 86 Plus   self.args.append('-')
Add comment 87 Plus   for arg in self.args:
Add comment 88 Plus   if arg == '-':
Add comment 89 Plus   continue
Add comment 90 Plus   if not os.path.exists(arg):
Add comment 91 Plus   print("'%s' not found" % arg)
Add comment 92 Plus   sys.exit(ERRORS['WARNING'])
Add comment 93 Plus   if os.path.isfile(arg):
Add comment 94 Plus   log_option('file', arg)
Add comment 95 Plus   elif os.path.isdir(arg):
Add comment 96 Plus   log_option('directory', arg)
Add comment 97 Plus   else:
Add comment 98 Plus   die("path '%s' could not be determined as either a file or directory" % arg)
Add comment 99 Plus   for arg in self.args:
Add comment 100 Plus   self.process_path(arg)
Add comment 101 Plus  
Add comment 102 Plus   def process_path(self, path):
Add comment 103 Plus   if path == '-' or os.path.isfile(path):
Add comment 104 Plus   self.process_file(path)
Add comment 105 Plus   elif os.path.isdir(path):
Add comment 106 Plus   for root, _, files in os.walk(path):
Add comment 107 Plus   for filename in files:
Add comment 108 Plus   filepath = os.path.join(root, filename)
Add comment 109 Plus   if self.re_xml_suffix.match(filepath):
Add comment 110 Plus   self.process_file(filepath)
Add comment 111 Plus   else:
Add comment 112 Plus   die("failed to determine if path '%s' is a file or directory" % path)
Add comment 113 Plus  
Add comment 114 Plus   def process_file(self, filepath):
Add comment 115 Plus   log.debug('processing filepath \'%s\'', filepath)
Add comment 116 Plus   if filepath == '-':
Add comment 117 Plus   filepath = '<STDIN>'
Add comment 118 Plus   if filepath == '<STDIN>':
Add comment 119 Plus   self.xml_to_json(sys.stdin.read())
Add comment 120 Plus   else:
Add comment 121 Plus   with open(filepath) as _:
Add comment 122 Plus   content = _.read()
Add comment 123 Plus   print(self.xml_to_json(content, filepath=filepath))
Add comment 124 Plus  
Add comment 125 Plus  
Add comment 126 Plus  if __name__ == '__main__':
Add comment 127 Plus   XmlToJson().main()
Add comment 128 Plus