""" An XPath for JSON A port of the Perl, and JavaScript versions of JSONPath see http://goessner.net/articles/JsonPath/ Based on on JavaScript version by Stefan Goessner at: http://code.google.com/p/jsonpath/ and Perl version by Kate Rhodes at: http://github.com/masukomi/jsonpath-perl/tree/master """ import re import sys __author__ = "Phil Budne" __revision__ = "$Revision: 1.13 $" __version__ = "0.54" # Copyright (c) 2007 Stefan Goessner (goessner.net) # Copyright (c) 2008 Kate Rhodes (masukomi.org) # Copyright (c) 2008-2012 Philip Budne (ultimate.com) # Licensed under the MIT licence: # # Permission is hereby granted, free of charge, to any person # obtaining a copy of this software and associated documentation # files (the "Software"), to deal in the Software without # restriction, including without limitation the rights to use, # copy, modify, merge, publish, distribute, sublicense, and/or sell # copies of the Software, and to permit persons to whom the # Software is furnished to do so, subject to the following # conditions: # # The above copyright notice and this permission notice shall be # included in all copies or substantial portions of the Software. # # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, # EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES # OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND # NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT # HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, # WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING # FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR # OTHER DEALINGS IN THE SOFTWARE. # XXX BUGS: # evalx is generally a crock: # handle !@.name.name??? # there are probably myriad unexpected ways to get an exception: # wrap initial "trace" call in jsonpath body in a try/except?? # XXX TODO: # internally keep paths as lists to preserve integer types # (instead of as ';' delimited strings) __all__ = ["jsonpath"] # XXX precompile RE objects on load??? # re_1 = re.compile(.....) # re_2 = re.compile(.....) def normalize(x): """normalize the path expression; outside jsonpath to allow testing""" subx = [] # replace index/filter expressions with placeholders # Python anonymous functions (lambdas) are cryptic, hard to debug def f1(m): n = len(subx) # before append g1 = m.group(1) subx.append(g1) ret = "[#%d]" % n return ret x = re.sub(r"[\['](\??\(.*?\))[\]']", f1, x) # added the negative lookbehind -krhodes x = re.sub(r"'?(? 1: print("\tf03", key, loc, expr, path) trace(s(key, expr), obj, path) walk(loc, x, obj, path, f03) elif loc == "..": trace(x, obj, path) def f04(key, loc, expr, obj, path): if debug > 1: print("\tf04", key, loc, expr, path) if isinstance(obj, dict): if key in obj: trace(s("..", expr), obj[key], s(path, key)) else: if key < len(obj): trace(s("..", expr), obj[key], s(path, key)) walk(loc, x, obj, path, f04) elif loc == "!": # Perl jsonpath extension: return keys def f06(key, loc, expr, obj, path): if isinstance(obj, dict): trace(expr, key, path) walk(loc, x, obj, path, f06) elif isinstance(obj, dict) and loc in obj: trace(x, obj[loc], s(path, loc)) elif isinstance(obj, list) and isint(loc): iloc = int(loc) if len(obj) >= iloc: trace(x, obj[iloc], s(path, loc)) else: # [(index_expression)] if loc.startswith("(") and loc.endswith(")"): if debug > 1: print("index", loc) e = evalx(loc, obj) trace(s(e, x), obj, path) return # ?(filter_expression) if loc.startswith("?(") and loc.endswith(")"): if debug > 1: print("filter", loc) def f05(key, loc, expr, obj, path): if debug > 1: print("f05", key, loc, expr, path) if isinstance(obj, dict): eval_result = evalx(loc, obj[key]) else: eval_result = evalx(loc, obj[int(key)]) if eval_result: trace(s(key, expr), obj, path) loc = loc[2:-1] walk(loc, x, obj, path, f05) return m = re.match(r"(-?[0-9]*):(-?[0-9]*):?(-?[0-9]*)$", loc) if m: if isinstance(obj, (dict, list)): def max(x, y): if x > y: return x return y def min(x, y): if x < y: return x return y objlen = len(obj) s0 = m.group(1) s1 = m.group(2) s2 = m.group(3) # XXX int("badstr") raises exception start = int(s0) if s0 else 0 end = int(s1) if s1 else objlen step = int(s2) if s2 else 1 if start < 0: start = max(0, start + objlen) else: start = min(objlen, start) if end < 0: end = max(0, end + objlen) else: end = min(objlen, end) for i in xrange(start, end, step): trace(s(i, x), obj, path) return # after (expr) & ?(expr) if loc.find(",") >= 0: # [index,index....] for piece in re.split(r"'?,'?", loc): if debug > 1: print("piece", piece) trace(s(piece, x), obj, path) else: store(path, obj) def walk(loc, expr, obj, path, funct): if isinstance(obj, list): for i in xrange(0, len(obj)): funct(i, loc, expr, obj, path) elif isinstance(obj, dict): for key in obj: funct(key, loc, expr, obj, path) def evalx(loc, obj): """eval expression""" if debug: print("evalx", loc) # a nod to JavaScript. doesn't work for @.name.name.length # Write len(@.name.name) instead!!! loc = loc.replace("@.length", "len(__obj)") loc = loc.replace("&&", " and ").replace("||", " or ") # replace !@.name with 'name' not in obj # XXX handle !@.name.name.name.... def notvar(m): return "'%s' not in __obj" % m.group(1) loc = re.sub(r"!@\.([a-zA-Z@_]+)", notvar, loc) # replace @.name.... with __obj['name'].... # handle @.name[.name...].length def varmatch(m): def brackets(elts): ret = "__obj" for e in elts: if isint(e): ret += "[%s]" % e # ain't necessarily so else: ret += "['%s']" % e # XXX beware quotes!!!! return ret g1 = m.group(1) elts = g1.split(".") if elts[-1] == "length": return "len(%s)" % brackets(elts[1:-1]) return brackets(elts[1:]) loc = re.sub(r"(? == translation # causes problems if a string contains = # replace @ w/ "__obj", but \@ means a literal @ loc = re.sub(r"(?", v) return v # body of jsonpath() # Get caller globals so eval can pick up user functions!!! caller_globals = sys._getframe(1).f_globals result = [] if expr and obj: cleaned_expr = normalize(expr) if cleaned_expr.startswith("$;"): cleaned_expr = cleaned_expr[2:] # XXX wrap this in a try?? trace(cleaned_expr, obj, "$") if len(result) > 0: return result return False if __name__ == "__main__": try: import json # v2.6 except ImportError: import simplejson as json import sys # XXX take options for output format, output file, debug level if len(sys.argv) < 3 or len(sys.argv) > 4: sys.stdout.write("Usage: jsonpath.py FILE PATH [OUTPUT_TYPE]\n") sys.exit(1) object = json.load(file(sys.argv[1])) path = sys.argv[2] format = "VALUE" if len(sys.argv) > 3: # XXX verify? format = sys.argv[3] value = jsonpath(object, path, format) if not value: sys.exit(1) f = sys.stdout json.dump(value, f, sort_keys=True, indent=1) f.write("\n") sys.exit(0)