Backup-Restore support library + tests
[integration/test.git] / csit / libraries / backuprestore / jsonpathl.py
diff --git a/csit/libraries/backuprestore/jsonpathl.py b/csit/libraries/backuprestore/jsonpathl.py
new file mode 100644 (file)
index 0000000..bf84e80
--- /dev/null
@@ -0,0 +1,367 @@
+"""
+An XPath for JSON
+
+A port of the Perl, and JavaScript versions of JSONPath
+see http://goessner.net/articles/JsonPath/
+
+Based on on JavaScript version by Stefan Goessner at:
+        http://code.google.com/p/jsonpath/
+and Perl version by Kate Rhodes at:
+        http://github.com/masukomi/jsonpath-perl/tree/master
+"""
+
+import re
+import sys
+
+__author__ = "Phil Budne"
+__revision__ = "$Revision: 1.13 $"
+__version__ = '0.54'
+
+#   Copyright (c) 2007 Stefan Goessner (goessner.net)
+#       Copyright (c) 2008 Kate Rhodes (masukomi.org)
+#       Copyright (c) 2008-2012 Philip Budne (ultimate.com)
+#   Licensed under the MIT licence:
+#
+#   Permission is hereby granted, free of charge, to any person
+#   obtaining a copy of this software and associated documentation
+#   files (the "Software"), to deal in the Software without
+#   restriction, including without limitation the rights to use,
+#   copy, modify, merge, publish, distribute, sublicense, and/or sell
+#   copies of the Software, and to permit persons to whom the
+#   Software is furnished to do so, subject to the following
+#   conditions:
+#
+#   The above copyright notice and this permission notice shall be
+#   included in all copies or substantial portions of the Software.
+#
+#   THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+#   EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
+#   OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+#   NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
+#   HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
+#   WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+#   FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+#   OTHER DEALINGS IN THE SOFTWARE.
+
+# XXX BUGS:
+# evalx is generally a crock:
+#       handle !@.name.name???
+# there are probably myriad unexpected ways to get an exception:
+#       wrap initial "trace" call in jsonpath body in a try/except??
+
+# XXX TODO:
+# internally keep paths as lists to preserve integer types
+#       (instead of as ';' delimited strings)
+
+__all__ = ['jsonpath']
+
+
+# XXX precompile RE objects on load???
+# re_1 = re.compile(.....)
+# re_2 = re.compile(.....)
+
+def normalize(x):
+    """normalize the path expression; outside jsonpath to allow testing"""
+    subx = []
+
+    # replace index/filter expressions with placeholders
+    # Python anonymous functions (lambdas) are cryptic, hard to debug
+    def f1(m):
+        n = len(subx)  # before append
+        g1 = m.group(1)
+        subx.append(g1)
+        ret = "[#%d]" % n
+        #       print "f1:", g1, ret
+        return ret
+
+    x = re.sub(r"[\['](\??\(.*?\))[\]']", f1, x)
+
+    # added the negative lookbehind -krhodes
+    x = re.sub(r"'?(?<!@)\.'?|\['?", ";", x)
+
+    x = re.sub(r";;;|;;", ";..;", x)
+
+    x = re.sub(r";$|'?\]|'$", "", x)
+
+    # put expressions back
+    def f2(m):
+        g1 = m.group(1)
+        #       print "f2:", g1
+        return subx[int(g1)]
+
+    x = re.sub(r"#([0-9]+)", f2, x)
+
+    return x
+
+
+def jsonpath(obj, expr, result_type='VALUE', debug=0, use_eval=True):
+    """traverse JSON object using jsonpath expr, returning values or paths"""
+
+    def s(x, y):
+        """concatenate path elements"""
+        return str(x) + ';' + str(y)
+
+    def isint(x):
+        """check if argument represents a decimal integer"""
+        return x.isdigit()
+
+    def as_path(path):
+        """convert internal path representation to
+           "full bracket notation" for PATH output"""
+        p = '$'
+        for piece in path.split(';')[1:]:
+            # make a guess on how to index
+            # XXX need to apply \ quoting on '!!
+            if isint(piece):
+                p += "[%s]" % piece
+            else:
+                p += "['%s']" % piece
+        return p
+
+    def store(path, object):
+        if result_type == 'VALUE':
+            result.append(object)
+        elif result_type == 'IPATH':  # Index format path (Python ext)
+            # return list of list of indices -- can be used w/o "eval" or split
+            result.append(path.split(';')[1:])
+        else:  # PATH
+            result.append(as_path(path))
+        return path
+
+    def trace(expr, obj, path):
+        if debug:
+            print "trace", expr, "/", path
+        if expr:
+            x = expr.split(';')
+            loc = x[0]
+            x = ';'.join(x[1:])
+            if debug:
+                print "\t", loc, type(obj)
+            if loc == "*":
+                def f03(key, loc, expr, obj, path):
+                    if debug > 1:
+                        print "\tf03", key, loc, expr, path
+                    trace(s(key, expr), obj, path)
+
+                walk(loc, x, obj, path, f03)
+            elif loc == "..":
+                trace(x, obj, path)
+
+                def f04(key, loc, expr, obj, path):
+                    if debug > 1:
+                        print "\tf04", key, loc, expr, path
+                    if isinstance(obj, dict):
+                        if key in obj:
+                            trace(s('..', expr), obj[key], s(path, key))
+                    else:
+                        if key < len(obj):
+                            trace(s('..', expr), obj[key], s(path, key))
+
+                walk(loc, x, obj, path, f04)
+            elif loc == "!":
+                # Perl jsonpath extension: return keys
+                def f06(key, loc, expr, obj, path):
+                    if isinstance(obj, dict):
+                        trace(expr, key, path)
+
+                walk(loc, x, obj, path, f06)
+            elif isinstance(obj, dict) and loc in obj:
+                trace(x, obj[loc], s(path, loc))
+            elif isinstance(obj, list) and isint(loc):
+                iloc = int(loc)
+                if len(obj) >= iloc:
+                    trace(x, obj[iloc], s(path, loc))
+            else:
+                # [(index_expression)]
+                if loc.startswith("(") and loc.endswith(")"):
+                    if debug > 1:
+                        print "index", loc
+                    e = evalx(loc, obj)
+                    trace(s(e, x), obj, path)
+                    return
+
+                # ?(filter_expression)
+                if loc.startswith("?(") and loc.endswith(")"):
+                    if debug > 1:
+                        print "filter", loc
+
+                    def f05(key, loc, expr, obj, path):
+                        if debug > 1:
+                            print "f05", key, loc, expr, path
+                        if isinstance(obj, dict):
+                            eval_result = evalx(loc, obj[key])
+                        else:
+                            eval_result = evalx(loc, obj[int(key)])
+                        if eval_result:
+                            trace(s(key, expr), obj, path)
+
+                    loc = loc[2:-1]
+                    walk(loc, x, obj, path, f05)
+                    return
+
+                m = re.match(r'(-?[0-9]*):(-?[0-9]*):?(-?[0-9]*)$', loc)
+                if m:
+                    if isinstance(obj, (dict, list)):
+                        def max(x, y):
+                            if x > y:
+                                return x
+                            return y
+
+                        def min(x, y):
+                            if x < y:
+                                return x
+                            return y
+
+                        objlen = len(obj)
+                        s0 = m.group(1)
+                        s1 = m.group(2)
+                        s2 = m.group(3)
+
+                        # XXX int("badstr") raises exception
+                        start = int(s0) if s0 else 0
+                        end = int(s1) if s1 else objlen
+                        step = int(s2) if s2 else 1
+
+                        if start < 0:
+                            start = max(0, start + objlen)
+                        else:
+                            start = min(objlen, start)
+                        if end < 0:
+                            end = max(0, end + objlen)
+                        else:
+                            end = min(objlen, end)
+
+                        for i in xrange(start, end, step):
+                            trace(s(i, x), obj, path)
+                    return
+
+                # after (expr) & ?(expr)
+                if loc.find(",") >= 0:
+                    # [index,index....]
+                    for piece in re.split(r"'?,'?", loc):
+                        if debug > 1:
+                            print "piece", piece
+                        trace(s(piece, x), obj, path)
+        else:
+            store(path, obj)
+
+    def walk(loc, expr, obj, path, funct):
+        if isinstance(obj, list):
+            for i in xrange(0, len(obj)):
+                funct(i, loc, expr, obj, path)
+        elif isinstance(obj, dict):
+            for key in obj:
+                funct(key, loc, expr, obj, path)
+
+    def evalx(loc, obj):
+        """eval expression"""
+
+        if debug:
+            print "evalx", loc
+
+        # a nod to JavaScript. doesn't work for @.name.name.length
+        # Write len(@.name.name) instead!!!
+        loc = loc.replace("@.length", "len(__obj)")
+
+        loc = loc.replace("&&", " and ").replace("||", " or ")
+
+        # replace !@.name with 'name' not in obj
+        # XXX handle !@.name.name.name....
+        def notvar(m):
+            return "'%s' not in __obj" % m.group(1)
+
+        loc = re.sub("!@\.([a-zA-Z@_]+)", notvar, loc)
+
+        # replace @.name.... with __obj['name']....
+        # handle @.name[.name...].length
+        def varmatch(m):
+            def brackets(elts):
+                ret = "__obj"
+                for e in elts:
+                    if isint(e):
+                        ret += "[%s]" % e  # ain't necessarily so
+                    else:
+                        ret += "['%s']" % e  # XXX beware quotes!!!!
+                return ret
+
+            g1 = m.group(1)
+            elts = g1.split('.')
+            if elts[-1] == "length":
+                return "len(%s)" % brackets(elts[1:-1])
+            return brackets(elts[1:])
+
+        loc = re.sub(r'(?<!\\)(@\.[a-zA-Z@_.]+)', varmatch, loc)
+
+        # removed = -> == translation
+        # causes problems if a string contains =
+
+        # replace @  w/ "__obj", but \@ means a literal @
+        loc = re.sub(r'(?<!\\)@', "__obj", loc).replace(r'\@', '@')
+        if not use_eval:
+            if debug:
+                print "eval disabled"
+            raise Exception("eval disabled")
+        if debug:
+            print "eval", loc
+        try:
+            # eval w/ caller globals, w/ local "__obj"!
+            v = eval(loc, caller_globals, {'__obj': obj})
+        except Exception, e:
+            if debug:
+                print e
+            return False
+
+        if debug:
+            print "->", v
+        return v
+
+    # body of jsonpath()
+
+    # Get caller globals so eval can pick up user functions!!!
+    caller_globals = sys._getframe(1).f_globals
+    result = []
+    if expr and obj:
+        cleaned_expr = normalize(expr)
+        if cleaned_expr.startswith("$;"):
+            cleaned_expr = cleaned_expr[2:]
+
+        # XXX wrap this in a try??
+        trace(cleaned_expr, obj, '$')
+
+        if len(result) > 0:
+            return result
+    return False
+
+
+if __name__ == '__main__':
+    try:
+        import json  # v2.6
+    except ImportError:
+        import simplejson as json
+
+    import sys
+
+    # XXX take options for output format, output file, debug level
+
+    if len(sys.argv) < 3 or len(sys.argv) > 4:
+        sys.stdout.write("Usage: jsonpath.py FILE PATH [OUTPUT_TYPE]\n")
+        sys.exit(1)
+
+    object = json.load(file(sys.argv[1]))
+    path = sys.argv[2]
+    format = 'VALUE'
+
+    if len(sys.argv) > 3:
+        # XXX verify?
+        format = sys.argv[3]
+
+    value = jsonpath(object, path, format)
+
+    if not value:
+        sys.exit(1)
+
+    f = sys.stdout
+    json.dump(value, f, sort_keys=True, indent=1)
+    f.write("\n")
+
+    sys.exit(0)