jsonschema2mentat.py

#!/usr/bin/python
# -*- coding: utf-8 -*-

from sys import argv, stderr
from json import load, dumps
from collections import Sequence
from urllib import unquote
from pprint import pprint


def flee(s):
    print >>stderr, s
    exit(1)


def loadJSON(p):
    try:
        with open(str(p), "r") as f:
            try:
                json = load(f)
            except ValueError, err:
                flee ("%s: %s" % (p, err))
    except IOError, err:
        flee(err)

    return json


def resolve_ref(document, fragment):
    """
    Resolve $ref. Only local relative URIs are supported.
    """
    fragment = unquote(fragment).lstrip("#").lstrip("/")
    parts = fragment.split("/") if fragment else []

    for part in parts:
        part = part.replace("~1", "/").replace("~0", "~")

        if isinstance(document, Sequence):
            # Array indexes should be turned into integers
            try:
                part = int(part)
            except ValueError:
                pass
        try:
            document = document[part]
        except (TypeError, LookupError):
            flee("Unresolvable JSON pointer: %r" % fragment)

    return document


xlattype = {
    "duration": "timedelta",
    "object": "document",
    "number": "real",
}


def process_simple(sch, name, parent, ordlist, card, whole, res):
    """
    Generate Mentat schema entry
    """

    # If IDEA type referenced, use that as type name, otherwise
    # use basic type, lowercased, possibly renamed.
    if "$ref" in sch and sch["$ref"].startswith("#/definitions/"):
        mytype = sch["$ref"].split("/")[-1]
    else:
        mytype = sch["type"]
    mytype = mytype.lower()
    if mytype in xlattype:
        mytype = xlattype[mytype]

    # Populate basic Mentat schema data
    new = {
        "type": mytype,
        "ordinality": "mandatory" if name is not None and name in ordlist else "optional",
        "cardinality": "multi" if card else "single",
        "description": sch["description"],
    }
    if "enum" in sch:
        new["values"] = sch["enum"]
    if parent:
        new["parents"] = [parent]

    # If same name entries exist, create new
    # Should create only if data differ, but that would need some nitpicky
    # comparison dancing on all candidates, so we just create duplicates
    # in any case of clash
    newname = name
    while newname in res:
        new["name"] = name
        newname = newname + "A"

    res[newname] = new


def recurse(sch, name, parent, ordlist, card, whole, res):
    """
    Recurse into subtrees based on type.
    Also crudely resolves references (by merging into current tree).
    """
    # Crudely resolve references (by merging into current tree)
    if "$ref" in sch:
        ref = resolve_ref(whole, sch["$ref"])
        sch = dict(ref.items() + sch.items())

    # Jumptable recursion
    if "type" in sch:
        fork[sch["type"]](sch, name, parent, ordlist, card, whole, res)
    else:
        flee("Basic type not defined: %s", pprint(schema))


def process_object(sch, name, parent, ordlist, card, whole, res):
    """
    Process object type and recurse for children
    """
    # Generate data for non root objects only
    if name:
        process_simple(sch, name, parent, ordlist, card, whole, res)

    # Set ordinality list for direct subobject of this object
    neword = sch["required"] if "required" in sch else []

    # Process each child, with False cardinality
    for n, subsch in sch["properties"].iteritems():
        recurse(subsch, n, name, neword, False, whole, res)


def process_array(sch, name, parent, ordlist, card, whole, res):
    """
    Process array type (recurse just for one "items" children, but indicate
    cardinality
    """
    recurse(sch["items"], name, parent, ordlist, True, whole, res)


fork = {
    "object": process_object,
    "array": process_array,
    "string": process_simple,
    "number": process_simple,
    "integer": process_simple,
    "boolean": process_simple
}


def main():
    if len(argv)==2:
        schp = argv[1]
    else:
        flee("Usage: %s <jsonschemafile>" % split(argv[0])[-1])

    schema = loadJSON(schp)

    res = {}
    # First structure of JSON schema must be object
    process_object(schema, None, "", [""], False, schema, res)

    print dumps(res, indent=8, sort_keys=True)


main()