JSON Parser/Encoder for Rebol 2

Rebol [
    Title: "JSON Parser/Encoder for Rebol 2"
    Author: "Christopher Ross-Gill"
    Date: 11-Feb-2017
    Home: http://www.ross-gill.com/page/JSON_and_Rebol
    File: %altjson.r
    Version: 0.3.9
    Purpose: "De/Serialize a JSON string to Rebol data."
    Rights: http://opensource.org/licenses/Apache-2.0
    Type: 'module
    Name: 'rgchris.altjson
    Exports: [load-json to-json]
    History: [
        11-Feb-2017 0.3.9 "Include support for decoding surrogate pairs (emoji)"
        06-Feb-2017 0.3.8 "Fix Unicode -> UTF-8 decoding"
        02-May-2016 0.3.7 "Support for /, : and , characters in JSON object keys"
        22-Sep-2015 0.3.6 "Sync with v0.3.6 for Rebol 3"
        07-Jul-2014 0.3.0 "Initial support for JSONP"
        15-Jul-2011 0.2.6 "Flattens Flickr '_content' objects"
        02-Dec-2010 0.2.5 "Support for time! added"
        28-Aug-2010 0.2.4 "Encodes tag! any-type! paired blocks as an object"
        06-Aug-2010 0.2.2 "Issue! composed of digits encoded as integers"
        22-May-2005 0.1.0 "Original Version"
    ]
    Notes: {
        - Simple Escaping
        - Converts date! to RFC 3339 Date String
    }
]

load-json: use [
    tree branch here val is-flat emit new-child to-parent neaten resolutions
    space word comma number string block object _content value ident
][
    branch: make block! 10

    emit: func [val][here: insert/only here val]
    new-child: [(insert/only branch insert/only here here: copy [])]
    to-parent: [(here: take branch)]
    neaten: [
        (new-line/all head here true)
        (new-line/all/skip head here true 2)
    ]
    resolutions: [
        []
        [end skip]
    ]

    space: use [space][
        space: charset " ^-^/^M"
        [any space]
    ]

    comma: [space #"," space]

    number: use [dg ex nm as-num][
        dg: charset "0123456789"
        ex: [[#"e" | #"E"] opt [#"+" | #"-"] some dg]
        nm: [opt #"-" some dg opt [#"." some dg] opt ex]

        as-num: func [val /num][
            num: load val

            all [
                parse val [opt "-" some dg]
                decimal? num
                num: to issue! val
            ]

            num
        ]

        [copy val nm (val: as-num val)]
    ]

    string: use [ch dq es hx mp decode][
        ch: complement charset {\"}
        es: charset {"\/bfnrt}
        hx: charset "0123456789ABCDEFabcdef"
        mp: [#"^"" "^"" #"\" "\" #"/" "/" #"b" "^H" #"f" "^L" #"r" "^M" #"n" "^/" #"t" "^-"]

        decode: use [ch mk escape encode-utf8 decode-surrogate][
            encode-utf8: func [
                "Encode a code point in UTF-8 format" 
                char [integer!] "Unicode code point"
            ][
                as-string to binary! reduce case [
                    char <= 127 [[char]]

                    ; U+0080 - U+07FF
                    char <= 2047 [[
                        char and 1984 / 64 + 192 
                        char and 63 + 128
                    ]]

                    any [
                        ; http://www.unicode.org/faq/private_use.html#nonchar4
                        ; invalid U+D800 - U+DFFF ; UTF-16 Surrogates
                        all [char >= 55296 char <= 57343]
                        ; invalid U+FDD0 - U+FDEF ; Noncharacters
                        all [char >= 64976 char <= 65007]
                        ; invalid U+nFFFE - U+nFFFF ; Noncharacters
                        equal? 65534 char and 65534
                        equal? 65535 char and 65535
                    ][
                        [239 191 189]
                    ]

                    ; U+0800 - U+FFFD ; upper U+FFFF for tests
                    char <= 65535 [[
                        char and 61440 / 4096 + 224 
                        char and 4032 / 64 + 128 
                        char and 63 + 128
                    ]]

                    ; U+010000 - U+10FFFF
                    char <= 1114111 [[
                        char and 1835008 / 262144 + 240 
                        char and 258048 / 4096 + 128 
                        char and 4032 / 64 + 128 
                        char and 63 + 128
                    ]]

                    true [[239 191 189]] ; Unknown codepoint
                ]
            ]

            decode-surrogate: func [char [string!]][
                char: debase/base char 16
                encode-utf8 65536
                    + (shift/left 1023 and to integer! take/part char 2 10)
                    + (1023 and to integer! char)
            ]

            escape: [
                mk: #"\" [
                      es (mk: change/part mk select mp mk/2 2)
                    | #"u" copy ch [
                        #"d" [#"8" | #"9" | #"a" | #"b"] 2 hx
                        "\u" #"d" [#"c" | #"d" | #"e" | #"f"] 2 hx
                    ] (
                        remove remove skip ch 4
                        mk: change/part mk decode-surrogate ch 12
                    ) :mk
                    | #"u" copy ch 4 hx (
                        mk: change/part mk encode-utf8 to integer! to issue! ch 6
                    )
                ] :mk
            ]

            func [text [string! none!] /mk][
                either none? text [copy ""][
                    all [parse/all text [any [to "\" escape] to end] text]
                ]
            ]
        ]

        [#"^"" copy val [any [some ch | #"\" [#"u" 4 hx | es]]] #"^"" (val: decode val)]
    ]

    word: use [word1 word+ special escapes escape wordify is-word][
        word1: charset ["!&*=?_|~" #"A" - #"Z" #"a" - #"z"]
        word+: charset ["!&'*+-.0123456789=?_|~" #"A" - #"Z" #"a" - #"z"]
        special: charset "^-^/ '+,-./0123456789:^^"

        escapes: [
            #"^-" "^^t" #"^/" "^^n"
            #" "  "^^_" #"/"  "^^|"
            #","  "^^&" #":"  "^^!"
            #"^^" "^^^^"
        ]

        escape: use [mk][
            [
                mk: special (
                    mk: change/part mk any [
                        select escapes mk/1
                        join "^^" mk/1
                    ] 1
                ) :mk
            ]
        ]

        wordify: func [value /local mk][
            if parse/all value: copy value [
                [some word1 | escape]
                any [some word+ | escape]
            ][
                if value = "self" [value: "^^*self"]
                to set-word! value
            ]
        ]

        [
            string (is-word: pick resolutions set-word? val: wordify val)
            is-word
        ]
    ]

    block: use [list][
        list: [space opt [value any [comma value]] space]

        [#"[" new-child list #"]" neaten/1 to-parent]
    ]

    _content: [#"{" space {"_content"} space #":" space value space "}"] ; Flickr

    object: use [name list as-object token][
        name: [
            token space #":" space
            (emit either is-flat [to tag! val][val])
        ]
        list: [
            (token: either is-flat [string][word])
            space opt [name value any [comma name value]] space
        ]
        as-object: [(unless is-flat [here: change back here make object! here/-1])]

        [#"{" new-child list #"}" neaten/2 to-parent as-object]
    ]

    ident: use [initial ident][
        initial: charset ["$_" #"a" - #"z" #"A" - #"Z"]
        ident: union initial charset [#"0" - #"9"]

        [initial any ident]
    ]

    value: [
          "null" (emit none)
        | "true" (emit true)
        | "false" (emit false)
        | number (emit val)
        | string (emit val)
        | _content
        | object | block
    ]

    func [
        [catch] "Convert a JSON string to Rebol data"
        json [string! binary! file! url!] "JSON string"
        /flat "Objects are imported as tag-value pairs"
        /padded "Loads JSON data wrapped in a JSONP envelope"
    ][
        is-flat: :flat
        tree: here: copy []
        if any [file? json url? json][
            if error? json: try [read (json)][
                throw :json
            ]
        ]
        unless parse/all json either padded [
            [space ident space "(" space opt value space ")" opt ";" space]
        ][
            [space opt value space]
        ][
            throw make error! either is-flat [
                "Not a valid JSON string"
            ][
                "Cannot load JSON string, try in /FLAT mode"
            ]
        ]
        pick tree 1
    ]
]

to-json: use [
    json emit emits escape to-token emit-issue emit-date
    here lookup comma block block-of-pairs object value
][
    emit: func [data][repend json data]
    emits: func [data][emit {"} emit data emit {"}]

    escape: use [mp ch encode utf-8 decode-utf8 encode-unicode][
        mp: [#"^/" "\n" #"^M" "\r" #"^-" "\t" #"^"" "\^"" #"\" "\\" #"/" "\/"]
        ch: complement charset compose [
            {^@^A^B^C^D^E^F^G^H^K^L^M^N^O^P^Q^R^S^T^U^V^W^X^Y^Z^[^\^]^!^_}
            (extract mp 2) #"^(80)" - #"^(FF)"
        ]

        encode: func [here /local char][
            char: any [
                select mp here/1
                either here/1 > 127 ["\uFFFD"][
                    join "\u" skip tail mold to-hex to integer! char -4
                ]
            ]
            change/part here char 1
        ]

        utf-8: use [utf-2 utf-3 utf-4 utf-b][
            ; probably need to adapt the rule from below...
            utf-2: charset [#"^(C2)" - #"^(DF)"]
            utf-3: charset [#"^(E0)" - #"^(EF)"]
            utf-4: charset [#"^(F0)" - #"^(F4)"]
            utf-b: charset [#"^(80)" - #"^(BF)"]

            [utf-2 utf-b | utf-3 2 utf-b | utf-4 3 utf-b]
        ]

        decode-utf8: use [
            utf-2 utf-3 utf-3-low utf-4 utf-4-low utf-4-high utf-b
            utf-x1 utf-x2 utf-x3 bounds out
        ][
            ; U+000080..U+0007FF _____________ C2..DF 80..BF
            ; U+000800..U+000FFF __________ E0 A0..BF 80..BF
            ; U+001000..U+00FFFF ______ E1..EF 80..BF 80..BF
            ; U+010000..U+03FFFF ___ F0 90..BF 80..BF 80..BF
            ; U+040000..U+0FFFFF F1..F3 80..BF 80..BF 80..BF
            ; U+100000..U+10FFFF ___ F4 80..8F 80..BF 80..BF
            utf-2: charset [#"^(C2)" - #"^(DF)"]
            utf-3-low: charset [#"^(A0)" - #"^(BF)"]
            utf-3: charset [#"^(E1)" - #"^(EF)"]
            utf-4-low: charset [#"^(90)" - #"^(BF)"]
            utf-4-high: charset [#"^(80)" - #"^(8F)"]
            utf-4: charset [#"^(F1)" - #"^(F3)"]
            utf-b: charset [#"^(80)" - #"^(BF)"]

            utf-x1: charset [#"^(A0)" - #"^(BF)"]
            utf-x2: charset [#"^(90)" - #"^(AF)"]
            utf-x3: charset [#"^(8F)" #"^(9F)" #"^(AF)" #"^(BF)"]

            func [char [string! binary!] /strict][
                bounds: [0 0]
                out: -1
                any [
                    all [
                        any [
                            parse/all char: as-binary char [
                                ; Test for invalid sequences first
                                [
                                    ; invalid U+D800 - U+DFFF ; UTF-8 Surrogates
                                    #"^(ED)" utf-x1 utf-b
                                    |
                                    ; invalid U+FDD0 - U+FDEF ; ???
                                    #"^(EF)" #"^(B7)" utf-x2
                                    |
                                    ; invalid U+nFFFE - U+nFFFF ; Troublesome UTF-16 sequences
                                    [#"^(EF)" | [#"^(F0)" | utf-4] utf-x3] #"^(BF)" [#"^(BE)" | #"^(BF)"]
                                ]
                                |
                                utf-2 utf-b (
                                    bounds: [127 2048]
                                    out: char/1 xor 192 * 64
                                    + (char/2 xor 128)
                                )
                                | [
                                      #"^(E0)" utf-3-low utf-b (bounds: [2047 4096])
                                    | utf-3 2 utf-b (bounds: [4095 65534])
                                ] (
                                    out: char/1 xor 224 * 4096
                                    + (char/2 xor 128 * 64)
                                    + (char/3 xor 128)
                                )
                                | [
                                      #"^(F0)" utf-4-low 2 utf-b (bounds: [65535 262144])
                                    | utf-4 3 utf-b (bounds: [262143 1048576])
                                    | #"^(F4)" utf-4-high 2 utf-b (bounds: [1048575 1114112])
                                ] (
                                    out: char/1 xor 240 * 262144
                                    + (char/2 xor 128 * 4096)
                                    + (char/3 xor 128 * 64)
                                    + (char/4 xor 128)
                                )
                            ]
                            not strict
                        ]
                        out > bounds/1
                        out < bounds/2
                        out
                    ]
                    65533 ; Unknown character
                ]
            ]
        ]

        encode-unicode: use [to-uchar][
            to-uchar: func [char [integer!]][
                rejoin ["\u" skip tail mold to-hex char -4]
            ]

            func [mark [string!] ext [string!] /local char][
                change/part mark case [
                    65535 > char: decode-utf8 mark [
                        to-uchar char
                    ]

                    ; Surrogate Pairs (emoji)
                    1114111 > char [
                        rejoin [
                            to-uchar 55296 or shift char and 65280 10
                            to-uchar char and 1023 or 56320
                        ]
                    ]

                    /else ["\uFFFD"] ; unknown char
                ] ext
            ]
        ]

        func [txt [string! binary!] /local ext][
            parse/all txt [
                any [
                      txt: some ch
                    | utf-8 ext: (txt: encode-unicode txt ext) :txt
                    | skip (txt: encode txt) :txt
                ]
            ]
            head txt
        ]
    ]

    to-token: use [special escapes mark][
        special: charset "!&'+-.0123456789nt^^_|"
        escapes: [
            #"t" "^-" #"n" "^/"
            #"_" " "  #"|" "/"
            #"&" ","  #"!" ":"
        ]

        func [name [set-word!]][
            if parse/all name: to string! name [
                "^^*self" end (remove remove name)
                | some [
                    mark: "^^" special (
                        mark: change/part mark any [
                            select escapes mark/2
                            mark/2
                        ] 2
                    ) :mark
                    | skip
                ]
            ][
                name
            ]
        ]
    ]

    emit-issue: use [dg nm][
        dg: charset "0123456789"
        nm: [opt "-" some dg]

        [(either parse/all here/1 nm [emit here/1][emits here/1])]
    ]

    emit-date: use [pad second][
        pad: func [part length][
            part: to string! part head insert/dup part "0" length - length? part
        ]

        [(
            emits rejoin collect [
                keep reduce [pad here/1/year 4 "-" pad here/1/month 2 "-" pad here/1/day 2]
                if here/1/time [
                    keep reduce ["T" pad here/1/time/hour 2 ":" pad here/1/time/minute 2 ":"]
                    second: parse/all to string! here/1/time/second "."
                    keep pad second/1 2
                    unless second/2 = "0" [keep join "." second/2]

                    keep either any [
                        none? here/1/zone
                        zero? here/1/zone
                    ]["Z"][
                        reduce [
                            either here/1/zone/hour < 0 ["-"]["+"]
                            pad abs here/1/zone/hour 2 ":" pad here/1/zone/minute 2
                        ]
                    ]
                ]
            ]
        )]
    ]

    lookup: [
        here: [get-word! | get-path!]
        (change here reduce reduce [here/1])
        end skip ; fail
    ]

    comma: [(if not tail? here [emit ","])]

    block: [(emit "[") any [here: value here: comma] (emit "]")]

    block-of-pairs: [
          some [set-word! skip]
        | some [tag! skip]
    ]

    object: [
        (emit "{")
        any [
            here: [set-word! (change here to-token here/1) | tag!]
            (emit [{"} escape to string! here/1 {":}])
            here: value here: comma
        ]
        (emit "}")
    ]

    value: [
          lookup
        | number! (emit here/1)
        | [logic! | 'true | 'false] (emit to string! here/1)
        | [none! | 'none] (emit 'null)
        | date! emit-date
        | issue! emit-issue
        | [
            any-string! | word! | lit-word! | tuple! | pair! | money! | time!
        ] (emits escape form here/1)
        | any-word! (emits escape form to word! here/1)

        | object! :here (change/only here body-of first here) into object
        | into block-of-pairs :here (change/only here copy first here) into object
        | any-block! :here (change/only here copy first here) into block

        | any-type! (emits to tag! type?/word first here)
    ]

    func [
        "Convert a Rebol value to JSON string"
        item [any-type!] "Rebol value to convert"
    ][
        json: make string! 1024
        if parse compose/only [(item)][here: value][json]
    ]
]