Rebol [ Title: "JSON Parser/Encoder for Rebol 2" Author: "Christopher Ross-Gill" Date: 11-Feb-2017 Home: http://www.ross-gill.com/page/JSON_and_Rebol File: %altjson.r Version: 0.3.9 Purpose: "De/Serialize a JSON string to Rebol data." Rights: http://opensource.org/licenses/Apache-2.0 Type: 'module Name: 'rgchris.altjson Exports: [load-json to-json] History: [ 11-Feb-2017 0.3.9 "Include support for decoding surrogate pairs (emoji)" 06-Feb-2017 0.3.8 "Fix Unicode -> UTF-8 decoding" 02-May-2016 0.3.7 "Support for /, : and , characters in JSON object keys" 22-Sep-2015 0.3.6 "Sync with v0.3.6 for Rebol 3" 07-Jul-2014 0.3.0 "Initial support for JSONP" 15-Jul-2011 0.2.6 "Flattens Flickr '_content' objects" 02-Dec-2010 0.2.5 "Support for time! added" 28-Aug-2010 0.2.4 "Encodes tag! any-type! paired blocks as an object" 06-Aug-2010 0.2.2 "Issue! composed of digits encoded as integers" 22-May-2005 0.1.0 "Original Version" ] Notes: { - Simple Escaping - Converts date! to RFC 3339 Date String } ] load-json: use [ tree branch here val is-flat emit new-child to-parent neaten resolutions space word comma number string block object _content value ident ][ branch: make block! 10 emit: func [val][here: insert/only here val] new-child: [(insert/only branch insert/only here here: copy [])] to-parent: [(here: take branch)] neaten: [ (new-line/all head here true) (new-line/all/skip head here true 2) ] resolutions: [ [] [end skip] ] space: use [space][ space: charset " ^-^/^M" [any space] ] comma: [space #"," space] number: use [dg ex nm as-num][ dg: charset "0123456789" ex: [[#"e" | #"E"] opt [#"+" | #"-"] some dg] nm: [opt #"-" some dg opt [#"." some dg] opt ex] as-num: func [val /num][ num: load val all [ parse val [opt "-" some dg] decimal? num num: to issue! val ] num ] [copy val nm (val: as-num val)] ] string: use [ch dq es hx mp decode][ ch: complement charset {\"} es: charset {"\/bfnrt} hx: charset "0123456789ABCDEFabcdef" mp: [#"^"" "^"" #"\" "\" #"/" "/" #"b" "^H" #"f" "^L" #"r" "^M" #"n" "^/" #"t" "^-"] decode: use [ch mk escape encode-utf8 decode-surrogate][ encode-utf8: func [ "Encode a code point in UTF-8 format" char [integer!] "Unicode code point" ][ as-string to binary! reduce case [ char <= 127 [[char]] ; U+0080 - U+07FF char <= 2047 [[ char and 1984 / 64 + 192 char and 63 + 128 ]] any [ ; http://www.unicode.org/faq/private_use.html#nonchar4 ; invalid U+D800 - U+DFFF ; UTF-16 Surrogates all [char >= 55296 char <= 57343] ; invalid U+FDD0 - U+FDEF ; Noncharacters all [char >= 64976 char <= 65007] ; invalid U+nFFFE - U+nFFFF ; Noncharacters equal? 65534 char and 65534 equal? 65535 char and 65535 ][ [239 191 189] ] ; U+0800 - U+FFFD ; upper U+FFFF for tests char <= 65535 [[ char and 61440 / 4096 + 224 char and 4032 / 64 + 128 char and 63 + 128 ]] ; U+010000 - U+10FFFF char <= 1114111 [[ char and 1835008 / 262144 + 240 char and 258048 / 4096 + 128 char and 4032 / 64 + 128 char and 63 + 128 ]] true [[239 191 189]] ; Unknown codepoint ] ] decode-surrogate: func [char [string!]][ char: debase/base char 16 encode-utf8 65536 + (shift/left 1023 and to integer! take/part char 2 10) + (1023 and to integer! char) ] escape: [ mk: #"\" [ es (mk: change/part mk select mp mk/2 2) | #"u" copy ch [ #"d" [#"8" | #"9" | #"a" | #"b"] 2 hx "\u" #"d" [#"c" | #"d" | #"e" | #"f"] 2 hx ] ( remove remove skip ch 4 mk: change/part mk decode-surrogate ch 12 ) :mk | #"u" copy ch 4 hx ( mk: change/part mk encode-utf8 to integer! to issue! ch 6 ) ] :mk ] func [text [string! none!] /mk][ either none? text [copy ""][ all [parse/all text [any [to "\" escape] to end] text] ] ] ] [#"^"" copy val [any [some ch | #"\" [#"u" 4 hx | es]]] #"^"" (val: decode val)] ] word: use [word1 word+ special escapes escape wordify is-word][ word1: charset ["!&*=?_|~" #"A" - #"Z" #"a" - #"z"] word+: charset ["!&'*+-.0123456789=?_|~" #"A" - #"Z" #"a" - #"z"] special: charset "^-^/ '+,-./0123456789:^^" escapes: [ #"^-" "^^t" #"^/" "^^n" #" " "^^_" #"/" "^^|" #"," "^^&" #":" "^^!" #"^^" "^^^^" ] escape: use [mk][ [ mk: special ( mk: change/part mk any [ select escapes mk/1 join "^^" mk/1 ] 1 ) :mk ] ] wordify: func [value /local mk][ if parse/all value: copy value [ [some word1 | escape] any [some word+ | escape] ][ if value = "self" [value: "^^*self"] to set-word! value ] ] [ string (is-word: pick resolutions set-word? val: wordify val) is-word ] ] block: use [list][ list: [space opt [value any [comma value]] space] [#"[" new-child list #"]" neaten/1 to-parent] ] _content: [#"{" space {"_content"} space #":" space value space "}"] ; Flickr object: use [name list as-object token][ name: [ token space #":" space (emit either is-flat [to tag! val][val]) ] list: [ (token: either is-flat [string][word]) space opt [name value any [comma name value]] space ] as-object: [(unless is-flat [here: change back here make object! here/-1])] [#"{" new-child list #"}" neaten/2 to-parent as-object] ] ident: use [initial ident][ initial: charset ["$_" #"a" - #"z" #"A" - #"Z"] ident: union initial charset [#"0" - #"9"] [initial any ident] ] value: [ "null" (emit none) | "true" (emit true) | "false" (emit false) | number (emit val) | string (emit val) | _content | object | block ] func [ [catch] "Convert a JSON string to Rebol data" json [string! binary! file! url!] "JSON string" /flat "Objects are imported as tag-value pairs" /padded "Loads JSON data wrapped in a JSONP envelope" ][ is-flat: :flat tree: here: copy [] if any [file? json url? json][ if error? json: try [read (json)][ throw :json ] ] unless parse/all json either padded [ [space ident space "(" space opt value space ")" opt ";" space] ][ [space opt value space] ][ throw make error! either is-flat [ "Not a valid JSON string" ][ "Cannot load JSON string, try in /FLAT mode" ] ] pick tree 1 ] ] to-json: use [ json emit emits escape to-token emit-issue emit-date here lookup comma block block-of-pairs object value ][ emit: func [data][repend json data] emits: func [data][emit {"} emit data emit {"}] escape: use [mp ch encode utf-8 decode-utf8 encode-unicode][ mp: [#"^/" "\n" #"^M" "\r" #"^-" "\t" #"^"" "\^"" #"\" "\\" #"/" "\/"] ch: complement charset compose [ {^@^A^B^C^D^E^F^G^H^K^L^M^N^O^P^Q^R^S^T^U^V^W^X^Y^Z^[^\^]^!^_} (extract mp 2) #"^(80)" - #"^(FF)" ] encode: func [here /local char][ char: any [ select mp here/1 either here/1 > 127 ["\uFFFD"][ join "\u" skip tail mold to-hex to integer! char -4 ] ] change/part here char 1 ] utf-8: use [utf-2 utf-3 utf-4 utf-b][ ; probably need to adapt the rule from below... utf-2: charset [#"^(C2)" - #"^(DF)"] utf-3: charset [#"^(E0)" - #"^(EF)"] utf-4: charset [#"^(F0)" - #"^(F4)"] utf-b: charset [#"^(80)" - #"^(BF)"] [utf-2 utf-b | utf-3 2 utf-b | utf-4 3 utf-b] ] decode-utf8: use [ utf-2 utf-3 utf-3-low utf-4 utf-4-low utf-4-high utf-b utf-x1 utf-x2 utf-x3 bounds out ][ ; U+000080..U+0007FF _____________ C2..DF 80..BF ; U+000800..U+000FFF __________ E0 A0..BF 80..BF ; U+001000..U+00FFFF ______ E1..EF 80..BF 80..BF ; U+010000..U+03FFFF ___ F0 90..BF 80..BF 80..BF ; U+040000..U+0FFFFF F1..F3 80..BF 80..BF 80..BF ; U+100000..U+10FFFF ___ F4 80..8F 80..BF 80..BF utf-2: charset [#"^(C2)" - #"^(DF)"] utf-3-low: charset [#"^(A0)" - #"^(BF)"] utf-3: charset [#"^(E1)" - #"^(EF)"] utf-4-low: charset [#"^(90)" - #"^(BF)"] utf-4-high: charset [#"^(80)" - #"^(8F)"] utf-4: charset [#"^(F1)" - #"^(F3)"] utf-b: charset [#"^(80)" - #"^(BF)"] utf-x1: charset [#"^(A0)" - #"^(BF)"] utf-x2: charset [#"^(90)" - #"^(AF)"] utf-x3: charset [#"^(8F)" #"^(9F)" #"^(AF)" #"^(BF)"] func [char [string! binary!] /strict][ bounds: [0 0] out: -1 any [ all [ any [ parse/all char: as-binary char [ ; Test for invalid sequences first [ ; invalid U+D800 - U+DFFF ; UTF-8 Surrogates #"^(ED)" utf-x1 utf-b | ; invalid U+FDD0 - U+FDEF ; ??? #"^(EF)" #"^(B7)" utf-x2 | ; invalid U+nFFFE - U+nFFFF ; Troublesome UTF-16 sequences [#"^(EF)" | [#"^(F0)" | utf-4] utf-x3] #"^(BF)" [#"^(BE)" | #"^(BF)"] ] | utf-2 utf-b ( bounds: [127 2048] out: char/1 xor 192 * 64 + (char/2 xor 128) ) | [ #"^(E0)" utf-3-low utf-b (bounds: [2047 4096]) | utf-3 2 utf-b (bounds: [4095 65534]) ] ( out: char/1 xor 224 * 4096 + (char/2 xor 128 * 64) + (char/3 xor 128) ) | [ #"^(F0)" utf-4-low 2 utf-b (bounds: [65535 262144]) | utf-4 3 utf-b (bounds: [262143 1048576]) | #"^(F4)" utf-4-high 2 utf-b (bounds: [1048575 1114112]) ] ( out: char/1 xor 240 * 262144 + (char/2 xor 128 * 4096) + (char/3 xor 128 * 64) + (char/4 xor 128) ) ] not strict ] out > bounds/1 out < bounds/2 out ] 65533 ; Unknown character ] ] ] encode-unicode: use [to-uchar][ to-uchar: func [char [integer!]][ rejoin ["\u" skip tail mold to-hex char -4] ] func [mark [string!] ext [string!] /local char][ change/part mark case [ 65535 > char: decode-utf8 mark [ to-uchar char ] ; Surrogate Pairs (emoji) 1114111 > char [ rejoin [ to-uchar 55296 or shift char and 65280 10 to-uchar char and 1023 or 56320 ] ] /else ["\uFFFD"] ; unknown char ] ext ] ] func [txt [string! binary!] /local ext][ parse/all txt [ any [ txt: some ch | utf-8 ext: (txt: encode-unicode txt ext) :txt | skip (txt: encode txt) :txt ] ] head txt ] ] to-token: use [special escapes mark][ special: charset "!&'+-.0123456789nt^^_|" escapes: [ #"t" "^-" #"n" "^/" #"_" " " #"|" "/" #"&" "," #"!" ":" ] func [name [set-word!]][ if parse/all name: to string! name [ "^^*self" end (remove remove name) | some [ mark: "^^" special ( mark: change/part mark any [ select escapes mark/2 mark/2 ] 2 ) :mark | skip ] ][ name ] ] ] emit-issue: use [dg nm][ dg: charset "0123456789" nm: [opt "-" some dg] [(either parse/all here/1 nm [emit here/1][emits here/1])] ] emit-date: use [pad second][ pad: func [part length][ part: to string! part head insert/dup part "0" length - length? part ] [( emits rejoin collect [ keep reduce [pad here/1/year 4 "-" pad here/1/month 2 "-" pad here/1/day 2] if here/1/time [ keep reduce ["T" pad here/1/time/hour 2 ":" pad here/1/time/minute 2 ":"] second: parse/all to string! here/1/time/second "." keep pad second/1 2 unless second/2 = "0" [keep join "." second/2] keep either any [ none? here/1/zone zero? here/1/zone ]["Z"][ reduce [ either here/1/zone/hour < 0 ["-"]["+"] pad abs here/1/zone/hour 2 ":" pad here/1/zone/minute 2 ] ] ] ] )] ] lookup: [ here: [get-word! | get-path!] (change here reduce reduce [here/1]) end skip ; fail ] comma: [(if not tail? here [emit ","])] block: [(emit "[") any [here: value here: comma] (emit "]")] block-of-pairs: [ some [set-word! skip] | some [tag! skip] ] object: [ (emit "{") any [ here: [set-word! (change here to-token here/1) | tag!] (emit [{"} escape to string! here/1 {":}]) here: value here: comma ] (emit "}") ] value: [ lookup | number! (emit here/1) | [logic! | 'true | 'false] (emit to string! here/1) | [none! | 'none] (emit 'null) | date! emit-date | issue! emit-issue | [ any-string! | word! | lit-word! | tuple! | pair! | money! | time! ] (emits escape form here/1) | any-word! (emits escape form to word! here/1) | object! :here (change/only here body-of first here) into object | into block-of-pairs :here (change/only here copy first here) into object | any-block! :here (change/only here copy first here) into block | any-type! (emits to tag! type?/word first here) ] func [ "Convert a Rebol value to JSON string" item [any-type!] "Rebol value to convert" ][ json: make string! 1024 if parse compose/only [(item)][here: value][json] ] ]