CSS

REBOL [
    Title: "CSS"
    Date: 18-Aug-2011
    Author: "Christopher Ross-Gill"
    Comment: {
        Based on: http://www.w3.org/TR/css3-syntax/
        Move to: http://www.w3.org/TR/CSS21/syndata.html#syntax
    }
    Purpose: "Describes CSS in Rebol"
    Version: 0.0.0.0.0.1
]

demo: https://raw.github.com/stubbornella/csslint/master/demos/demo.css


_p_: func [str][print [index? str mold copy/part str 24]]

css-rules: context [
    _*_: [progress:]
    _?_: use [mk][[mk: (_p_ mk)]]
    _??_: [(probe #??) _?_]
    _???_: [(probe #???) _?_]
    _????_: [(probe #????) _?_]
    progress: ""

    comment [
        ascii: charset [#"^(20)" - #"^(7E)"]
        alpha: charset [#"a" - #"z" #"A" - #"Z"]
        digit: charset "0123456789"
        alphanum: union alpha digit
    
        string: ["^"" any [some stringchar | "'"] "^"" | "'" any [stringchar | "^""] "'"]
        stringchar: [urlchar | "^(20)" | "\" nl]
        urlchar: [urlc | nonascii | escape]
        urlc: charset [#"^(09)" #"^(21)" #"^(23)" - #"^(26)" #"^(2A)" - #"^(7E)"]
        ; urlc: charset "!#$%&*-~"
        nl: [newline | crlf | "^M" | "^L"]
        w: [any wc]
        wc: charset "^-^/^L^M "
    
    
        byte-order-mark: "^(FE)^(FF)" ; "^(FEFF)"
    ]

    with: func [context [word! object!] target [block!]][bind target :context]

    letters: context [
        esc: ["\" 0 4 "0"]
        a-o: ["4" | "6"]
        p-z: ["5" | "7"]
        trm: ["^M^/" | " " | "^-" | "^M" | "^/" | "^L"]
        a: ["a" | esc a-o "1" trm]
        c: ["c" | esc a-o "3" trm]
        d: ["d" | esc a-o "4" trm]
        e: ["e" | esc a-o "5" trm]
        g: ["g" | esc a-o "7" trm | "\g"]
        h: ["h" | esc a-o "8" trm | "\h"]
        i: ["i" | esc a-o "9" trm | "\i"]
        k: ["k" | esc a-o "b" trm | "\k"]
        l: ["l" | esc a-o "c" trm | "\l"]
        m: ["m" | esc a-o "d" trm | "\m"]
        n: ["n" | esc a-o "e" trm | "\n"]
        o: ["o" | esc a-o "f" trm | "\o"]
        p: ["p" | esc p-z "0" trm | "\p"]
        r: ["r" | esc p-z "2" trm | "\r"]
        s: ["s" | esc p-z "3" trm | "\s"]
        t: ["t" | esc p-z "4" trm]
        u: ["u" | esc p-z "5" trm | "\u"]
        x: ["x" | esc p-z "8" trm | "\x"]
        y: ["y" | esc p-z "9" trm | "\y"]
        z: ["z" | esc p-z "a" trm | "\z"]
    ]

    macros: context [
        ; ident: [-]?{nmstart}{nmchar}*
        ident: [opt "-" nmstart any nmchar]

        ; name: {nmchar}+
        name: [some nmchar]
        
        ; nmstart: [_a-z]|{nonascii}|{escape}
        nmstart: use [chars][
            chars: charset [#"_" #"a" - #"z" #"A" - #"Z"]
            [chars | nonascii | escape]
        ]

        ; nonascii: [^\0-\237]
        nonascii: charset [#"^(80)" - #"^(FF)"]
        ; pending UCS [#"^(80)" - #"^(D7FF)" #"(^E000)" - #"^(FFFD)" #"^(010000)" - #"^(10FFFF)"]

        ; unicode: \\[0-9a-f]{1,6}(\r\n|[ \n\r\t\f])?
        unicode: use [hex term][
            hex: charset "0123456789abcdefABCDEF"
            term: charset " ^/^M^L^T"
            ["\" 1 6 hex opt ["^M^/" | term]]
        ]

        ; escape: {unicode}|\\[^\n\r\f0-9a-f]
        escape: [unicode | "\" [ascii | nonascii]]

        ; nmchar: [_a-z0-9-]|{nonascii}|{escape}
        nmchar: use [chars][
            chars: charset ["-_" #"0" - #"9" #"a" - #"z" #"A" - #"Z"]
            [chars | nonascii | escape]
        ]

        ; num: [0-9]+|[0-9]*\.[0-9]+
        num: use [digit][
            digit: charset "0123456789"
            [opt [any digit "."] some digit]
        ]

        ; string: {string1}|{string2}
        ; string1: \"([^\n\r\f\\"]|\\{nl}|{escape})*\"
        ; string2: \'([^\n\r\f\\']|\\{nl}|{escape})*\'
        string: use [ssq sdq][
            sdq: complement charset {^/^L^M"\}
            ssq: complement charset {^/^L^M'\}
            [
                  {"} any [sdq | "\" nl | escape] {"}
                | {'} any [ssq | "\" nl | escape] {'}
            ]
        ]

        ; badstring  {badstring1}|{badstring2}
        ; badstring1 \"([^\n\r\f\\"]|\\{nl}|{escape})*\\?
        ; badstring2 \'([^\n\r\f\\']|\\{nl}|{escape})*\\?
        badstring: use [ssq sdq][
            sdq: complement charset {^/^L^M"\}
            ssq: complement charset {^/^L^M'\}
            [
                  {"} any [sdq | "\" nl | escape] opt "\"
                | {'} any [ssq | "\" nl | escape] opt "\"
            ]
        ]

        ; badcomment: {badcomment1}|{badcomment2}
        ; badcomment1: \/\*[^*]*\*+([^/*][^*]*\*+)*
        ; badcomment2: \/\*[^*]*(\*+[^/*][^*]*)*
        badcomment: use [!star !delim][
            !star: complement charset "*"
            !delim: complement charset "*/"
            [
                  "/*" any !star some "*" any [!delim any !star some "*"]
                | "/*" any !star any [some "*" !delim any !star]
            ]
        ]

        ; baduri:  {baduri1}|{baduri2}|{baduri3}
        ; baduri1: url\({w}([!#$%&*-~]|{nonascii}|{escape})*{w}
        ; baduri2: url\({w}{string}{w}
        ; baduri3: url\({w}{badstring}
        baduri: use [symbol][
            symbol: charset "[!#$%&*-~]"
            [
                "url(" w [
                      any [symbol | nonascii | escape] w
                    | string w
                    | badstring
                ]
            ]
        ]

        ; nl: \n|\r\n|\r|\f
        nl: ["^/" | "^M^/" | "^M" | "^L"]

        ; w: [ \t\r\n\f]*
        w: use [space][
            space: charset "^-^/^L^L^M "
            [any space]
        ]
    ]

    tokens: context [
        value: none
        ; ; The following productions are the complete list of tokens in CSS3:

        ; IDENT {ident}
        IDENT: [
            copy value [macros/ident]
            (emit ident value)
        ]

        ; ATKEYWORD @{ident}
        ATKEYWORD: [
            copy value ["@" macros/ident]
            (emit atkeyword value)
        ]

        CHARSET-SYM: ["@charset " (emit charset-sym "@charset ")]
        IMPORT-SYM: with letters ["@" i m p o r t (emit import-sym "@import")]
        PAGE-SYM: with letters ["@" p a g e (emit page-sym "@page")]
        MEDIA-SYM: with letters ["@" m e d i a (emit media-sym "@media")]

        ; STRING    {string}
        STRING: [
            copy value [macros/string]
            (emit string value)
        ]

        ; BAD_STRING    {badstring}
        BAD-STRING: [
            copy value [macros/badstring]
            (emit bad-string value)
        ]

        ; BAD_URI   {baduri}
        BAD-URI: [
            copy value [macros/baduri]
            (emit bad-uri value)
        ]

        ; BAD_COMMENT   {badcomment}
        BAD-COMMENT: [
            copy value [macros/badcomment]
            (emit bad-comment value)
        ]

        ; HASH  #{name}
        HASH: [
            copy value ["#" macros/name]
            (emit hash value)
        ]

        EMS: with letters [copy value [macros/num e m] (emit ems value)]
        EXS: with letters [copy value [macros/num e x] (emit exs value)]
        TIME: with letters [copy value [macros/num [opt m s]] (emit time value)]
        FREQ: with letters [copy value [macros/num [opt h z]] (emit freq value)]

        LENGTH: with letters [
            copy value [
                macros/num [p x | c m | m m | i n | p t | p c]
            ] (emit length value)
        ]

        ANGLE: with letters [
            copy value [macros/num [d e g | opt g r a d]]
            (emit angle value)
        ]

        RESOLUTION: with letters [
            copy value [macros/num [d p i | d p c m]]
            (emit resolution value)
        ]

        PERCENTAGE: [
            copy value [macros/num "%"]
            (emit percentage value)
        ]

        DIMENSION: [
            copy value [macros/num macros/ident]
            (emit dimension value)
        ]

        NUMBER: [
            copy value [macros/num]
            (emit number value)
        ]

        URI: use [urlchar][
            urlchar: charset "!#$%&*-[]_~" ; []_ not included in spec, but I think they meant to?
            [
                copy value [
                    "url(" macros/w [
                        macros/string | any [urlchar | macros/nonascii | macros/escape]
                    ] macros/w ")"
                ] (emit uri value)
            ]
        ]

        ; UNICODE-RANGE u\+[0-9a-f?]{1,6}(-[0-9a-f]{1,6})?
        UNICODE-RANGE: use [hex][
            hex: charset "0123456789abcdefABCDEF"
            [
                copy value ["u+" 1 6 [hex | "?"] opt ["-" 1 6 hex]]
                (emit unicode-range value)
            ]
        ]

        ; CDO   <!--
        CDO: ["<!--" (emit cdo "<!--")]

        ; CDC   -->
        CDC: ["-->" (emit cdc value "-->")]

        COLON: [":" (emit colon ":")]
        SEMICOLON: [";" (emit semi ";")]
        LCURLY: ["{" (emit lcurly "{")]
        RCURLY: ["}" (emit rcurly "}")]
        LPAREN: ["(" (emit lparen "(")]
        RPAREN:  [")" (emit rparen ")")]
        LSQUARE: ["[" (emit lsquare "[")]
        RSQUARE: ["]" (emit rsquare "]")]
        LCHEVRON: ["<" (emit delim "<")]
        RCHEVRON: [">" (emit delim ">")]
        COMMA: ["," (emit delim ",")]
        PLUS: ["+" (emit delim "+")]
        MINUS: ["-" (emit delim "-")]
        SLASH: ["/" (emit delim "/")]
        PERIOD: ["." (emit delim ".")]
        ASTERISK: ["*" (emit delim "*")]
        EQUALS: ["=" (emit delim "=")]
        ; S [ \t\r\n\f]+
        S: use [space][
            space: charset " ^-^/^L^M"
            [copy value some space (emit space value)]
        ]

        COMMENT: use [!star !delim][
            !star: complement charset "*"
            !delim: complement charset "*/"
            comment: [
                copy value [
                    "/*" _*_ any !star some "*" _*_ any [!delim any !star some "*"] "/"
                ] (emit comment value)
            ]
        ]

        ; FUNCTION  {ident}\(
        FUNCTION: [
            copy value [macros/ident "("]
            (emit function value)
        ]

        ; INCLUDES  ~=
        INCLUDES: ["~=" (emit includes "~=")]

        ; DASHMATCH |=
        DASHMATCH: ["|=" (emit dashmatch "|=")]

        ONLY: with letters [copy value [o n l y] (emit only value)]
        NOT: with letters [copy value [n o t] (emit not value)]
        AND: with letters [copy value [a n d] (emit and value)]

        IMPORTANT-SYM: with letters [
            copy value ["!" macros/w i m p o r t a n t]
            (emit important-sym value)
        ]

        ; DELIM any other character not matched by the above rules, and neither a single nor a double quote
        DELIM: use [char][
            ; char: complement charset {"'}
            char: charset "*+,-./=|~"
            [copy value char (emit delim value)]
        ]
        ; any other character not matched by the above rules, and neither a single nor a double quote

        ; {skip}            {return MISC;}
    ]

    rules: make macros [
        ; attribute: http://www.w3.org/TR/css-style-attr/#syntax
        ; media queries: http://www.w3.org/TR/css3-mediaqueries/#syntax

        space: [tokens/S | tokens/COMMENT]
        space*: [any space]

        ; stylesheet  : [ CDO | CDC | S | statement ]*;

        ; statement   : ruleset | at-rule;
        ; at-rule     : ATKEYWORD S* any* [ block | ';' S* ];
        ; block       : '{' S* [ any | block | ATKEYWORD S* | ';' S* ]* '}' S*;
        ; ruleset     : selector? '{' S* declaration? [ ';' S* declaration? ]* '}' S*;
        ; selector    : any+;
        ; declaration : property S* ':' S* value;
        ; property    : IDENT;
        ; value       : [ any | block | ATKEYWORD S* ]+;
        ; any         : [ IDENT | NUMBER | PERCENTAGE | DIMENSION | STRING
        ;               | DELIM | URI | HASH | UNICODE-RANGE | INCLUDES
        ;               | DASHMATCH | ':' | FUNCTION S* [any|unused]* ')'
        ;               | '(' S* [any|unused]* ')' | '[' S* [any|unused]* ']'
        ;               ] S*;
        ; unused      : block | ATKEYWORD S* | ';' S* | CDO S* | CDC S*;

        stylesheet: [
            opt [tokens/CHARSET-SYM tokens/STRING tokens/SEMICOLON]
            _*_ (probe "Import")
            any [() import | tokens/CDO | tokens/CDC | space]
            
            any [_*_ (print "===NEW STATEMENT")() statement | tokens/CDO | tokens/CDC | space]
        ]

        declaration-list: [any [_*_ space* opt declaration any [tokens/SEMICOLON space* opt declaration]]]

        import: [
            tokens/IMPORT-SYM space* [tokens/STRING | tokens/URI]
            space* opt media-query-list tokens/SEMICOLON
        ]

        media: [
            tokens/MEDIA-SYM space* media-query-list
            tokens/LCURLY space* any ruleset tokens/RCURLY space*
        ]

        ; media-query-list: S* [media_query [ ',' S* media_query ]* ]?
        media-query-list: [space* opt [media-query any [tokens/COMMA space* media-query]]]

        ; media-query: [ONLY | NOT]? S* media_type S* [ AND S* expression ]*
        ; | expression [ AND S* expression ]*
        media-query: [
            opt [tokens/ONLY | tokens/NOT] space*
            media-type space* any [tokens/AND space* expression]
            |
            expression any [tokens/AND space* expression]
        ]

        ; media_type: IDENT
        media-type: [tokens/IDENT]

        ; expression: '(' S* media_feature S* [ ':' S* expr ]? ')' S*
        expression: [tokens/LPAREN space* media-feature space* opt [tokens/COLON space* expr] tokens/RPAREN space*]

        ; media_feature: IDENT
        media-feature: [tokens/IDENT]

        medium: [tokens/IDENT space*]

        page: [
            tokens/PAGE-SYM space* opt pseudo-page
            tokens/LCURLY space* opt declaration any [
                tokens/SEMICOLON space* opt declaration
            ] tokens/RCURLY space*
        ]

        pseudo-page: [tokens/COLON tokens/IDENT space*]

        operator: [[tokens/SLASH | tokens/COMMA] space*]

        combinator: [[tokens/PLUS | tokens/RCHEVRON] space*]

        unary-operator: [tokens/MINUS | tokens/PLUS]

        statement: [ruleset | at-rule]

        ; at-rule     : ATKEYWORD S* any* [ block | ';' S* ];
        at-rule: [media | page | (print "---Start @Keyword") tokens/ATKEYWORD space* any part [block (print "End Block") | tokens/SEMICOLON space*]]

        ; block       : '{' S* [ any | block | ATKEYWORD S* | ';' S* ]* '}' S*;
        block: [(print "---Start Block") tokens/LCURLY space* any [part | block | tokens/ATKEYWORD space* | tokens/SEMICOLON space*] tokens/RCURLY space*]

        ruleset: [
            _*_
            selector any [tokens/COMMA space* selector]
            tokens/LCURLY space* _*_ opt declaration any [
                _*_ tokens/SEMICOLON space* opt declaration
            ] tokens/RCURLY space*
        ]

        selector: [
            simple-selector [combinator selector | space* opt [opt combinator selector]]
        ]

        simple-selector: [
            element-name any [tokens/HASH | class | attrib | pseudo]
            | some [tokens/HASH | class | attrib | pseudo]
        ]

        class: [tokens/PERIOD tokens/IDENT]

        element-name: [tokens/IDENT | tokens/ASTERISK]

        comment {
            selectors_group: [selector any ["," space* selector]]
            selector: [space* atom any [combinator atom] opt pseudo_element]
            combinator: [space* ["+" | ">" | "~" | ] space*]
            atom: [
                opt element some [HASH | class | attrib | pseudo_class | negation]
                | element
            ]

            namespace_prefix: [opt [IDENT | "*"] "|"]
            element_name: [IDENT]
            type_selector: [opt namespace_prefix element_name]
            universal: [opt namespace_prefix "*"]
            element: [type_selector | universal]

            class: ["." IDENT]


            ; functional_pseudo: [_*_ FUNCTION space* [STRING | expression | negation_arg | IDENT | NUMBER] space* ")"]
            pseudo_class: [":" [functional_pseudo | IDENT]]
            negation: ["not(" space* opt negation_arg space* ")"]
            negation_arg: [element | HASH | class | attrib | pseudo_class]
            pseudo_element: [opt ":" ":" IDENT]
        }

        attrib: [
            tokens/LSQUARE space* tokens/IDENT space*
            opt [
                [tokens/EQUALS | tokens/INCLUDES | tokens/DASHMATCH] space*
                [tokens/IDENT | tokens/STRING] space*
            ] tokens/RSQUARE
        ]

        pseudo: [
            tokens/COLON opt [
                  tokens/IDENT
                | tokens/FUNCTION space* opt [tokens/IDENT space*] tokens/RPAREN
            ]
        ]

        declaration: [(print "---Start Declaration") property tokens/COLON space* expr opt prio]

        prio: [(print "---Start Priority") tokens/IMPORTANT-SYM space*]

        expr: [term any [opt operator term]]

        ; expression: use [integer signed-integer][
        ;   integer: :num
        ;   signed-integer: ["-" integer]
        ;   [opt ["-" | integer] "n" opt signed-integer | integer]
        ; ]

        term: [
            opt unary-operator [
                  tokens/PERCENTAGE | tokens/LENGTH | tokens/EMS | tokens/EXS
                | tokens/ANGLE | tokens/TIME | tokens/FREQ | tokens/NUMBER
            ] space*
            | tokens/STRING space* | tokens/URI space* | hexcolor | function | tokens/IDENT space*
        ]
        
        function: [tokens/FUNCTION space* expr tokens/RPAREN space*]

        hexcolor: [tokens/HASH space*]

        property: [tokens/IDENT space*]

        ; part: [ IDENT | NUMBER | PERCENTAGE | DIMENSION | STRING
        ;     | DELIM | URI | HASH | UNICODE-RANGE | INCLUDES
        ;     | DASHMATCH | COLON | FUNCTION S* [any|unused]* ')'
        ;     | '(' S* [any|unused]* ')' | '[' S* [any|unused]* ']'
        ;     ] S*;
        part: [
            [
                  tokens/PERCENTAGE | tokens/DIMENSION | tokens/STRING
                | tokens/DELIM | tokens/URI | tokens/HASH | tokens/UNICODE-RANGE | tokens/INCLUDES
                | tokens/DASHMATCH | tokens/COLON | tokens/FUNCTION space* any [part | unused] tokens/RPAREN
                | tokens/IDENT | tokens/NUMBER
                | tokens/LPAREN space* any [part | unused] tokens/RPAREN
                | tokens/LSQUARE space* any [part | unused] tokens/RSQUARE
            ] space*
        ]

        part: [
            (print "---Start Part")
            [
                  tokens/PERCENTAGE | tokens/DIMENSION | tokens/STRING
                | tokens/URI | tokens/HASH | tokens/UNICODE-RANGE | tokens/INCLUDES
                | tokens/DASHMATCH | tokens/COLON | tokens/FUNCTION space* any [part | unused] tokens/RPAREN
                | tokens/IDENT | tokens/NUMBER
                | tokens/LPAREN space* any [part | unused] tokens/RPAREN
                | tokens/LSQUARE space* any [part | unused] tokens/RSQUARE
                | tokens/DELIM
            ] space*
        ]

        ; unused      : block | ATKEYWORD S* | ';' S* | CDO S* | CDC S*;
        unused: [block | tokens/ATKEYWORD space* | tokens/SEMICOLON space* | tokens/CDO space* | tokens/CDC space*]

        comment {
        ; value       : [ any | block | ATKEYWORD S* ]+;
        ; value: [some [_*_ part | block | tokens/ATKEYWORD space*]]


        ; part: [[
        ;     tokens/PERCENTAGE | tokens/DIMENSION | tokens/STRING
        ;   | tokens/DELIM | tokens/URI | tokens/HASH | tokens/UNICODE-RANGE | tokens/INCLUDES
        ;   | tokens/DASHMATCH | tokens/COLON | tokens/FUNCTION space* any [part | unused] tokens/RPAREN
        ;   | tokens/IDENT | tokens/NUMBER
        ;   | tokens/LPAREN space* any [part | unused] tokens/RPAREN
        ;   | tokens/LSQUARE space* any [part | unused] tokens/RSQUARE
        ; ] space*]
        }
    ]

    emitter: context [
        out: none
        emit: func ['token [word!] value [string!] /end /w /v][
            end: tail out
            append out w: to word! uppercase form token
            append out v: value
            print [form w mold v]
            new-line end true
        ]
        rule: [
            (out: copy [])
            rules/stylesheet
        ]
    ]

    emit: get in emitter 'emit
]

parse-css: func [css [string! file! url!]][
    case/all [
        file? css [css: attempt [read css]]
        url? css [css: attempt [read css]]
        not string? css [return none]
        not css: parse/all css css-rules/emitter/rule [
            print "^/Fail:"
            _p_ css-rules/progress
        ]
    ]
    css-rules/emitter/out
]