Extract URLs

REBOL [
    Title: "Extract URLs"
    File: %extract-urls.r
    Version: 2.0.0
    Home: http://www.ross-gill.com/page/Beyond_Regular_Expressions
    Date: 31-Jul-2010
    Purpose: "To identify and extract URIs from plain text"
    Author: "Christopher Ross-Gill"
    Exports: [extract-urls]
    Reference: http://daringfireball.net/2010/07/improved_regex_for_matching_urls
]

extract-urls: use [out text uri letter digit word space punct chars paren][
    letter: charset [#"a" - #"z"]
    digit: charset [#"0" - #"9"]
    word: charset [#"_" #"0" - #"9" #"A" - #"Z" #"a" - #"z"] ; per regex
    space: charset "^/^- ()<>^"'" ; for curly quotes, need unicode (R3)
    punct: charset "!'#$%&`*+,-./:;=?@[/]^^{|}~" ; regex 'punct without ()<>
    chars: complement union space punct
    paren: ["(" some [chars | punct | "(" some [chars | punct] ")"]")"]

    uri: [
        [
              letter some [word | "-"] ":" [1 3 "/" | letter | digit | "%"]
            | "www" 0 3 digit "."
            | some [letter | digit] "." 2 4 letter
        ]
        some [opt [some punct] some [chars | paren] opt "/"]
    ]

    text: use [emit-link emit-text link mk ex][
        emit-link: [(append out to-url link)]
        emit-text: [(unless mk = ex [append out copy/part mk ex])]

        [
            mk: any [
                ex: copy link uri emit-text emit-link mk:
                | some [chars | punct] some space ; non-uri words, line not required
                | skip
            ]
            ex: emit-text
        ]
    ]

    func [
        "Separates URLs from plain text"
        txt [string!] "Text to be split"
    ][
        out: copy []
        if parse/all txt text [out]
    ]
]