|
Neohapsis is currently accepting applications for employment. For more information, please visit our website www.neohapsis.com or email hr@neohapsis.com |
From: dwg
owasp.orgDate: Wed Dec 12 2001 - 10:09:27 CST
No more parameter tampering?
Given the following BNF for URL's:
> RFC 1738: Uniform Resource Locators (URL)
>
> RFC 1738 [BLMaMC 94a] defines a pseudo-BNF for URLs. The ftp, file, http,
> gopher, mailto, news, nntp, telnet, wais, and prospero URLs are defined in
> this RFC.
>
> The pseudo-BNF is pretty straightforward:
>
>
>
> ; The generic form of a URL is:
>
> genericurl = scheme ":" schemepart
>
> ; Specific predefined schemes are defined here; new schemes
> ; may be registered with IANA
>
> url = httpurl | ftpurl | newsurl |
> nntpurl | telneturl | gopherurl |
> waisurl | mailtourl | fileurl |
> prosperourl | otherurl
>
> ; new schemes follow the general syntax
> otherurl = genericurl
>
> ; the scheme is in lower case; interpreters should use case-ignore
> scheme = 1*[ lowalpha | digit | "+" | "-" | "." ]
> schemepart = *xchar | ip-schemepart
>
>
> ; URL schemeparts for ip based protocols:
>
> ip-schemepart = "//" login [ "/" urlpath ]
>
> login = [ user [ ":" password ] "
" ] hostport
> hostport = host [ ":" port ]
> host = hostname | hostnumber
> hostname = *[ domainlabel "." ] toplabel
> domainlabel = alphadigit | alphadigit *[ alphadigit | "-" ] alphadigit
> toplabel = alpha | alpha *[ alphadigit | "-" ] alphadigit
> alphadigit = alpha | digit
> hostnumber = digits "." digits "." digits "." digits
> port = digits
> user = *[ uchar | ";" | "?" | "&" | "=" ]
> password = *[ uchar | ";" | "?" | "&" | "=" ]
> urlpath = *xchar ; depends on protocol see section 3.1
>
> ; The predefined schemes:
>
> ; FTP (see also RFC959)
>
> ftpurl = "ftp://" login [ "/" fpath [ ";type=" ftptype ]]
> fpath = fsegment *[ "/" fsegment ]
> fsegment = *[ uchar | "?" | ":" | "
" | "&" | "=" ]
> ftptype = "A" | "I" | "D" | "a" | "i" | "d"
>
> ; FILE
>
> fileurl = "file://" [ host | "localhost" ] "/" fpath
>
> ; HTTP
>
> httpurl = "http://" hostport [ "/" hpath [ "?" search ]]
> hpath = hsegment *[ "/" hsegment ]
> hsegment = *[ uchar | ";" | ":" | "
" | "&" | "=" ]
> search = *[ uchar | ";" | ":" | "
" | "&" | "=" ]
>
> ; GOPHER (see also RFC1436)
>
> gopherurl = "gopher://" hostport [ / [ gtype [ selector
> [ "%09" search [ "%09" gopher+_string ] ] ] ] ]
> gtype = xchar
> selector = *xchar
> gopher+_string = *xchar
>
> ; MAILTO (see also RFC822)
>
> mailtourl = "mailto:" encoded822addr
> encoded822addr = 1*xchar ; further defined in RFC822
>
> ; NEWS (see also RFC1036)
>
> newsurl = "news:" grouppart
> grouppart = "*" | group | article
> group = alpha *[ alpha | digit | "-" | "." | "+" | "_" ]
> article = 1*[ uchar | ";" | "/" | "?" | ":" | "&" | "=" ] "
" host
>
> ; NNTP (see also RFC977)
>
> nntpurl = "nntp://" hostport "/" group [ "/" digits ]
>
> ; TELNET
>
> telneturl = "telnet://" login [ "/" ]
>
> ; WAIS (see also RFC1625)
>
> waisurl = waisdatabase | waisindex | waisdoc
> waisdatabase = "wais://" hostport "/" database
> waisindex = "wais://" hostport "/" database "?" search
> waisdoc = "wais://" hostport "/" database "/" wtype "/" wpath
> database = *uchar
> wtype = *uchar
> wpath = *uchar
>
> ; PROSPERO
>
> prosperourl = "prospero://" hostport "/" ppath *[ fieldspec ]
> ppath = psegment *[ "/" psegment ]
> psegment = *[ uchar | "?" | ":" | "
" | "&" | "=" ]
> fieldspec = ";" fieldname "=" fieldvalue
> fieldname = *[ uchar | "?" | ":" | "
" | "&" ]
> fieldvalue = *[ uchar | "?" | ":" | "
" | "&" ]
>
> ; Miscellaneous definitions
>
> lowalpha = "a" | "b" | "c" | "d" | "e" | "f" | "g" | "h" |
> "i" | "j" | "k" | "l" | "m" | "n" | "o" | "p" |
> "q" | "r" | "s" | "t" | "u" | "v" | "w" | "x" |
> "y" | "z"
> hialpha = "A" | "B" | "C" | "D" | "E" | "F" | "G" | "H" | "I" |
> "J" | "K" | "L" | "M" | "N" | "O" | "P" | "Q" | "R" |
> "S" | "T" | "U" | "V" | "W" | "X" | "Y" | "Z"
> alpha = lowalpha | hialpha
> digit = "0" | "1" | "2" | "3" | "4" | "5" | "6" | "7" |
> "8" | "9"
> safe = "$" | "-" | "_" | "." | "+"
> extra = "!" | "*" | "'" | "(" | ")" | ","
> national = "{" | "}" | "|" | "\" | "^" | "~" | "[" | "]" | "`"
> punctuation = "<" | ">" | "#" | "%" | <">
>
>
> reserved = ";" | "/" | "?" | ":" | "
" | "&" | "="
> hex = digit | "A" | "B" | "C" | "D" | "E" | "F" |
> "a" | "b" | "c" | "d" | "e" | "f"
> escape = "%" hex hex
>
> unreserved = alpha | digit | safe | extra
> uchar = unreserved | escape
> xchar = unreserved | reserved | escape
> digits = 1*digit
Then it is easy to create the following regular expression in perl of what
is a valid URL:
> (?:http://(?:(?:(?:(?:(?:[a-zA-Z\d](?:(?:[a-zA-Z\d]|-)*[a-zA-Z\d])?)\.
> )*(?:[a-zA-Z](?:(?:[a-zA-Z\d]|-)*[a-zA-Z\d])?))|(?:(?:\d+)(?:\.(?:\d+)
> ){3}))(?::(?:\d+))?)(?:/(?:(?:(?:(?:[a-zA-Z\d$\-_.+!*'(),]|(?:%[a-fA-F
> \d]{2}))|[;:
&=])*)(?:/(?:(?:(?:[a-zA-Z\d$\-_.+!*'(),]|(?:%[a-fA-F\d]{
> 2}))|[;:
&=])*))*)(?:\?(?:(?:(?:[a-zA-Z\d$\-_.+!*'(),]|(?:%[a-fA-F\d]{
> 2}))|[;:
&=])*))?)?)|(?:ftp://(?:(?:(?:(?:(?:[a-zA-Z\d$\-_.+!*'(),]|(?
> :%[a-fA-F\d]{2}))|[;?&=])*)(?::(?:(?:(?:[a-zA-Z\d$\-_.+!*'(),]|(?:%[a-
> fA-F\d]{2}))|[;?&=])*))?
)?(?:(?:(?:(?:(?:[a-zA-Z\d](?:(?:[a-zA-Z\d]|-
> )*[a-zA-Z\d])?)\.)*(?:[a-zA-Z](?:(?:[a-zA-Z\d]|-)*[a-zA-Z\d])?))|(?:(?
> :\d+)(?:\.(?:\d+)){3}))(?::(?:\d+))?))(?:/(?:(?:(?:(?:[a-zA-Z\d$\-_.+!
> *'(),]|(?:%[a-fA-F\d]{2}))|[?:
&=])*)(?:/(?:(?:(?:[a-zA-Z\d$\-_.+!*'()
> ,]|(?:%[a-fA-F\d]{2}))|[?:
&=])*))*)(?:;type=[AIDaid])?)?)|(?:news:(?:
> (?:(?:(?:[a-zA-Z\d$\-_.+!*'(),]|(?:%[a-fA-F\d]{2}))|[;/?:&=])+
(?:(?:(
> ?:(?:[a-zA-Z\d](?:(?:[a-zA-Z\d]|-)*[a-zA-Z\d])?)\.)*(?:[a-zA-Z](?:(?:[
> a-zA-Z\d]|-)*[a-zA-Z\d])?))|(?:(?:\d+)(?:\.(?:\d+)){3})))|(?:[a-zA-Z](
> ?:[a-zA-Z\d]|[_.+-])*)|\*))|(?:nntp://(?:(?:(?:(?:(?:[a-zA-Z\d](?:(?:[
> a-zA-Z\d]|-)*[a-zA-Z\d])?)\.)*(?:[a-zA-Z](?:(?:[a-zA-Z\d]|-)*[a-zA-Z\d
> ])?))|(?:(?:\d+)(?:\.(?:\d+)){3}))(?::(?:\d+))?)/(?:[a-zA-Z](?:[a-zA-Z
> \d]|[_.+-])*)(?:/(?:\d+))?)|(?:telnet://(?:(?:(?:(?:(?:[a-zA-Z\d$\-_.+
> !*'(),]|(?:%[a-fA-F\d]{2}))|[;?&=])*)(?::(?:(?:(?:[a-zA-Z\d$\-_.+!*'()
> ,]|(?:%[a-fA-F\d]{2}))|[;?&=])*))?
)?(?:(?:(?:(?:(?:[a-zA-Z\d](?:(?:[a
> -zA-Z\d]|-)*[a-zA-Z\d])?)\.)*(?:[a-zA-Z](?:(?:[a-zA-Z\d]|-)*[a-zA-Z\d]
> )?))|(?:(?:\d+)(?:\.(?:\d+)){3}))(?::(?:\d+))?))/?)|(?:gopher://(?:(?:
> (?:(?:(?:[a-zA-Z\d](?:(?:[a-zA-Z\d]|-)*[a-zA-Z\d])?)\.)*(?:[a-zA-Z](?:
> (?:[a-zA-Z\d]|-)*[a-zA-Z\d])?))|(?:(?:\d+)(?:\.(?:\d+)){3}))(?::(?:\d+
> ))?)(?:/(?:[a-zA-Z\d$\-_.+!*'(),;/?:
&=]|(?:%[a-fA-F\d]{2}))(?:(?:(?:[
> a-zA-Z\d$\-_.+!*'(),;/?:
&=]|(?:%[a-fA-F\d]{2}))*)(?:%09(?:(?:(?:[a-zA
> -Z\d$\-_.+!*'(),]|(?:%[a-fA-F\d]{2}))|[;:
&=])*)(?:%09(?:(?:[a-zA-Z\d$
> \-_.+!*'(),;/?:
&=]|(?:%[a-fA-F\d]{2}))*))?)?)?)?)|(?:wais://(?:(?:(?:
> (?:(?:[a-zA-Z\d](?:(?:[a-zA-Z\d]|-)*[a-zA-Z\d])?)\.)*(?:[a-zA-Z](?:(?:
> [a-zA-Z\d]|-)*[a-zA-Z\d])?))|(?:(?:\d+)(?:\.(?:\d+)){3}))(?::(?:\d+))?
> )/(?:(?:[a-zA-Z\d$\-_.+!*'(),]|(?:%[a-fA-F\d]{2}))*)(?:(?:/(?:(?:[a-zA
> -Z\d$\-_.+!*'(),]|(?:%[a-fA-F\d]{2}))*)/(?:(?:[a-zA-Z\d$\-_.+!*'(),]|(
> ?:%[a-fA-F\d]{2}))*))|\?(?:(?:(?:[a-zA-Z\d$\-_.+!*'(),]|(?:%[a-fA-F\d]
> {2}))|[;:
&=])*))?)|(?:mailto:(?:(?:[a-zA-Z\d$\-_.+!*'(),;/?:
&=]|(?:%
> [a-fA-F\d]{2}))+))|(?:file://(?:(?:(?:(?:(?:[a-zA-Z\d](?:(?:[a-zA-Z\d]
> |-)*[a-zA-Z\d])?)\.)*(?:[a-zA-Z](?:(?:[a-zA-Z\d]|-)*[a-zA-Z\d])?))|(?:
> (?:\d+)(?:\.(?:\d+)){3}))|localhost)?/(?:(?:(?:(?:[a-zA-Z\d$\-_.+!*'()
> ,]|(?:%[a-fA-F\d]{2}))|[?:
&=])*)(?:/(?:(?:(?:[a-zA-Z\d$\-_.+!*'(),]|(
> ?:%[a-fA-F\d]{2}))|[?:
&=])*))*))|(?:prospero://(?:(?:(?:(?:(?:[a-zA-Z
> \d](?:(?:[a-zA-Z\d]|-)*[a-zA-Z\d])?)\.)*(?:[a-zA-Z](?:(?:[a-zA-Z\d]|-)
> *[a-zA-Z\d])?))|(?:(?:\d+)(?:\.(?:\d+)){3}))(?::(?:\d+))?)/(?:(?:(?:(?
> :[a-zA-Z\d$\-_.+!*'(),]|(?:%[a-fA-F\d]{2}))|[?:
&=])*)(?:/(?:(?:(?:[a-
> zA-Z\d$\-_.+!*'(),]|(?:%[a-fA-F\d]{2}))|[?:
&=])*))*)(?:(?:;(?:(?:(?:[
> a-zA-Z\d$\-_.+!*'(),]|(?:%[a-fA-F\d]{2}))|[?:
&])*)=(?:(?:(?:[a-zA-Z\d
> $\-_.+!*'(),]|(?:%[a-fA-F\d]{2}))|[?:
&])*)))*)|(?:ldap://(?:(?:(?:(?:
> (?:(?:[a-zA-Z\d](?:(?:[a-zA-Z\d]|-)*[a-zA-Z\d])?)\.)*(?:[a-zA-Z](?:(?:
> [a-zA-Z\d]|-)*[a-zA-Z\d])?))|(?:(?:\d+)(?:\.(?:\d+)){3}))(?::(?:\d+))?
> ))?/(?:(?:(?:(?:(?:(?:(?:[a-zA-Z\d]|%(?:3\d|[46][a-fA-F\d]|[57][Aa\d])
> )|(?:%20))+|(?:OID|oid)\.(?:(?:\d+)(?:\.(?:\d+))*))(?:(?:%0[Aa])?(?:%2
> 0)*)=(?:(?:%0[Aa])?(?:%20)*))?(?:(?:[a-zA-Z\d$\-_.+!*'(),]|(?:%[a-fA-F
> \d]{2}))*))(?:(?:(?:%0[Aa])?(?:%20)*)\+(?:(?:%0[Aa])?(?:%20)*)(?:(?:(?
> :(?:(?:[a-zA-Z\d]|%(?:3\d|[46][a-fA-F\d]|[57][Aa\d]))|(?:%20))+|(?:OID
> |oid)\.(?:(?:\d+)(?:\.(?:\d+))*))(?:(?:%0[Aa])?(?:%20)*)=(?:(?:%0[Aa])
> ?(?:%20)*))?(?:(?:[a-zA-Z\d$\-_.+!*'(),]|(?:%[a-fA-F\d]{2}))*)))*)(?:(
> ?:(?:(?:%0[Aa])?(?:%20)*)(?:[;,])(?:(?:%0[Aa])?(?:%20)*))(?:(?:(?:(?:(
> ?:(?:[a-zA-Z\d]|%(?:3\d|[46][a-fA-F\d]|[57][Aa\d]))|(?:%20))+|(?:OID|o
> id)\.(?:(?:\d+)(?:\.(?:\d+))*))(?:(?:%0[Aa])?(?:%20)*)=(?:(?:%0[Aa])?(
> ?:%20)*))?(?:(?:[a-zA-Z\d$\-_.+!*'(),]|(?:%[a-fA-F\d]{2}))*))(?:(?:(?:
> %0[Aa])?(?:%20)*)\+(?:(?:%0[Aa])?(?:%20)*)(?:(?:(?:(?:(?:[a-zA-Z\d]|%(
> ?:3\d|[46][a-fA-F\d]|[57][Aa\d]))|(?:%20))+|(?:OID|oid)\.(?:(?:\d+)(?:
> \.(?:\d+))*))(?:(?:%0[Aa])?(?:%20)*)=(?:(?:%0[Aa])?(?:%20)*))?(?:(?:[a
> -zA-Z\d$\-_.+!*'(),]|(?:%[a-fA-F\d]{2}))*)))*))*(?:(?:(?:%0[Aa])?(?:%2
> 0)*)(?:[;,])(?:(?:%0[Aa])?(?:%20)*))?)(?:\?(?:(?:(?:(?:[a-zA-Z\d$\-_.+
> !*'(),]|(?:%[a-fA-F\d]{2}))+)(?:,(?:(?:[a-zA-Z\d$\-_.+!*'(),]|(?:%[a-f
> A-F\d]{2}))+))*)?)(?:\?(?:base|one|sub)(?:\?(?:((?:[a-zA-Z\d$\-_.+!*'(
> ),;/?:
&=]|(?:%[a-fA-F\d]{2}))+)))?)?)?)|(?:(?:z39\.50[rs])://(?:(?:(?
> :(?:(?:[a-zA-Z\d](?:(?:[a-zA-Z\d]|-)*[a-zA-Z\d])?)\.)*(?:[a-zA-Z](?:(?
> :[a-zA-Z\d]|-)*[a-zA-Z\d])?))|(?:(?:\d+)(?:\.(?:\d+)){3}))(?::(?:\d+))
> ?)(?:/(?:(?:(?:[a-zA-Z\d$\-_.+!*'(),]|(?:%[a-fA-F\d]{2}))+)(?:\+(?:(?:
> [a-zA-Z\d$\-_.+!*'(),]|(?:%[a-fA-F\d]{2}))+))*(?:\?(?:(?:[a-zA-Z\d$\-_
> .+!*'(),]|(?:%[a-fA-F\d]{2}))+))?)?(?:;esn=(?:(?:[a-zA-Z\d$\-_.+!*'(),
> ]|(?:%[a-fA-F\d]{2}))+))?(?:;rs=(?:(?:[a-zA-Z\d$\-_.+!*'(),]|(?:%[a-fA
> -F\d]{2}))+)(?:\+(?:(?:[a-zA-Z\d$\-_.+!*'(),]|(?:%[a-fA-F\d]{2}))+))*)
> ?))|(?:cid:(?:(?:(?:[a-zA-Z\d$\-_.+!*'(),]|(?:%[a-fA-F\d]{2}))|[;?:
&=
> ])*))|(?:mid:(?:(?:(?:[a-zA-Z\d$\-_.+!*'(),]|(?:%[a-fA-F\d]{2}))|[;?:
> &=])*)(?:/(?:(?:(?:[a-zA-Z\d$\-_.+!*'(),]|(?:%[a-fA-F\d]{2}))|[;?:
&=]
> )*))?)|(?:vemmi://(?:(?:(?:(?:(?:[a-zA-Z\d](?:(?:[a-zA-Z\d]|-)*[a-zA-Z
> \d])?)\.)*(?:[a-zA-Z](?:(?:[a-zA-Z\d]|-)*[a-zA-Z\d])?))|(?:(?:\d+)(?:\
> .(?:\d+)){3}))(?::(?:\d+))?)(?:/(?:(?:(?:[a-zA-Z\d$\-_.+!*'(),]|(?:%[a
> -fA-F\d]{2}))|[/?:
&=])*)(?:(?:;(?:(?:(?:[a-zA-Z\d$\-_.+!*'(),]|(?:%[a
> -fA-F\d]{2}))|[/?:
&])*)=(?:(?:(?:[a-zA-Z\d$\-_.+!*'(),]|(?:%[a-fA-F\d
> ]{2}))|[/?:
&])*))*))?)|(?:imap://(?:(?:(?:(?:(?:(?:(?:[a-zA-Z\d$\-_.+
> !*'(),]|(?:%[a-fA-F\d]{2}))|[&=~])+)(?:(?:;[Aa][Uu][Tt][Hh]=(?:\*|(?:(
> ?:(?:[a-zA-Z\d$\-_.+!*'(),]|(?:%[a-fA-F\d]{2}))|[&=~])+))))?)|(?:(?:;[
> Aa][Uu][Tt][Hh]=(?:\*|(?:(?:(?:[a-zA-Z\d$\-_.+!*'(),]|(?:%[a-fA-F\d]{2
> }))|[&=~])+)))(?:(?:(?:(?:[a-zA-Z\d$\-_.+!*'(),]|(?:%[a-fA-F\d]{2}))|[
> &=~])+))?))
)?(?:(?:(?:(?:(?:[a-zA-Z\d](?:(?:[a-zA-Z\d]|-)*[a-zA-Z\d])
> ?)\.)*(?:[a-zA-Z](?:(?:[a-zA-Z\d]|-)*[a-zA-Z\d])?))|(?:(?:\d+)(?:\.(?:
> \d+)){3}))(?::(?:\d+))?))/(?:(?:(?:(?:(?:(?:[a-zA-Z\d$\-_.+!*'(),]|(?:
> %[a-fA-F\d]{2}))|[&=~:
/])+)?;[Tt][Yy][Pp][Ee]=(?:[Ll](?:[Ii][Ss][Tt]|
> [Ss][Uu][Bb])))|(?:(?:(?:(?:[a-zA-Z\d$\-_.+!*'(),]|(?:%[a-fA-F\d]{2}))
> |[&=~:
/])+)(?:\?(?:(?:(?:[a-zA-Z\d$\-_.+!*'(),]|(?:%[a-fA-F\d]{2}))|[
> &=~:
/])+))?(?:(?:;[Uu][Ii][Dd][Vv][Aa][Ll][Ii][Dd][Ii][Tt][Yy]=(?:[1-
> 9]\d*)))?)|(?:(?:(?:(?:[a-zA-Z\d$\-_.+!*'(),]|(?:%[a-fA-F\d]{2}))|[&=~
> :
/])+)(?:(?:;[Uu][Ii][Dd][Vv][Aa][Ll][Ii][Dd][Ii][Tt][Yy]=(?:[1-9]\d*
> )))?(?:/;[Uu][Ii][Dd]=(?:[1-9]\d*))(?:(?:/;[Ss][Ee][Cc][Tt][Ii][Oo][Nn
> ]=(?:(?:(?:[a-zA-Z\d$\-_.+!*'(),]|(?:%[a-fA-F\d]{2}))|[&=~:
/])+)))?))
> )?)|(?:nfs:(?:(?://(?:(?:(?:(?:(?:[a-zA-Z\d](?:(?:[a-zA-Z\d]|-)*[a-zA-
> Z\d])?)\.)*(?:[a-zA-Z](?:(?:[a-zA-Z\d]|-)*[a-zA-Z\d])?))|(?:(?:\d+)(?:
> \.(?:\d+)){3}))(?::(?:\d+))?)(?:(?:/(?:(?:(?:(?:(?:[a-zA-Z\d\$\-_.!~*'
> (),])|(?:%[a-fA-F\d]{2})|[:
&=+])*)(?:/(?:(?:(?:[a-zA-Z\d\$\-_.!~*'(),
> ])|(?:%[a-fA-F\d]{2})|[:
&=+])*))*)?)))?)|(?:/(?:(?:(?:(?:(?:[a-zA-Z\d
> \$\-_.!~*'(),])|(?:%[a-fA-F\d]{2})|[:
&=+])*)(?:/(?:(?:(?:[a-zA-Z\d\$\
> -_.!~*'(),])|(?:%[a-fA-F\d]{2})|[:
&=+])*))*)?))|(?:(?:(?:(?:(?:[a-zA-
> Z\d\$\-_.!~*'(),])|(?:%[a-fA-F\d]{2})|[:
&=+])*)(?:/(?:(?:(?:[a-zA-Z\d
> \$\-_.!~*'(),])|(?:%[a-fA-F\d]{2})|[:
&=+])*))*)?)))
A complete discussion is found at:
http://www.foad.org/~abigail/Perl/url2.html
dwg
- Messages sorted by: [ date ] [ thread ] [ subject ] [ author ]