CFLib.org – Common Function Library Project

parseUrl(sURL)

Last updated January 10, 2007

author

Dan G. Switzer, II

Version: 1 | Requires: CF7 | Library: StrLib

Description:
Parses a Url and returns a struct with keys defining the information in the Uri. authority: the authority section of the Uri directory: the directory path in the Uri (w/out parameters) domain: the domain of the Uri file: the file name in the Uri fragment: the fragment of the uri (i.e. #namedAnchor) params.url: the query string params as a struct params.segment: the segment parameters as a struct (i.e. ;JSESSIONID=1234) password: the password supplied in the Uri path: the full Uri path--includes embedded parameters (i.e. ;JSESSIONID=1234) port: the port defined in the Uri query: the query string scheme: the scheme/protocol (i.e. http, https, ftp, etc.) username: the username in the Uri

Return Values:
Returns a struct.

Example:

<cfdump
  var="#parseUrl("http://usr:pwd@www.foo.com:80/bar/sub/file.gif;p=5?q1=item1&q1=item2&q2=item3##nameAnchor")#"
  />

Parameters:

Name Description Required
sURL String to parse. Yes

Full UDF Source:

/**
 * Parses a Url and returns a struct with keys defining the information in the Uri.
 * 
 * @param sURL 	 String to parse. (Required)
 * @return Returns a struct. 
 * @author Dan G. Switzer, II (dswitzer@pengoworks.com) 
 * @version 1, January 10, 2007 
 */
function parseUrl(sUrl){
	// var to hold the final structure
	var stUrlInfo = structNew();
	// vars for use in the loop, so we don't have to evaluate lists and arrays more than once
	var i = 1;
	var sKeyPair = "";
	var sKey = "";
	var sValue = "";
	var aQSPairs = "";
	var sPath = "";
	/*
		from: http://www.ietf.org/rfc/rfc2396.txt

		^((([^:/?#]+):)?(//([^/?#]*))?([^?#]*)(\?([^#]*))?(#(.*)))?
		 123            4  5          6       7  8        9 A

			scheme    = $3
			authority = $5
			path      = $6
			query     = $8
			fragment  = $10 (A)
	*/
	var sUriRegEx = "^(([^:/?##]+):)?(//([^/?##]*))?([^?##]*)(\?([^##]*))?(##(.*))?";
	/*
		separates the authority into user info, domain and port

		^((([^@:]+)(:([^@]+))?@)?([^:]*)?(:(.*)))?
		 123       4 5           6       7 8

			username  = $3
			password  = $5
			domain    = $6
			port      = $8
	*/
	var sAuthRegEx = "^(([^@:]+)(:([^@]+))?@)?([^:]*)?(:(.*))?";
	/*
		separates the path into segments & parameters

		((/?[^;/]+)(;([^/]+))?)
		12         3 4

			segment     = $1
			path        = $2
			parameters  = $4
	*/
	var sSegRegEx = "(/?[^;/]+)(;([^/]+))?";

	// parse the url looking for info
	var stUriInfo = reFindNoCase(sUriRegEx, sUrl, 1, true);
	// this is for the authority section
	var stAuthInfo = "";
	// this is for the segments in the path
	var stSegInfo = "";

	// create empty keys
	stUrlInfo["scheme"] = "";
	stUrlInfo["authority"] = "";
	stUrlInfo["path"] = "";
	stUrlInfo["directory"] = "";
	stUrlInfo["file"] = "";
	stUrlInfo["query"] = "";
	stUrlInfo["fragment"] = "";
	stUrlInfo["domain"] = "";
	stUrlInfo["port"] = "";
	stUrlInfo["username"] = "";
	stUrlInfo["password"] = "";
	stUrlInfo["params"] = structNew();

	// get the scheme
	if( stUriInfo.len[3] gt 0 ) stUrlInfo["scheme"] = mid(sUrl, stUriInfo.pos[3], stUriInfo.len[3]);
	// get the authority
	if( stUriInfo.len[5] gt 0 ) stUrlInfo["authority"] = mid(sUrl, stUriInfo.pos[5], stUriInfo.len[5]);
	// get the path
	if( stUriInfo.len[6] gt 0 ) stUrlInfo["path"] = mid(sUrl, stUriInfo.pos[6], stUriInfo.len[6]);
	// get the path
	if( stUriInfo.len[8] gt 0 ) stUrlInfo["query"] = mid(sUrl, stUriInfo.pos[8], stUriInfo.len[8]);
	// get the fragment
	if( stUriInfo.len[10] gt 0 ) stUrlInfo["fragment"] = mid(sUrl, stUriInfo.pos[10], stUriInfo.len[10]);

	// break authority into user info, domain and ports
	if( len(stUrlInfo["authority"]) gt 0 ){
		// parse the authority looking for info
		stAuthInfo = reFindNoCase(sAuthRegEx, stUrlInfo["authority"], 1, true);

		// get the domain
		if( stAuthInfo.len[6] gt 0 ) stUrlInfo["domain"] = mid(stUrlInfo["authority"], stAuthInfo.pos[6], stAuthInfo.len[6]);
		// get the port
		if( stAuthInfo.len[8] gt 0 ) stUrlInfo["port"] = mid(stUrlInfo["authority"], stAuthInfo.pos[8], stAuthInfo.len[8]);
		// get the username
		if( stAuthInfo.len[3] gt 0 ) stUrlInfo["username"] = mid(stUrlInfo["authority"], stAuthInfo.pos[3], stAuthInfo.len[3]);
		// get the password
		if( stAuthInfo.len[5] gt 0 ) stUrlInfo["password"] = mid(stUrlInfo["authority"], stAuthInfo.pos[5], stAuthInfo.len[5]);
	}

	// the query string in struct form
	stUrlInfo["params"]["segment"] = structNew();

	// if the path contains any parameters, we need to parse them out
	if( find(";", stUrlInfo["path"]) gt 0 ){
		// this is for the segments in the path
		stSegInfo = reFindNoCase(sSegRegEx, stUrlInfo["path"], 1, true);

		// loop through all the segments and build the strings
		while( stSegInfo.pos[1] gt 0 ){
			// build the path, excluding parameters
			sPath = sPath & mid(stUrlInfo["path"], stSegInfo.pos[2], stSegInfo.len[2]);

			// if there are some parameters in this segment, add them to the struct
			if( stSegInfo.len[4] gt 0 ){

				// put the parameters into an array for easier looping
				aQSPairs = listToArray(mid(stUrlInfo["path"], stSegInfo.pos[4], stSegInfo.len[4]), ";");

				// now, loop over the array and build the struct
				for( i=1; i lte arrayLen(aQSPairs); i=i+1 ){
					sKeyPair = aQSPairs[i]; // current pair
					sKey = listFirst(sKeyPair, "="); // current key
					// make sure there are 2 keys
					if( listLen(sKeyPair, "=") gt 1){
						sValue = urlDecode(listLast(sKeyPair, "=")); // current value
					} else {
						sValue = ""; // set blank value
					}
					// check if key already added to struct
					if( structKeyExists(stUrlInfo["params"]["segment"], sKey) ) stUrlInfo["params"]["segment"][sKey] = listAppend(stUrlInfo["params"]["segment"][sKey], sValue); // add value to list
					else structInsert(stUrlInfo["params"]["segment"], sKey, sValue); // add new key/value pair
				}
			}

			// get the ending position
			i = stSegInfo.pos[1] + stSegInfo.len[1];

			// get the next segment
			stSegInfo = reFindNoCase(sSegRegEx, stUrlInfo["path"], i, true);
		}

	} else {
		// set the current path
		sPath = stUrlInfo["path"];
	}

	// get the file name
	stUrlInfo["file"] = getFileFromPath(sPath);
	// get the directory path by removing the file name
	if( len(stUrlInfo["file"]) gt 0 ){
		stUrlInfo["directory"] = replace(sPath, stUrlInfo["file"], "", "one");
	} else {
		stUrlInfo["directory"] = sPath;
	}

	// the query string in struct form
	stUrlInfo["params"]["url"] = structNew();

	// if query info was supplied, break it into a struct
	if( len(stUrlInfo["query"]) gt 0 ){
		// put the query string into an array for easier looping
		aQSPairs = listToArray(stUrlInfo["query"], "&");

		// now, loop over the array and build the struct
		for( i=1; i lte arrayLen(aQSPairs); i=i+1 ){
			sKeyPair = aQSPairs[i]; // current pair
			sKey = listFirst(sKeyPair, "="); // current key
			// make sure there are 2 keys
			if( listLen(sKeyPair, "=") gt 1){
				sValue = urlDecode(listLast(sKeyPair, "=")); // current value
			} else {
				sValue = ""; // set blank value
			}
			// check if key already added to struct
			if( structKeyExists(stUrlInfo["params"]["url"], sKey) ) stUrlInfo["params"]["url"][sKey] = listAppend(stUrlInfo["params"]["url"][sKey], sValue); // add value to list
			else structInsert(stUrlInfo["params"]["url"], sKey, sValue); // add new key/value pair
		}
	}

	// return the struct
	return stUrlInfo;
}
blog comments powered by Disqus

Search CFLib.org


Latest Additions

Chris Wigginton added
loremIpsum
January 18, 2016

Gary Stanton added
calculateArrival...
November 19, 2015

Sebastiaan Naafs - van Dijk added
getDaysInQuarter
November 13, 2015

Simon Bingham added
convertSecondsTo...
April 15, 2015

Umbrae added
fullUrlEncode
February 11, 2015

Created by Raymond Camden / Design by Justin Johnson