CFLib.org – Common Function Library Project

getLinks(BodyText)

Last updated February 21, 2011

author

James Moberg

Version: 1 | Requires: CF5 | Library: StrLib

Description:
Creates a structure with the following contents: link: An array of all the URL's in the text. desc: An array of all the descriptions in the text. index: A structure with the link description as the key, and the corresponding array position as the value. You can use the "index" structure to look up the array position of a particular URL. For example, if you know the text contains a link titled "Next", you can look up "Next" in the index structure with StructFind(), and use the resulting number to get the corresponding URL from the link array. NOTE: If you use this with CFHTTP, you may want to use the RESOLVEURL option. ORIGINAL AUTHOR: Joel Mueller - Creative Internet Solutions (v2, 10/23/1998)

Return Values:
Returns a structure of matches.

Example:

<cfsavecontent variable="theHTML">
<a href="http://www.adobe.com/products/coldfusion/">Adobe ColdFusion</a>
<a href="http://www.microsoft.com/">Microsoft</a>
<a href="http://www.google.com/">Google</a>
</cfsavecontent>
<CFDUMP VAR="#GetLinks(theHTML)#">

Parameters:

Name Description Required
BodyText String to parse. Yes

Full UDF Source:

/**
 * Finds all anchor or frame tags and creates a structure that you can use to look up a URL by name.
 * 
 * @param BodyText 	 String to parse. (Required)
 * @return Returns a structure of matches. 
 * @author James Moberg (james@ssmedia.com) 
 * @version 1, February 21, 2011 
 */
function getLinks(BodyText){
	var objLinks = StructNew();
	var objIndex = StructNew();
	var arrLink = ArrayNew(1);
	var arrDesc = ArrayNew(1);
	var nextMatch = 1;
	var Counter = 1;
	do { /* find opening anchor tag. */
		objMatch = REFindNoCase("<(A|FRAME)[[:space:]]+[^>]*(HREF|SRC) ?= ?[""']?([^[:space:]""'>]+)(>|(([""']|[[:space:]])[^>]*>))", BodyText, nextMatch, true);
		nextMatch = objMatch.pos[1] + objMatch.len[1];
		if (ArrayLen(objMatch.pos) GTE 4) {
			thisURL = Mid(BodyText, objMatch.pos[4], objMatch.len[4]);
			thisTag = Mid(BodyText, objMatch.pos[2], objMatch.len[2]);
			if (CompareNoCase(thisTag, "A") EQ 0) {
				descEnd = FindNoCase("</A>", BodyText, nextMatch);
				thisDesc = Mid(BodyText, nextMatch, descEnd - nextMatch);
				nextMatch = descEnd + 4;
			} else { /* get the frame name */
				fullTag = Mid(BodyText, objMatch.pos[1], objMatch.len[1]);
				frameName = "";
				objFrame = REFindNoCase("NAME ?= ?[""']?([^[:space:]""'>]+)(>|(([""']|[[:space:]])[^>]*>))", fullTag, 1, true);
				if (ArrayLen(objFrame.pos) GT 1) {
					frameName = Mid(fullTag, objFrame.pos[2], objFrame.len[2]);
				}
				thisDesc = "FRAME: " & frameName;
			}
			StructInsert(objIndex, thisDesc, Counter, true);
			arrLink[Counter] = thisURL;
			arrDesc[Counter] = thisDesc;
			Counter = Counter + 1;
		}
	} while (nextMatch NEQ 0);
	StructInsert(objLinks, "index", objIndex);
	StructInsert(objLinks, "link", arrLink);
	StructInsert(objLinks, "desc", arrDesc);
	return objLinks;
}
blog comments powered by Disqus

Search CFLib.org


Latest Additions

Kevin Cotton added
date2ExcelDate
May 5, 2016

Raymond Camden added
CapFirst
April 25, 2016

Chris Wigginton added
loremIpsum
January 18, 2016

Gary Stanton added
calculateArrival...
November 19, 2015

Sebastiaan Naafs - van Dijk added
getDaysInQuarter
November 13, 2015

Created by Raymond Camden / Design by Justin Johnson