CFLib.org – Common Function Library Project

detectLanguage(text)

Last updated May 29, 2007

author

Guido Bellomo

Version: 1 | Requires: CF6 | Library: StrLib

Description:
This function try to guess the language of a given text. Supports Italian,English,German. Accept a string as parameters and returns a two-letter code of the language (it,en,de).

Return Values:
Returns a string.

Example:

<cfsavecontent variable="test">
This is a piece of text
</cfsavecontent>
<cfoutput>
The following text is in: #detectLanguage(test)#
</cfoutput>

Parameters:

Name Description Required
text Text to check. Yes

Full UDF Source:

/**
 * This function try to guess the language of a given text.
 * 
 * @param text 	 Text to check. (Required)
 * @return Returns a string. 
 * @author Guido Bellomo (guidobellomo@gmail.com) 
 * @version 1, May 29, 2007 
 */
function detectLanguage(text) {
	var strLanguage = StructNew();
	var strPoints = StructNew();	
	var keys = "";
	var languages = "";
	var i = 0;
	var result = "";
	var maxPoints = 0;
	var currentPoint = 0;
	
	// Init structure
	strLanguage["it"] = "il,lo,la,gli,le,delle,dalle,dallo,ciao,che,questo,quello,quella,del,dal,in";
	strLanguage["en"] = "the,where,to,one,that,those,in,out,is";
	strLanguage["de"] = "der,die,das,es,dass,diese,wir,ihr,sie,aus,ein,jenes,diese,dieses,jene,und,nein,ja,auch,nicht,wieso,warum,weshlab,wie,was,warum,wer,durch,um,wegen,mit,ohne";
	strPoints["it"] = 0;
	strPoints["en"] = 0;
	strPoints["de"] = 0;		
	keys = StructKeyList(strLanguage);
	languages = StructKeyList(strLanguage);
	// Clean HTML
	text = ReReplace(text,"<.*?>","","ALL");
	// Start race
	for (i = 1; i lte ListLen(languages); i = i + 1) {
		// Get test words
		lang = ListGetAt(languages,i);
		testWords = StructFind(strLanguage,lang);
		for (k = 1; k lte ListLen(testWords); k = k + 1) {
			if (ReFindNoCase("\b#ListGetAt(testWords,k)#\b",text)) {
				strPoints[lang] = strPoints[lang]+1; 
			}					
		}
	}		
	// Check the winning langage
	for (i = 1; i lte ListLen(languages); i = i + 1) {
		currentPoint = StructFind(strPoints,ListGetAt(languages,i));
		if (currentPoint gt maxPoints) {
			result = ListGetAt(languages,i);
			maxPoints = currentPoint;
		}
	}
	return result;
}
blog comments powered by Disqus

Search CFLib.org


Latest Additions

Kevin Cotton added
date2ExcelDate
May 5, 2016

Raymond Camden added
CapFirst
April 25, 2016

Chris Wigginton added
loremIpsum
January 18, 2016

Gary Stanton added
calculateArrival...
November 19, 2015

Sebastiaan Naafs - van Dijk added
getDaysInQuarter
November 13, 2015

Created by Raymond Camden / Design by Justin Johnson