detectLanguage(text)
Last updated May 29, 2007
Version: 1 | Requires: CF6 | Library: StrLib
Description:
This function try to guess the language of a given text. Supports Italian,English,German.
Accept a string as parameters and returns a two-letter code of the language (it,en,de).
Return Values:
Returns a string.
Example:
<cfsavecontent variable="test">
This is a piece of text
</cfsavecontent>
<cfoutput>
The following text is in: #detectLanguage(test)#
</cfoutput>
Parameters:
Name | Description | Required |
---|---|---|
text | Text to check. | Yes |
Full UDF Source:
/**
* This function try to guess the language of a given text.
*
* @param text Text to check. (Required)
* @return Returns a string.
* @author Guido Bellomo (guidobellomo@gmail.com)
* @version 1, May 29, 2007
*/
function detectLanguage(text) {
var strLanguage = StructNew();
var strPoints = StructNew();
var keys = "";
var languages = "";
var i = 0;
var result = "";
var maxPoints = 0;
var currentPoint = 0;
// Init structure
strLanguage["it"] = "il,lo,la,gli,le,delle,dalle,dallo,ciao,che,questo,quello,quella,del,dal,in";
strLanguage["en"] = "the,where,to,one,that,those,in,out,is";
strLanguage["de"] = "der,die,das,es,dass,diese,wir,ihr,sie,aus,ein,jenes,diese,dieses,jene,und,nein,ja,auch,nicht,wieso,warum,weshlab,wie,was,warum,wer,durch,um,wegen,mit,ohne";
strPoints["it"] = 0;
strPoints["en"] = 0;
strPoints["de"] = 0;
keys = StructKeyList(strLanguage);
languages = StructKeyList(strLanguage);
// Clean HTML
text = ReReplace(text,"<.*?>","","ALL");
// Start race
for (i = 1; i lte ListLen(languages); i = i + 1) {
// Get test words
lang = ListGetAt(languages,i);
testWords = StructFind(strLanguage,lang);
for (k = 1; k lte ListLen(testWords); k = k + 1) {
if (ReFindNoCase("\b#ListGetAt(testWords,k)#\b",text)) {
strPoints[lang] = strPoints[lang]+1;
}
}
}
// Check the winning langage
for (i = 1; i lte ListLen(languages); i = i + 1) {
currentPoint = StructFind(strPoints,ListGetAt(languages,i));
if (currentPoint gt maxPoints) {
result = ListGetAt(languages,i);
maxPoints = currentPoint;
}
}
return result;
}
Search CFLib.org
Latest Additions
Raymond Camden added
QueryDeleteRows
November 04, 2017
Leigh added
nullPad
May 11, 2016
Raymond Camden added
stripHTML
May 10, 2016
Kevin Cotton added
date2ExcelDate
May 05, 2016
Raymond Camden added
CapFirst
April 25, 2016