diff --git a/make/fixdosfiles.sh b/make/fixdosfiles.sh
new file mode 100755
index 000000000..72f4fcaf5
--- /dev/null
+++ b/make/fixdosfiles.sh
@@ -0,0 +1,31 @@
+#!/bin/sh
+#------------------------------------------------------
+# Script to find files that are not Unix encoded
+#
+# Laurent Destailleur - eldy@users.sourceforge.net
+#------------------------------------------------------
+# Usage: fixdosfiles.sh [list|fix]
+#------------------------------------------------------
+
+# Syntax
+if [ "x$1" != "xlist" -a "x$1" != "xfix" ]
+then
+ echo "This script detect or clean files with CR+LF into files with LF only. All source files are included, also files into includes."
+ echo "Usage: fixdosfiles.sh [list|fix]"
+fi
+
+# To detec
+if [ "x$1" = "xlist" ]
+then
+ find . \( -iname "functions" -o -iname "*.md" -o -iname "*.html" -o -iname "*.htm" -o -iname "*.php" -o -iname "*.sh" -o -iname "*.cml" -o -iname "*.css" -o -iname "*.js" -o -iname "*.lang" -o -iname "*.pl" -o -iname "*.txt" -o -iname "*.xml" \) -exec file "{}" + | grep -v '\/test' | grep CRLF
+fi
+
+# To convert
+if [ "x$1" = "xfix" ]
+then
+ for fic in `find . \( -iname "functions" -o -iname "*.md" -o -iname "*.html" -o -iname "*.htm" -o -iname "*.php" -o -iname "*.sh" -o -iname "*.cml" -o -iname "*.css" -o -iname "*.js" -o -iname "*.lang" -o -iname "*.pl" -o -iname "*.txt" -o -iname "*.xml" \) -exec file "{}" + | grep -v '\/test' | grep CRLF | awk -F':' '{ print $1 }' `
+ do
+ echo "Fix file $fic"
+ dos2unix "$fic"
+ done;
+fi
diff --git a/make/fixutf8bomfiles.sh b/make/fixutf8bomfiles.sh
new file mode 100755
index 000000000..bda503d28
--- /dev/null
+++ b/make/fixutf8bomfiles.sh
@@ -0,0 +1,40 @@
+#!/bin/sh
+#
+# Checks of fix files contains UTF-8 BOM in dolibarr source tree,
+# excluding git repository, custom modules and included libraries.
+#
+# Rapha毛l Doursenaud - rdoursenaud@gpcsolutions.fr
+# Laurent Destailleur eldy@users.sourceforge.net
+#------------------------------------------------------
+# Usage: fixutf8bomfiles.sh [list|fix]
+#------------------------------------------------------
+
+# Syntax
+if [ "x$1" != "xlist" -a "x$1" != "xfix" ]
+then
+ echo "Detect and fix bad UTF8 encoded files (UTF8 must not use BOM char)"
+ echo "Usage: fixutf8bomfiles.sh (list|fix) [addincludes]"
+fi
+
+if [ "x$2" != "xaddincludes" ]
+then
+ export moreoptions="--exclude-dir='includes'"
+fi
+
+# To detec
+if [ "x$1" = "xlist" ]
+then
+ #find . \( -iname '*.php' -print0 -o -iname '*.sh' -print0 -o -iname '*.pl' -print0 -o -iname '*.lang' -print0 -o -iname '*.txt' \) -print0 | xargs -0 awk '/^\xEF\xBB\xBF/ {print FILENAME} {nextfile}'
+ echo "grep -rlIZ --include='*.php' --include='*.sh' --include='*.pl' --include='*.lang' --include='*.txt' --exclude-dir='.git' --exclude-dir='.tx' $moreoptions --exclude-dir='custom' . . | xargs -0 awk '/^\xEF\xBB\xBF/ {print FILENAME} {nextfile}'"
+ grep -rlIZ --include='*.php' --include='*.sh' --include='*.pl' --include='*.lang' --include='*.txt' --exclude-dir='.git' --exclude-dir='.tx' $moreoptions --exclude-dir='custom' . . | xargs -0 awk '/^\xEF\xBB\xBF/ {print FILENAME} {nextfile}'
+fi
+
+# To convert
+if [ "x$1" = "xfix" ]
+then
+ for fic in `grep -rlIZ --include='*.php' --include='*.sh' --include='*.pl' --include='*.lang' --include='*.txt' --exclude-dir='.git' --exclude-dir='.tx' $moreoptions --exclude-dir='custom' . . | xargs -0 awk '/^\xEF\xBB\xBF/ {print FILENAME} {nextfile}'`
+ do
+ echo "Fixing $fic"
+ sed -i '1s/^\xEF\xBB\xBF//' $fic
+ done;
+fi
diff --git a/wwwroot/cgi-bin/awdownloadcsv.pl b/wwwroot/cgi-bin/awdownloadcsv.pl
index c43acaa72..eaa61ad14 100755
--- a/wwwroot/cgi-bin/awdownloadcsv.pl
+++ b/wwwroot/cgi-bin/awdownloadcsv.pl
@@ -1,152 +1,152 @@
-#!/usr/bin/perl -w
-#------------------------------------------------------------------------------
-# Free addition to AWStats Web Log Analyzer. Used to export the contents of
-# sections of the Apache server log database to CSV for use in other tools.
-# Works from command line or as a CGI.
-#
-# This program is free software; you can redistribute it and/or modify
-# it under the terms of the GNU General Public License as published by
-# the Free Software Foundation; either version 2 of the License, or
-# (at your option) any later version.
-#
-# This program is distributed in the hope that it will be useful,
-# but WITHOUT ANY WARRANTY; without even the implied warranty of
-# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
-# GNU General Public License for more details.
-#
-# You should have received a copy of the GNU General Public License
-# along with this program. If not, see .
-#------------------------------------------------------------------------------
-use CGI qw(:standard);
-
-my $ALLOWDOWNLOAD=0;
-
-# Disabled by default for security reason
-if (! $ALLOWDOWNLOAD)
-{
- print("Error: You must first edit script to change ALLOWDOWNLOAD to 1 to allow usage of this script.\n");
- print("Reason is that enabling this script may be a security hole as it allows someone to download/view details of your awstats data files.\n");
- exit;
-}
-
-my $q = new CGI;
-my $outputFile = ""; # used to write the output to a file
-my $inputFile = ""; # the fully qualified path to the input log database file
-my $sectionToReport = ""; # contains the tag to search for in the database file
-my $startSearchStr = "BEGIN_";
-my $endSearchStr = "END_";
-my $startPrinting = 0; # flag to indicate that the start tag has been found
-my $attachFileName = "";
-
-# These parameters are used to build the input file name of the awstats log database
-my $baseName = "";
-my $month = "";
-my $year = "";
-my $day = "";
-my $siteConfig = "";
-
-if ($q->param("outputFile")) {
- if ($outputFile eq '') { $outputFile = $q->param("outputFile"); }
-}
-
-if ($q->param("inputFile")) {
- if ($inputFile eq '') { $inputFile = $q->param("inputFile"); }
-}
-
-if ($q->param("section")) {
- if ($sectionToReport eq '' ) { $sectionToReport = $q->param("section"); }
-}
-
-if ($q->param("baseName")) {
- if ($baseName eq '' ) { $baseName = $q->param("baseName"); }
-}
-
-if ($q->param("month")) {
- if ($month eq '' ) { $month = $q->param("month"); }
-}
-
-if ($q->param("year")) {
- if ($year eq '' ) { $year = $q->param("year"); }
-}
-
-if ($q->param("day")) { $day = $q->param("day"); }
-
-if ($q->param("siteConfig")) {
- if ($siteConfig eq '' ) { $siteConfig = $q->param("siteConfig"); }
-}
-
-# set the attachment file name to the report section
-if ($sectionToReport ne '' ) {
- $attachFileName = $sectionToReport . ".csv";
-} else {
- $attachFileName = "exportCSV.csv";
-}
-print $q->header(-type=> "application/force-download", -attachment=>$attachFileName);
-
-# Build the start/end search tags
-$startSearchStr = $startSearchStr . $sectionToReport;
-$endSearchStr = $endSearchStr . $sectionToReport;
-
-if ( !$inputFile ) { $inputFile ="$baseName$month$year$day.$siteConfig.txt" };
-
-open (IN, $inputFile) || die "cannot open $inputFile\n";
-
-# If there's a parameter for the output, open it here
-if ($outputFile ne '') {
- open (OUT,">$outputFile") || die "cannot create $outputFile\n";
- flock (OUT, 2);
-}
-# Loop through the input file searching for the start string. When
-# found, start displaying the input lines (with spaces changed
-# to commas) until the end tag is found.
-
-# Array to store comments for printing once we hit the desired section
-my $commentCount = -1;
-my %commentArray;
-
-while () {
- chomp;
-
- if (/^#\s(.*-)\s/){ # search for comment lines
- s/ - /,/g; # replace dashes with commas
- s/#//; # get rid of the comment sign
- $commentArray[++$commentCount] = $_;
- }
-
- # put the test to end printing here to eliminate printing
- # the line with the END tag
- if (/^$endSearchStr\b/) {
- $startPrinting = 0;
- }
-
- if ($startPrinting) {
- s/ /,/g;
- print "$_\n";
- if ($outputFile ne '') {
- print OUT "$_\n";
- }
- }
- # if we find an END tag and we haven't started printing, reset the
- # comment array to start re-capturing comments for next section
- if ((/^END_/) && ($startPrinting == 0)) {
- $commentCount = -1;
- }
-
- # put the start printing test after the first input line
- # to eliminate printing the line with the BEGIN tag...find it
- # here, then start printing on the next input line
- if (/^$startSearchStr\b/) {
- $startPrinting = 1;
- # print the comment array - it provides labels for the columns
- for ($i = 0; $i <= $commentCount; $i++ ) {
- print "$commentArray[$i]\n";
- }
- }
-}
-
-close(IN);
-
-# Close the output file if there was one used
-if ($outputFile ne '') {
- close(OUT);
-}
+#!/usr/bin/perl -w
+#------------------------------------------------------------------------------
+# Free addition to AWStats Web Log Analyzer. Used to export the contents of
+# sections of the Apache server log database to CSV for use in other tools.
+# Works from command line or as a CGI.
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 2 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program. If not, see .
+#------------------------------------------------------------------------------
+use CGI qw(:standard);
+
+my $ALLOWDOWNLOAD=0;
+
+# Disabled by default for security reason
+if (! $ALLOWDOWNLOAD)
+{
+ print("Error: You must first edit script to change ALLOWDOWNLOAD to 1 to allow usage of this script.\n");
+ print("Reason is that enabling this script may be a security hole as it allows someone to download/view details of your awstats data files.\n");
+ exit;
+}
+
+my $q = new CGI;
+my $outputFile = ""; # used to write the output to a file
+my $inputFile = ""; # the fully qualified path to the input log database file
+my $sectionToReport = ""; # contains the tag to search for in the database file
+my $startSearchStr = "BEGIN_";
+my $endSearchStr = "END_";
+my $startPrinting = 0; # flag to indicate that the start tag has been found
+my $attachFileName = "";
+
+# These parameters are used to build the input file name of the awstats log database
+my $baseName = "";
+my $month = "";
+my $year = "";
+my $day = "";
+my $siteConfig = "";
+
+if ($q->param("outputFile")) {
+ if ($outputFile eq '') { $outputFile = $q->param("outputFile"); }
+}
+
+if ($q->param("inputFile")) {
+ if ($inputFile eq '') { $inputFile = $q->param("inputFile"); }
+}
+
+if ($q->param("section")) {
+ if ($sectionToReport eq '' ) { $sectionToReport = $q->param("section"); }
+}
+
+if ($q->param("baseName")) {
+ if ($baseName eq '' ) { $baseName = $q->param("baseName"); }
+}
+
+if ($q->param("month")) {
+ if ($month eq '' ) { $month = $q->param("month"); }
+}
+
+if ($q->param("year")) {
+ if ($year eq '' ) { $year = $q->param("year"); }
+}
+
+if ($q->param("day")) { $day = $q->param("day"); }
+
+if ($q->param("siteConfig")) {
+ if ($siteConfig eq '' ) { $siteConfig = $q->param("siteConfig"); }
+}
+
+# set the attachment file name to the report section
+if ($sectionToReport ne '' ) {
+ $attachFileName = $sectionToReport . ".csv";
+} else {
+ $attachFileName = "exportCSV.csv";
+}
+print $q->header(-type=> "application/force-download", -attachment=>$attachFileName);
+
+# Build the start/end search tags
+$startSearchStr = $startSearchStr . $sectionToReport;
+$endSearchStr = $endSearchStr . $sectionToReport;
+
+if ( !$inputFile ) { $inputFile ="$baseName$month$year$day.$siteConfig.txt" };
+
+open (IN, $inputFile) || die "cannot open $inputFile\n";
+
+# If there's a parameter for the output, open it here
+if ($outputFile ne '') {
+ open (OUT,">$outputFile") || die "cannot create $outputFile\n";
+ flock (OUT, 2);
+}
+# Loop through the input file searching for the start string. When
+# found, start displaying the input lines (with spaces changed
+# to commas) until the end tag is found.
+
+# Array to store comments for printing once we hit the desired section
+my $commentCount = -1;
+my %commentArray;
+
+while () {
+ chomp;
+
+ if (/^#\s(.*-)\s/){ # search for comment lines
+ s/ - /,/g; # replace dashes with commas
+ s/#//; # get rid of the comment sign
+ $commentArray[++$commentCount] = $_;
+ }
+
+ # put the test to end printing here to eliminate printing
+ # the line with the END tag
+ if (/^$endSearchStr\b/) {
+ $startPrinting = 0;
+ }
+
+ if ($startPrinting) {
+ s/ /,/g;
+ print "$_\n";
+ if ($outputFile ne '') {
+ print OUT "$_\n";
+ }
+ }
+ # if we find an END tag and we haven't started printing, reset the
+ # comment array to start re-capturing comments for next section
+ if ((/^END_/) && ($startPrinting == 0)) {
+ $commentCount = -1;
+ }
+
+ # put the start printing test after the first input line
+ # to eliminate printing the line with the BEGIN tag...find it
+ # here, then start printing on the next input line
+ if (/^$startSearchStr\b/) {
+ $startPrinting = 1;
+ # print the comment array - it provides labels for the columns
+ for ($i = 0; $i <= $commentCount; $i++ ) {
+ print "$commentArray[$i]\n";
+ }
+ }
+}
+
+close(IN);
+
+# Close the output file if there was one used
+if ($outputFile ne '') {
+ close(OUT);
+}
diff --git a/wwwroot/cgi-bin/lang/awstats-cn.txt b/wwwroot/cgi-bin/lang/awstats-cn.txt
index cb6e7bc08..0e797f7e1 100644
--- a/wwwroot/cgi-bin/lang/awstats-cn.txt
+++ b/wwwroot/cgi-bin/lang/awstats-cn.txt
@@ -1,182 +1,182 @@
-# Chinese (simplified) message file (by Che Dong chedongATgmail.com)
-# $Revision$ - $Date$
-PageCode=GBK
-message0=无法得知
-message1=无法得知(不能反向解析的网域名称)
-message2=其他
-message3=查看详细资料
-message4=日期
-message5=月
-message6=年
-message7=统计网站
-message8=首次参观日期
-message9=最近参观日期
-message10=参观人次
-message11=参观者
-message12=参观
-message13=个关键字词
-message14=搜索
-message15=百分比
-message16=流量统计
-message17=网域或国家
-message18=参观者
-message19=URL 网址
-message20=每小时浏览次数
-message21=浏览器
-message22=HTTP 错误
-message23=反相链接
-message24=从未更新(请参考 awstats_setup.html上的 'Build/Update')
-message25=参观者的网域或国家
-message26=主机数
-message27=网页数
-message28=个不同的网页
-message29=存取次数
-message30=不同的字词
-message31=找不到的网页
-message32=HTTP 错误码
-message33=Netscape 版本
-message34=IE 版本
-message35=最近更新
-message36=链接网站的方法
-message37=来源网址
-message38=网址由参观者自行输入或从书签取出
-message39=无法得知连结的方法
-message40=来自搜索引擎
-message41=来自此网站外的其他网页 (非搜索引擎)
-message42=从网站内部连结
-message43=网站搜索的关键字句
-message44=网站搜索的关键字词
-message45=无法反解译的IP地址
-message46=无法得知的操作系统
-message47=找不到的网址链接 (HTTP 错误码 404)
-message48=IP 地址
-message49=错误次数
-message50=无法得知的浏览器
-message51=个机器人
-message52=参观人次/参观者
-message53=搜索引擎网站的机器人
-message54=网页纪录分析系统
-message55=个於
-message56=网页数
-message57=文件数
-message58=版本
-message59=操作系统
-message60=01月
-message61=02月
-message62=03月
-message63=04月
-message64=05月
-message65=06月
-message66=07月
-message67=08月
-message68=09月
-message69=10月
-message70=11月
-message71=12月
-message72=浏览器统计
-message73=文件类别
-message74=立即更新
-message75=字节
-message76=回到主页
-message77=前
-message78=yyyy年mm月dd日 HH:MM
-message79=过滤包含
-message80=全部列出
-message81=主机
-message82=个解译成功
-message83=搜索引擎网站
-message84=日
-message85=一
-message86=二
-message87=三
-message88=四
-message89=五
-message90=六
-message91=按星期
-message92=按参观者
-message93=按参观时间
-message94=鉴别出的用户
-message95=最小
-message96=平均数
-message97=最大
-message98=网页压缩
-message99=节省了的带宽
-message100=压缩前
-message101=压缩后
-message102=总数
-message103=个不同的关键字句
-message104=入站处
-message105=编码
-message106=平均大小
-message107=从新闻群组链接
-message108=KB
-message109=MB
-message110=GB
-message111=离线浏览器(网页抓取)
-message112=是
-message113=否
-message114=Whois 信息
-message115=OK
-message116=出站处
-message117=每次参观所花时间
-message118=关闭此窗口
-message119=Bytes
-message120=用以搜索的短语
-message121=用以搜索的关键词
-message122=个不同的搜索引擎转介参观者到这站
-message123=个不同的其他网站转介参观者到这站
-message124=其他短语
-message125=其他登录 (包括匿名登录)
-message126=由那些搜索引擎转介
-message127=由那些其他网站转介
-message128=摘要
-message129=作全年统计时,无法准确得知参观者的数目
-message130=数据值数组
-message131=发信人邮址
-message132=收信人邮件地址
-message133=报表日期
-message134=特别/市场
-message135=屏幕分辨率
-message136=蠕虫/病毒 攻击
-message137=加入到收藏夹(估计)
-message138=按日期统计
-message139=其他
-message140=浏览器支持 Java
-message141=浏览器支持 Macromedia Director
-message142=浏览器支持 Flash
-message143=浏览器支持 Real audio 播放
-message144=浏览器支持 Quicktime audio 播放
-message145=浏览器支持 Windows Media audio 播放
-message146=浏览器支持 PDF
-message147=SMTP错误代码
-message148=国家或地区
-message149=邮件
-message150=大小
-message151=第一个
-message152=最末一个
-message153=过滤不包含
-message154=非浏览器产生的流量(来自搜索引擎机器人,病毒蠕虫等)
-message155=集群
-message156=以上列出的搜索引擎机器人产生的“非浏览器”流量并未包含在其他图表中
-message157=“+”后的数字为成功的“robots.txt”访问次数
-message158=以上列出的蠕虫产生的“非浏览器”流量并未包含在其他图表中
-message159=非浏览的流量包括搜索引擎机器人,蠕虫病毒产生的流量和非正常的HTTP相应
-message160=浏览器流量
-message161=非浏览器流量
-message162=按月历史统计
-message163=蠕虫
-message164=不同的蠕虫
-message165=成功发送邮件
-message166=邮件失败或拒收
-message167=敏感目标
-message168=Javascript禁用
-message169=创建者
-message170=插件
-message171=地区
-message172=城市
-message173=Opera 版本
-message174=Safari 版本
-message175=Chrome 版本
-message176=Konqueror 版本
-message177=,
-message178=下载
+# Chinese (simplified) message file (by Che Dong chedongATgmail.com)
+# $Revision$ - $Date$
+PageCode=GBK
+message0=无法得知
+message1=无法得知(不能反向解析的网域名称)
+message2=其他
+message3=查看详细资料
+message4=日期
+message5=月
+message6=年
+message7=统计网站
+message8=首次参观日期
+message9=最近参观日期
+message10=参观人次
+message11=参观者
+message12=参观
+message13=个关键字词
+message14=搜索
+message15=百分比
+message16=流量统计
+message17=网域或国家
+message18=参观者
+message19=URL 网址
+message20=每小时浏览次数
+message21=浏览器
+message22=HTTP 错误
+message23=反相链接
+message24=从未更新(请参考 awstats_setup.html上的 'Build/Update')
+message25=参观者的网域或国家
+message26=主机数
+message27=网页数
+message28=个不同的网页
+message29=存取次数
+message30=不同的字词
+message31=找不到的网页
+message32=HTTP 错误码
+message33=Netscape 版本
+message34=IE 版本
+message35=最近更新
+message36=链接网站的方法
+message37=来源网址
+message38=网址由参观者自行输入或从书签取出
+message39=无法得知连结的方法
+message40=来自搜索引擎
+message41=来自此网站外的其他网页 (非搜索引擎)
+message42=从网站内部连结
+message43=网站搜索的关键字句
+message44=网站搜索的关键字词
+message45=无法反解译的IP地址
+message46=无法得知的操作系统
+message47=找不到的网址链接 (HTTP 错误码 404)
+message48=IP 地址
+message49=错误次数
+message50=无法得知的浏览器
+message51=个机器人
+message52=参观人次/参观者
+message53=搜索引擎网站的机器人
+message54=网页纪录分析系统
+message55=个於
+message56=网页数
+message57=文件数
+message58=版本
+message59=操作系统
+message60=01月
+message61=02月
+message62=03月
+message63=04月
+message64=05月
+message65=06月
+message66=07月
+message67=08月
+message68=09月
+message69=10月
+message70=11月
+message71=12月
+message72=浏览器统计
+message73=文件类别
+message74=立即更新
+message75=字节
+message76=回到主页
+message77=前
+message78=yyyy年mm月dd日 HH:MM
+message79=过滤包含
+message80=全部列出
+message81=主机
+message82=个解译成功
+message83=搜索引擎网站
+message84=日
+message85=一
+message86=二
+message87=三
+message88=四
+message89=五
+message90=六
+message91=按星期
+message92=按参观者
+message93=按参观时间
+message94=鉴别出的用户
+message95=最小
+message96=平均数
+message97=最大
+message98=网页压缩
+message99=节省了的带宽
+message100=压缩前
+message101=压缩后
+message102=总数
+message103=个不同的关键字句
+message104=入站处
+message105=编码
+message106=平均大小
+message107=从新闻群组链接
+message108=KB
+message109=MB
+message110=GB
+message111=离线浏览器(网页抓取)
+message112=是
+message113=否
+message114=Whois 信息
+message115=OK
+message116=出站处
+message117=每次参观所花时间
+message118=关闭此窗口
+message119=Bytes
+message120=用以搜索的短语
+message121=用以搜索的关键词
+message122=个不同的搜索引擎转介参观者到这站
+message123=个不同的其他网站转介参观者到这站
+message124=其他短语
+message125=其他登录 (包括匿名登录)
+message126=由那些搜索引擎转介
+message127=由那些其他网站转介
+message128=摘要
+message129=作全年统计时,无法准确得知参观者的数目
+message130=数据值数组
+message131=发信人邮址
+message132=收信人邮件地址
+message133=报表日期
+message134=特别/市场
+message135=屏幕分辨率
+message136=蠕虫/病毒 攻击
+message137=加入到收藏夹(估计)
+message138=按日期统计
+message139=其他
+message140=浏览器支持 Java
+message141=浏览器支持 Macromedia Director
+message142=浏览器支持 Flash
+message143=浏览器支持 Real audio 播放
+message144=浏览器支持 Quicktime audio 播放
+message145=浏览器支持 Windows Media audio 播放
+message146=浏览器支持 PDF
+message147=SMTP错误代码
+message148=国家或地区
+message149=邮件
+message150=大小
+message151=第一个
+message152=最末一个
+message153=过滤不包含
+message154=非浏览器产生的流量(来自搜索引擎机器人,病毒蠕虫等)
+message155=集群
+message156=以上列出的搜索引擎机器人产生的“非浏览器”流量并未包含在其他图表中
+message157=“+”后的数字为成功的“robots.txt”访问次数
+message158=以上列出的蠕虫产生的“非浏览器”流量并未包含在其他图表中
+message159=非浏览的流量包括搜索引擎机器人,蠕虫病毒产生的流量和非正常的HTTP相应
+message160=浏览器流量
+message161=非浏览器流量
+message162=按月历史统计
+message163=蠕虫
+message164=不同的蠕虫
+message165=成功发送邮件
+message166=邮件失败或拒收
+message167=敏感目标
+message168=Javascript禁用
+message169=创建者
+message170=插件
+message171=地区
+message172=城市
+message173=Opera 版本
+message174=Safari 版本
+message175=Chrome 版本
+message176=Konqueror 版本
+message177=,
+message178=下载
diff --git a/wwwroot/cgi-bin/lang/awstats-lv.txt b/wwwroot/cgi-bin/lang/awstats-lv.txt
index 9f3985ae5..fce13900b 100644
--- a/wwwroot/cgi-bin/lang/awstats-lv.txt
+++ b/wwwroot/cgi-bin/lang/awstats-lv.txt
@@ -1,178 +1,178 @@
-锘# Latvie拧u valodas zi艈ojumu fails (madmaster@gobbo.caves.lv)
-# Updated by edvinsma@inbox.lv 2004/01/24 00:40:00
-# $Revision$ - $Date$
-PageCode=utf-8
-message0=Nezin膩ms
-message1=Nezin膩ms (neatpaz墨ts ip)
-message2=Citi
-message3=Apskat墨t izv膿rsti
-message4=Diena
-message5=M膿nesis
-message6=Gads
-message7=Statistika
-message8=Pirmais apmekl膿jums
-message9=P膿d膿jais apmekl膿jums
-message10=Viz墨拧u skaits
-message11=Unik膩lie apmekl膿t膩ji
-message12=Apmekl膿jums
-message13=at拧姆ir墨gi(s) atsl膿gv膩rdi(s)
-message14=Mekl膿t
-message15=Procenti
-message16=Trafiks
-message17=Domaini/Valstis
-message18=Apmekl膿t膩ji
-message19=Lapas-URL
-message20=Stundas
-message21=P膩rl奴kprogrammas
-message22=HTTP K募奴das
-message23=Nor膩d墨t膩ji
-message24=Mekl膿t Atsl膿gv膩rdus
-message25=Apmekl膿t膩ju domaini/valstis
-message26=hosti
-message27=lapas
-message28=at拧姆ir墨gas lapas
-message29=Skat墨tas lapas
-message30=Citi v膩rdi
-message31=Neatrastas lapas
-message32=HTTP K募奴du kodi
-message33=Netscape versijas
-message34=IE versijas
-message35=P膿d膿jais jaunin膩jums
-message36=Pievienoties saitei no
-message37=Ori模in膩li
-message38=Tie拧膩 adrese / Gr膩matz墨mes
-message39=Or模in膩ls nezin膩ms
-message40=Nor膩des no Interneta Mekl膿拧anas Sait膿m
-message41=Nor膩des no 膩r膿j膩m lap膩m (citas web lapas iz艈emot mekl膿拧anas saites)
-message42=Links from an internal page (cita lapa 拧aj膩 pa拧膩 sait膿)
-message43=Atsl膿gv膩rdi kas lietoti mekl膿拧anas sait膿s
-message44=Kb
-message45=Neatpaz墨tas IP Addreses
-message46=Nezin膩ms OS (Nor膩des Lauks)
-message47=Piepras墨ts bet neatrasts URLs (HTTP kods 404)
-message48=IP Addrese
-message49=K募uda Tr膩p墨jumi
-message50=Nezin膩mi p膩rl奴ki (Nor膩des lauks)
-message51=Apmekl膿ju拧ie roboti
-message52=apmekl膿jumi/apmekl膿t膩ji
-message53=Roboti/Zirnek募i apmekl膿t膩ji
-message54=Br墨vs re膩l膩 laika logfailu analizators advanc膿tai web statistikai
-message55=no
-message56=Lapas
-message57=Tr膩p墨jumi
-message58=Versijas
-message59=Oper膿t膩jsist膿mas
-message60=Jan
-message61=Feb
-message62=Mar
-message63=Apr
-message64=Mai
-message65=J奴n
-message66=J奴l
-message67=Aug
-message68=Sep
-message69=Okt
-message70=Nov
-message71=Dec
-message72=Navig膩cija
-message73=Failu tips
-message74=Atjaunot
-message75=Baiti
-message76=Atpaka募 uz galveno lapu
-message77=Aug拧a
-message78=dd mmm yyyy - HH:MM
-message79=Filtrs
-message80=Pilns saraksts
-message81=Hosti
-message82=Zin膩ms
-message83=Roboti
-message84=Sv
-message85=Pir
-message86=Ot
-message87=Tr
-message88=Ce
-message89=Pkt
-message90=Se
-message91=Ned膿募as dienas
-message92=Kas
-message93=Kad
-message94=Autentific膿tie lietot膩ji
-message95=Min
-message96=Vid
-message97=Maks
-message98=Web sal墨dzin膩jums
-message99=saglab膩tais joslas platums
-message100=Pirms kompresijas
-message101=P膿c kompresijas
-message102=Kop膩
-message103=At拧姆ir墨gi atsl膿gv膩rdi
-message104=Iejas lapas
-message105=Kods
-message106=Vid膿jais izm膿rs
-message107=Saites no Zi艈u grup膩m
-message108=KB
-message109=MB
-message110=GB
-message111=Sav膩c膿js
-message112=J膩
-message113=N膿
-message114=WhoIs inform膩cija
-message115=OK
-message116=Izejas pages
-message117=Apmekl膿juma ilgums
-message118=Aizv膿rt logu
-message119=Baiti
-message120=Mekl膿拧anas atsl膿gfr膩zes
-message121=Mekl膿拧anas atsl膿gv膩rdi
-message122=Citas mekl膿t膩ju lapas ar atsauc膿m
-message123=Citas lapas ar atsauc膿m
-message124=Citas fr膩zes
-message125=Anon墨mie lietot膩ji
-message126=Mekl膿t膩ju lapas ar atsauc膿m
-message127=Lapas ar atsauc膿m
-message128=Kopsavilkums
-message129=Prec墨za v膿rt墨ba sada募膩 "Gads" nav pieejama
-message130=Datu v膿r墨bu kopnes
-message131=S奴t墨t膩ja adrese
-message132=Sa艈膿m膿ja adrese
-message133=Atskaites periods
-message134=Papildus/M膩rketings
-message135=Ekr膩na iz拧姆ir拧anas sp膿ja
-message136=V墨rusu uzbrukumi
-message137=Pievienots izlasei
-message138=M膿ne拧a dienas
-message139=Da啪膩di
-message140=P膩rl奴kprogrammas ar Java atbalstu
-message141=P膩rl奴kprogrammas ar Macromedia Director atbalstu
-message142=P膩rl奴kprogrammas ar Flash atbalstu
-message143=P膩rl奴kprogrammas ar RealAudio atbalstu
-message144=P膩rl奴kprogrammas ar QuickTime atbalstu
-message145=P膩rl奴kprogrammas ar Windows Media atbalstu
-message146=P膩rl奴kprogrammas ar PDF atbalstu
-message147=SMTP k募奴du kodi
-message148=Valstis
-message149=E-pasti
-message150=Izm膿rs
-message151=S膩kums
-message152=Beigas
-message153=Izsl膿g拧anas filtrs
-message154=艩eit kodi par膩da 拧膩vienus vai trafiku, ko nav apskat墨ju拧i lietot膩ji, t膩p膿c vi艈i nav iek募auti cit膩s diagramm膩s.
-message155=Puduris
-message156=艩eit uzr膩d墨tie roboti ir rad墨ju拧i tr膩pijumus vai "nepskat墨to" trafiku, t膩p膿c tie nav iek募auti cit膩s diagramm膩s.
-message157=Skaitlis p膿c "+" ir veiksm墨go 拧膩vienu skaits robots.txt failam.
-message158=艩ie ir uzr膩d墨ti tr膩pijumi vai trafiks ko rad墨ja t墨kla t膩rpi vai ar墨 "neapskat墨t膩s" lapas, t膩p膿c tie nav iek募auti cit膩s
-diagramm膩s.
-message159="Neapskat墨to" trafiku 模ener膿 roboti, t墨kla t膩rpi, vai ar墨 atbildes ar specialo HTTP statusa kodu.
-message160=Apskat墨ts trafiks
-message161=Nav apskat墨ts trafiks
-message162=M膿ne拧a atskaite
-message163=T墨kla t膩rpi
-message164=Da啪膩di t墨kla t膩rpi
-message165=Veiksm墨gi nos奴t墨ti e-pasti
-message166=Neveiksm墨gas e-pasta s奴t墨拧anas
-message167=Ievainojam墨ba
-message168=Atsl膿gtsw Javascript
-message169=Izveidojis
-message170=spraud艈i
-message171=Re模ioni
+# Latvie拧u valodas zi艈ojumu fails (madmaster@gobbo.caves.lv)
+# Updated by edvinsma@inbox.lv 2004/01/24 00:40:00
+# $Revision$ - $Date$
+PageCode=utf-8
+message0=Nezin膩ms
+message1=Nezin膩ms (neatpaz墨ts ip)
+message2=Citi
+message3=Apskat墨t izv膿rsti
+message4=Diena
+message5=M膿nesis
+message6=Gads
+message7=Statistika
+message8=Pirmais apmekl膿jums
+message9=P膿d膿jais apmekl膿jums
+message10=Viz墨拧u skaits
+message11=Unik膩lie apmekl膿t膩ji
+message12=Apmekl膿jums
+message13=at拧姆ir墨gi(s) atsl膿gv膩rdi(s)
+message14=Mekl膿t
+message15=Procenti
+message16=Trafiks
+message17=Domaini/Valstis
+message18=Apmekl膿t膩ji
+message19=Lapas-URL
+message20=Stundas
+message21=P膩rl奴kprogrammas
+message22=HTTP K募奴das
+message23=Nor膩d墨t膩ji
+message24=Mekl膿t Atsl膿gv膩rdus
+message25=Apmekl膿t膩ju domaini/valstis
+message26=hosti
+message27=lapas
+message28=at拧姆ir墨gas lapas
+message29=Skat墨tas lapas
+message30=Citi v膩rdi
+message31=Neatrastas lapas
+message32=HTTP K募奴du kodi
+message33=Netscape versijas
+message34=IE versijas
+message35=P膿d膿jais jaunin膩jums
+message36=Pievienoties saitei no
+message37=Ori模in膩li
+message38=Tie拧膩 adrese / Gr膩matz墨mes
+message39=Or模in膩ls nezin膩ms
+message40=Nor膩des no Interneta Mekl膿拧anas Sait膿m
+message41=Nor膩des no 膩r膿j膩m lap膩m (citas web lapas iz艈emot mekl膿拧anas saites)
+message42=Links from an internal page (cita lapa 拧aj膩 pa拧膩 sait膿)
+message43=Atsl膿gv膩rdi kas lietoti mekl膿拧anas sait膿s
+message44=Kb
+message45=Neatpaz墨tas IP Addreses
+message46=Nezin膩ms OS (Nor膩des Lauks)
+message47=Piepras墨ts bet neatrasts URLs (HTTP kods 404)
+message48=IP Addrese
+message49=K募uda Tr膩p墨jumi
+message50=Nezin膩mi p膩rl奴ki (Nor膩des lauks)
+message51=Apmekl膿ju拧ie roboti
+message52=apmekl膿jumi/apmekl膿t膩ji
+message53=Roboti/Zirnek募i apmekl膿t膩ji
+message54=Br墨vs re膩l膩 laika logfailu analizators advanc膿tai web statistikai
+message55=no
+message56=Lapas
+message57=Tr膩p墨jumi
+message58=Versijas
+message59=Oper膿t膩jsist膿mas
+message60=Jan
+message61=Feb
+message62=Mar
+message63=Apr
+message64=Mai
+message65=J奴n
+message66=J奴l
+message67=Aug
+message68=Sep
+message69=Okt
+message70=Nov
+message71=Dec
+message72=Navig膩cija
+message73=Failu tips
+message74=Atjaunot
+message75=Baiti
+message76=Atpaka募 uz galveno lapu
+message77=Aug拧a
+message78=dd mmm yyyy - HH:MM
+message79=Filtrs
+message80=Pilns saraksts
+message81=Hosti
+message82=Zin膩ms
+message83=Roboti
+message84=Sv
+message85=Pir
+message86=Ot
+message87=Tr
+message88=Ce
+message89=Pkt
+message90=Se
+message91=Ned膿募as dienas
+message92=Kas
+message93=Kad
+message94=Autentific膿tie lietot膩ji
+message95=Min
+message96=Vid
+message97=Maks
+message98=Web sal墨dzin膩jums
+message99=saglab膩tais joslas platums
+message100=Pirms kompresijas
+message101=P膿c kompresijas
+message102=Kop膩
+message103=At拧姆ir墨gi atsl膿gv膩rdi
+message104=Iejas lapas
+message105=Kods
+message106=Vid膿jais izm膿rs
+message107=Saites no Zi艈u grup膩m
+message108=KB
+message109=MB
+message110=GB
+message111=Sav膩c膿js
+message112=J膩
+message113=N膿
+message114=WhoIs inform膩cija
+message115=OK
+message116=Izejas pages
+message117=Apmekl膿juma ilgums
+message118=Aizv膿rt logu
+message119=Baiti
+message120=Mekl膿拧anas atsl膿gfr膩zes
+message121=Mekl膿拧anas atsl膿gv膩rdi
+message122=Citas mekl膿t膩ju lapas ar atsauc膿m
+message123=Citas lapas ar atsauc膿m
+message124=Citas fr膩zes
+message125=Anon墨mie lietot膩ji
+message126=Mekl膿t膩ju lapas ar atsauc膿m
+message127=Lapas ar atsauc膿m
+message128=Kopsavilkums
+message129=Prec墨za v膿rt墨ba sada募膩 "Gads" nav pieejama
+message130=Datu v膿r墨bu kopnes
+message131=S奴t墨t膩ja adrese
+message132=Sa艈膿m膿ja adrese
+message133=Atskaites periods
+message134=Papildus/M膩rketings
+message135=Ekr膩na iz拧姆ir拧anas sp膿ja
+message136=V墨rusu uzbrukumi
+message137=Pievienots izlasei
+message138=M膿ne拧a dienas
+message139=Da啪膩di
+message140=P膩rl奴kprogrammas ar Java atbalstu
+message141=P膩rl奴kprogrammas ar Macromedia Director atbalstu
+message142=P膩rl奴kprogrammas ar Flash atbalstu
+message143=P膩rl奴kprogrammas ar RealAudio atbalstu
+message144=P膩rl奴kprogrammas ar QuickTime atbalstu
+message145=P膩rl奴kprogrammas ar Windows Media atbalstu
+message146=P膩rl奴kprogrammas ar PDF atbalstu
+message147=SMTP k募奴du kodi
+message148=Valstis
+message149=E-pasti
+message150=Izm膿rs
+message151=S膩kums
+message152=Beigas
+message153=Izsl膿g拧anas filtrs
+message154=艩eit kodi par膩da 拧膩vienus vai trafiku, ko nav apskat墨ju拧i lietot膩ji, t膩p膿c vi艈i nav iek募auti cit膩s diagramm膩s.
+message155=Puduris
+message156=艩eit uzr膩d墨tie roboti ir rad墨ju拧i tr膩pijumus vai "nepskat墨to" trafiku, t膩p膿c tie nav iek募auti cit膩s diagramm膩s.
+message157=Skaitlis p膿c "+" ir veiksm墨go 拧膩vienu skaits robots.txt failam.
+message158=艩ie ir uzr膩d墨ti tr膩pijumi vai trafiks ko rad墨ja t墨kla t膩rpi vai ar墨 "neapskat墨t膩s" lapas, t膩p膿c tie nav iek募auti cit膩s
+diagramm膩s.
+message159="Neapskat墨to" trafiku 模ener膿 roboti, t墨kla t膩rpi, vai ar墨 atbildes ar specialo HTTP statusa kodu.
+message160=Apskat墨ts trafiks
+message161=Nav apskat墨ts trafiks
+message162=M膿ne拧a atskaite
+message163=T墨kla t膩rpi
+message164=Da啪膩di t墨kla t膩rpi
+message165=Veiksm墨gi nos奴t墨ti e-pasti
+message166=Neveiksm墨gas e-pasta s奴t墨拧anas
+message167=Ievainojam墨ba
+message168=Atsl膿gtsw Javascript
+message169=Izveidojis
+message170=spraud艈i
+message171=Re模ioni
message172=Pils膿tas
\ No newline at end of file
diff --git a/wwwroot/cgi-bin/lib/robots.pm b/wwwroot/cgi-bin/lib/robots.pm
index c443f66bd..f6124146f 100644
--- a/wwwroot/cgi-bin/lib/robots.pm
+++ b/wwwroot/cgi-bin/lib/robots.pm
@@ -1,2219 +1,2219 @@
-# AWSTATS ROBOTS DATABASE
-#-------------------------------------------------------
-# If you want to add robots to extend AWStats database detection capabilities,
-# you must add an entry in RobotsSearchIDOrder_listx and RobotsHashIDLib.
-
-# The entry in RobotsSearchIDOrder_listx is a Perl regular expression
-# (see http://perldoc.perl.org/perlreref.html). AWSTats applies these
-# expressions to the user agent string in the order given by the lists. The
-# first match specifies the robot.
-#
-# Note: This regular expression must not contain any whitespace.
-# Otherwise AWStats will produce lines in the database that
-# will be misinterpreted and as a consequence the corresponding data in the
-# generated HTML reports will be wrong. If you want to match whitespace in
-# the user agent string, use other constructs like '\s', '[:blank:]',
-# '\p{IsSpace}', '\x20' etc.
-#
-# The corresponding entry in RobotsHashIDLib contains the regular expression
-# as key, followed by a string containing HTML-text. AWStats inserts this
-# text into reports to describe the bot. If possible the text should contain
-# a link to the bot home page. This makes it easier for sysadmins to find
-# the information necessary e.g. to adapt the robots.txt file.
-#
-# An entry in the RobotsAffiliateLib is not necessary. An entry in this list
-# contains as first part the regular expression specifying the bot. The
-# second part is a string that gives the Company or product managing the bot.
-# This information is not used yet.
-#
-# There are several sorts of bots that AWStats is not able to detect and
-# therefore a considerable amount of bot generated traffic counts
-# as user traffic:
-#
-# a) A crawler that identifies itself in the referrer string, but not in
-# the user agent string. An example is the crawler from semalt.semalt.com.
-#
-# b) Crawlers that correctly access robots.txt but identify themselves in
-# in the user agent string only once or just a few times. Most of the
-# time a user agent string ist used that does not contain hints that
-# a bot is involved. An example is the iCjobs spider.
-# msnbot-UDiscovery/2.0b seems to show this behaviour too.
-#
-#
-#
-#-------------------------------------------------------
-
-# 2005-08-19 Sean Carlos http://www.antezeta.com/awstats.html
-# added dipsie (not tested with real data).
-# added DomainsDB.net http://domainsdb.net/
-# added ia_archiver-web.archive.org (was inadvertently grouped with Alexa traffic)
-# added Nutch (used by looksmart (furl?))
-# added rssImagesBot
-# added Sqworm
-# added t\-h\-u\-n\-d\-e\-r\-s\-t\-o\-n\-e
-# added w3c css-validator
-# added documentation link to bot home pages for above and selected major bots.
-# In the case of international bots, choose .com page.
-# Included tool tip (html "title").
-# To do: parameterize to match both AWStats language and tooltips settings.
-# To do: add html links for all bots based on current documentation in source
-# files referenced below.
-# changed '\wbot[\/\-]', to '\wbot[\/\-]' (removed comma)
-# made minor grammar corrections to notes below
-# 2005-08-24 added YahooSeeker-Testing
-# added w3c-checklink
-# updated url for ask.com
-# 2005-08-24 added Girafabot http://www.girafa.com/
-# 2005-08-30 added PluckFeedCrawler http://www.pluck.com/
-# added Gaisbot/3.0 (robot05@gais.cs.ccu.edu.tw; )
-# dded geniebot (wgao@genieknows.com)
-# added BecomeBot link http://www.become.com/site_owners.html
-# added topicblogs http://www.topicblogs.com/
-# added Powermarks; seen used by referrer spam
-# added YahooSeeker
-# added NG/2. http://www.exabot.com/
-# 2005-09-15 added link for Walhello appie
-# added bender focused_crawler
-# updated YahooSeeker description (blog crawler)
-# 2005-09-16 added link for http://linkchecker.sourceforge.net
-# added ConveraCrawler/0.9d ( http://www.authoritativeweb.com/crawl)
-# added Blogslive info@blogslive.com intelliseek.com
-# added BlogPulse (ISSpider-3.0) intelliseek.com
-# 2005-09-26 added Feedfetcher-Google (http://www.google.com/feedfetcher.html)
-# added EverbeeCrawler
-# added Yahoo-Blogs http://help.yahoo.com/help/us/ysearch/crawling/crawling-02.html
-# added link for Bloglines http://www.bloglines.com
-# 2005-10-19 fixed Feedfetcher-Google (http://www.google.com/feedfetcher.html)
-# added Blogshares Spiders (Synchronized V1.5.1)
-# added yacy
-# 2005-11-21 added Argus www.simpy.com
-# added BlogsSay :: RSS Search Crawler (http://www.blogssay.com/)
-# added MJ12bot http://majestic12.co.uk/bot.php
-# added OpenTaggerBot (http://www.opentagger.com/opentaggerbot.htm)
-# added OutfoxBot/0.3 (For internet experiments; outfox.agent@gmail.com)
-# added RufusBot Rufus Web Miner http://64.124.122.252.webaroo.com/feedback.html
-# added Seekbot (http://www.seekbot.net/bot.html)
-# added Yahoo-MMCrawler/3.x (mms-mmcrawler-support@yahoo-inc.com)
-# added link for BaiDuSpider
-# added link for Blogshares Spider
-# added link for StackRambler http://www.rambler.ru/doc/faq.shtml
-# added link for WISENutbot
-# added link for ZyBorg/1.0 (wn-14.zyborg@looksmart.net; http://www.WISEnutbot.com. Moved location to above wisenut to avoid classification as wisenut
-# 2005-12-15
-# added FAST Enteprise Crawler/6 (www dot fastsearch dot com). Note spelling Enteprise not Enterprise.
-# added findlinks http://wortschatz.uni-leipzig.de/findlinks/
-# added IBM Almaden Research Center WebFountain鈩 http://www.almaden.ibm.com/cs/crawler [hc3]
-# added INFOMINE/8.0 VLCrawler (http://infomine.ucr.edu/useragents)
-# added lmspider (lmspider@scansoft.com) http://www.nuance.com/
-# added noxtrumbot http://www.noxtrum.com/
-# added SandCrawler (Microsoft)
-# added SBIder http://www.sitesell.com/sbider.html
-# added SeznamBot http://fulltext.seznam.cz/
-# added sohu-search http://corp.sohu.com/ (looked for //robots.txt not /robots.txt)
-# added the ruffle SemanticWeb crawler v0.5 - http://www.unreach.net
-# added WebVulnCrawl/1.0 libwww-perl/5.803 (looked for //robots.txt not /robots.txt)
-# added Yahoo! Japan keyoshid http://www.yahoo.co.jp/
-# added Y!J http://help.yahoo.co.jp/help/jp/search/indexing/indexing-15.html
-# added link for GigaBot
-# added link for MagpieRSS
-# added link for MSIECrawler
-# 2005-12-21
-# added aipbot http://www.aipbot.com aipbot@aipbot.com [matthys70 users.sourceforge.net]
-# added Everest-Vulcan Inc./0.1 (R&D project; http://everest.vulcan.com/crawlerhelp)
-# added Fast-Search-Engine http://www.fast-search-engine.com/ [matthys70 users.sourceforge.net]
-# added g2Crawler (nobody@airmail.net) http://crawler.instantnetworks.net/
-# added Jakarta commons-httpclient http://jakarta.apache.org/commons/httpclient/ (hit robots.txt). May be used as robot or browser - a site may want to remove this entry.
-# added OmniExplorer_Bot http://www.omni-explorer.com/ [matthys70 users.sourceforge.net]
-# added USTC-Semantic-Group ai.ustc.edu.cn/mas/en/research/index.php ?
-# 2005-12-22
-# added EARTHCOM.info www.earthcom.info
-# added HTTrack off-line browser 'httrack','HTTrack', http://www.httrack.com/ [Moizes Gabor]
-# added KummHttp http://www.psychedelix.com/cgi-bin/csv2html.pl?data=allagents.csv&template=detail.html&match=\bid_g_l_301105_2\b [Moizes Gabor]
-# 2006-01-01
-# added Dulance http://www.dulance.com/bot.jsp
-# added MojeekBot http://www.mojeek.com/bot.html
-# added nicebot http://www.egghelp.org/setup.htm ?
-# added Snappy http://www.urltrends.com/faq.php
-# added sohu agent
-# added VORTEX http://marty.anstey.ca/robots/vortex/ [matthys70 users.sourceforge.net]
-# added zspider http://feedback.redkolibri.com/
-# 2006-01-13
-# added boitho.com-dc http://www.boitho.com/dcbot.html
-# added IRLbot http://irl.cs.tamu.edu/crawler
-# added virus_detector virus_harvester@securecomputing.com
-# added Wavefire http://www.wavefire.com; info@wavefire.com
-# added WebFilter Robot
-# 2006-01-24
-# added Shim-Crawler http://www.logos.ic.i.u-tokyo.ac.jp/crawler/; crawl@logos.ic.i.u-tokyo.ac.jp
-# added Exabot exabot.com
-# added LetsCrawl.com http://letscrawl.com
-# added ichiro http://help.goo.ne.jp/door/crawlerE.html
-# 2006-01-27 additional 22 robots from a list provided by Moizes Gabor
-# added ALeadSoftbot http://www.aleadsoft.com/bot.htm
-# added CipinetBot http://www.cipinet.com/bot.html
-# added Cuasarbot http://www.cuasar.com/
-# added Dumbot http://www.dumbfind.com/
-# added Extreme_Picture_Finder http://www.exisoftware.com/
-# added Fooky.com/ScorpionBot/ScoutOut http://www.fooky.com/scorpionbots
-# added IlTrovatore-Setaccio http://www.iltrovatore.it/aiuto/motore_di_ricerca.html bot@iltrovatore.it
-# added InsurancoBot http://www.fastspywareremoval.com/
-# added InternetArchive http://lucene.apache.org/nutch/bot.html nutch-agent@lucene.apache.org
-# added KazoomBot http://www.kazoom.ca/bot.html kazoombot@kazoom.ca
-# added Kurzor http://www.easymail.hu/ cursor@easymail.hu
-# added NutchCVS http://lucene.apache.org/nutch/bot.html nutch-agent@lucene.apache.org
-# added NutchOSU-VLIB http://lucene.apache.org/nutch/bot.html nutch-agent@lucene.apache.org
-# added Orbiter http://www.dailyorbit.com/bot.htm
-# added PHP_version_tracker http://www.nexen.net/phpversion/bot.php
-# added SuperBot http://www.sparkleware.com/superbot/
-# added SynooBot http://www.synoo.de/bot.html webmaster@synoo.com
-# added TestBot http://www.agbrain.com/
-# added TutorGigBot http://www.tutorgig.info/
-# added WebIndexer mailto://webindexerv1@yahoo.com
-# added WebMiner http://64.124.122.252/feedback.html
-# 2006-02-01
-# added heritrix https://sourceforge.net/forum/message.php?msg_id=3550202
-# added Zeus Webster Pro https://sourceforge.net/forum/message.php?msg_id=3141164
-# additional robots from a list provided by Moizes Gabor [ mojzi -a-t- free mail hu ]
-# added Candlelight_Favorites_Inspector
-# added DomainChecker
-# added EasyDL
-# added FavOrg
-# added Favorites_Sweeper
-# added Html_Link_Validator
-# added Internet_Ninja
-# added JRTwine_Software_Check_Favorites_Utility
-# fixed Microsoft_URL_Control
-# added miniRank
-# added Missigua_Locator
-# added NPBot
-# added Ocelli
-# added Onet.pl_SA
-# added proodleBot
-# added SearchGuild_DMOZ_Experiment
-# added Susie
-# added Website_Monitoring_Bot
-# added Xenu_Link_Sleuth
-# 2006-05-15
-# added ASPseek http://www.aspseek.org/
-# added AdamM Bot http://home.blic.net/adamm/
-# added archive.org_bot http://crawls.archive.org/collections/bncf/crawl.html
-# added arianna.libero.it (Italian Portal/search engine)
-# added Biz360 spider http://www.biz360.com
-# added BlogBridge Service http://www.blogbridge.com/
-# added BlogSearch http://www.icerocket.com/
-# added libcrawl
-# added edgeio-relanshanbottriever http://www.edgeio.com
-# added FeedFlow http://feedflow.com/about
-# added Biblioteca Nazionale Centrale di Firenze (Italian National Archive) http://www.bncf.firenze.sbn.it/raccolta.txt
-# added Java catchall - used by many spam bots
-# added lanshanbot http://www.psychedelix.com/cgi-bin/csv2html.pl?data=allagents.csv&template=detail.html&match=%5Cbid_g_l_140406_1%5Cb
-# added msnbot-media http://search.msn.com/msnbot.htm
-# added MT::Telegraph::Agent
-# added Netluchs http://www.netluchs.de/ (German SE bot)
-# added oBot http://www.webmasterworld.com/forum11/1616.htm
-# added Onfolio http://www.onfolio.com/ (IE Toolbar plugin) - hit rss feeds.
-# added ping.blo.gs http://blo.gs/ping.php blog bot
-# added Sphere Scout http://www.sphere.com/
-# added sproose crawler http://www.sproose.com/bot.html
-# added SyndicAPI http://syndicapi.com/bot.html
-# added Yahoo! Mindset http://mindset.research.yahoo.com/
-# added msrabot
-# added Vagabondo & Vagabondo-WAP http://www.wise-guys.nl/Contact/index.php?botselected=webagents&lang=uk
-# fixed Missigua Locator detection (Missigua_Locator -> Missigua Locator)
-# changed echo to echo! to avoid conflict with the bonecho (Firefox 2.0) browser.
-# This requires you to reprocess historic logs if you want EchO! to be recognized for older reports.
-# 2006-05-17
-# added Alpha Search Agent # 62.152.125.60 Eurologon Srl
-# added Krugle http://www.krugle.com/crawler/info.html the search engine for developers
-# added Octora Beta Bot http://www.octora.com/ # Blog and Rss Search Engine
-# added UbiCrawler http://law.dsi.unimi.it/ubicrawler/
-# added Yahoo! Slurp China http://misc.yahoo.com.cn/help.html
-# You must reprocess old logs for the Yahoo! Slurp China bot to be detected in old reports
-# 2006-05-20
-# added 1-More Scanner http://www.myzips.com/software/1-More-Scanner.phtml
-# added Accoona-AI-Agent http://www.accoona.com/
-# added ActiveBookmark http://www.libmaster.com/active_bookmark.php
-# added BIGLOTRON http://www.biglotron.com/robot.html
-# added Bookmark-Manager http://bkm.sourceforge.net/
-# added cbn00glebot
-# added Cerberian Drtrs http://www.pgts.com.au/cgi-bin/psql?robot_info=25240
-# added CFNetwork http://www.cocoadev.com/index.pl?CFNetwork
-# added CheckWeb link validator http://p.duby.free.fr/chkweb.htm
-# added Computer and Automation Research Institute Crawler http://www.ilab.sztaki.hu/~stamas/publications/p184-benczur.html
-# added ConveraCrawler http://www.authoritativeweb.com/crawl/
-# added ConveraMultiMediaCrawler http://www.authoritativeweb.com/crawl/
-# added CSE HTML Validator Lite Online http://online.htmlvalidator.com/php/onlinevallite.php
-# added Cursor http://adcenter.hu/docs/en/bot.html
-# added Custo http://www.netwu.com/custo/
-# added DataFountains/DMOZ Downloader http://infomine.ucr.edu/
-# added Deepindex http://www.deepindex.net/faq.php
-# added DNSGroup http://www.dnsgroup.com/
-# added DoCoMo http://www.nttdocomo.co.jp/
-# added dumm.de-Bot http://www.dumm.de/
-# added ETS v http://www.freetranslation.com/help/
-# added eventax http://www.eventax.de/
-# added FAST Enterprise Crawler * crawleradmin.t-info@telekom.de http://www.telekom.de/
-# added FAST Enterprise Crawler http://www.fast.no/
-# added FAST Enterprise Crawler * T-Info_BI_cluster crawleradmin.t-info@telekom.de http://www.telekom.de/
-# added FeedValidator http://feedvalidator.org/
-# added FilmkameraBot http://www.filmkamera.at/bot.html
-# added Findexa Crawler http://www.findexa.no/gulesider/article26548.ece
-# added Global Fetch http://www.wesonet.com/
-# added GOFORITBOT http://www.goforit.com/about/
-# added GoForIt.com http://www.goforit.com/about/
-# added GPU p2p crawler http://gpu.sourceforge.net/search_engine.php
-# added HooWWWer http://cosco.hiit.fi/search/hoowwwer/
-# added HPPrint
-# added HTMLParser http://htmlparser.sourceforge.net/
-# added Hundesuche.com-Bot http://www.hundesuche.com/
-# added InfoBot http://www.infobot.org/
-# added InfociousBot http://corp.infocious.com/tech_crawler.php
-# added InternetSupervision http://internetsupervision.com/
-# added isearch2006 http://www.yahoo.com.cn/
-# added IUPUI_Research_Bot http://spamhuntress.com/2005/04/25/a-mail-harvester-visits/
-# added KalamBot http://64.124.122.251/feedback.html
-# added kamano.de NewsFeedVerzeichnis http://www.kamano.de/
-# added Kevin http://dznet.com/kevin/
-# added KnowItAll http://www.cs.washington.edu/research/knowitall/
-# added Knowledge.com http://www.knowledge.com/
-# added Kouaa Krawler http://www.kouaa.com/
-# added ksibot http://ego.ms.mff.cuni.cz/
-# added Link Valet Online http://www.htmlhelp.com/tools/valet/
-# added lwp-request http://search.cpan.org/~gaas/libwww-perl-5.69/bin/lwp-request
-# added lwp-trivial http://search.cpan.org/src/GAAS/libwww-perl-5.805/lib/LWP/Simple.pm
-# added MapoftheInternet.com http://MapoftheInternet.com/
-# added Matrix S.p.A. - FAST Enterprise Crawler http://tin.virgilio.it/
-# added Megite http://www.megite.com/
-# added Metaspinner http://index.meta-spinner.de/
-# added Mini-reptile
-# added Misterbot http://www.misterbot.fr/
-# added Miva http://www.miva.com/
-# added Mizzu Labs http://www.psychedelix.com/cgi-bin/csv2html.pl?data=allagents.csv&template=detail.html&match=\bid_m_141105_2\b
-# added MSRBOT http://research.microsoft.com/research/sv/msrbot/
-# added MS SharePoint Portal Server - MS Search 4.0 Robot http://support.microsoft.com/default.aspx?scid=kb;en-us;284022
-# added Mydoyouhike http://www.doyouhike.net/my
-# added NASA Search http://www.psychedelix.com/cgi-bin/csv2html.pl?data=allagents.csv&template=detail.html&match=\bid_n_s_140506_2\b
-# added NetSprint http://www.netsprint.pl/serwis/
-# added NimbleCrawler http://www.healthline.com/
-# added OpenWebSpider http://www.openwebspider.org/
-# added Oracle Ultra Search http://www.oracle.com/technology/products/ultrasearch/index.html
-# added OSSProxy http://www.marketscore.com/FAQ.Aspx
-# added passwordmaker.org http://passwordmaker.org/
-# added PEAR HTTP Request class http://pear.php.net/
-# added PEERbot http://www.peerbot.com/
-# added PHP version tracker http://www.nexen.net/phpversion/bot.php
-# added PictureOfInternet http://malfunction.org/poi/
-# added plinki http://www.plinki.com/
-# added Port Huron Labs http://www.psychedelix.com/cgi-bin/csv2html.pl?data=allagents.csv&template=detail.html&match=\bid_n_s_1133\b
-# added PostFavorites http://www.psychedelix.com/cgi-bin/csv2html.pl?data=allagents.csv&template=detail.html&match=\bid_n_s_1135\b
-# added ProjectWF-java-test-crawler
-# added PyQuery http://sourceforge.net/projects/pyquery/
-# added Schizozilla http://spamhuntress.com/2005/03/18/gizmo/
-# added Scumbot
-# added Sensis Web Crawler http://www.sensis.com.au/
-# added snap.com beta crawler http://www.snap.com/
-# added Steeler http://www.tkl.iis.u-tokyo.ac.jp/~crawler/
-# added STEROID Download http://faqs.org.ru/progr/pascal/delphi_internet2.htm
-# added Suchfin-Bot http://www.suchfin.de/
-# added Sunrise http://www.sunrisexp.com/
-# added Tagyu Agent http://www.tagyu.com/
-# added Tcl http client package http://www.tcl.tk/man/tcl8.4/TclCmd/http.htm
-# added TeragramCrawlerSURF http://www.teragram.com/
-# added Test Crawler http://netp.ath.cx/
-# added UnChaos Bot Hybrid Web Search Engine http://www.unchaos.com/
-# added unido-bot http://www.unchina.org/unido/unido/our_projects/3_3.html
-# added UniversalFeedParser http://feedparser.org/ (seen from md301000.inktomisearch.com)
-# added updated http://www.updated.com/
-# added Vermut http://vermut.aol.com
-# added versus crawler from eda.baykan@epfl.ch http://www.epfl.ch/Eindex.html
-# added Vespa Crawler (Yahoo Norway?) http://www.psychedelix.com/cgi-bin/csv2html.pl?data=allagents.csv&template=detail.html&match=%5Cbid_t_z_030406_1%5Cb
-# added VSE http://www.vivisimo.com/
-# added webcrawl.net http://www.webcrawl.net/
-# added Web Downloader http://www.krasu.ru/soft/chuchelo/
-# added Webdup http://www.webdup.com/en/index.html
-# added Wells Search http://www.psychedelix.com/cgi-bin/csv2html.pl?data=allagents.csv&template=detail.html&match=\bid_t_z_1484\b
-# added WordPress http://wordpress.org/
-# added wume crawler http://wume.cse.lehigh.edu/~xiq204/crawler/
-# added Xenu's Link Sleuth (with ')
-# added xirq http://www.xirq.com/
-# added yoogliFetchAgent http://www.yoogli.com/
-# added Z-Add Link Checker http://w3.z-add.co.uk/linkcheck/
-# -- fix - some robots were reported with _ where _ should have been a space.
-# changed Xenu Link Sleuth
-# changed microsoft[_+\s]url[_+\s]control -> microsoft_url_control
-# changed favorites_sweeper -> favorites_sweeper
-# -- updates
-# updated AskJeeves to Ask
-# 2012-06-05 Albrecht Mueller
-# added Grabber from SDSC (San Diego Supercomputer Center).
-# 2013-09-30 Albrecht Mueller
-# AWStats probably cannot detect this bot as it identifies itself in
-# the referrer field and not in the user agent string.
-#92.113.100.35 - - [29/Sep/2013:17:22:46 +0200] "GET /robots.txt HTTP/1.1" 200 516 "-" "Mozilla/5.0 (Windows NT 6.1; WOW64; rv:5.0) Gecko/20100101 Firefox/5.0" "-"
-#92.113.100.35 - - [29/Sep/2013:17:22:49 +0200] "GET /tghome.htm HTTP/1.1" 200 4445 "http://extrabot.com/help/frytygativyheku.htm" "Mozilla/5.0 (Windows NT 6.1; WOW64; rv:5.0) Gecko/20100101 Firefox/5.0" "-"
-#92.113.100.35 - - [29/Sep/2013:17:22:51 +0200] "GET / HTTP/1.1" 200 5467 "http://extrabot.com/help/frytygativyheku.htm" "Mozilla/5.0 (Windows NT 6.1; WOW64; rv:5.0) Gecko/20100101 Firefox/5.0" "-"
-
-# to do MS Search 4.0 Robot
-
-#package AWSROB;
-
-
-# Robots list was found at http://www.robotstxt.org/wc/active/all.txt
-# Other robots can be found at http://www.jafsoft.com/searchengines/webbots.html
-# Rem: To avoid bad detection, some robot's ids were removed from this list:
-# - Robots with ID of 3 letters only
-# - Robots called 'webs' and 'tcl'
-# Rem: directhit changed into direct_hit (its real id)
-# Rem: calif changed into calif[^r] to avoid confusion between Tiscalifreenet browser
-# Rem: fish changed into [^a]fish to avoid confusion between Madsafish browser
-# Rem: roadrunner changed into road_runner
-# Rem: lycos changed to lycos_ to avoid confusion with lycos-online browser
-# Rem: voyager changed into ^voyager\/ to avoid to exclude voyager and amigavoyager browser
-
-# RobotsSearchIDOrder
-# It contains all matching criteria to search for in log fields. This list is
-# used to know in which order to search Robot IDs.
-# Most frequent ones are in list1, used when LevelForRobotsDetection is 1 or more
-# Minor robots are in list2, used when LevelForRobotsDetection is 2 or more
-# Note: Robots IDs are in lower case, '_', ' ' and '+' are changed into '[_+\s]' and are quoted.
-#-------------------------------------------------------
-@RobotsSearchIDOrder_list1 = (
-# Common robots (In robot file)
-'appie',
-'architext',
-'bingpreview',
-'bjaaland',
-'contentmatch',
-'ferret',
-'googlebot\-image',
-'googlebot',
-'google\-sitemaps',
-'google[_+\s]web[_+\s]preview',
-'grabber',
-'gulliver',
-'virus[_+\s]detector', # Must be before harvest
-'harvest',
-'htdig',
-'jeeves',
-'linkwalker',
-'lilina',
-'lycos[_+\s]',
-'moget',
-'muscatferret',
-'myweb',
-'nomad',
-'scooter',
-'slurp',
-'^voyager\/',
-'weblayers',
-# Common robots (Not in robot file)
-'antibot',
-'bruinbot',
-'digout4u',
-'echo!',
-'fast\-webcrawler',
-'ia_archiver\-web\.archive\.org', # Must be before ia_archiver to avoid confusion with alexa
-'ia_archiver',
-'jennybot',
-'mercator',
-'netcraft',
-'msnbot\-media',
-'msnbot-udiscovery',
-'msnbot',
-'petersnews',
-'relevantnoise\.com',
-'unlost_web_crawler',
-'voila',
-'webbase',
-'webcollage',
-'cfetch',
-'zyborg', # Must be before wisenut
-'wisenutbot'
-);
-@RobotsSearchIDOrder_list2 = (
-# Less common robots (In robot file)
-'007ac9',
-'[^a]fish',
-'abcdatos',
-'abonti\.com',
-'acme\.spider',
-'ahoythehomepagefinder',
-'ahrefsbot',
-'alkaline',
-'anthill',
-'arachnophilia',
-'arale',
-'araneo',
-'aretha',
-'ariadne',
-'powermarks',
-'arks',
-'aspider',
-'atn\.txt',
-'atomz',
-'auresys',
-'backrub',
-'bbot',
-'bigbrother',
-'blackwidow',
-'blindekuh',
-'bloodhound',
-'borg\-bot',
-'brightnet',
-'bspider',
-'cactvschemistryspider',
-'calif[^r]',
-'cassandra',
-'cgireader',
-'checkbot',
-'christcrawler',
-'churl',
-'cienciaficcion',
-'cms\scrawler',
-'collective',
-'combine',
-'conceptbot',
-'coolbot',
-'core',
-'cosmos',
-'crazywebcrawler',
-'cruiser',
-'cusco',
-'cyberspyder',
-'desertrealm',
-'deweb',
-'dienstspider',
-'digger',
-'diibot',
-'direct_hit',
-'dnabot',
-'domainappender',
-'download_express',
-'dragonbot',
-'dwcp',
-'e\-collector',
-'ebiness',
-'elfinbot',
-'emacs',
-'emcspider',
-'esther',
-'evliyacelebi',
-'fastcrawler',
-'feedcrawl',
-'fdse',
-'felix',
-'fetchrover',
-'fido',
-'finnish',
-'fireball',
-'fouineur',
-'francoroute',
-'freecrawl',
-'funnelweb',
-'gama',
-'gazz',
-'gcreep',
-'getbot',
-'geturl',
-'golem',
-'gougou',
-'grapnel',
-'griffon',
-'gromit',
-'gulperbot',
-'hambot',
-'havindex',
-'hometown',
-'htmlgobble',
-'hyperdecontextualizer',
-'iajabot',
-'iaskspider',
-'hl_ftien_spider',
-'sogou',
-'icjobs\.de',
-'iconoclast',
-'ilse',
-'imagelock',
-'incywincy',
-'informant',
-'infoseek',
-'infoseeksidewinder',
-'infospider',
-'inspectorwww',
-'intelliagent',
-'irobot',
-'iron33',
-'israelisearch',
-'javabee',
-'jbot',
-'jcrawler',
-'jobo',
-'jobot',
-'joebot',
-'jubii',
-'jumpstation',
-'kapsi',
-'katipo',
-'kilroy',
-'ko[_+\s]yappo[_+\s]robot',
-'kummhttp',
-'labelgrabber\.txt',
-'larbin',
-'legs',
-'linkidator',
-'linkscan',
-'lockon',
-'logo_gif',
-'macworm',
-'magpie',
-'marvin',
-'mattie',
-'mediafox',
-'merzscope',
-'meshexplorer',
-'mindcrawler',
-'mnogosearch',
-'momspider',
-'monster',
-'motor',
-'muncher',
-'mwdsearch',
-'ndspider',
-'nederland\.zoek',
-'netcarta',
-'netmechanic',
-'netscoop',
-'newscan\-online',
-'nhse',
-'northstar',
-'nzexplorer',
-'objectssearch',
-'occam',
-'octopus',
-'openfind',
-'orb_search',
-'packrat',
-'pageboy',
-'parasite',
-'patric',
-'pegasus',
-'perignator',
-'perlcrawler',
-'phantom',
-'phpdig',
-'piltdownman',
-'pimptrain',
-'pioneer',
-'pitkow',
-'pjspider',
-'plumtreewebaccessor',
-'poppi',
-'portalb',
-'psbot',
-'python',
-'raven',
-'rbse',
-'resumerobot',
-'rhcs',
-'road_runner',
-'robbie',
-'robi',
-'robocrawl',
-'robofox',
-'robozilla',
-'roverbot',
-'rules',
-'safetynetrobot',
-'semalt', #Note: This entry will not work as this crawler identifies itself
-# in the referrer string and not in the user agent string
-'search\-info',
-'search_au',
-'searchprocess',
-'senrigan',
-'sgscout',
-'shaggy',
-'shaihulud',
-'sift',
-'simbot',
-'sistrix', #Virus/trojan-infection? fr-crawler, ca-crawler? See https://www.projecthoneypot.org/ip_37.59.55.128, https://www.projecthoneypot.org/ip_198.27.80.144
-'site\-valet',
-'sitetech',
-'skymob',
-'slcrawler',
-'smartspider',
-'snooper',
-'solbot',
-'speedy',
-'spider[_+\s]monkey',
-'spiderbot',
-'spiderline',
-'spiderman',
-'spiderview',
-'spry',
-'sqworm',
-'ssearcher',
-'suke',
-'sunrise',
-'suntek',
-'sven',
-'tach_bw',
-'tagyu_agent',
-'tailrank',
-'tarantula',
-'tarspider',
-'techbot',
-'templeton',
-'titan',
-'titin',
-'tkwww',
-'tlspider',
-'ucsd',
-'udmsearch',
-'universalfeedparser',
-'urlck',
-'valkyrie',
-'verticrawl',
-'victoria',
-'visionsearch',
-'voidbot',
-'vwbot',
-'w3index',
-'w3m2',
-'wallpaper',
-'wanderer',
-'wapspIRLider',
-'webbandit',
-'webcatcher',
-'webcopy',
-'webfetcher',
-'webfoot',
-'webinator',
-'weblinker',
-'webmirror',
-'webmoose',
-'webquest',
-'webreader',
-'webreaper',
-'websnarf',
-'webspider',
-'webvac',
-'webwalk',
-'webwalker',
-'webwatch',
-'whatuseek',
-'whowhere',
-'wired\-digital',
-'wmir',
-'wolp',
-'wombat',
-'wordpress',
-'worm',
-'woozweb',
-'wwwc',
-'wz101',
-'xenu\slink\ssleuth',
-'xget',
-# Other robots reported by users
-'^finbot', #UA string starts with "finbot", should not match "elfinbot"
-'^webindex$', #UA should not match "webindexer"
-'1\-more_scanner',
-'360spider',
-'a6-indexer',
-'accoona\-ai\-agent',
-'activebookmark',
-'adamm_bot',
-'adsbot-google',
-'advbot',
-'affectv\.co\.uk',
-'almaden',
-'aipbot',
-'aleadsoftbot',
-'alpha_search_agent',
-'allrati',
-'aport',
-'applebot',
-'archive\-de\.com',
-'archive\.org_bot',
-'argus', # Must be before nutch
-'arianna\.libero\.it',
-'aspseek',
-'asterias',
-'awbot',
-'backlinktest\.com',
-'baiduspider',
-'becomebot',
-'bender',
-'betabot',
-'biglotron',
-'bittorrent_bot',
-'biz360[_+\s]spider',
-'blexbot',
-'blogbridge[_+\s]service',
-'bloglines',
-'blogpulse',
-'blogsearch',
-'blogshares',
-'blogslive',
-'blogssay',
-'bncf\.firenze\.sbn\.it\/raccolta\.txt',
-'bobby',
-'boitho\.com\-dc',
-'bookmark\-manager',
-'boris',
-'bubing',
-'bumblebee',
-'candlelight[_+\s]favorites[_+\s]inspector',
-'careerbot',
-'cbn00glebot',
-'ccbot',
-'cerberian_drtrs',
-'cfnetwork',
-'cipinetbot',
-'checkweb_link_validator',
-'cliqzbot',
-'commons\-httpclient',
-'computer_and_automation_research_institute_crawler',
-'converamultimediacrawler',
-'converacrawler',
-'copubbot',
-'cscrawler',
-'cse_html_validator_lite_online',
-'cuasarbot',
-'cursor',
-'custo',
-'datafountains\/dmoz_downloader',
-'dataprovider\.com',
-'daumoa',
-'daviesbot',
-'daypopbot',
-'deepindex',
-'deusu',
-'dipsie\.bot',
-'dnsgroup',
-'doccheckbot',
-'domainchecker',
-'domainsdb\.net',
-'dotbot',
-'duckduckgo-favicons-bot',
-'dulance',
-'dumbot',
-'dumm\.de\-bot',
-'earthcom\.info',
-'easydl',
-'eccp',
-'edgeio\-retriever',
-'ernst[:blank:]2\.0',
-'ets_v',
-'exactseek',
-'extreme[_+\s]picture[_+\s]finder',
-'eventax',
-'everbeecrawler',
-'everest\-vulcan',
-'ezresult',
-'enteprise',
-'facebook',
-'facebot',
-'fast_enterprise_crawler.*crawleradmin\.t\-info@telekom\.de',
-'fast_enterprise_crawler.*t\-info_bi_cluster_crawleradmin\.t\-info@telekom\.de',
-'finderlein[_+\s]research[_+\s]crawler',
-'matrix_s\.p\.a\._\-_fast_enterprise_crawler', # must come before fast enterprise crawler
-'fast_enterprise_crawler',
-'fast\-search\-engine',
-'fastbot',
-'favicon',
-'favorg',
-'favorites_sweeper',
-'feedburner',
-'feedfetcher\-google',
-'feedflow',
-'feedster',
-'feedsky',
-'feedvalidator',
-'fetchbot',
-'filmkamerabot',
-'filterdb\.iss\.net',
-'findlinks',
-'findexa_crawler',
-'firmilybot',
-'foaf-search\.net',
-'fooky\.com\/ScorpionBot',
-'g2crawler',
-'gaisbot',
-'geniebot',
-'genieo',
-'gigablastopensource',
-'gigabot',
-'girafabot',
-'global_fetch',
-'gnodspider',
-'goforit\.com',
-'goforitbot',
-'gonzo',
-'grapeshot',
-'grub',
-'gpu_p2p_crawler',
-'henrythemiragorobot',
-'heritrix',
-'holmes',
-'hoowwwer',
-'hpprint',
-'htmlparser',
-'html[_+\s]link[_+\s]validator',
-'httrack',
-'hundesuche\.com\-bot',
-'i-bot',
-'icarus6j',
-'ichiro',
-'idmarch',
-'iltrovatore\-setaccio',
-'implisensebot',
-'infobot',
-'infociousbot',
-'infohelfer',
-'infomine',
-'insurancobot',
-'integromedb\.org',
-'internet[_+\s]ninja',
-'internetarchive',
-'internetseer',
-'internetsupervision',
-'ips\-agent',
-'irlbot',
-'isearch2006',
-'istellabot',
-'iupui_research_bot',
-'izsearch',
-'james\sbot',
-'jobboerse', #AWStats seems not to find this one despite the fact that "JobboerseBot" and "jobboerse.com" appear in the UA-string, maybe some previous entry matches
-'jrtwine[_+\s]software[_+\s]check[_+\s]favorites[_+\s]utility',
-'justview',
-'kalambot',
-'kamano\.de_newsfeedverzeichnis',
-'kazoombot',
-'kevin',
-'keyoshid', # Must come before Y!J
-'kinjabot',
-'kinja\-imagebot',
-'knowitall',
-'knowledge\.com',
-'kouaa_krawler',
-'krugle',
-'ksibot',
-'kurzor',
-'lanshanbot',
-'letscrawl\.com',
-'libcrawl',
-'linkbot',
-'linkdex\.com',
-'link_valet_online',
-'metager\-linkchecker', # Must be before linkchecker
-'linkchecker',
-'linkstats\sbot',
-'lipperhey',
-'livejournal\.com',
-'lmspider',
-'loadtimebot',
-'lssrocketcrawler',
-'ltbot',
-'ltx71',
-'lwp\-request',
-'lwp\-trivial',
-'madaali\.de',
-'magpierss',
-'mail\.ru',
-'mapoftheinternet\.com',
-'meanpathbot',
-'mediabot',
-'mediapartners\-google',
-'megaindex',
-'megite',
-'memorybot',
-'metager2-verification-bot',
-'metajobbot', #Does not show up in the results of Sep. 2015 despite the fact that the corresponing log file has about 40 entries containing "MetaJobBot" in the UA string - strange.
-'metaspinner',
-'miadev',
-'microsoft\sbits',
-'microsoft.*discovery', # = 'microsoft (?:office (?:protocol|existence)|data access internet publishing provider protocol) discovery',
-'microsoft[_+\s]url[_+\s]control',
-'mindupbot',
-'mini\-reptile',
-'minirank',
-'missigua_locator',
-'misterbot',
-'miva',
-'mizzu_labs',
-'mj12bot',
-'mojeekbot',
-'msiecrawler',
-'ms[_+\s]search[_+\s]6\.0[_+\s]robot',
-'ms_search_4\.0_robot',
-'msrabot',
-'msrbot',
-'mt::telegraph::agent',
-'mydoyouhike',
-'nagios',
-'nasa_search',
-'netestate\sne\scrawler',
-'netluchs',
-'netsprint',
-'newsgatoronline',
-'nicebot',
-'nimblecrawler',
-'noxtrumbot',
-'npbot',
-'loocalcrawler/nutch',
-'nutchcvs',
-'nutchosu\-vlib',
-'nutch', # Must come after other nutch versions
-'ocelli',
-'octora_beta_bot',
-'omniexplorer[_+\s]bot',
-'onet\.pl[_+\s]sa',
-'onfolio',
-'opentaggerbot',
-'openwebspider',
-'optimizer',
-'oracle_ultra_search',
-'orangebot',
-'orbiter',
-'yodaobot',
-'qihoobot',
-'qwantify',
-'passwordmaker\.org',
-'pear_http_request_class',
-'peerbot',
-'perman',
-'php[_+\s]version[_+\s]tracker',
-'phpcrawl',
-'picmole',
-'pictureofinternet',
-'ping\.blo\.gs',
-'plinki',
-'pluckfeedcrawler',
-'plukkie',
-'pogodak',
-'pompos',
-'popdexter',
-'port_huron_labs',
-'postfavorites',
-'projectwf\-java\-test\-crawler',
-'proodlebot',
-'publiclibraryarchive',
-'pyquery',
-'rambler',
-'redalert',
-'riddler',
-'rogerbot',
-'rojo',
-'rssimagesbot',
-'ruffle',
-'rufusbot',
-'safeads\.xyz',
-'safesearch',
-'sandcrawler',
-'savetheworldheritage',
-'sbider',
-'schizozilla',
-'scumbot',
-'searchguild[_+\s]dmoz[_+\s]experiment',
-'searchmetricsbot',
-'seekbot',
-'semrushbot',
-'sensis_web_crawler',
-'seodiver',
-'seokicks\.de',
-'seoscanners',
-'seznambot',
-'shim\-crawler',
-'shoutcast',
-'sitedomain-bot',
-'siteexplorer\.info',
-'skimbot',
-'slysearch',
-'smtbot',
-'snap\.com_beta_crawler',
-'sohu\-search',
-'sohu', # "sohu agent"
-'snappy',
-'spbot',
-'sphere_scout',
-'spiderlytics',
-'spip',
-'sproose_crawler',
-'ssearch_bot',
-'steeler',
-'steroid__download',
-'stq_bot',
-'suchfin\-bot',
-'superbot',
-'surveybot',
-'susie',
-'syndic8',
-'syndicapi',
-'synoobot',
-'tcl_http_client_package',
-'technoratibot',
-'teragramcrawlersurf',
-'test_crawler',
-'testbot',
-'thumbsniper',
-'t\-h\-u\-n\-d\-e\-r\-s\-t\-o\-n\-e',
-'topicblogs',
-'turnitinbot',
-'turtlescanner', # Must be before turtle
-'turtle',
-'tutorgigbot',
-'twiceler',
-'ubicrawler',
-'ultraseek',
-'unchaos_bot_hybrid_web_search_engine',
-'unido\-bot',
-'unisterbot',
-'updated',
-'ustc\-semantic\-group',
-'vagabondo\-wap',
-'vagabondo',
-'vebidoobot',
-'vermut',
-'versus_crawler_from_eda\.baykan@epfl\.ch',
-'vespa_crawler',
-'voltron',
-'vortex',
-'vse\/',
-'w3c\-checklink',
-'w3c[_+\s]css[_+\s]validator[_+\s]jfouffa',
-'w3c_validator',
-'watchmouse',
-'wavefire',
-'waybackarchive\.org',
-'wbsearchbot',
-'webclipping\.com',
-'webcompass',
-'webcrawl\.net',
-'web_downloader',
-'webdup',
-'webfilter',
-'webindexer',
-'webminer',
-'website[_+\s]monitoring[_+\s]bot',
-'webvulncrawl',
-'wells_search',
-'wer-liefert-was',
-'wesee:search',
-'wevikabot',
-'wonderer',
-'wotbox',
-'wume_crawler',
-'wwweasel',
-'xenu\'s_link_sleuth',
-'xenu_link_sleuth',
-'xirq',
-'xovibot',
-'y!j', # Must come after keyoshid Y!J
-'yacy',
-'yahoo\-blogs',
-'yahoo\-verticalcrawler',
-'yahoofeedseeker',
-'yahooseeker\-testing',
-'yahooseeker',
-'yahoo\-mmcrawler',
-'yahoo!_mindset',
-'yandex',
-'flexum',
-'yanga',
-'yet-another-spider',
-'yisouspider',
-'yooglifetchagent',
-'z\-add_link_checker',
-'zealbot',
-'zhuaxia',
-'zspider',
-'zeus',
-'ng\/1\.', # put at end to avoid false positive
-'ng\/2\.', # put at end to avoid false positive
-'exabot', # put at end to avoid false positive
-# Additional bots found by Sussex.
-'^[1-3]$', # Hiding bots. Doesn't appear to be a valid user agent.
-'alltop',
-'applesyndication',
-'asynchttpclient',
-'bingbot',
-'blogged_crawl',
-'bloglovin',
-'butterfly',
-'buzztracker',
-'carpathia',
-'catbot',
-'chattertrap',
-'check_http', #(nagios) a monitoring tool
-'coldfusion',
-'covario',
-'daylifefeedfetcher',
-'discobot',
-'dlvr\.it',
-'dreamwidth',
-'drupal',
-'ezoom',
-'feedmyinbox',
-'feedroll\.com',
-'feedzira',
-'fever\/',
-'freenews',
-'geohasher',
-'hanrss',
-'inagist',
-'jacobin\sclub',
-'jakarta',
-'js\-kit',
-'largesmall\scrawler',
-'linkedinbot',
-'longurl',
-'metauri',
-'microsoft\-webdav\-miniredir',
-'^motorola$',
-'movabletype',
-# These appear to be bots trying to hide. All of the usual architecture data is missing.
-'^mozilla\/3\.0\s\(compatible$',
-'^mozilla\/4\.0$',
-'^mozilla\/4\.0\s\(compatible;\)$',
-'^mozilla\/5\.0$',
-'^mozilla\/5\.0\s\(compatible;$',
-'^mozilla\/5\.0\s\(en\-us\)$',
-'^mozilla\/5\.0\sfirefox\/3\.0\.5$',
-'^msie',
-# End of hiding bots.
-'netnewswire',
-'\snetseer\s',
-'netvibes',
-'newrelicpinger',
-'newsfox',
-'nextgensearchbot',
-'ning',
-'pingdom',
-'pita',
-'postpost',
-'postrank',
-'printfulbot',
-'protopage',
-'proximic',
-'quipply',
-'r6\_',
-'ratingburner',
-'regator',
-'rome\sclient',
-'rpt\-httpclient',
-'rssgraffiti',
-'sage\+\+',
-'scoutjet',
-'simplepie',
-'sitebot',
-'summify\.com',
-'superfeedr',
-'synthesio',
-'teoma',
-'topblogsinfo',
-'topix\.net',
-'trapit',
-'trileet',
-'tweetedtimes',
-'twisted\spagegetter',
-'twitterbot',
-'twitterfeed',
-'unwindfetchor',
-'wazzup',
-'windows\-rss\-platform',
-'wiumi',
-'xydo',
-'yahoo!\sslurp',
-'yahoo\spipes',
-'yahoo\-newscrawler',
-'yahoocachesystem',
-'yahooexternalcache',
-'yahoo!\ssearchmonkey',
-'yahooysmcm',
-'yammer',
-# 'yandexbot', #already covered by 'yandex'
-'yeti',
-'yie8',
-'youdao',
-'yourls',
-'zemanta',
-'zend_http_client',
-'zumbot',
-# Other id that are 99% of robots
-'wget',
-'libwww',
-'^java\/[0-9]' # put at end to avoid false positive
-);
-@RobotsSearchIDOrder_listgen = (
-# Generic robot
-'robot',
-'checker',
-'crawl',
-'discovery',
-'hunter',
-'scanner',
-'spider',
-'sucker',
-'bot[\s_+:,\.\;\/\\\-]',
-# Identifies
-#"Mozilla/5.0 (Linux; U; Android 4.2.2; de-de; CUBOT P9 Build/JDQ39) AppleWebKit/534.30 (KHTML, like Gecko) Version/4.0 Mobile Safari/534.30"
-#as a but. There is a Android mobile phone called "CUBOT P9", so this is probably not a bot.
-'[\s_+:,\.\;\/\\\-]bot',
-'curl',
-'php',
-'ruby\/',
-'no_user_agent'
-);
-
-
-
-# RobotsHashIDLib
-# List of robots names ('robot id','robot clear text')
-#-------------------------------------------------------
-%RobotsHashIDLib = (
-# Common robots (In robot file)
-'appie','Walhello appie',
-'architext','ArchitextSpider',
-'bingpreview','Bing Preview bot',
-'bjaaland','Bjaaland',
-'ferret','Wild Ferret Web Hopper #1, #2, #3',
-'contentmatch','Yahoo!China ContentMatch Crawler',
-'googlebot\-image','Googlebot-Image',
-'googlebot','Googlebot',
-'google\-sitemaps', 'Google Sitemaps',
-'grabber', 'Grabber (SDSC)',
-'google[_+\s]web[_+\s]preview', 'Google Web Preview',
-'gulliver','Northern Light Gulliver',
-'virus[_+\s]detector','virus_detector',
-'harvest','Harvest',
-'htdig','ht://Dig',
-'jeeves','Ask',
-'linkwalker','LinkWalker',
-'lilina','Lilina',
-'lycos[_+\s]','Lycos',
-'moget','moget',
-'muscatferret','Muscat Ferret',
-'myweb','Internet Shinchakubin',
-'nomad','Nomad',
-'scooter','Scooter',
-'slurp','Yahoo Slurp',
-'^voyager\/','Voyager',
-'weblayers','Weblayers',
-# Common robots (Not in robot file)
-'antibot','Antibot',
-'bruinbot','The web archive',
-'digout4u','Digout4u',
-'echo!','EchO!',
-'fast\-webcrawler','Fast-Webcrawler',
-'ia_archiver\-web\.archive\.org','The web archive (IA Archiver)',
-'ia_archiver','Alexa (IA Archiver)',
-'jennybot','JennyBot',
-'mercator','Mercator',
-'msnbot\-media','MSNBot-media',
-'msnbot-udiscovery', 'msnbot-UDiscovery Note: AWStats counts most of its traffic as user traffic',
-'msnbot','MSNBot',
-'netcraft','Netcraft',
-'petersnews','Petersnews',
-'unlost_web_crawler','Unlost Web Crawler',
-'voila','Voila',
-'webbase', 'WebBase',
-'zyborg','ZyBorg',
-'wisenutbot','WISENutbot',
-'webcollage','WebCollage',
-'cfetch','Cfetch',
-# Less common robots (In robot file)
-'007ac9', '007ac9 Crawler, seems to belong to SISTRIX',
-'[^a]fish','Fish search',
-'abcdatos','ABCdatos BotLink',
-'abonti\.com','Abonti WebSearch',
-'acme\.spider','Acme.Spider',
-'ahoythehomepagefinder','Ahoy! The Homepage Finder',
-'ahrefsbot', 'AhrefsBot',
-'alkaline','Alkaline',
-'anthill','Anthill',
-'arachnophilia','Arachnophilia',
-'arale','Arale',
-'araneo','Araneo',
-'aretha','Aretha',
-'ariadne','ARIADNE',
-'powermarks','Powermarks', # must come before Arks; seen used by referrer spam
-'arks','arks',
-'aspider','ASpider (Associative Spider)',
-'atn\.txt','ATN Worldwide',
-'atomz','Atomz.com Search Robot',
-'auresys','AURESYS',
-'backrub','BackRub',
-'bbot','BBot',
-'bigbrother','Big Brother',
-'blackwidow','BlackWidow',
-'blindekuh','Die Blinde Kuh',
-'bloodhound','Bloodhound',
-'borg\-bot','Borg-Bot',
-'brightnet','bright.net caching robot',
-'bspider','BSpider',
-'cactvschemistryspider','CACTVS Chemistry Spider',
-'calif[^r]','Calif',
-'cassandra','Cassandra',
-'cgireader','Digimarc Marcspider/CGI',
-'checkbot','Checkbot',
-'christcrawler','ChristCrawler.com',
-'churl','churl',
-'cienciaficcion','cIeNcIaFiCcIoN.nEt',
-'cms\scrawler', 'CMS Crawler',
-'collective','Collective',
-'combine','Combine System',
-'conceptbot','Conceptbot',
-'coolbot','CoolBot',
-'core','Web Core / Roots',
-'cosmos','XYLEME Robot',
-'crazywebcrawler', 'CrazyWeb Crawler',
-'cruiser','Internet Cruiser Robot',
-'cusco','Cusco',
-'cyberspyder','CyberSpyder Link Test',
-'desertrealm','Desert Realm Spider',
-'deweb','DeWeb(c) Katalog/Index',
-'dienstspider','DienstSpider',
-'digger','Digger',
-'diibot','Digital Integrity Robot',
-'direct_hit','Direct Hit Grabber',
-'dnabot','DNAbot',
-'domainappender', 'DomainAppender',
-'download_express','DownLoad Express',
-'dragonbot','DragonBot',
-'dwcp','DWCP (Dridus\' Web Cataloging Project)',
-'e\-collector','e-collector',
-'ebiness','EbiNess',
-'elfinbot','ELFINBOT',
-'emacs','Emacs-w3 Search Engine',
-'emcspider','ananzi',
-'esther','Esther',
-'evliyacelebi','Evliya Celebi',
-'fastcrawler','FastCrawler',
-'feedcrawl','FeedCrawl by feed@aobo.com',
-'fdse','Fluid Dynamics Search Engine robot',
-'felix','Felix IDE',
-'fetchrover','FetchRover',
-'fido','fido',
-'finnish','Finnish',
-'fireball','KIT-Fireball',
-'fouineur','Fouineur',
-'francoroute','Robot Francoroute',
-'freecrawl','Freecrawl',
-'funnelweb','FunnelWeb',
-'gama','gammaSpider, FocusedCrawler',
-'gazz','gazz',
-'gcreep','GCreep',
-'getbot','GetBot',
-'geturl','GetURL',
-'golem','Golem',
-'gougou','GouGou',
-'grapnel','Grapnel/0.01 Experiment',
-'griffon','Griffon',
-'gromit','Gromit',
-'gulperbot','Gulper Bot',
-'hambot','HamBot',
-'havindex','havIndex',
-'hometown','Hometown Spider Pro',
-'htmlgobble','HTMLgobble',
-'hyperdecontextualizer','Hyper-Decontextualizer',
-'iajabot','iajaBot',
-'iaskspider','Sina Iask Spider',
-'hl_ftien_spider','Hylanda',
-'sogou','Sogou Spider',
-'icjobs\.de', 'iCjobs Spider Note: Most traffic counts as user traffic',
-#20130805 The user agent string of the icjobs-spider contained the
-#identifying string only when it accessed the robots.txt file.
-#When it accessed the actual content it did not identify itself as
-#a spider. Thus traffic of this spider was counted as user traffic.
-#The behavious seems to have changed now - the spider identifies itself
-#when it accesses content pages.
-#20141401 Behavior as before: Does identify itself when it accesses
-# robots.txt and the root page. The following traffic does not contain
-# the identification string and is therefore counted as user traffic.
-'iconoclast','Popular Iconoclast',
-'ilse','Ingrid',
-'imagelock','Imagelock',
-'incywincy','IncyWincy',
-'informant','Informant',
-'infoseek','InfoSeek Robot 1.0',
-'infoseeksidewinder','Infoseek Sidewinder',
-'infospider','InfoSpiders',
-'inspectorwww','Inspector Web',
-'intelliagent','IntelliAgent',
-'ips\-agent', 'ips-agent Verisign(?) - no reliable information found.',
-'irobot','I, Robot',
-'iron33','Iron33',
-'israelisearch','Israeli-search',
-'javabee','JavaBee',
-'jbot','JBot Java Web Robot',
-'jcrawler','JCrawler',
-'jobo','JoBo Java Web Robot',
-'jobot','Jobot',
-'joebot','JoeBot',
-'jubii','The Jubii Indexing Robot',
-'jumpstation','JumpStation',
-'kapsi','image.kapsi.net',
-'katipo','Katipo',
-'kilroy','Kilroy',
-'ko[_+\s]yappo[_+\s]robot','KO_Yappo_Robot',
-'kummhttp','KummHttp',
-'labelgrabber\.txt','LabelGrabber',
-'larbin','larbin',
-'legs','legs',
-'linkidator','Link Validator',
-'linkscan','LinkScan',
-'lockon','Lockon',
-'logo_gif','logo.gif Crawler',
-'macworm','Mac WWWWorm',
-'lmspider','lmspider',
-'lwp\-request','lwp-request',
-'lwp\-trivial','lwp-trivial',
-'magpie','MagpieRSS',
-'marvin','marvin/infoseek',
-'mattie','Mattie',
-'mediafox','MediaFox',
-'merzscope','MerzScope',
-'meshexplorer','NEC-MeshExplorer',
-'mindcrawler','MindCrawler',
-'mnogosearch','mnoGoSearch search engine software',
-'momspider','MOMspider',
-'monster','Monster',
-'motor','Motor',
-'muncher','Muncher',
-'mwdsearch','Mwd.Search',
-'ndspider','NDSpider',
-'nederland\.zoek','Nederland.zoek',
-'netcarta','NetCarta WebMap Engine',
-'netmechanic','NetMechanic',
-'netscoop','NetScoop',
-'newscan\-online','newscan-online',
-'nhse','NHSE Web Forager',
-'northstar','The NorthStar Robot',
-'nzexplorer','nzexplorer',
-'objectssearch','ObjectsSearch',
-'occam','Occam',
-'octopus','HKU WWW Octopus',
-'openfind','Openfind data gatherer',
-'orb_search','Orb Search',
-'packrat','Pack Rat',
-'pageboy','PageBoy',
-'parasite','ParaSite',
-'patric','Patric',
-'pegasus','pegasus',
-'perignator','The Peregrinator',
-'perlcrawler','PerlCrawler 1.0',
-'phantom','Phantom',
-'phpdig','PhpDig',
-'piltdownman','PiltdownMan',
-'pimptrain','Pimptrain.com\'s robot',
-'pioneer','Pioneer',
-'pitkow','html_analyzer',
-'pjspider','Portal Juice Spider',
-'plumtreewebaccessor','PlumtreeWebAccessor',
-'poppi','Poppi',
-'portalb','PortalB Spider',
-'psbot','psbot',
-'python','Python-urllib',
-'raven','Raven Search',
-'rbse','RBSE Spider',
-'resumerobot','Resume Robot',
-'rhcs','RoadHouse Crawling System',
-'road_runner','Road Runner: The ImageScape Robot',
-'robbie','Robbie the Robot',
-'robi','ComputingSite Robi/1.0',
-'robocrawl','RoboCrawl Spider',
-'robofox','RoboFox',
-'robozilla','Robozilla',
-'roverbot','Roverbot',
-'rules','RuLeS',
-'safetynetrobot','SafetyNet Robot',
-'semalt', 'seamalt.com',
-'search\-info','Sleek',
-'search_au','Search.Aus-AU.COM',
-'searchprocess','SearchProcess',
-'senrigan','Senrigan',
-'sgscout','SG-Scout',
-'shaggy','ShagSeeker',
-'shaihulud','Shai\'Hulud',
-'sift','Sift',
-'simbot','Simmany Robot Ver1.0',
-'sistrix', 'SISTRIX Crawler',
-'site\-valet','Site Valet',
-'sitetech','SiteTech-Rover',
-'skymob','Skymob.com',
-'slcrawler','SLCrawler',
-'smartspider','Smart Spider',
-'snooper','Snooper',
-'solbot','Solbot',
-'speedy','Speedy Spider',
-'spider[_+\s]monkey','Spider monkey',
-'spiderbot','SpiderBot',
-'spiderline','Spiderline Crawler',
-'spiderlytics', 'Spiderlytics: No homepage, e-mail only: spider (at) spiderlytics.com',
-'spiderman','Spiderman',
-'spiderview','SpiderView(tm)',
-'spry','Spry Wizard Robot',
-'ssearcher','Site Searcher',
-'sqworm','Sqworm',
-'suke','Suke',
-'sunrise','Sunrise',
-'suntek','suntek search engine',
-'sven','Sven',
-'tach_bw','TACH Black Widow',
-'tagyu_agent','Tagyu Agent',
-'tarantula','Tarantula',
-'tarspider','tarspider',
-'tailrank','TailRank',
-'techbot','TechBOT',
-'templeton','Templeton',
-'titan','TITAN',
-'titin','TitIn',
-'tkwww','The TkWWW Robot',
-'tlspider','TLSpider',
-'ucsd','UCSD Crawl',
-'udmsearch','UdmSearch',
-'universalfeedparser','UniversalFeedParser',
-'urlck','URL Check',
-'valkyrie','Valkyrie',
-'verticrawl','Verticrawl',
-'victoria','Victoria',
-'visionsearch','vision-search',
-'voidbot','void-bot',
-'vwbot','VWbot',
-'w3index','The NWI Robot',
-'w3m2','W3M2',
-'wallpaper','WallPaper (alias crawlpaper)',
-'wanderer','the World Wide Web Wanderer',
-'wapspider','w@pSpider by wap4.com',
-'webbandit','WebBandit Web Spider',
-'webcatcher','WebCatcher',
-'webcopy','WebCopy',
-'webfetcher','webfetcher',
-'webfoot','The Webfoot Robot',
-'webinator','Webinator',
-'weblinker','WebLinker',
-'webmirror','WebMirror',
-'webmoose','The Web Moose',
-'webquest','WebQuest',
-'webreader','Digimarc MarcSpider',
-'webreaper','WebReaper',
-'websnarf','Websnarf',
-'webspider','WebSpider',
-'webvac','WebVac',
-'webwalk','webwalk',
-'webwalker','WebWalker',
-'webwatch','WebWatch',
-'whatuseek','whatUseek Winona',
-'whowhere','WhoWhere Robot',
-'wired\-digital','Wired Digital',
-'wmir','w3mir',
-'wolp','WebStolperer',
-'wombat','The Web Wombat',
-'wordpress','WordPress',
-'worm','The World Wide Web Worm',
-'woozweb','Woozweb Monitoring',
-'wwwc','WWWC Ver 0.2.5',
-'wz101','WebZinger',
-'xenu\slink\ssleuth', 'Xenu'. "'" . 's Link Sleuth (TM), see Wikipedia',
-'xget','XGET',
-# Other robots reported by users
-'^finbot', 'finbot',
-'^webindex$', 'WebIndex',
-'1\-more_scanner','1-More Scanner',
-'360spider','360spider',
-'a6-indexer', 'A6-Indexer',
-'accoona\-ai\-agent','Accoona-AI-Agent',
-'activebookmark','ActiveBookmark',
-'adamm_bot','AdamM Bot',
-'adsbot-google', 'AdsBot-Google',
-'advbot', 'AdvBot',
-'affectv\.co\.uk', 'affectv.co.uk',
-'almaden','IBM Almaden Research Center WebFountain™',
-'aipbot','aipbot',
-'aleadsoftbot','ALeadSoftbot',
-'alpha_search_agent','Alpha Search Agent',
-'allrati','Allrati',
-'aport', 'Aport',
-'applebot', 'Applebot',
-'archive\-de\.com', 'Archive-de.com',
-'archive\.org_bot','archive.org bot',
-'argus','Argus',
-'arianna\.libero\.it','arianna.libero.it',
-'aspseek','ASPseek',
-'asterias', 'Asterias',
-'awbot', 'AWBot',
-'backlinktest\.com', 'BacklinkCrawler',
-'baiduspider','BaiDuSpider',
-'becomebot', 'BecomeBot',
-'bender','bender focused_crawler',
-'betabot','BetaBot',
-'biglotron','Biglotron',
-'bittorrent_bot','BitTorrent Bot',
-'biz360[_+\s]spider','Biz360 spider',
-'blexbot', 'BLEXBot, seems to belong to the WebMeUp backlink tool',
-'blogbridge[_+\s]service','BlogBridge Service',
-'bloglines','Bloglines',
-'blogpulse','BlogPulse ISSpider intelliseek.com',
-'blogsearch','BlogSearch',
-'blogshares','Blogshares Spiders',
-'blogslive','Blogslive',
-'blogssay','BlogsSay :: RSS Search Crawler',
-'bncf\.firenze\.sbn\.it\/raccolta\.txt','Biblioteca Nazionale Centrale di Firenze',
-'bobby', 'Bobby',
-'boitho\.com\-dc','boitho.com-dc',
-'bookmark\-manager','Bookmark-Manager',
-'boris', 'Boris',
-'bubing', 'BUbiNG',
-'bumblebee', 'Bumblebee (relevare.com)',
-'candlelight[_+\s]favorites[_+\s]inspector','Candlelight_Favorites_Inspector',
-'careerbot', 'CareerBot',
-'cbn00glebot','cbn00glebot',
-'ccbot', 'Common Crawl',
-'cerberian_drtrs','Cerberian Drtrs',
-'cfnetwork','CFNetwork',
-'cipinetbot','CipinetBot',
-'checkweb_link_validator','CheckWeb link validator',
-'cliqzbot', 'Cliqzbot',
-'commons\-httpclient','Jakarta commons-httpclient',
-'computer_and_automation_research_institute_crawler','Computer and Automation Research Institute Crawler',
-'converamultimediacrawler','ConveraMultiMediaCrawler',
-'converacrawler','ConveraCrawler',
-'copubbot', 'CoPubbot',
-'cscrawler','CsCrawler',
-'cse_html_validator_lite_online','CSE HTML Validator Lite Online','cuasarbot','Cuasarbot',
-'cursor','Cursor',
-'custo','Custo',
-'datafountains\/dmoz_downloader','DataFountains/DMOZ Downloader',
-'dataprovider\.com', 'Dataprovider Site Explorer',
-'daumoa', 'Daum',
-'daviesbot', 'DaviesBot',
-'daypopbot', 'DayPop',
-'deepindex','Deepindex',
-'deusu', 'DeuSu',
-'dipsie\.bot','Dipsie',
-'dnsgroup','DNSGroup',
-'doccheckbot', 'doccheckbot/1.0, known to Project Honey Pot',
-'domainchecker','DomainChecker',
-'domainsdb\.net','DomainsDB.net',
-'dotbot', 'DotBot, Open Site Explorer',
-'duckduckgo-favicons-bot', 'DuckDuckGo-Favicons-Bot',
-'dulance','Dulance',
-'dumbot','Dumbot',
-'dumm\.de\-bot','dumm.de-Bot',
-'earthcom\.info','EARTHCOM.info',
-'easydl','EasyDL',
-'eccp', 'Eniro Sverige, email: search (at) eniro.com',
-'edgeio\-retriever','edgeio-retriever',
-'ernst[:blank:]2\.0', 'Ernst 2.0 (does not provide any further information)',
-'ets_v','ETS Enterprise Translation Server',
-'exactseek','ExactSeek Crawler',
-'extreme[_+\s]picture[_+\s]finder','Extreme_Picture_Finder',
-'eventax','eventax',
-'everbeecrawler','EverbeeCrawler',
-'everest\-vulcan','Everest-Vulcan',
-'ezresult', 'Ezresult',
-'enteprise','Fast Enteprise Crawler',
-'facebook','FaceBook bot',
-'facebot', 'Facebot (Facebook bot?)',
-'fast\-search\-engine','Fast-Search-Engine (not fastsearch.com)',
-'fast_enterprise_crawler','FAST Enterprise Crawler',
-'fast_enterprise_crawler.*scrawleradmin\.t\-info@telekom\.de','FAST Enterprise Crawler * crawleradmin.t-info@telekom.de',
-'finderlein[_+\s]research[_+\s]crawler', 'Finderlein Research Crawler 1.0 (no contact information given)',
-'matrix_s\.p\.a\._\-_fast_enterprise_crawler','Matrix S.p.A. - FAST Enterprise Crawler',
-'fast_enterprise_crawler.*t\-info_bi_cluster_crawleradmin\.t\-info@telekom\.de','FAST Enterprise Crawler * T-Info_BI_cluster crawleradmin.t-info@telekom.de',
-'fastbot', 'fastbot',
-'favicon','FavIconizer',
-'favorg','FavOrg',
-'favorites_sweeper','Favorites Sweeper',
-'feedburner', 'Feedburner',
-'feedfetcher\-google','Feedfetcher-Google',
-'feedflow','FeedFlow',
-'feedster','Feedster',
-'feedsky','FeedSky',
-'feedvalidator','FeedValidator',
-'fetchbot', 'Fetchbot',
-'filmkamerabot','FilmkameraBot',
-'filterdb\.iss\.net', 'oBot',
-'findexa_crawler','Findexa Crawler',
-'firmilybot', 'Firmily Bot Home page (Website was hacked on Oct. 19, 2013)',
-'findlinks','Findlinks',
-'foaf-search\.net', 'Friend of a friend (FOAF) search engine',
-'fooky\.com\/ScorpionBot','Fooky.com/ScorpionBot/ScoutOut',
-'g2crawler','G2Crawler',
-'gaisbot','Gaisbot',
-'geniebot','Geniebot',
-'genieo', 'Genieo',
-'gigablastopensource', 'GigablastOpenSource, an Open Source Search Engine(Wiki)',
-'gigabot','GigaBot',
-'girafabot','Girafabot',
-'global_fetch','Global Fetch',
-'gnodspider','GNOD Spider',
-'goforit\.com','GoForIt.com',
-'goforitbot','GOFORITBOT',
-'gonzo','suchen.de',
-'gpu_p2p_crawler','GPU p2p crawler',
-'grapeshot', 'Grapeshot Crawler',
-'grub','Grub.org',
-'henrythemiragorobot', 'Mirago',
-'heritrix','Heritrix',
-'holmes', 'Holmes',
-'hoowwwer','HooWWWer',
-'hpprint','HPPrint',
-'htmlparser','HTMLParser',
-'html[_+\s]link[_+\s]validator','Html_Link_Validator',
-'httrack','HTTrack off-line browser',
-'hundesuche\.com\-bot','Hundesuche.com-Bot',
-'i-bot','i-bot',
-'icarus6j', 'Icarus6j, email address in UA string, no website',
-'ichiro','ichiro',
-'idmarch', 'IDMARCH',
-'iltrovatore\-setaccio','IlTrovatore-Setaccio',
-'implisensebot', 'ImplisenseBot',
-'infobot','InfoBot',
-'infociousbot','InfociousBot',
-'infohelfer','Infohelfer',
-'infomine','INFOMINE VLCrawler',
-'insurancobot','InsurancoBot',
-'integromedb\.org','IntegromeDB',
-'internet[_+\s]ninja','Internet_Ninja ',
-'internetarchive','InternetArchive',
-'internetseer', 'InternetSeer',
-'internetsupervision','InternetSupervision',
-'irlbot','IRLbot',
-'isearch2006','isearch2006',
-'istellabot', 'IstellaBot',
-'iupui_research_bot','IUPUI_Research_Bot',
-'izsearch', 'iZSearch',
-'james\sbot', 'James BOT',
-'jobboerse', 'Jobbörse',
-'jrtwine[_+\s]software[_+\s]check[_+\s]favorites[_+\s]utility','JRTwine_Software_Check_Favorites_Utility',
-'justview', 'JustView',
-'kalambot','KalamBot',
-'kamano\.de_newsfeedverzeichnis','kamano.de NewsFeedVerzeichnis',
-'kazoombot','KazoomBot',
-'kevin','Kevin',
-'keyoshid','Yahoo! Japan keyoshid robot study',
-'kinjabot', 'Kinjabot',
-'kinja\-imagebot', 'Kinja Imagebot',
-'knowitall','KnowItAll',
-'knowledge\.com','Knowledge.com',
-'kouaa_krawler','Kouaa Krawler',
-'krugle','Krugle',
-'ksibot','ksibot',
-'kurzor','Kurzor',
-'lanshanbot','lanshanbot',
-'letscrawl\.com','LetsCrawl.com',
-'libcrawl','Crawl libcrawl',
-'link_valet_online','Link Valet Online',
-'linkbot','LinkBot',
-'linkdex\.com', 'Linkdex',
-'linkchecker','LinkChecker',
-'linkstats\sbot', 'LinkStats Bot',
-'lipperhey', 'Lipperhey SEO Service',
-'livejournal\.com', 'LiveJournal.com',
-'loadtimebot', 'LoadTimeBot',
-'lssrocketcrawler', 'LSSRocketCrawler (no contact information)',
-'ltbot', 'Language Tools Bot (ltbot)',
-'ltx71', 'ltx71',
-'madaali\.de', 'www.madaali.de',
-'magpierss', 'MagpieRSS',
-'mail\.ru', 'Mail.ru bot',
-'mapoftheinternet\.com','MapoftheInternet.com',
-'meanpathbot', 'Meanpathbot',
-'mediabot', 'MediaBot',
-'mediapartners\-google','Google AdSense',
-# 'Mediapartners-Google (Feb 12, 2015: no additial information in UA String, seems to use GigablastOpenSource',
-# Uses UA string "Mediapartners-Google" only, and there were accesses using an UA string "GigablastOpenSource/1.0" from the same IP-Address.
-# Therefore this is probably not related to Google 4.3.2015 Albrecht M眉ller
-'megaindex', 'MegaIndex Crawler, seems to belong to MegaIndex.ru',
-'megite','Megite',
-'memorybot', 'Archivethe.net',
-'metager2-verification-bot', 'metager2-verification-bot',
-'metager\-linkchecker','MetaGer LinkChecker',
-'metajobbot', 'MetaJobBot',
-'metaspinner','Metaspinner',
-'miadev', 'MiaDev spider',
-'microsoft\sbits', 'Microsoft Background Intelligent Transfer Service (BITS)?',
-'microsoft.*discovery', 'Microsoft Office Protocol Discovery/Microsoft Office Existence Discovery',
-'microsoft[_+\s]url[_+\s]control','Microsoft URL Control',
-'mindupbot', 'mindUpBot (datenbutler.de)',
-'minirank','miniRank',
-'mini\-reptile','Mini-reptile',
-'missigua_locator','Missigua_Locator',
-'misterbot','Misterbot',
-'miva','Miva',
-'mizzu_labs','Mizzu Labs',
-'mj12bot','MJ12bot',
-'mojeekbot','MojeekBot',
-'msiecrawler','MSIECrawler',
-'ms[_+\s]search[_+\s]6\.0[_+\s]robot','MS Search 6.0 Robot (MS SharePoint Portal Server?)',
-'ms_search_4\.0_robot','MS SharePoint Portal Server - MS Search 4.0 Robot',
-'msrabot','msrabot',
-'msrbot','MSRBOT',
-'mt::telegraph::agent','MT::Telegraph::Agent',
-'mydoyouhike','Mydoyouhike',
-'nagios','Nagios',
-'nasa_search','NASA Search',
-'netestate\sne\scrawler','Website-Datenbank',
-'netluchs','Netluchs',
-'netsprint','NetSprint',
-'newsgatoronline', 'NewsGator Online',
-'nicebot','nicebot',
-'nimblecrawler','NimbleCrawler',
-'noxtrumbot','noxtrumbot',
-'npbot','NPBot',
-'loocalcrawler/nutch', 'LoocalCrawler/Nutch',
-'nutchcvs','NutchCVS',
-'nutchosu\-vlib','NutchOSU-VLIB',
-'nutch','Nutch',
-'ocelli','Ocelli',
-'octora_beta_bot','Octora Beta Bot',
-'omniexplorer[_+\s]bot','OmniExplorer Bot',
-'onet\.pl[_+\s]sa','Onet.pl_SA',
-'onfolio','Onfolio',
-'opentaggerbot','OpenTaggerBot',
-'openwebspider','OpenWebSpider',
-'optimizer', 'Optimizer',
-'oracle_ultra_search','Oracle Ultra Search',
-'orangebot', 'OrangeBot, no website, log entry specifies mail address', # support.orangebot@orange.com
-'orbiter','Orbiter',
-'yodaobot','OutfoxBot/YodaoBot',
-'qihoobot','QihooBot',
-'qwantify', 'Qwant',
-'passwordmaker\.org','passwordmaker.org',
-'pear_http_request_class','PEAR HTTP Request class',
-'peerbot','PEERbot',
-'perman', 'Perman surfer',
-'php[_+\s]version[_+\s]tracker','PHP version tracker',
-'phpcrawl', 'PHPCrawl',
-'picmole', 'Specified address www.picmole.com was not reachable on April 21, 2014',
-'pictureofinternet','PictureOfInternet',
-'ping\.blo\.gs','ping.blo.gs',
-'plinki','plinki',
-'pluckfeedcrawler','PluckFeedCrawler',
-'plukkie', 'Plukkie',
-'pogodak','Pogodak.com',
-'pompos','Pompos',
-'popdexter','Popdexter',
-'port_huron_labs','Port Huron Labs',
-'postfavorites','PostFavorites',
-'projectwf\-java\-test\-crawler','ProjectWF-java-test-crawler',
-'proodlebot','proodleBot',
-'publiclibraryarchive', 'publiclibraryarchive.org (related to spiderlytics.com and/or waybackarchive.org?)',
-#Observations 2014-06-23
-#Domain publiclibraryarchive.org is parked at GoDaddy.com
-#from https://www.projecthoneypot.org/
-#81.30.151.220's User Agent Strings (honeypot classified this ip as an mail server, active about 6 years ago)
-#Mozilla/5.0 (compatible; publiclibraryarchive.org/1.0; +crawl@publiclibraryarchive.org)
-#176.9.138.27's User Agent Strings
-#Mozilla/5.0 (compatible; publiclibraryarchive.org/1.0; +crawl@publiclibraryarchive.org)
-#Mozilla/5.0 (compatible; Spiderlytics/1.0; +spider@spiderlytics.com)
-#Mozilla/5.0 (compatible; waybackarchive.org/1.0; +spider@waybackarchive.org)
-#146.0.32.165's User Agent Strings
-#Mozilla/5.0 (compatible; publiclibraryarchive.org/1.0; +crawl@publiclibraryarchive.org)
-#Mozilla/5.0 (compatible; savetheworldheritage.org/1.0; +crawl@savetheworldheritage.org)
-#Mozilla/5.0 (compatible; seoscanners.net/1; +spider@seoscanners.net)
-'pyquery','PyQuery',
-'rambler','StackRambler',
-'redalert','Red Alert',
-'relevantnoise\.com', 'Relevant Noise',
-'riddler', 'Riddler',
-'rogerbot', 'Rogerbot',
-'rojo','RoJo aggregator',
-'rssimagesbot','rssImagesBot',
-'ruffle','ruffle SemanticWeb crawler',
-'rufusbot','RufusBot Rufus Web Miner',
-'safeads\.xyz', 'SafeAds.xyz',
-'safesearch', 'Avira SafeSearch',
-'sandcrawler','SandCrawler (Microsoft)',
-'savetheworldheritage', 'savetheworldheritage.org (related to spiderlytics.com, waybackarchive.org and/or publiclibraryarchive.org?)',
-'sbider','SBIder',
-'schizozilla','Schizozilla',
-'scumbot','Scumbot',
-'searchguild[_+\s]dmoz[_+\s]experiment','SearchGuild_DMOZ_Experiment',
-'searchmetricsbot','SearchmetricsBot',
-'seekbot','Seekbot',
-'semrushbot', 'SemrushBot',
-'sensis_web_crawler','Sensis Web Crawler',
-'seodiver', 'SEO DIVER',
-'seokicks\.de', 'SEOkicks Webcrawler',
-'seoscanners', 'seoscanners.net (related to publiclibraryarchive.org and savetheworldheritage.org?)',
-'seznambot','SeznamBot',
-'shim\-crawler','Shim-Crawler',
-'shoutcast','Shoutcast Directory Service',
-'sitedomain-bot', 'Sitedomain.de',
-'siteexplorer\.info', 'Site Explorer',
-'skimbot', 'SkimBot',
-'slysearch','SlySearch',
-'smtbot', 'SMTBot',
-'snap\.com_beta_crawler','snap.com beta crawler',
-'sohu\-search','sohu-search',
-'sohu','sohu agent',
-'snappy','Snappy',
-'spbot', 'SEOprofiler Bot',
-'sphere_scout','Sphere Scout',
-'spip','SPIP',
-'sproose_crawler','sproose crawler',
-'ssearch_bot', 'sSearch Crawler',
-'steroid__download','STEROID Download',
-'steeler','Steeler',
-'stq_bot', 'SEARCHTEQ',
-'suchfin\-bot','Suchfin-Bot',
-'superbot','SuperBot',
-'surveybot','SurveyBot',
-'susie','Susie',
-'syndic8','Syndic8',
-'syndicapi','SyndicAPI',
-'synoobot','SynooBot',
-'tcl_http_client_package','Tcl http client package',
-'technoratibot', 'Technoratibot',
-'teragramcrawlersurf','TeragramCrawlerSURF',
-'test_crawler','Test Crawler',
-'testbot','TestBot',
-'thumbsniper', 'ThumbSniper',
-'t\-h\-u\-n\-d\-e\-r\-s\-t\-o\-n\-e','T-H-U-N-D-E-R-S-T-O-N-E',
-'topicblogs', 'topicblogs',
-'turnitinbot', 'Turn It In',
-'turtle', 'Turtle',
-'turtlescanner', 'Turtle',
-'tutorgigbot','TutorGigBot',
-'twiceler','twiceler',
-'ubicrawler','UbiCrawler',
-'ultraseek', 'Ultraseek',
-'unchaos_bot_hybrid_web_search_engine','UnChaos Bot Hybrid Web Search Engine',
-'unido\-bot','unido-bot',
-'unisterbot', 'UnisterBot; E-Mail only: crawler (at) unister.de',
-'updated','updated',
-'ustc\-semantic\-group','USTC-Semantic-Group',
-'vagabondo\-wap','Vagabondo-WAP',
-'vagabondo','Vagabondo',
-'vebidoobot', 'vebidoobot',
-'vermut','Vermut',
-'versus_crawler_from_eda\.baykan@epfl\.ch','versus crawler from eda.baykan@epfl.ch',
-'vespa_crawler','Vespa Crawler',
-'voltron', 'voltron',
-'vortex','VORTEX',
-'vse\/','VSE',
-'w3c\-checklink','W3C Link Checker',
-'w3c[_+\s]css[_+\s]validator[_+\s]jfouffa', 'W3C jigsaw CSS Validator',
-'w3c_validator','W3C Validator',
-'watchmouse', 'WatchMouse Website Monitor',
-'wavefire','Wavefire',
-'waybackarchive\.org', 'No website, email: spider(at)waybackarchive.org',
-# 2.12.2013 Project Honeypot reports at least one of the IPs used by waybackarchive with a spiderlytics UA string.
-# Problably not related to the wayback machine of archive.org.
-'wbsearchbot', 'WBSearchBot',
-'webclipping\.com', 'WebClipping.com',
-'webcompass', 'webcompass',
-'webcrawl\.net','webcrawl.net',
-'web_downloader','Web Downloader',
-'webdup','Webdup',
-'webfilter','WebFilter',
-'webindexer','WebIndexer',
-'webminer','WebMiner',
-'website[_+\s]monitoring[_+\s]bot','Website_Monitoring_Bot',
-'webvulncrawl', 'WebVulnCrawl',
-'wells_search','Wells Search',
-'wer-liefert-was', 'Wer-liefert-was Crawler Note: AWStats counts most traffic as user traffic',
-'wesee:search', 'WeSEE Bot',
-'wevikabot', 'WeViKa',
-'wonderer', 'Web Wombat Redback Spider',
-'wotbox', 'Wotbox',
-'wume_crawler','wume crawler',
-'wwweasel',,'WWWeasel',
-'xenu\'s_link_sleuth','Xenu Link Sleuth',
-'xenu_link_sleuth','Xenu Link Sleuth',
-'xirq','xirq',
-'xovibot', 'XoviBot',
-'y!j', 'Y!J Yahoo Japan',
-'yacy', 'YaCy',
-'yahoo\-blogs','Yahoo-Blogs',
-'yahoo\-verticalcrawler', 'Yahoo Vertical Crawler',
-'yahoofeedseeker', 'Yahoo Feed Seeker',
-'yahooseeker\-testing', 'YahooSeeker-Testing',
-'yahooseeker', 'YahooSeeker Yahoo! Blog crawler',
-'yahoo\-mmcrawler', 'Yahoo-MMCrawler',
-'yahoo!_mindset','Yahoo! Mindset',
-'yandex', 'Yandex Bot',
-'flexum', 'Flexum Search Engine',
-'yanga', 'Yanga WorldSearch Bot',
-'yet-another-spider','Yet-Another-Spider',
-'yisouspider', 'YisouSpider (no additional information in UA string)',
-'yooglifetchagent','yoogliFetchAgent',
-'z\-add_link_checker','Z-Add Link Checker',
-'zealbot','ZealBot',
-'zhuaxia','ZhuaXia',
-'zspider','zspider',
-'zeus','Zeus Webster Pro',
-'zumbot','ZumBot',
-'ng\/1\.','NG 1.x (Exalead)', # put at end to avoid false positive
-'ng\/2\.','NG 2.x (Exalead)', # put at end to avoid false positive
-'exabot','Exabot', # put at end to avoid false positive
-# Other id that are 99% of robots
-'wget','WGet tools',
-'libwww','Perl tool',
-'^java\/[0-9]','Java (Often spam bot)', # put at end to avoid false positive
-# Generic robot
-'robot', 'Unknown robot (identified by \'robot\')',
-'checker', 'Unknown robot (identified by \'checker\')',
-'crawl', 'Unknown robot (identified by \'crawl\')',
-'discovery', 'Unknown robot (identified by \'discovery\')',
-'hunter', 'Unknown robot (identified by \'hunter\')',
-'scanner', 'Unknown robot (identified by \'scanner\')',
-'spider', 'Unknown robot (identified by \'spider\')',
-'sucker', 'Unknown robot (identified by \'sucker\')',
-'bot[\s_+:,\.\;\/\\\-]', 'Unknown robot (identified by \'bot\' followed by a space or one of the following characters _+:,.;/\-)',
-'[\s_+:,\.\;\/\\\-]bot', 'Unknown robot (identified by a space or one of the characters _+:,.;/\- followed by \'bot\')',
-'curl', 'Common *nix tool for automating web document retrieval. Most likely a bot.',
-'php', 'A PHP script',
-'ruby\/', 'Ruby script',
-# Additional bots found by Sussex.
-'^[1-3]$', 'Generic bot identified as "1", "2" or "3"',
-'alltop', 'alltop',
-'applesyndication', 'applesyndication',
-'asynchttpclient', 'asynchttpclient',
-'bingbot', 'Bingbot',
-'blogged_crawl', 'blogged_crawl',
-'bloglovin', 'bloglovin',
-'butterfly', 'butterfly',
-'buzztracker', 'buzztracker',
-'carpathia', 'carpathia',
-'catbot', 'catbot',
-'chattertrap', 'chattertrap',
-'check_http', 'check_http (nagios)',
-'coldfusion', 'coldfusion',
-'covario', 'covario',
-'daylifefeedfetcher', 'daylifefeedfetcher',
-'discobot', 'discobot',
-'dlvr\.it', 'dlvr.it',
-'dreamwidth', 'dreamwidth',
-'drupal', 'Drupal Site',
-'ezoom', 'ezoom',
-'feedmyinbox', 'feedmyinbox',
-'feedroll\.com', 'feedroll.com',
-'feedzira', 'feedzira',
-'fever\/', 'Feed a Fever',
-'freenews', 'freenews',
-'geohasher', 'geohasher',
-'hanrss', 'hanrss',
-'inagist', 'inagist',
-'jacobin\sclub', 'jacobin club',
-'jakarta', 'jakarta',
-'js\-kit', 'js-kit',
-'largesmall\scrawler', 'largesmall crawler',
-'linkedinbot', 'linkedinbot',
-'longurl', 'longurl',
-'metauri', 'metauri',
-'microsoft\-webdav\-miniredir', 'microsoft-webdav-miniredir',
-'^motorola$', 'Suspected Bot masquerading as "Motorola"',
-'movabletype', 'movabletype',
-'^mozilla\/3\.0\s\(compatible$', 'Suspected bot masqurading as Mozilla',
-'^mozilla\/4\.0$', 'Suspected bot masqurading as Mozilla',
-'^mozilla\/4\.0\s\(compatible;\)$', 'Suspected bot masqurading as Mozilla',
-'^mozilla\/5\.0$', 'Suspected bot masqurading as Mozilla',
-'^mozilla\/5\.0\s\(compatible;$', 'Suspected bot masqurading as Mozilla',
-'^mozilla\/5\.0\s\(en\-us\)$', 'Suspected bot masqurading as Mozilla',
-'^mozilla\/5\.0\sfirefox\/3\.0\.5$', 'Suspected bot masqurading as Mozilla',
-'^msie', 'Suspected bot masquerading as M$ IE',
-'netnewswire', 'netnewswire',
-'\snetseer\s', 'Net Seer',
-'netvibes', 'netvibes',
-'newrelicpinger', 'newrelicpinger',
-'newsfox', 'Fox News',
-'nextgensearchbot', 'nextgensearchbot',
-'ning', 'ning',
-'pingdom', 'pingdom',
-'pita', 'pita (pain in the ass?)',
-'postpost', 'postpost',
-'postrank', 'postrank',
-'printfulbot', 'printfulbot',
-'protopage', 'protopage',
-'proximic', 'Proximic Spider',
-'quipply', 'quipply',
-'r6\_', 'Radian 6 Crawler',
-'ratingburner', 'ratingburner',
-'regator', 'regator',
-'rome\sclient', 'rome client',
-'rpt\-httpclient', 'rpt-httpclient',
-'rssgraffiti', 'rssgraffiti',
-'sage\+\+', 'sage++',
-'scoutjet', 'ScoutJet crawler for Blekko.',
-'simplepie', 'simplepie',
-'sitebot', 'sitebot',
-'summify\.com', 'summify.com',
-'superfeedr', 'superfeedr',
-'synthesio', 'synthesio',
-'teoma', 'teoma',
-'topblogsinfo', 'topblogsinfo',
-'topix\.net', 'topix.net',
-'trapit', 'trapit',
-'trileet', 'trileet',
-'tweetedtimes', 'The Tweeted Times',
-'twisted\spagegetter', 'twisted pagegetter',
-'twitterbot', 'Twitterbot',
-'twitterfeed', 'twitterfeed',
-'unwindfetchor', 'unwindfetchor',
-'wazzup', 'wazzup',
-'windows\-rss\-platform', 'windows-rss-platform',
-'wiumi', 'wiumi',
-'xydo', 'xydo',
-'yahoo!\sslurp', 'Additional Yahoo bots.',
-'yahoo\spipes', 'Additional Yahoo bots.',
-'yahoo\-newscrawler', 'Additional Yahoo bots.',
-'yahoocachesystem', 'Additional Yahoo bots.',
-'yahooexternalcache', 'Additional Yahoo bots.',
-'yahoo!\ssearchmonkey', 'Additional Yahoo bots.',
-'yahooysmcm', 'Additional Yahoo bots.',
-'yammer', 'yammer',
-#'yandexbot', 'yandexbot', #already covered by 'yandex'
-'yeti', 'yeti',
-'yie8', 'yie8',
-'youdao', 'youdao',
-'yourls', 'yourls',
-'zemanta', 'zemanta',
-'zend_http_client', 'Zend Http Client',
-'no_user_agent','Unknown robot (identified by empty user agent string)',
-# Unknown robots identified by hit on robots.txt
-'unknown', 'Unknown robot (identified by hit on \'robots.txt\')'
-);
-
-
-# RobotsAffiliateLib
-# This list try to tell by which Search Engine a robot is used
-#-------------------------------------------------------------
-%RobotsAffiliateLib = (
-'bingpreview'=>'Bing',
-'fast\-webcrawler'=>'AllTheWeb',
-'googlebot'=>'Google',
-'google\-sitemap'=>'Google',
-'google[_+\s]web[_+\s]preview'=>'Google',
-'msnbot'=>'MSN',
-'nutch'=>'Looksmart',
-'scooter'=>'AltaVista',
-'wisenutbot'=>'Looksmart',
-'yahoo\-blogs'=>'Yahoo',
-'yahoo\-verticalcrawler'=>'Yahoo',
-'yahoofeedseeker'=>'Yahoo',
-'yahooseeker\-testing'=>'Yahoo',
-'yahooseeker'=>'Yahoo',
-'yahoo\-mmcrawler'=>'Yahoo',
-'yahoo!_mindset'=>'Yahoo',
-'zyborg'=>'Looksmart',
-'cfetch'=>'Kosmix',
-'^voyager\/'=>'Kosmix',
-# Additional bots found by Sussex.
-'feedfetcher\-google'=>'Google',
-'bingbot'=>'MSN',
-'twitterbot'=>'Twitter',
-'twitterfeed'=>'Twitter',
-'yahoo!\sslurp'=>'Yahoo',
-'yahoo\spipes'=>'Yahoo',
-'yahoo-newscrawler'=>'Yahoo',
-'yahoocachesystem'=>'Yahoo',
-'yahooexternalcache'=>'Yahoo',
-'yahoo!\ssearchmonkey'=>'Yahoo',
-'yahooysmcm'=>'Yahoo'
-);
-
-1;
+# AWSTATS ROBOTS DATABASE
+#-------------------------------------------------------
+# If you want to add robots to extend AWStats database detection capabilities,
+# you must add an entry in RobotsSearchIDOrder_listx and RobotsHashIDLib.
+
+# The entry in RobotsSearchIDOrder_listx is a Perl regular expression
+# (see http://perldoc.perl.org/perlreref.html). AWSTats applies these
+# expressions to the user agent string in the order given by the lists. The
+# first match specifies the robot.
+#
+# Note: This regular expression must not contain any whitespace.
+# Otherwise AWStats will produce lines in the database that
+# will be misinterpreted and as a consequence the corresponding data in the
+# generated HTML reports will be wrong. If you want to match whitespace in
+# the user agent string, use other constructs like '\s', '[:blank:]',
+# '\p{IsSpace}', '\x20' etc.
+#
+# The corresponding entry in RobotsHashIDLib contains the regular expression
+# as key, followed by a string containing HTML-text. AWStats inserts this
+# text into reports to describe the bot. If possible the text should contain
+# a link to the bot home page. This makes it easier for sysadmins to find
+# the information necessary e.g. to adapt the robots.txt file.
+#
+# An entry in the RobotsAffiliateLib is not necessary. An entry in this list
+# contains as first part the regular expression specifying the bot. The
+# second part is a string that gives the Company or product managing the bot.
+# This information is not used yet.
+#
+# There are several sorts of bots that AWStats is not able to detect and
+# therefore a considerable amount of bot generated traffic counts
+# as user traffic:
+#
+# a) A crawler that identifies itself in the referrer string, but not in
+# the user agent string. An example is the crawler from semalt.semalt.com.
+#
+# b) Crawlers that correctly access robots.txt but identify themselves in
+# in the user agent string only once or just a few times. Most of the
+# time a user agent string ist used that does not contain hints that
+# a bot is involved. An example is the iCjobs spider.
+# msnbot-UDiscovery/2.0b seems to show this behaviour too.
+#
+#
+#
+#-------------------------------------------------------
+
+# 2005-08-19 Sean Carlos http://www.antezeta.com/awstats.html
+# added dipsie (not tested with real data).
+# added DomainsDB.net http://domainsdb.net/
+# added ia_archiver-web.archive.org (was inadvertently grouped with Alexa traffic)
+# added Nutch (used by looksmart (furl?))
+# added rssImagesBot
+# added Sqworm
+# added t\-h\-u\-n\-d\-e\-r\-s\-t\-o\-n\-e
+# added w3c css-validator
+# added documentation link to bot home pages for above and selected major bots.
+# In the case of international bots, choose .com page.
+# Included tool tip (html "title").
+# To do: parameterize to match both AWStats language and tooltips settings.
+# To do: add html links for all bots based on current documentation in source
+# files referenced below.
+# changed '\wbot[\/\-]', to '\wbot[\/\-]' (removed comma)
+# made minor grammar corrections to notes below
+# 2005-08-24 added YahooSeeker-Testing
+# added w3c-checklink
+# updated url for ask.com
+# 2005-08-24 added Girafabot http://www.girafa.com/
+# 2005-08-30 added PluckFeedCrawler http://www.pluck.com/
+# added Gaisbot/3.0 (robot05@gais.cs.ccu.edu.tw; )
+# dded geniebot (wgao@genieknows.com)
+# added BecomeBot link http://www.become.com/site_owners.html
+# added topicblogs http://www.topicblogs.com/
+# added Powermarks; seen used by referrer spam
+# added YahooSeeker
+# added NG/2. http://www.exabot.com/
+# 2005-09-15 added link for Walhello appie
+# added bender focused_crawler
+# updated YahooSeeker description (blog crawler)
+# 2005-09-16 added link for http://linkchecker.sourceforge.net
+# added ConveraCrawler/0.9d ( http://www.authoritativeweb.com/crawl)
+# added Blogslive info@blogslive.com intelliseek.com
+# added BlogPulse (ISSpider-3.0) intelliseek.com
+# 2005-09-26 added Feedfetcher-Google (http://www.google.com/feedfetcher.html)
+# added EverbeeCrawler
+# added Yahoo-Blogs http://help.yahoo.com/help/us/ysearch/crawling/crawling-02.html
+# added link for Bloglines http://www.bloglines.com
+# 2005-10-19 fixed Feedfetcher-Google (http://www.google.com/feedfetcher.html)
+# added Blogshares Spiders (Synchronized V1.5.1)
+# added yacy
+# 2005-11-21 added Argus www.simpy.com
+# added BlogsSay :: RSS Search Crawler (http://www.blogssay.com/)
+# added MJ12bot http://majestic12.co.uk/bot.php
+# added OpenTaggerBot (http://www.opentagger.com/opentaggerbot.htm)
+# added OutfoxBot/0.3 (For internet experiments; outfox.agent@gmail.com)
+# added RufusBot Rufus Web Miner http://64.124.122.252.webaroo.com/feedback.html
+# added Seekbot (http://www.seekbot.net/bot.html)
+# added Yahoo-MMCrawler/3.x (mms-mmcrawler-support@yahoo-inc.com)
+# added link for BaiDuSpider
+# added link for Blogshares Spider
+# added link for StackRambler http://www.rambler.ru/doc/faq.shtml
+# added link for WISENutbot
+# added link for ZyBorg/1.0 (wn-14.zyborg@looksmart.net; http://www.WISEnutbot.com. Moved location to above wisenut to avoid classification as wisenut
+# 2005-12-15
+# added FAST Enteprise Crawler/6 (www dot fastsearch dot com). Note spelling Enteprise not Enterprise.
+# added findlinks http://wortschatz.uni-leipzig.de/findlinks/
+# added IBM Almaden Research Center WebFountain鈩 http://www.almaden.ibm.com/cs/crawler [hc3]
+# added INFOMINE/8.0 VLCrawler (http://infomine.ucr.edu/useragents)
+# added lmspider (lmspider@scansoft.com) http://www.nuance.com/
+# added noxtrumbot http://www.noxtrum.com/
+# added SandCrawler (Microsoft)
+# added SBIder http://www.sitesell.com/sbider.html
+# added SeznamBot http://fulltext.seznam.cz/
+# added sohu-search http://corp.sohu.com/ (looked for //robots.txt not /robots.txt)
+# added the ruffle SemanticWeb crawler v0.5 - http://www.unreach.net
+# added WebVulnCrawl/1.0 libwww-perl/5.803 (looked for //robots.txt not /robots.txt)
+# added Yahoo! Japan keyoshid http://www.yahoo.co.jp/
+# added Y!J http://help.yahoo.co.jp/help/jp/search/indexing/indexing-15.html
+# added link for GigaBot
+# added link for MagpieRSS
+# added link for MSIECrawler
+# 2005-12-21
+# added aipbot http://www.aipbot.com aipbot@aipbot.com [matthys70 users.sourceforge.net]
+# added Everest-Vulcan Inc./0.1 (R&D project; http://everest.vulcan.com/crawlerhelp)
+# added Fast-Search-Engine http://www.fast-search-engine.com/ [matthys70 users.sourceforge.net]
+# added g2Crawler (nobody@airmail.net) http://crawler.instantnetworks.net/
+# added Jakarta commons-httpclient http://jakarta.apache.org/commons/httpclient/ (hit robots.txt). May be used as robot or browser - a site may want to remove this entry.
+# added OmniExplorer_Bot http://www.omni-explorer.com/ [matthys70 users.sourceforge.net]
+# added USTC-Semantic-Group ai.ustc.edu.cn/mas/en/research/index.php ?
+# 2005-12-22
+# added EARTHCOM.info www.earthcom.info
+# added HTTrack off-line browser 'httrack','HTTrack', http://www.httrack.com/ [Moizes Gabor]
+# added KummHttp http://www.psychedelix.com/cgi-bin/csv2html.pl?data=allagents.csv&template=detail.html&match=\bid_g_l_301105_2\b [Moizes Gabor]
+# 2006-01-01
+# added Dulance http://www.dulance.com/bot.jsp
+# added MojeekBot http://www.mojeek.com/bot.html
+# added nicebot http://www.egghelp.org/setup.htm ?
+# added Snappy http://www.urltrends.com/faq.php
+# added sohu agent
+# added VORTEX http://marty.anstey.ca/robots/vortex/ [matthys70 users.sourceforge.net]
+# added zspider http://feedback.redkolibri.com/
+# 2006-01-13
+# added boitho.com-dc http://www.boitho.com/dcbot.html
+# added IRLbot http://irl.cs.tamu.edu/crawler
+# added virus_detector virus_harvester@securecomputing.com
+# added Wavefire http://www.wavefire.com; info@wavefire.com
+# added WebFilter Robot
+# 2006-01-24
+# added Shim-Crawler http://www.logos.ic.i.u-tokyo.ac.jp/crawler/; crawl@logos.ic.i.u-tokyo.ac.jp
+# added Exabot exabot.com
+# added LetsCrawl.com http://letscrawl.com
+# added ichiro http://help.goo.ne.jp/door/crawlerE.html
+# 2006-01-27 additional 22 robots from a list provided by Moizes Gabor
+# added ALeadSoftbot http://www.aleadsoft.com/bot.htm
+# added CipinetBot http://www.cipinet.com/bot.html
+# added Cuasarbot http://www.cuasar.com/
+# added Dumbot http://www.dumbfind.com/
+# added Extreme_Picture_Finder http://www.exisoftware.com/
+# added Fooky.com/ScorpionBot/ScoutOut http://www.fooky.com/scorpionbots
+# added IlTrovatore-Setaccio http://www.iltrovatore.it/aiuto/motore_di_ricerca.html bot@iltrovatore.it
+# added InsurancoBot http://www.fastspywareremoval.com/
+# added InternetArchive http://lucene.apache.org/nutch/bot.html nutch-agent@lucene.apache.org
+# added KazoomBot http://www.kazoom.ca/bot.html kazoombot@kazoom.ca
+# added Kurzor http://www.easymail.hu/ cursor@easymail.hu
+# added NutchCVS http://lucene.apache.org/nutch/bot.html nutch-agent@lucene.apache.org
+# added NutchOSU-VLIB http://lucene.apache.org/nutch/bot.html nutch-agent@lucene.apache.org
+# added Orbiter http://www.dailyorbit.com/bot.htm
+# added PHP_version_tracker http://www.nexen.net/phpversion/bot.php
+# added SuperBot http://www.sparkleware.com/superbot/
+# added SynooBot http://www.synoo.de/bot.html webmaster@synoo.com
+# added TestBot http://www.agbrain.com/
+# added TutorGigBot http://www.tutorgig.info/
+# added WebIndexer mailto://webindexerv1@yahoo.com
+# added WebMiner http://64.124.122.252/feedback.html
+# 2006-02-01
+# added heritrix https://sourceforge.net/forum/message.php?msg_id=3550202
+# added Zeus Webster Pro https://sourceforge.net/forum/message.php?msg_id=3141164
+# additional robots from a list provided by Moizes Gabor [ mojzi -a-t- free mail hu ]
+# added Candlelight_Favorites_Inspector
+# added DomainChecker
+# added EasyDL
+# added FavOrg
+# added Favorites_Sweeper
+# added Html_Link_Validator
+# added Internet_Ninja
+# added JRTwine_Software_Check_Favorites_Utility
+# fixed Microsoft_URL_Control
+# added miniRank
+# added Missigua_Locator
+# added NPBot
+# added Ocelli
+# added Onet.pl_SA
+# added proodleBot
+# added SearchGuild_DMOZ_Experiment
+# added Susie
+# added Website_Monitoring_Bot
+# added Xenu_Link_Sleuth
+# 2006-05-15
+# added ASPseek http://www.aspseek.org/
+# added AdamM Bot http://home.blic.net/adamm/
+# added archive.org_bot http://crawls.archive.org/collections/bncf/crawl.html
+# added arianna.libero.it (Italian Portal/search engine)
+# added Biz360 spider http://www.biz360.com
+# added BlogBridge Service http://www.blogbridge.com/
+# added BlogSearch http://www.icerocket.com/
+# added libcrawl
+# added edgeio-relanshanbottriever http://www.edgeio.com
+# added FeedFlow http://feedflow.com/about
+# added Biblioteca Nazionale Centrale di Firenze (Italian National Archive) http://www.bncf.firenze.sbn.it/raccolta.txt
+# added Java catchall - used by many spam bots
+# added lanshanbot http://www.psychedelix.com/cgi-bin/csv2html.pl?data=allagents.csv&template=detail.html&match=%5Cbid_g_l_140406_1%5Cb
+# added msnbot-media http://search.msn.com/msnbot.htm
+# added MT::Telegraph::Agent
+# added Netluchs http://www.netluchs.de/ (German SE bot)
+# added oBot http://www.webmasterworld.com/forum11/1616.htm
+# added Onfolio http://www.onfolio.com/ (IE Toolbar plugin) - hit rss feeds.
+# added ping.blo.gs http://blo.gs/ping.php blog bot
+# added Sphere Scout http://www.sphere.com/
+# added sproose crawler http://www.sproose.com/bot.html
+# added SyndicAPI http://syndicapi.com/bot.html
+# added Yahoo! Mindset http://mindset.research.yahoo.com/
+# added msrabot
+# added Vagabondo & Vagabondo-WAP http://www.wise-guys.nl/Contact/index.php?botselected=webagents&lang=uk
+# fixed Missigua Locator detection (Missigua_Locator -> Missigua Locator)
+# changed echo to echo! to avoid conflict with the bonecho (Firefox 2.0) browser.
+# This requires you to reprocess historic logs if you want EchO! to be recognized for older reports.
+# 2006-05-17
+# added Alpha Search Agent # 62.152.125.60 Eurologon Srl
+# added Krugle http://www.krugle.com/crawler/info.html the search engine for developers
+# added Octora Beta Bot http://www.octora.com/ # Blog and Rss Search Engine
+# added UbiCrawler http://law.dsi.unimi.it/ubicrawler/
+# added Yahoo! Slurp China http://misc.yahoo.com.cn/help.html
+# You must reprocess old logs for the Yahoo! Slurp China bot to be detected in old reports
+# 2006-05-20
+# added 1-More Scanner http://www.myzips.com/software/1-More-Scanner.phtml
+# added Accoona-AI-Agent http://www.accoona.com/
+# added ActiveBookmark http://www.libmaster.com/active_bookmark.php
+# added BIGLOTRON http://www.biglotron.com/robot.html
+# added Bookmark-Manager http://bkm.sourceforge.net/
+# added cbn00glebot
+# added Cerberian Drtrs http://www.pgts.com.au/cgi-bin/psql?robot_info=25240
+# added CFNetwork http://www.cocoadev.com/index.pl?CFNetwork
+# added CheckWeb link validator http://p.duby.free.fr/chkweb.htm
+# added Computer and Automation Research Institute Crawler http://www.ilab.sztaki.hu/~stamas/publications/p184-benczur.html
+# added ConveraCrawler http://www.authoritativeweb.com/crawl/
+# added ConveraMultiMediaCrawler http://www.authoritativeweb.com/crawl/
+# added CSE HTML Validator Lite Online http://online.htmlvalidator.com/php/onlinevallite.php
+# added Cursor http://adcenter.hu/docs/en/bot.html
+# added Custo http://www.netwu.com/custo/
+# added DataFountains/DMOZ Downloader http://infomine.ucr.edu/
+# added Deepindex http://www.deepindex.net/faq.php
+# added DNSGroup http://www.dnsgroup.com/
+# added DoCoMo http://www.nttdocomo.co.jp/
+# added dumm.de-Bot http://www.dumm.de/
+# added ETS v http://www.freetranslation.com/help/
+# added eventax http://www.eventax.de/
+# added FAST Enterprise Crawler * crawleradmin.t-info@telekom.de http://www.telekom.de/
+# added FAST Enterprise Crawler http://www.fast.no/
+# added FAST Enterprise Crawler * T-Info_BI_cluster crawleradmin.t-info@telekom.de http://www.telekom.de/
+# added FeedValidator http://feedvalidator.org/
+# added FilmkameraBot http://www.filmkamera.at/bot.html
+# added Findexa Crawler http://www.findexa.no/gulesider/article26548.ece
+# added Global Fetch http://www.wesonet.com/
+# added GOFORITBOT http://www.goforit.com/about/
+# added GoForIt.com http://www.goforit.com/about/
+# added GPU p2p crawler http://gpu.sourceforge.net/search_engine.php
+# added HooWWWer http://cosco.hiit.fi/search/hoowwwer/
+# added HPPrint
+# added HTMLParser http://htmlparser.sourceforge.net/
+# added Hundesuche.com-Bot http://www.hundesuche.com/
+# added InfoBot http://www.infobot.org/
+# added InfociousBot http://corp.infocious.com/tech_crawler.php
+# added InternetSupervision http://internetsupervision.com/
+# added isearch2006 http://www.yahoo.com.cn/
+# added IUPUI_Research_Bot http://spamhuntress.com/2005/04/25/a-mail-harvester-visits/
+# added KalamBot http://64.124.122.251/feedback.html
+# added kamano.de NewsFeedVerzeichnis http://www.kamano.de/
+# added Kevin http://dznet.com/kevin/
+# added KnowItAll http://www.cs.washington.edu/research/knowitall/
+# added Knowledge.com http://www.knowledge.com/
+# added Kouaa Krawler http://www.kouaa.com/
+# added ksibot http://ego.ms.mff.cuni.cz/
+# added Link Valet Online http://www.htmlhelp.com/tools/valet/
+# added lwp-request http://search.cpan.org/~gaas/libwww-perl-5.69/bin/lwp-request
+# added lwp-trivial http://search.cpan.org/src/GAAS/libwww-perl-5.805/lib/LWP/Simple.pm
+# added MapoftheInternet.com http://MapoftheInternet.com/
+# added Matrix S.p.A. - FAST Enterprise Crawler http://tin.virgilio.it/
+# added Megite http://www.megite.com/
+# added Metaspinner http://index.meta-spinner.de/
+# added Mini-reptile
+# added Misterbot http://www.misterbot.fr/
+# added Miva http://www.miva.com/
+# added Mizzu Labs http://www.psychedelix.com/cgi-bin/csv2html.pl?data=allagents.csv&template=detail.html&match=\bid_m_141105_2\b
+# added MSRBOT http://research.microsoft.com/research/sv/msrbot/
+# added MS SharePoint Portal Server - MS Search 4.0 Robot http://support.microsoft.com/default.aspx?scid=kb;en-us;284022
+# added Mydoyouhike http://www.doyouhike.net/my
+# added NASA Search http://www.psychedelix.com/cgi-bin/csv2html.pl?data=allagents.csv&template=detail.html&match=\bid_n_s_140506_2\b
+# added NetSprint http://www.netsprint.pl/serwis/
+# added NimbleCrawler http://www.healthline.com/
+# added OpenWebSpider http://www.openwebspider.org/
+# added Oracle Ultra Search http://www.oracle.com/technology/products/ultrasearch/index.html
+# added OSSProxy http://www.marketscore.com/FAQ.Aspx
+# added passwordmaker.org http://passwordmaker.org/
+# added PEAR HTTP Request class http://pear.php.net/
+# added PEERbot http://www.peerbot.com/
+# added PHP version tracker http://www.nexen.net/phpversion/bot.php
+# added PictureOfInternet http://malfunction.org/poi/
+# added plinki http://www.plinki.com/
+# added Port Huron Labs http://www.psychedelix.com/cgi-bin/csv2html.pl?data=allagents.csv&template=detail.html&match=\bid_n_s_1133\b
+# added PostFavorites http://www.psychedelix.com/cgi-bin/csv2html.pl?data=allagents.csv&template=detail.html&match=\bid_n_s_1135\b
+# added ProjectWF-java-test-crawler
+# added PyQuery http://sourceforge.net/projects/pyquery/
+# added Schizozilla http://spamhuntress.com/2005/03/18/gizmo/
+# added Scumbot
+# added Sensis Web Crawler http://www.sensis.com.au/
+# added snap.com beta crawler http://www.snap.com/
+# added Steeler http://www.tkl.iis.u-tokyo.ac.jp/~crawler/
+# added STEROID Download http://faqs.org.ru/progr/pascal/delphi_internet2.htm
+# added Suchfin-Bot http://www.suchfin.de/
+# added Sunrise http://www.sunrisexp.com/
+# added Tagyu Agent http://www.tagyu.com/
+# added Tcl http client package http://www.tcl.tk/man/tcl8.4/TclCmd/http.htm
+# added TeragramCrawlerSURF http://www.teragram.com/
+# added Test Crawler http://netp.ath.cx/
+# added UnChaos Bot Hybrid Web Search Engine http://www.unchaos.com/
+# added unido-bot http://www.unchina.org/unido/unido/our_projects/3_3.html
+# added UniversalFeedParser http://feedparser.org/ (seen from md301000.inktomisearch.com)
+# added updated http://www.updated.com/
+# added Vermut http://vermut.aol.com
+# added versus crawler from eda.baykan@epfl.ch http://www.epfl.ch/Eindex.html
+# added Vespa Crawler (Yahoo Norway?) http://www.psychedelix.com/cgi-bin/csv2html.pl?data=allagents.csv&template=detail.html&match=%5Cbid_t_z_030406_1%5Cb
+# added VSE http://www.vivisimo.com/
+# added webcrawl.net http://www.webcrawl.net/
+# added Web Downloader http://www.krasu.ru/soft/chuchelo/
+# added Webdup http://www.webdup.com/en/index.html
+# added Wells Search http://www.psychedelix.com/cgi-bin/csv2html.pl?data=allagents.csv&template=detail.html&match=\bid_t_z_1484\b
+# added WordPress http://wordpress.org/
+# added wume crawler http://wume.cse.lehigh.edu/~xiq204/crawler/
+# added Xenu's Link Sleuth (with ')
+# added xirq http://www.xirq.com/
+# added yoogliFetchAgent http://www.yoogli.com/
+# added Z-Add Link Checker http://w3.z-add.co.uk/linkcheck/
+# -- fix - some robots were reported with _ where _ should have been a space.
+# changed Xenu Link Sleuth
+# changed microsoft[_+\s]url[_+\s]control -> microsoft_url_control
+# changed favorites_sweeper -> favorites_sweeper
+# -- updates
+# updated AskJeeves to Ask
+# 2012-06-05 Albrecht Mueller
+# added Grabber from SDSC (San Diego Supercomputer Center).
+# 2013-09-30 Albrecht Mueller
+# AWStats probably cannot detect this bot as it identifies itself in
+# the referrer field and not in the user agent string.
+#92.113.100.35 - - [29/Sep/2013:17:22:46 +0200] "GET /robots.txt HTTP/1.1" 200 516 "-" "Mozilla/5.0 (Windows NT 6.1; WOW64; rv:5.0) Gecko/20100101 Firefox/5.0" "-"
+#92.113.100.35 - - [29/Sep/2013:17:22:49 +0200] "GET /tghome.htm HTTP/1.1" 200 4445 "http://extrabot.com/help/frytygativyheku.htm" "Mozilla/5.0 (Windows NT 6.1; WOW64; rv:5.0) Gecko/20100101 Firefox/5.0" "-"
+#92.113.100.35 - - [29/Sep/2013:17:22:51 +0200] "GET / HTTP/1.1" 200 5467 "http://extrabot.com/help/frytygativyheku.htm" "Mozilla/5.0 (Windows NT 6.1; WOW64; rv:5.0) Gecko/20100101 Firefox/5.0" "-"
+
+# to do MS Search 4.0 Robot
+
+#package AWSROB;
+
+
+# Robots list was found at http://www.robotstxt.org/wc/active/all.txt
+# Other robots can be found at http://www.jafsoft.com/searchengines/webbots.html
+# Rem: To avoid bad detection, some robot's ids were removed from this list:
+# - Robots with ID of 3 letters only
+# - Robots called 'webs' and 'tcl'
+# Rem: directhit changed into direct_hit (its real id)
+# Rem: calif changed into calif[^r] to avoid confusion between Tiscalifreenet browser
+# Rem: fish changed into [^a]fish to avoid confusion between Madsafish browser
+# Rem: roadrunner changed into road_runner
+# Rem: lycos changed to lycos_ to avoid confusion with lycos-online browser
+# Rem: voyager changed into ^voyager\/ to avoid to exclude voyager and amigavoyager browser
+
+# RobotsSearchIDOrder
+# It contains all matching criteria to search for in log fields. This list is
+# used to know in which order to search Robot IDs.
+# Most frequent ones are in list1, used when LevelForRobotsDetection is 1 or more
+# Minor robots are in list2, used when LevelForRobotsDetection is 2 or more
+# Note: Robots IDs are in lower case, '_', ' ' and '+' are changed into '[_+\s]' and are quoted.
+#-------------------------------------------------------
+@RobotsSearchIDOrder_list1 = (
+# Common robots (In robot file)
+'appie',
+'architext',
+'bingpreview',
+'bjaaland',
+'contentmatch',
+'ferret',
+'googlebot\-image',
+'googlebot',
+'google\-sitemaps',
+'google[_+\s]web[_+\s]preview',
+'grabber',
+'gulliver',
+'virus[_+\s]detector', # Must be before harvest
+'harvest',
+'htdig',
+'jeeves',
+'linkwalker',
+'lilina',
+'lycos[_+\s]',
+'moget',
+'muscatferret',
+'myweb',
+'nomad',
+'scooter',
+'slurp',
+'^voyager\/',
+'weblayers',
+# Common robots (Not in robot file)
+'antibot',
+'bruinbot',
+'digout4u',
+'echo!',
+'fast\-webcrawler',
+'ia_archiver\-web\.archive\.org', # Must be before ia_archiver to avoid confusion with alexa
+'ia_archiver',
+'jennybot',
+'mercator',
+'netcraft',
+'msnbot\-media',
+'msnbot-udiscovery',
+'msnbot',
+'petersnews',
+'relevantnoise\.com',
+'unlost_web_crawler',
+'voila',
+'webbase',
+'webcollage',
+'cfetch',
+'zyborg', # Must be before wisenut
+'wisenutbot'
+);
+@RobotsSearchIDOrder_list2 = (
+# Less common robots (In robot file)
+'007ac9',
+'[^a]fish',
+'abcdatos',
+'abonti\.com',
+'acme\.spider',
+'ahoythehomepagefinder',
+'ahrefsbot',
+'alkaline',
+'anthill',
+'arachnophilia',
+'arale',
+'araneo',
+'aretha',
+'ariadne',
+'powermarks',
+'arks',
+'aspider',
+'atn\.txt',
+'atomz',
+'auresys',
+'backrub',
+'bbot',
+'bigbrother',
+'blackwidow',
+'blindekuh',
+'bloodhound',
+'borg\-bot',
+'brightnet',
+'bspider',
+'cactvschemistryspider',
+'calif[^r]',
+'cassandra',
+'cgireader',
+'checkbot',
+'christcrawler',
+'churl',
+'cienciaficcion',
+'cms\scrawler',
+'collective',
+'combine',
+'conceptbot',
+'coolbot',
+'core',
+'cosmos',
+'crazywebcrawler',
+'cruiser',
+'cusco',
+'cyberspyder',
+'desertrealm',
+'deweb',
+'dienstspider',
+'digger',
+'diibot',
+'direct_hit',
+'dnabot',
+'domainappender',
+'download_express',
+'dragonbot',
+'dwcp',
+'e\-collector',
+'ebiness',
+'elfinbot',
+'emacs',
+'emcspider',
+'esther',
+'evliyacelebi',
+'fastcrawler',
+'feedcrawl',
+'fdse',
+'felix',
+'fetchrover',
+'fido',
+'finnish',
+'fireball',
+'fouineur',
+'francoroute',
+'freecrawl',
+'funnelweb',
+'gama',
+'gazz',
+'gcreep',
+'getbot',
+'geturl',
+'golem',
+'gougou',
+'grapnel',
+'griffon',
+'gromit',
+'gulperbot',
+'hambot',
+'havindex',
+'hometown',
+'htmlgobble',
+'hyperdecontextualizer',
+'iajabot',
+'iaskspider',
+'hl_ftien_spider',
+'sogou',
+'icjobs\.de',
+'iconoclast',
+'ilse',
+'imagelock',
+'incywincy',
+'informant',
+'infoseek',
+'infoseeksidewinder',
+'infospider',
+'inspectorwww',
+'intelliagent',
+'irobot',
+'iron33',
+'israelisearch',
+'javabee',
+'jbot',
+'jcrawler',
+'jobo',
+'jobot',
+'joebot',
+'jubii',
+'jumpstation',
+'kapsi',
+'katipo',
+'kilroy',
+'ko[_+\s]yappo[_+\s]robot',
+'kummhttp',
+'labelgrabber\.txt',
+'larbin',
+'legs',
+'linkidator',
+'linkscan',
+'lockon',
+'logo_gif',
+'macworm',
+'magpie',
+'marvin',
+'mattie',
+'mediafox',
+'merzscope',
+'meshexplorer',
+'mindcrawler',
+'mnogosearch',
+'momspider',
+'monster',
+'motor',
+'muncher',
+'mwdsearch',
+'ndspider',
+'nederland\.zoek',
+'netcarta',
+'netmechanic',
+'netscoop',
+'newscan\-online',
+'nhse',
+'northstar',
+'nzexplorer',
+'objectssearch',
+'occam',
+'octopus',
+'openfind',
+'orb_search',
+'packrat',
+'pageboy',
+'parasite',
+'patric',
+'pegasus',
+'perignator',
+'perlcrawler',
+'phantom',
+'phpdig',
+'piltdownman',
+'pimptrain',
+'pioneer',
+'pitkow',
+'pjspider',
+'plumtreewebaccessor',
+'poppi',
+'portalb',
+'psbot',
+'python',
+'raven',
+'rbse',
+'resumerobot',
+'rhcs',
+'road_runner',
+'robbie',
+'robi',
+'robocrawl',
+'robofox',
+'robozilla',
+'roverbot',
+'rules',
+'safetynetrobot',
+'semalt', #Note: This entry will not work as this crawler identifies itself
+# in the referrer string and not in the user agent string
+'search\-info',
+'search_au',
+'searchprocess',
+'senrigan',
+'sgscout',
+'shaggy',
+'shaihulud',
+'sift',
+'simbot',
+'sistrix', #Virus/trojan-infection? fr-crawler, ca-crawler? See https://www.projecthoneypot.org/ip_37.59.55.128, https://www.projecthoneypot.org/ip_198.27.80.144
+'site\-valet',
+'sitetech',
+'skymob',
+'slcrawler',
+'smartspider',
+'snooper',
+'solbot',
+'speedy',
+'spider[_+\s]monkey',
+'spiderbot',
+'spiderline',
+'spiderman',
+'spiderview',
+'spry',
+'sqworm',
+'ssearcher',
+'suke',
+'sunrise',
+'suntek',
+'sven',
+'tach_bw',
+'tagyu_agent',
+'tailrank',
+'tarantula',
+'tarspider',
+'techbot',
+'templeton',
+'titan',
+'titin',
+'tkwww',
+'tlspider',
+'ucsd',
+'udmsearch',
+'universalfeedparser',
+'urlck',
+'valkyrie',
+'verticrawl',
+'victoria',
+'visionsearch',
+'voidbot',
+'vwbot',
+'w3index',
+'w3m2',
+'wallpaper',
+'wanderer',
+'wapspIRLider',
+'webbandit',
+'webcatcher',
+'webcopy',
+'webfetcher',
+'webfoot',
+'webinator',
+'weblinker',
+'webmirror',
+'webmoose',
+'webquest',
+'webreader',
+'webreaper',
+'websnarf',
+'webspider',
+'webvac',
+'webwalk',
+'webwalker',
+'webwatch',
+'whatuseek',
+'whowhere',
+'wired\-digital',
+'wmir',
+'wolp',
+'wombat',
+'wordpress',
+'worm',
+'woozweb',
+'wwwc',
+'wz101',
+'xenu\slink\ssleuth',
+'xget',
+# Other robots reported by users
+'^finbot', #UA string starts with "finbot", should not match "elfinbot"
+'^webindex$', #UA should not match "webindexer"
+'1\-more_scanner',
+'360spider',
+'a6-indexer',
+'accoona\-ai\-agent',
+'activebookmark',
+'adamm_bot',
+'adsbot-google',
+'advbot',
+'affectv\.co\.uk',
+'almaden',
+'aipbot',
+'aleadsoftbot',
+'alpha_search_agent',
+'allrati',
+'aport',
+'applebot',
+'archive\-de\.com',
+'archive\.org_bot',
+'argus', # Must be before nutch
+'arianna\.libero\.it',
+'aspseek',
+'asterias',
+'awbot',
+'backlinktest\.com',
+'baiduspider',
+'becomebot',
+'bender',
+'betabot',
+'biglotron',
+'bittorrent_bot',
+'biz360[_+\s]spider',
+'blexbot',
+'blogbridge[_+\s]service',
+'bloglines',
+'blogpulse',
+'blogsearch',
+'blogshares',
+'blogslive',
+'blogssay',
+'bncf\.firenze\.sbn\.it\/raccolta\.txt',
+'bobby',
+'boitho\.com\-dc',
+'bookmark\-manager',
+'boris',
+'bubing',
+'bumblebee',
+'candlelight[_+\s]favorites[_+\s]inspector',
+'careerbot',
+'cbn00glebot',
+'ccbot',
+'cerberian_drtrs',
+'cfnetwork',
+'cipinetbot',
+'checkweb_link_validator',
+'cliqzbot',
+'commons\-httpclient',
+'computer_and_automation_research_institute_crawler',
+'converamultimediacrawler',
+'converacrawler',
+'copubbot',
+'cscrawler',
+'cse_html_validator_lite_online',
+'cuasarbot',
+'cursor',
+'custo',
+'datafountains\/dmoz_downloader',
+'dataprovider\.com',
+'daumoa',
+'daviesbot',
+'daypopbot',
+'deepindex',
+'deusu',
+'dipsie\.bot',
+'dnsgroup',
+'doccheckbot',
+'domainchecker',
+'domainsdb\.net',
+'dotbot',
+'duckduckgo-favicons-bot',
+'dulance',
+'dumbot',
+'dumm\.de\-bot',
+'earthcom\.info',
+'easydl',
+'eccp',
+'edgeio\-retriever',
+'ernst[:blank:]2\.0',
+'ets_v',
+'exactseek',
+'extreme[_+\s]picture[_+\s]finder',
+'eventax',
+'everbeecrawler',
+'everest\-vulcan',
+'ezresult',
+'enteprise',
+'facebook',
+'facebot',
+'fast_enterprise_crawler.*crawleradmin\.t\-info@telekom\.de',
+'fast_enterprise_crawler.*t\-info_bi_cluster_crawleradmin\.t\-info@telekom\.de',
+'finderlein[_+\s]research[_+\s]crawler',
+'matrix_s\.p\.a\._\-_fast_enterprise_crawler', # must come before fast enterprise crawler
+'fast_enterprise_crawler',
+'fast\-search\-engine',
+'fastbot',
+'favicon',
+'favorg',
+'favorites_sweeper',
+'feedburner',
+'feedfetcher\-google',
+'feedflow',
+'feedster',
+'feedsky',
+'feedvalidator',
+'fetchbot',
+'filmkamerabot',
+'filterdb\.iss\.net',
+'findlinks',
+'findexa_crawler',
+'firmilybot',
+'foaf-search\.net',
+'fooky\.com\/ScorpionBot',
+'g2crawler',
+'gaisbot',
+'geniebot',
+'genieo',
+'gigablastopensource',
+'gigabot',
+'girafabot',
+'global_fetch',
+'gnodspider',
+'goforit\.com',
+'goforitbot',
+'gonzo',
+'grapeshot',
+'grub',
+'gpu_p2p_crawler',
+'henrythemiragorobot',
+'heritrix',
+'holmes',
+'hoowwwer',
+'hpprint',
+'htmlparser',
+'html[_+\s]link[_+\s]validator',
+'httrack',
+'hundesuche\.com\-bot',
+'i-bot',
+'icarus6j',
+'ichiro',
+'idmarch',
+'iltrovatore\-setaccio',
+'implisensebot',
+'infobot',
+'infociousbot',
+'infohelfer',
+'infomine',
+'insurancobot',
+'integromedb\.org',
+'internet[_+\s]ninja',
+'internetarchive',
+'internetseer',
+'internetsupervision',
+'ips\-agent',
+'irlbot',
+'isearch2006',
+'istellabot',
+'iupui_research_bot',
+'izsearch',
+'james\sbot',
+'jobboerse', #AWStats seems not to find this one despite the fact that "JobboerseBot" and "jobboerse.com" appear in the UA-string, maybe some previous entry matches
+'jrtwine[_+\s]software[_+\s]check[_+\s]favorites[_+\s]utility',
+'justview',
+'kalambot',
+'kamano\.de_newsfeedverzeichnis',
+'kazoombot',
+'kevin',
+'keyoshid', # Must come before Y!J
+'kinjabot',
+'kinja\-imagebot',
+'knowitall',
+'knowledge\.com',
+'kouaa_krawler',
+'krugle',
+'ksibot',
+'kurzor',
+'lanshanbot',
+'letscrawl\.com',
+'libcrawl',
+'linkbot',
+'linkdex\.com',
+'link_valet_online',
+'metager\-linkchecker', # Must be before linkchecker
+'linkchecker',
+'linkstats\sbot',
+'lipperhey',
+'livejournal\.com',
+'lmspider',
+'loadtimebot',
+'lssrocketcrawler',
+'ltbot',
+'ltx71',
+'lwp\-request',
+'lwp\-trivial',
+'madaali\.de',
+'magpierss',
+'mail\.ru',
+'mapoftheinternet\.com',
+'meanpathbot',
+'mediabot',
+'mediapartners\-google',
+'megaindex',
+'megite',
+'memorybot',
+'metager2-verification-bot',
+'metajobbot', #Does not show up in the results of Sep. 2015 despite the fact that the corresponing log file has about 40 entries containing "MetaJobBot" in the UA string - strange.
+'metaspinner',
+'miadev',
+'microsoft\sbits',
+'microsoft.*discovery', # = 'microsoft (?:office (?:protocol|existence)|data access internet publishing provider protocol) discovery',
+'microsoft[_+\s]url[_+\s]control',
+'mindupbot',
+'mini\-reptile',
+'minirank',
+'missigua_locator',
+'misterbot',
+'miva',
+'mizzu_labs',
+'mj12bot',
+'mojeekbot',
+'msiecrawler',
+'ms[_+\s]search[_+\s]6\.0[_+\s]robot',
+'ms_search_4\.0_robot',
+'msrabot',
+'msrbot',
+'mt::telegraph::agent',
+'mydoyouhike',
+'nagios',
+'nasa_search',
+'netestate\sne\scrawler',
+'netluchs',
+'netsprint',
+'newsgatoronline',
+'nicebot',
+'nimblecrawler',
+'noxtrumbot',
+'npbot',
+'loocalcrawler/nutch',
+'nutchcvs',
+'nutchosu\-vlib',
+'nutch', # Must come after other nutch versions
+'ocelli',
+'octora_beta_bot',
+'omniexplorer[_+\s]bot',
+'onet\.pl[_+\s]sa',
+'onfolio',
+'opentaggerbot',
+'openwebspider',
+'optimizer',
+'oracle_ultra_search',
+'orangebot',
+'orbiter',
+'yodaobot',
+'qihoobot',
+'qwantify',
+'passwordmaker\.org',
+'pear_http_request_class',
+'peerbot',
+'perman',
+'php[_+\s]version[_+\s]tracker',
+'phpcrawl',
+'picmole',
+'pictureofinternet',
+'ping\.blo\.gs',
+'plinki',
+'pluckfeedcrawler',
+'plukkie',
+'pogodak',
+'pompos',
+'popdexter',
+'port_huron_labs',
+'postfavorites',
+'projectwf\-java\-test\-crawler',
+'proodlebot',
+'publiclibraryarchive',
+'pyquery',
+'rambler',
+'redalert',
+'riddler',
+'rogerbot',
+'rojo',
+'rssimagesbot',
+'ruffle',
+'rufusbot',
+'safeads\.xyz',
+'safesearch',
+'sandcrawler',
+'savetheworldheritage',
+'sbider',
+'schizozilla',
+'scumbot',
+'searchguild[_+\s]dmoz[_+\s]experiment',
+'searchmetricsbot',
+'seekbot',
+'semrushbot',
+'sensis_web_crawler',
+'seodiver',
+'seokicks\.de',
+'seoscanners',
+'seznambot',
+'shim\-crawler',
+'shoutcast',
+'sitedomain-bot',
+'siteexplorer\.info',
+'skimbot',
+'slysearch',
+'smtbot',
+'snap\.com_beta_crawler',
+'sohu\-search',
+'sohu', # "sohu agent"
+'snappy',
+'spbot',
+'sphere_scout',
+'spiderlytics',
+'spip',
+'sproose_crawler',
+'ssearch_bot',
+'steeler',
+'steroid__download',
+'stq_bot',
+'suchfin\-bot',
+'superbot',
+'surveybot',
+'susie',
+'syndic8',
+'syndicapi',
+'synoobot',
+'tcl_http_client_package',
+'technoratibot',
+'teragramcrawlersurf',
+'test_crawler',
+'testbot',
+'thumbsniper',
+'t\-h\-u\-n\-d\-e\-r\-s\-t\-o\-n\-e',
+'topicblogs',
+'turnitinbot',
+'turtlescanner', # Must be before turtle
+'turtle',
+'tutorgigbot',
+'twiceler',
+'ubicrawler',
+'ultraseek',
+'unchaos_bot_hybrid_web_search_engine',
+'unido\-bot',
+'unisterbot',
+'updated',
+'ustc\-semantic\-group',
+'vagabondo\-wap',
+'vagabondo',
+'vebidoobot',
+'vermut',
+'versus_crawler_from_eda\.baykan@epfl\.ch',
+'vespa_crawler',
+'voltron',
+'vortex',
+'vse\/',
+'w3c\-checklink',
+'w3c[_+\s]css[_+\s]validator[_+\s]jfouffa',
+'w3c_validator',
+'watchmouse',
+'wavefire',
+'waybackarchive\.org',
+'wbsearchbot',
+'webclipping\.com',
+'webcompass',
+'webcrawl\.net',
+'web_downloader',
+'webdup',
+'webfilter',
+'webindexer',
+'webminer',
+'website[_+\s]monitoring[_+\s]bot',
+'webvulncrawl',
+'wells_search',
+'wer-liefert-was',
+'wesee:search',
+'wevikabot',
+'wonderer',
+'wotbox',
+'wume_crawler',
+'wwweasel',
+'xenu\'s_link_sleuth',
+'xenu_link_sleuth',
+'xirq',
+'xovibot',
+'y!j', # Must come after keyoshid Y!J
+'yacy',
+'yahoo\-blogs',
+'yahoo\-verticalcrawler',
+'yahoofeedseeker',
+'yahooseeker\-testing',
+'yahooseeker',
+'yahoo\-mmcrawler',
+'yahoo!_mindset',
+'yandex',
+'flexum',
+'yanga',
+'yet-another-spider',
+'yisouspider',
+'yooglifetchagent',
+'z\-add_link_checker',
+'zealbot',
+'zhuaxia',
+'zspider',
+'zeus',
+'ng\/1\.', # put at end to avoid false positive
+'ng\/2\.', # put at end to avoid false positive
+'exabot', # put at end to avoid false positive
+# Additional bots found by Sussex.
+'^[1-3]$', # Hiding bots. Doesn't appear to be a valid user agent.
+'alltop',
+'applesyndication',
+'asynchttpclient',
+'bingbot',
+'blogged_crawl',
+'bloglovin',
+'butterfly',
+'buzztracker',
+'carpathia',
+'catbot',
+'chattertrap',
+'check_http', #(nagios) a monitoring tool
+'coldfusion',
+'covario',
+'daylifefeedfetcher',
+'discobot',
+'dlvr\.it',
+'dreamwidth',
+'drupal',
+'ezoom',
+'feedmyinbox',
+'feedroll\.com',
+'feedzira',
+'fever\/',
+'freenews',
+'geohasher',
+'hanrss',
+'inagist',
+'jacobin\sclub',
+'jakarta',
+'js\-kit',
+'largesmall\scrawler',
+'linkedinbot',
+'longurl',
+'metauri',
+'microsoft\-webdav\-miniredir',
+'^motorola$',
+'movabletype',
+# These appear to be bots trying to hide. All of the usual architecture data is missing.
+'^mozilla\/3\.0\s\(compatible$',
+'^mozilla\/4\.0$',
+'^mozilla\/4\.0\s\(compatible;\)$',
+'^mozilla\/5\.0$',
+'^mozilla\/5\.0\s\(compatible;$',
+'^mozilla\/5\.0\s\(en\-us\)$',
+'^mozilla\/5\.0\sfirefox\/3\.0\.5$',
+'^msie',
+# End of hiding bots.
+'netnewswire',
+'\snetseer\s',
+'netvibes',
+'newrelicpinger',
+'newsfox',
+'nextgensearchbot',
+'ning',
+'pingdom',
+'pita',
+'postpost',
+'postrank',
+'printfulbot',
+'protopage',
+'proximic',
+'quipply',
+'r6\_',
+'ratingburner',
+'regator',
+'rome\sclient',
+'rpt\-httpclient',
+'rssgraffiti',
+'sage\+\+',
+'scoutjet',
+'simplepie',
+'sitebot',
+'summify\.com',
+'superfeedr',
+'synthesio',
+'teoma',
+'topblogsinfo',
+'topix\.net',
+'trapit',
+'trileet',
+'tweetedtimes',
+'twisted\spagegetter',
+'twitterbot',
+'twitterfeed',
+'unwindfetchor',
+'wazzup',
+'windows\-rss\-platform',
+'wiumi',
+'xydo',
+'yahoo!\sslurp',
+'yahoo\spipes',
+'yahoo\-newscrawler',
+'yahoocachesystem',
+'yahooexternalcache',
+'yahoo!\ssearchmonkey',
+'yahooysmcm',
+'yammer',
+# 'yandexbot', #already covered by 'yandex'
+'yeti',
+'yie8',
+'youdao',
+'yourls',
+'zemanta',
+'zend_http_client',
+'zumbot',
+# Other id that are 99% of robots
+'wget',
+'libwww',
+'^java\/[0-9]' # put at end to avoid false positive
+);
+@RobotsSearchIDOrder_listgen = (
+# Generic robot
+'robot',
+'checker',
+'crawl',
+'discovery',
+'hunter',
+'scanner',
+'spider',
+'sucker',
+'bot[\s_+:,\.\;\/\\\-]',
+# Identifies
+#"Mozilla/5.0 (Linux; U; Android 4.2.2; de-de; CUBOT P9 Build/JDQ39) AppleWebKit/534.30 (KHTML, like Gecko) Version/4.0 Mobile Safari/534.30"
+#as a but. There is a Android mobile phone called "CUBOT P9", so this is probably not a bot.
+'[\s_+:,\.\;\/\\\-]bot',
+'curl',
+'php',
+'ruby\/',
+'no_user_agent'
+);
+
+
+
+# RobotsHashIDLib
+# List of robots names ('robot id','robot clear text')
+#-------------------------------------------------------
+%RobotsHashIDLib = (
+# Common robots (In robot file)
+'appie','Walhello appie',
+'architext','ArchitextSpider',
+'bingpreview','Bing Preview bot',
+'bjaaland','Bjaaland',
+'ferret','Wild Ferret Web Hopper #1, #2, #3',
+'contentmatch','Yahoo!China ContentMatch Crawler',
+'googlebot\-image','Googlebot-Image',
+'googlebot','Googlebot',
+'google\-sitemaps', 'Google Sitemaps',
+'grabber', 'Grabber (SDSC)',
+'google[_+\s]web[_+\s]preview', 'Google Web Preview',
+'gulliver','Northern Light Gulliver',
+'virus[_+\s]detector','virus_detector',
+'harvest','Harvest',
+'htdig','ht://Dig',
+'jeeves','Ask',
+'linkwalker','LinkWalker',
+'lilina','Lilina',
+'lycos[_+\s]','Lycos',
+'moget','moget',
+'muscatferret','Muscat Ferret',
+'myweb','Internet Shinchakubin',
+'nomad','Nomad',
+'scooter','Scooter',
+'slurp','Yahoo Slurp',
+'^voyager\/','Voyager',
+'weblayers','Weblayers',
+# Common robots (Not in robot file)
+'antibot','Antibot',
+'bruinbot','The web archive',
+'digout4u','Digout4u',
+'echo!','EchO!',
+'fast\-webcrawler','Fast-Webcrawler',
+'ia_archiver\-web\.archive\.org','The web archive (IA Archiver)',
+'ia_archiver','Alexa (IA Archiver)',
+'jennybot','JennyBot',
+'mercator','Mercator',
+'msnbot\-media','MSNBot-media',
+'msnbot-udiscovery', 'msnbot-UDiscovery Note: AWStats counts most of its traffic as user traffic',
+'msnbot','MSNBot',
+'netcraft','Netcraft',
+'petersnews','Petersnews',
+'unlost_web_crawler','Unlost Web Crawler',
+'voila','Voila',
+'webbase', 'WebBase',
+'zyborg','ZyBorg',
+'wisenutbot','WISENutbot',
+'webcollage','WebCollage',
+'cfetch','Cfetch',
+# Less common robots (In robot file)
+'007ac9', '007ac9 Crawler, seems to belong to SISTRIX',
+'[^a]fish','Fish search',
+'abcdatos','ABCdatos BotLink',
+'abonti\.com','Abonti WebSearch',
+'acme\.spider','Acme.Spider',
+'ahoythehomepagefinder','Ahoy! The Homepage Finder',
+'ahrefsbot', 'AhrefsBot',
+'alkaline','Alkaline',
+'anthill','Anthill',
+'arachnophilia','Arachnophilia',
+'arale','Arale',
+'araneo','Araneo',
+'aretha','Aretha',
+'ariadne','ARIADNE',
+'powermarks','Powermarks', # must come before Arks; seen used by referrer spam
+'arks','arks',
+'aspider','ASpider (Associative Spider)',
+'atn\.txt','ATN Worldwide',
+'atomz','Atomz.com Search Robot',
+'auresys','AURESYS',
+'backrub','BackRub',
+'bbot','BBot',
+'bigbrother','Big Brother',
+'blackwidow','BlackWidow',
+'blindekuh','Die Blinde Kuh',
+'bloodhound','Bloodhound',
+'borg\-bot','Borg-Bot',
+'brightnet','bright.net caching robot',
+'bspider','BSpider',
+'cactvschemistryspider','CACTVS Chemistry Spider',
+'calif[^r]','Calif',
+'cassandra','Cassandra',
+'cgireader','Digimarc Marcspider/CGI',
+'checkbot','Checkbot',
+'christcrawler','ChristCrawler.com',
+'churl','churl',
+'cienciaficcion','cIeNcIaFiCcIoN.nEt',
+'cms\scrawler', 'CMS Crawler',
+'collective','Collective',
+'combine','Combine System',
+'conceptbot','Conceptbot',
+'coolbot','CoolBot',
+'core','Web Core / Roots',
+'cosmos','XYLEME Robot',
+'crazywebcrawler', 'CrazyWeb Crawler',
+'cruiser','Internet Cruiser Robot',
+'cusco','Cusco',
+'cyberspyder','CyberSpyder Link Test',
+'desertrealm','Desert Realm Spider',
+'deweb','DeWeb(c) Katalog/Index',
+'dienstspider','DienstSpider',
+'digger','Digger',
+'diibot','Digital Integrity Robot',
+'direct_hit','Direct Hit Grabber',
+'dnabot','DNAbot',
+'domainappender', 'DomainAppender',
+'download_express','DownLoad Express',
+'dragonbot','DragonBot',
+'dwcp','DWCP (Dridus\' Web Cataloging Project)',
+'e\-collector','e-collector',
+'ebiness','EbiNess',
+'elfinbot','ELFINBOT',
+'emacs','Emacs-w3 Search Engine',
+'emcspider','ananzi',
+'esther','Esther',
+'evliyacelebi','Evliya Celebi',
+'fastcrawler','FastCrawler',
+'feedcrawl','FeedCrawl by feed@aobo.com',
+'fdse','Fluid Dynamics Search Engine robot',
+'felix','Felix IDE',
+'fetchrover','FetchRover',
+'fido','fido',
+'finnish','Finnish',
+'fireball','KIT-Fireball',
+'fouineur','Fouineur',
+'francoroute','Robot Francoroute',
+'freecrawl','Freecrawl',
+'funnelweb','FunnelWeb',
+'gama','gammaSpider, FocusedCrawler',
+'gazz','gazz',
+'gcreep','GCreep',
+'getbot','GetBot',
+'geturl','GetURL',
+'golem','Golem',
+'gougou','GouGou',
+'grapnel','Grapnel/0.01 Experiment',
+'griffon','Griffon',
+'gromit','Gromit',
+'gulperbot','Gulper Bot',
+'hambot','HamBot',
+'havindex','havIndex',
+'hometown','Hometown Spider Pro',
+'htmlgobble','HTMLgobble',
+'hyperdecontextualizer','Hyper-Decontextualizer',
+'iajabot','iajaBot',
+'iaskspider','Sina Iask Spider',
+'hl_ftien_spider','Hylanda',
+'sogou','Sogou Spider',
+'icjobs\.de', 'iCjobs Spider Note: Most traffic counts as user traffic',
+#20130805 The user agent string of the icjobs-spider contained the
+#identifying string only when it accessed the robots.txt file.
+#When it accessed the actual content it did not identify itself as
+#a spider. Thus traffic of this spider was counted as user traffic.
+#The behavious seems to have changed now - the spider identifies itself
+#when it accesses content pages.
+#20141401 Behavior as before: Does identify itself when it accesses
+# robots.txt and the root page. The following traffic does not contain
+# the identification string and is therefore counted as user traffic.
+'iconoclast','Popular Iconoclast',
+'ilse','Ingrid',
+'imagelock','Imagelock',
+'incywincy','IncyWincy',
+'informant','Informant',
+'infoseek','InfoSeek Robot 1.0',
+'infoseeksidewinder','Infoseek Sidewinder',
+'infospider','InfoSpiders',
+'inspectorwww','Inspector Web',
+'intelliagent','IntelliAgent',
+'ips\-agent', 'ips-agent Verisign(?) - no reliable information found.',
+'irobot','I, Robot',
+'iron33','Iron33',
+'israelisearch','Israeli-search',
+'javabee','JavaBee',
+'jbot','JBot Java Web Robot',
+'jcrawler','JCrawler',
+'jobo','JoBo Java Web Robot',
+'jobot','Jobot',
+'joebot','JoeBot',
+'jubii','The Jubii Indexing Robot',
+'jumpstation','JumpStation',
+'kapsi','image.kapsi.net',
+'katipo','Katipo',
+'kilroy','Kilroy',
+'ko[_+\s]yappo[_+\s]robot','KO_Yappo_Robot',
+'kummhttp','KummHttp',
+'labelgrabber\.txt','LabelGrabber',
+'larbin','larbin',
+'legs','legs',
+'linkidator','Link Validator',
+'linkscan','LinkScan',
+'lockon','Lockon',
+'logo_gif','logo.gif Crawler',
+'macworm','Mac WWWWorm',
+'lmspider','lmspider',
+'lwp\-request','lwp-request',
+'lwp\-trivial','lwp-trivial',
+'magpie','MagpieRSS',
+'marvin','marvin/infoseek',
+'mattie','Mattie',
+'mediafox','MediaFox',
+'merzscope','MerzScope',
+'meshexplorer','NEC-MeshExplorer',
+'mindcrawler','MindCrawler',
+'mnogosearch','mnoGoSearch search engine software',
+'momspider','MOMspider',
+'monster','Monster',
+'motor','Motor',
+'muncher','Muncher',
+'mwdsearch','Mwd.Search',
+'ndspider','NDSpider',
+'nederland\.zoek','Nederland.zoek',
+'netcarta','NetCarta WebMap Engine',
+'netmechanic','NetMechanic',
+'netscoop','NetScoop',
+'newscan\-online','newscan-online',
+'nhse','NHSE Web Forager',
+'northstar','The NorthStar Robot',
+'nzexplorer','nzexplorer',
+'objectssearch','ObjectsSearch',
+'occam','Occam',
+'octopus','HKU WWW Octopus',
+'openfind','Openfind data gatherer',
+'orb_search','Orb Search',
+'packrat','Pack Rat',
+'pageboy','PageBoy',
+'parasite','ParaSite',
+'patric','Patric',
+'pegasus','pegasus',
+'perignator','The Peregrinator',
+'perlcrawler','PerlCrawler 1.0',
+'phantom','Phantom',
+'phpdig','PhpDig',
+'piltdownman','PiltdownMan',
+'pimptrain','Pimptrain.com\'s robot',
+'pioneer','Pioneer',
+'pitkow','html_analyzer',
+'pjspider','Portal Juice Spider',
+'plumtreewebaccessor','PlumtreeWebAccessor',
+'poppi','Poppi',
+'portalb','PortalB Spider',
+'psbot','psbot',
+'python','Python-urllib',
+'raven','Raven Search',
+'rbse','RBSE Spider',
+'resumerobot','Resume Robot',
+'rhcs','RoadHouse Crawling System',
+'road_runner','Road Runner: The ImageScape Robot',
+'robbie','Robbie the Robot',
+'robi','ComputingSite Robi/1.0',
+'robocrawl','RoboCrawl Spider',
+'robofox','RoboFox',
+'robozilla','Robozilla',
+'roverbot','Roverbot',
+'rules','RuLeS',
+'safetynetrobot','SafetyNet Robot',
+'semalt', 'seamalt.com',
+'search\-info','Sleek',
+'search_au','Search.Aus-AU.COM',
+'searchprocess','SearchProcess',
+'senrigan','Senrigan',
+'sgscout','SG-Scout',
+'shaggy','ShagSeeker',
+'shaihulud','Shai\'Hulud',
+'sift','Sift',
+'simbot','Simmany Robot Ver1.0',
+'sistrix', 'SISTRIX Crawler',
+'site\-valet','Site Valet',
+'sitetech','SiteTech-Rover',
+'skymob','Skymob.com',
+'slcrawler','SLCrawler',
+'smartspider','Smart Spider',
+'snooper','Snooper',
+'solbot','Solbot',
+'speedy','Speedy Spider',
+'spider[_+\s]monkey','Spider monkey',
+'spiderbot','SpiderBot',
+'spiderline','Spiderline Crawler',
+'spiderlytics', 'Spiderlytics: No homepage, e-mail only: spider (at) spiderlytics.com',
+'spiderman','Spiderman',
+'spiderview','SpiderView(tm)',
+'spry','Spry Wizard Robot',
+'ssearcher','Site Searcher',
+'sqworm','Sqworm',
+'suke','Suke',
+'sunrise','Sunrise',
+'suntek','suntek search engine',
+'sven','Sven',
+'tach_bw','TACH Black Widow',
+'tagyu_agent','Tagyu Agent',
+'tarantula','Tarantula',
+'tarspider','tarspider',
+'tailrank','TailRank',
+'techbot','TechBOT',
+'templeton','Templeton',
+'titan','TITAN',
+'titin','TitIn',
+'tkwww','The TkWWW Robot',
+'tlspider','TLSpider',
+'ucsd','UCSD Crawl',
+'udmsearch','UdmSearch',
+'universalfeedparser','UniversalFeedParser',
+'urlck','URL Check',
+'valkyrie','Valkyrie',
+'verticrawl','Verticrawl',
+'victoria','Victoria',
+'visionsearch','vision-search',
+'voidbot','void-bot',
+'vwbot','VWbot',
+'w3index','The NWI Robot',
+'w3m2','W3M2',
+'wallpaper','WallPaper (alias crawlpaper)',
+'wanderer','the World Wide Web Wanderer',
+'wapspider','w@pSpider by wap4.com',
+'webbandit','WebBandit Web Spider',
+'webcatcher','WebCatcher',
+'webcopy','WebCopy',
+'webfetcher','webfetcher',
+'webfoot','The Webfoot Robot',
+'webinator','Webinator',
+'weblinker','WebLinker',
+'webmirror','WebMirror',
+'webmoose','The Web Moose',
+'webquest','WebQuest',
+'webreader','Digimarc MarcSpider',
+'webreaper','WebReaper',
+'websnarf','Websnarf',
+'webspider','WebSpider',
+'webvac','WebVac',
+'webwalk','webwalk',
+'webwalker','WebWalker',
+'webwatch','WebWatch',
+'whatuseek','whatUseek Winona',
+'whowhere','WhoWhere Robot',
+'wired\-digital','Wired Digital',
+'wmir','w3mir',
+'wolp','WebStolperer',
+'wombat','The Web Wombat',
+'wordpress','WordPress',
+'worm','The World Wide Web Worm',
+'woozweb','Woozweb Monitoring',
+'wwwc','WWWC Ver 0.2.5',
+'wz101','WebZinger',
+'xenu\slink\ssleuth', 'Xenu'. "'" . 's Link Sleuth (TM), see Wikipedia',
+'xget','XGET',
+# Other robots reported by users
+'^finbot', 'finbot',
+'^webindex$', 'WebIndex',
+'1\-more_scanner','1-More Scanner',
+'360spider','360spider',
+'a6-indexer', 'A6-Indexer',
+'accoona\-ai\-agent','Accoona-AI-Agent',
+'activebookmark','ActiveBookmark',
+'adamm_bot','AdamM Bot',
+'adsbot-google', 'AdsBot-Google',
+'advbot', 'AdvBot',
+'affectv\.co\.uk', 'affectv.co.uk',
+'almaden','IBM Almaden Research Center WebFountain™',
+'aipbot','aipbot',
+'aleadsoftbot','ALeadSoftbot',
+'alpha_search_agent','Alpha Search Agent',
+'allrati','Allrati',
+'aport', 'Aport',
+'applebot', 'Applebot',
+'archive\-de\.com', 'Archive-de.com',
+'archive\.org_bot','archive.org bot',
+'argus','Argus',
+'arianna\.libero\.it','arianna.libero.it',
+'aspseek','ASPseek',
+'asterias', 'Asterias',
+'awbot', 'AWBot',
+'backlinktest\.com', 'BacklinkCrawler',
+'baiduspider','BaiDuSpider',
+'becomebot', 'BecomeBot',
+'bender','bender focused_crawler',
+'betabot','BetaBot',
+'biglotron','Biglotron',
+'bittorrent_bot','BitTorrent Bot',
+'biz360[_+\s]spider','Biz360 spider',
+'blexbot', 'BLEXBot, seems to belong to the WebMeUp backlink tool',
+'blogbridge[_+\s]service','BlogBridge Service',
+'bloglines','Bloglines',
+'blogpulse','BlogPulse ISSpider intelliseek.com',
+'blogsearch','BlogSearch',
+'blogshares','Blogshares Spiders',
+'blogslive','Blogslive',
+'blogssay','BlogsSay :: RSS Search Crawler',
+'bncf\.firenze\.sbn\.it\/raccolta\.txt','Biblioteca Nazionale Centrale di Firenze',
+'bobby', 'Bobby',
+'boitho\.com\-dc','boitho.com-dc',
+'bookmark\-manager','Bookmark-Manager',
+'boris', 'Boris',
+'bubing', 'BUbiNG',
+'bumblebee', 'Bumblebee (relevare.com)',
+'candlelight[_+\s]favorites[_+\s]inspector','Candlelight_Favorites_Inspector',
+'careerbot', 'CareerBot',
+'cbn00glebot','cbn00glebot',
+'ccbot', 'Common Crawl',
+'cerberian_drtrs','Cerberian Drtrs',
+'cfnetwork','CFNetwork',
+'cipinetbot','CipinetBot',
+'checkweb_link_validator','CheckWeb link validator',
+'cliqzbot', 'Cliqzbot',
+'commons\-httpclient','Jakarta commons-httpclient',
+'computer_and_automation_research_institute_crawler','Computer and Automation Research Institute Crawler',
+'converamultimediacrawler','ConveraMultiMediaCrawler',
+'converacrawler','ConveraCrawler',
+'copubbot', 'CoPubbot',
+'cscrawler','CsCrawler',
+'cse_html_validator_lite_online','CSE HTML Validator Lite Online','cuasarbot','Cuasarbot',
+'cursor','Cursor',
+'custo','Custo',
+'datafountains\/dmoz_downloader','DataFountains/DMOZ Downloader',
+'dataprovider\.com', 'Dataprovider Site Explorer',
+'daumoa', 'Daum',
+'daviesbot', 'DaviesBot',
+'daypopbot', 'DayPop',
+'deepindex','Deepindex',
+'deusu', 'DeuSu',
+'dipsie\.bot','Dipsie',
+'dnsgroup','DNSGroup',
+'doccheckbot', 'doccheckbot/1.0, known to Project Honey Pot',
+'domainchecker','DomainChecker',
+'domainsdb\.net','DomainsDB.net',
+'dotbot', 'DotBot, Open Site Explorer',
+'duckduckgo-favicons-bot', 'DuckDuckGo-Favicons-Bot',
+'dulance','Dulance',
+'dumbot','Dumbot',
+'dumm\.de\-bot','dumm.de-Bot',
+'earthcom\.info','EARTHCOM.info',
+'easydl','EasyDL',
+'eccp', 'Eniro Sverige, email: search (at) eniro.com',
+'edgeio\-retriever','edgeio-retriever',
+'ernst[:blank:]2\.0', 'Ernst 2.0 (does not provide any further information)',
+'ets_v','ETS Enterprise Translation Server',
+'exactseek','ExactSeek Crawler',
+'extreme[_+\s]picture[_+\s]finder','Extreme_Picture_Finder',
+'eventax','eventax',
+'everbeecrawler','EverbeeCrawler',
+'everest\-vulcan','Everest-Vulcan',
+'ezresult', 'Ezresult',
+'enteprise','Fast Enteprise Crawler',
+'facebook','FaceBook bot',
+'facebot', 'Facebot (Facebook bot?)',
+'fast\-search\-engine','Fast-Search-Engine (not fastsearch.com)',
+'fast_enterprise_crawler','FAST Enterprise Crawler',
+'fast_enterprise_crawler.*scrawleradmin\.t\-info@telekom\.de','FAST Enterprise Crawler * crawleradmin.t-info@telekom.de',
+'finderlein[_+\s]research[_+\s]crawler', 'Finderlein Research Crawler 1.0 (no contact information given)',
+'matrix_s\.p\.a\._\-_fast_enterprise_crawler','Matrix S.p.A. - FAST Enterprise Crawler',
+'fast_enterprise_crawler.*t\-info_bi_cluster_crawleradmin\.t\-info@telekom\.de','FAST Enterprise Crawler * T-Info_BI_cluster crawleradmin.t-info@telekom.de',
+'fastbot', 'fastbot',
+'favicon','FavIconizer',
+'favorg','FavOrg',
+'favorites_sweeper','Favorites Sweeper',
+'feedburner', 'Feedburner',
+'feedfetcher\-google','Feedfetcher-Google',
+'feedflow','FeedFlow',
+'feedster','Feedster',
+'feedsky','FeedSky',
+'feedvalidator','FeedValidator',
+'fetchbot', 'Fetchbot',
+'filmkamerabot','FilmkameraBot',
+'filterdb\.iss\.net', 'oBot',
+'findexa_crawler','Findexa Crawler',
+'firmilybot', 'Firmily Bot Home page (Website was hacked on Oct. 19, 2013)',
+'findlinks','Findlinks',
+'foaf-search\.net', 'Friend of a friend (FOAF) search engine',
+'fooky\.com\/ScorpionBot','Fooky.com/ScorpionBot/ScoutOut',
+'g2crawler','G2Crawler',
+'gaisbot','Gaisbot',
+'geniebot','Geniebot',
+'genieo', 'Genieo',
+'gigablastopensource', 'GigablastOpenSource, an Open Source Search Engine(Wiki)',
+'gigabot','GigaBot',
+'girafabot','Girafabot',
+'global_fetch','Global Fetch',
+'gnodspider','GNOD Spider',
+'goforit\.com','GoForIt.com',
+'goforitbot','GOFORITBOT',
+'gonzo','suchen.de',
+'gpu_p2p_crawler','GPU p2p crawler',
+'grapeshot', 'Grapeshot Crawler',
+'grub','Grub.org',
+'henrythemiragorobot', 'Mirago',
+'heritrix','Heritrix',
+'holmes', 'Holmes',
+'hoowwwer','HooWWWer',
+'hpprint','HPPrint',
+'htmlparser','HTMLParser',
+'html[_+\s]link[_+\s]validator','Html_Link_Validator',
+'httrack','HTTrack off-line browser',
+'hundesuche\.com\-bot','Hundesuche.com-Bot',
+'i-bot','i-bot',
+'icarus6j', 'Icarus6j, email address in UA string, no website',
+'ichiro','ichiro',
+'idmarch', 'IDMARCH',
+'iltrovatore\-setaccio','IlTrovatore-Setaccio',
+'implisensebot', 'ImplisenseBot',
+'infobot','InfoBot',
+'infociousbot','InfociousBot',
+'infohelfer','Infohelfer',
+'infomine','INFOMINE VLCrawler',
+'insurancobot','InsurancoBot',
+'integromedb\.org','IntegromeDB',
+'internet[_+\s]ninja','Internet_Ninja ',
+'internetarchive','InternetArchive',
+'internetseer', 'InternetSeer',
+'internetsupervision','InternetSupervision',
+'irlbot','IRLbot',
+'isearch2006','isearch2006',
+'istellabot', 'IstellaBot',
+'iupui_research_bot','IUPUI_Research_Bot',
+'izsearch', 'iZSearch',
+'james\sbot', 'James BOT',
+'jobboerse', 'Jobbörse',
+'jrtwine[_+\s]software[_+\s]check[_+\s]favorites[_+\s]utility','JRTwine_Software_Check_Favorites_Utility',
+'justview', 'JustView',
+'kalambot','KalamBot',
+'kamano\.de_newsfeedverzeichnis','kamano.de NewsFeedVerzeichnis',
+'kazoombot','KazoomBot',
+'kevin','Kevin',
+'keyoshid','Yahoo! Japan keyoshid robot study',
+'kinjabot', 'Kinjabot',
+'kinja\-imagebot', 'Kinja Imagebot',
+'knowitall','KnowItAll',
+'knowledge\.com','Knowledge.com',
+'kouaa_krawler','Kouaa Krawler',
+'krugle','Krugle',
+'ksibot','ksibot',
+'kurzor','Kurzor',
+'lanshanbot','lanshanbot',
+'letscrawl\.com','LetsCrawl.com',
+'libcrawl','Crawl libcrawl',
+'link_valet_online','Link Valet Online',
+'linkbot','LinkBot',
+'linkdex\.com', 'Linkdex',
+'linkchecker','LinkChecker',
+'linkstats\sbot', 'LinkStats Bot',
+'lipperhey', 'Lipperhey SEO Service',
+'livejournal\.com', 'LiveJournal.com',
+'loadtimebot', 'LoadTimeBot',
+'lssrocketcrawler', 'LSSRocketCrawler (no contact information)',
+'ltbot', 'Language Tools Bot (ltbot)',
+'ltx71', 'ltx71',
+'madaali\.de', 'www.madaali.de',
+'magpierss', 'MagpieRSS',
+'mail\.ru', 'Mail.ru bot',
+'mapoftheinternet\.com','MapoftheInternet.com',
+'meanpathbot', 'Meanpathbot',
+'mediabot', 'MediaBot',
+'mediapartners\-google','Google AdSense',
+# 'Mediapartners-Google (Feb 12, 2015: no additial information in UA String, seems to use GigablastOpenSource',
+# Uses UA string "Mediapartners-Google" only, and there were accesses using an UA string "GigablastOpenSource/1.0" from the same IP-Address.
+# Therefore this is probably not related to Google 4.3.2015 Albrecht M眉ller
+'megaindex', 'MegaIndex Crawler, seems to belong to MegaIndex.ru',
+'megite','Megite',
+'memorybot', 'Archivethe.net',
+'metager2-verification-bot', 'metager2-verification-bot',
+'metager\-linkchecker','MetaGer LinkChecker',
+'metajobbot', 'MetaJobBot',
+'metaspinner','Metaspinner',
+'miadev', 'MiaDev spider',
+'microsoft\sbits', 'Microsoft Background Intelligent Transfer Service (BITS)?',
+'microsoft.*discovery', 'Microsoft Office Protocol Discovery/Microsoft Office Existence Discovery',
+'microsoft[_+\s]url[_+\s]control','Microsoft URL Control',
+'mindupbot', 'mindUpBot (datenbutler.de)',
+'minirank','miniRank',
+'mini\-reptile','Mini-reptile',
+'missigua_locator','Missigua_Locator',
+'misterbot','Misterbot',
+'miva','Miva',
+'mizzu_labs','Mizzu Labs',
+'mj12bot','MJ12bot',
+'mojeekbot','MojeekBot',
+'msiecrawler','MSIECrawler',
+'ms[_+\s]search[_+\s]6\.0[_+\s]robot','MS Search 6.0 Robot (MS SharePoint Portal Server?)',
+'ms_search_4\.0_robot','MS SharePoint Portal Server - MS Search 4.0 Robot',
+'msrabot','msrabot',
+'msrbot','MSRBOT',
+'mt::telegraph::agent','MT::Telegraph::Agent',
+'mydoyouhike','Mydoyouhike',
+'nagios','Nagios',
+'nasa_search','NASA Search',
+'netestate\sne\scrawler','Website-Datenbank',
+'netluchs','Netluchs',
+'netsprint','NetSprint',
+'newsgatoronline', 'NewsGator Online',
+'nicebot','nicebot',
+'nimblecrawler','NimbleCrawler',
+'noxtrumbot','noxtrumbot',
+'npbot','NPBot',
+'loocalcrawler/nutch', 'LoocalCrawler/Nutch',
+'nutchcvs','NutchCVS',
+'nutchosu\-vlib','NutchOSU-VLIB',
+'nutch','Nutch',
+'ocelli','Ocelli',
+'octora_beta_bot','Octora Beta Bot',
+'omniexplorer[_+\s]bot','OmniExplorer Bot',
+'onet\.pl[_+\s]sa','Onet.pl_SA',
+'onfolio','Onfolio',
+'opentaggerbot','OpenTaggerBot',
+'openwebspider','OpenWebSpider',
+'optimizer', 'Optimizer',
+'oracle_ultra_search','Oracle Ultra Search',
+'orangebot', 'OrangeBot, no website, log entry specifies mail address', # support.orangebot@orange.com
+'orbiter','Orbiter',
+'yodaobot','OutfoxBot/YodaoBot',
+'qihoobot','QihooBot',
+'qwantify', 'Qwant',
+'passwordmaker\.org','passwordmaker.org',
+'pear_http_request_class','PEAR HTTP Request class',
+'peerbot','PEERbot',
+'perman', 'Perman surfer',
+'php[_+\s]version[_+\s]tracker','PHP version tracker',
+'phpcrawl', 'PHPCrawl',
+'picmole', 'Specified address www.picmole.com was not reachable on April 21, 2014',
+'pictureofinternet','PictureOfInternet',
+'ping\.blo\.gs','ping.blo.gs',
+'plinki','plinki',
+'pluckfeedcrawler','PluckFeedCrawler',
+'plukkie', 'Plukkie',
+'pogodak','Pogodak.com',
+'pompos','Pompos',
+'popdexter','Popdexter',
+'port_huron_labs','Port Huron Labs',
+'postfavorites','PostFavorites',
+'projectwf\-java\-test\-crawler','ProjectWF-java-test-crawler',
+'proodlebot','proodleBot',
+'publiclibraryarchive', 'publiclibraryarchive.org (related to spiderlytics.com and/or waybackarchive.org?)',
+#Observations 2014-06-23
+#Domain publiclibraryarchive.org is parked at GoDaddy.com
+#from https://www.projecthoneypot.org/
+#81.30.151.220's User Agent Strings (honeypot classified this ip as an mail server, active about 6 years ago)
+#Mozilla/5.0 (compatible; publiclibraryarchive.org/1.0; +crawl@publiclibraryarchive.org)
+#176.9.138.27's User Agent Strings
+#Mozilla/5.0 (compatible; publiclibraryarchive.org/1.0; +crawl@publiclibraryarchive.org)
+#Mozilla/5.0 (compatible; Spiderlytics/1.0; +spider@spiderlytics.com)
+#Mozilla/5.0 (compatible; waybackarchive.org/1.0; +spider@waybackarchive.org)
+#146.0.32.165's User Agent Strings
+#Mozilla/5.0 (compatible; publiclibraryarchive.org/1.0; +crawl@publiclibraryarchive.org)
+#Mozilla/5.0 (compatible; savetheworldheritage.org/1.0; +crawl@savetheworldheritage.org)
+#Mozilla/5.0 (compatible; seoscanners.net/1; +spider@seoscanners.net)
+'pyquery','PyQuery',
+'rambler','StackRambler',
+'redalert','Red Alert',
+'relevantnoise\.com', 'Relevant Noise',
+'riddler', 'Riddler',
+'rogerbot', 'Rogerbot',
+'rojo','RoJo aggregator',
+'rssimagesbot','rssImagesBot',
+'ruffle','ruffle SemanticWeb crawler',
+'rufusbot','RufusBot Rufus Web Miner',
+'safeads\.xyz', 'SafeAds.xyz',
+'safesearch', 'Avira SafeSearch',
+'sandcrawler','SandCrawler (Microsoft)',
+'savetheworldheritage', 'savetheworldheritage.org (related to spiderlytics.com, waybackarchive.org and/or publiclibraryarchive.org?)',
+'sbider','SBIder',
+'schizozilla','Schizozilla',
+'scumbot','Scumbot',
+'searchguild[_+\s]dmoz[_+\s]experiment','SearchGuild_DMOZ_Experiment',
+'searchmetricsbot','SearchmetricsBot',
+'seekbot','Seekbot',
+'semrushbot', 'SemrushBot',
+'sensis_web_crawler','Sensis Web Crawler',
+'seodiver', 'SEO DIVER',
+'seokicks\.de', 'SEOkicks Webcrawler',
+'seoscanners', 'seoscanners.net (related to publiclibraryarchive.org and savetheworldheritage.org?)',
+'seznambot','SeznamBot',
+'shim\-crawler','Shim-Crawler',
+'shoutcast','Shoutcast Directory Service',
+'sitedomain-bot', 'Sitedomain.de',
+'siteexplorer\.info', 'Site Explorer',
+'skimbot', 'SkimBot',
+'slysearch','SlySearch',
+'smtbot', 'SMTBot',
+'snap\.com_beta_crawler','snap.com beta crawler',
+'sohu\-search','sohu-search',
+'sohu','sohu agent',
+'snappy','Snappy',
+'spbot', 'SEOprofiler Bot',
+'sphere_scout','Sphere Scout',
+'spip','SPIP',
+'sproose_crawler','sproose crawler',
+'ssearch_bot', 'sSearch Crawler',
+'steroid__download','STEROID Download',
+'steeler','Steeler',
+'stq_bot', 'SEARCHTEQ',
+'suchfin\-bot','Suchfin-Bot',
+'superbot','SuperBot',
+'surveybot','SurveyBot',
+'susie','Susie',
+'syndic8','Syndic8',
+'syndicapi','SyndicAPI',
+'synoobot','SynooBot',
+'tcl_http_client_package','Tcl http client package',
+'technoratibot', 'Technoratibot',
+'teragramcrawlersurf','TeragramCrawlerSURF',
+'test_crawler','Test Crawler',
+'testbot','TestBot',
+'thumbsniper', 'ThumbSniper',
+'t\-h\-u\-n\-d\-e\-r\-s\-t\-o\-n\-e','T-H-U-N-D-E-R-S-T-O-N-E',
+'topicblogs', 'topicblogs',
+'turnitinbot', 'Turn It In',
+'turtle', 'Turtle',
+'turtlescanner', 'Turtle',
+'tutorgigbot','TutorGigBot',
+'twiceler','twiceler',
+'ubicrawler','UbiCrawler',
+'ultraseek', 'Ultraseek',
+'unchaos_bot_hybrid_web_search_engine','UnChaos Bot Hybrid Web Search Engine',
+'unido\-bot','unido-bot',
+'unisterbot', 'UnisterBot; E-Mail only: crawler (at) unister.de',
+'updated','updated',
+'ustc\-semantic\-group','USTC-Semantic-Group',
+'vagabondo\-wap','Vagabondo-WAP',
+'vagabondo','Vagabondo',
+'vebidoobot', 'vebidoobot',
+'vermut','Vermut',
+'versus_crawler_from_eda\.baykan@epfl\.ch','versus crawler from eda.baykan@epfl.ch',
+'vespa_crawler','Vespa Crawler',
+'voltron', 'voltron',
+'vortex','VORTEX',
+'vse\/','VSE',
+'w3c\-checklink','W3C Link Checker',
+'w3c[_+\s]css[_+\s]validator[_+\s]jfouffa', 'W3C jigsaw CSS Validator',
+'w3c_validator','W3C Validator',
+'watchmouse', 'WatchMouse Website Monitor',
+'wavefire','Wavefire',
+'waybackarchive\.org', 'No website, email: spider(at)waybackarchive.org',
+# 2.12.2013 Project Honeypot reports at least one of the IPs used by waybackarchive with a spiderlytics UA string.
+# Problably not related to the wayback machine of archive.org.
+'wbsearchbot', 'WBSearchBot',
+'webclipping\.com', 'WebClipping.com',
+'webcompass', 'webcompass',
+'webcrawl\.net','webcrawl.net',
+'web_downloader','Web Downloader',
+'webdup','Webdup',
+'webfilter','WebFilter',
+'webindexer','WebIndexer',
+'webminer','WebMiner',
+'website[_+\s]monitoring[_+\s]bot','Website_Monitoring_Bot',
+'webvulncrawl', 'WebVulnCrawl',
+'wells_search','Wells Search',
+'wer-liefert-was', 'Wer-liefert-was Crawler Note: AWStats counts most traffic as user traffic',
+'wesee:search', 'WeSEE Bot',
+'wevikabot', 'WeViKa',
+'wonderer', 'Web Wombat Redback Spider',
+'wotbox', 'Wotbox',
+'wume_crawler','wume crawler',
+'wwweasel',,'WWWeasel',
+'xenu\'s_link_sleuth','Xenu Link Sleuth',
+'xenu_link_sleuth','Xenu Link Sleuth',
+'xirq','xirq',
+'xovibot', 'XoviBot',
+'y!j', 'Y!J Yahoo Japan',
+'yacy', 'YaCy',
+'yahoo\-blogs','Yahoo-Blogs',
+'yahoo\-verticalcrawler', 'Yahoo Vertical Crawler',
+'yahoofeedseeker', 'Yahoo Feed Seeker',
+'yahooseeker\-testing', 'YahooSeeker-Testing',
+'yahooseeker', 'YahooSeeker Yahoo! Blog crawler',
+'yahoo\-mmcrawler', 'Yahoo-MMCrawler',
+'yahoo!_mindset','Yahoo! Mindset',
+'yandex', 'Yandex Bot',
+'flexum', 'Flexum Search Engine',
+'yanga', 'Yanga WorldSearch Bot',
+'yet-another-spider','Yet-Another-Spider',
+'yisouspider', 'YisouSpider (no additional information in UA string)',
+'yooglifetchagent','yoogliFetchAgent',
+'z\-add_link_checker','Z-Add Link Checker',
+'zealbot','ZealBot',
+'zhuaxia','ZhuaXia',
+'zspider','zspider',
+'zeus','Zeus Webster Pro',
+'zumbot','ZumBot',
+'ng\/1\.','NG 1.x (Exalead)', # put at end to avoid false positive
+'ng\/2\.','NG 2.x (Exalead)', # put at end to avoid false positive
+'exabot','Exabot', # put at end to avoid false positive
+# Other id that are 99% of robots
+'wget','WGet tools',
+'libwww','Perl tool',
+'^java\/[0-9]','Java (Often spam bot)', # put at end to avoid false positive
+# Generic robot
+'robot', 'Unknown robot (identified by \'robot\')',
+'checker', 'Unknown robot (identified by \'checker\')',
+'crawl', 'Unknown robot (identified by \'crawl\')',
+'discovery', 'Unknown robot (identified by \'discovery\')',
+'hunter', 'Unknown robot (identified by \'hunter\')',
+'scanner', 'Unknown robot (identified by \'scanner\')',
+'spider', 'Unknown robot (identified by \'spider\')',
+'sucker', 'Unknown robot (identified by \'sucker\')',
+'bot[\s_+:,\.\;\/\\\-]', 'Unknown robot (identified by \'bot\' followed by a space or one of the following characters _+:,.;/\-)',
+'[\s_+:,\.\;\/\\\-]bot', 'Unknown robot (identified by a space or one of the characters _+:,.;/\- followed by \'bot\')',
+'curl', 'Common *nix tool for automating web document retrieval. Most likely a bot.',
+'php', 'A PHP script',
+'ruby\/', 'Ruby script',
+# Additional bots found by Sussex.
+'^[1-3]$', 'Generic bot identified as "1", "2" or "3"',
+'alltop', 'alltop',
+'applesyndication', 'applesyndication',
+'asynchttpclient', 'asynchttpclient',
+'bingbot', 'Bingbot',
+'blogged_crawl', 'blogged_crawl',
+'bloglovin', 'bloglovin',
+'butterfly', 'butterfly',
+'buzztracker', 'buzztracker',
+'carpathia', 'carpathia',
+'catbot', 'catbot',
+'chattertrap', 'chattertrap',
+'check_http', 'check_http (nagios)',
+'coldfusion', 'coldfusion',
+'covario', 'covario',
+'daylifefeedfetcher', 'daylifefeedfetcher',
+'discobot', 'discobot',
+'dlvr\.it', 'dlvr.it',
+'dreamwidth', 'dreamwidth',
+'drupal', 'Drupal Site',
+'ezoom', 'ezoom',
+'feedmyinbox', 'feedmyinbox',
+'feedroll\.com', 'feedroll.com',
+'feedzira', 'feedzira',
+'fever\/', 'Feed a Fever',
+'freenews', 'freenews',
+'geohasher', 'geohasher',
+'hanrss', 'hanrss',
+'inagist', 'inagist',
+'jacobin\sclub', 'jacobin club',
+'jakarta', 'jakarta',
+'js\-kit', 'js-kit',
+'largesmall\scrawler', 'largesmall crawler',
+'linkedinbot', 'linkedinbot',
+'longurl', 'longurl',
+'metauri', 'metauri',
+'microsoft\-webdav\-miniredir', 'microsoft-webdav-miniredir',
+'^motorola$', 'Suspected Bot masquerading as "Motorola"',
+'movabletype', 'movabletype',
+'^mozilla\/3\.0\s\(compatible$', 'Suspected bot masqurading as Mozilla',
+'^mozilla\/4\.0$', 'Suspected bot masqurading as Mozilla',
+'^mozilla\/4\.0\s\(compatible;\)$', 'Suspected bot masqurading as Mozilla',
+'^mozilla\/5\.0$', 'Suspected bot masqurading as Mozilla',
+'^mozilla\/5\.0\s\(compatible;$', 'Suspected bot masqurading as Mozilla',
+'^mozilla\/5\.0\s\(en\-us\)$', 'Suspected bot masqurading as Mozilla',
+'^mozilla\/5\.0\sfirefox\/3\.0\.5$', 'Suspected bot masqurading as Mozilla',
+'^msie', 'Suspected bot masquerading as M$ IE',
+'netnewswire', 'netnewswire',
+'\snetseer\s', 'Net Seer',
+'netvibes', 'netvibes',
+'newrelicpinger', 'newrelicpinger',
+'newsfox', 'Fox News',
+'nextgensearchbot', 'nextgensearchbot',
+'ning', 'ning',
+'pingdom', 'pingdom',
+'pita', 'pita (pain in the ass?)',
+'postpost', 'postpost',
+'postrank', 'postrank',
+'printfulbot', 'printfulbot',
+'protopage', 'protopage',
+'proximic', 'Proximic Spider',
+'quipply', 'quipply',
+'r6\_', 'Radian 6 Crawler',
+'ratingburner', 'ratingburner',
+'regator', 'regator',
+'rome\sclient', 'rome client',
+'rpt\-httpclient', 'rpt-httpclient',
+'rssgraffiti', 'rssgraffiti',
+'sage\+\+', 'sage++',
+'scoutjet', 'ScoutJet crawler for Blekko.',
+'simplepie', 'simplepie',
+'sitebot', 'sitebot',
+'summify\.com', 'summify.com',
+'superfeedr', 'superfeedr',
+'synthesio', 'synthesio',
+'teoma', 'teoma',
+'topblogsinfo', 'topblogsinfo',
+'topix\.net', 'topix.net',
+'trapit', 'trapit',
+'trileet', 'trileet',
+'tweetedtimes', 'The Tweeted Times',
+'twisted\spagegetter', 'twisted pagegetter',
+'twitterbot', 'Twitterbot',
+'twitterfeed', 'twitterfeed',
+'unwindfetchor', 'unwindfetchor',
+'wazzup', 'wazzup',
+'windows\-rss\-platform', 'windows-rss-platform',
+'wiumi', 'wiumi',
+'xydo', 'xydo',
+'yahoo!\sslurp', 'Additional Yahoo bots.',
+'yahoo\spipes', 'Additional Yahoo bots.',
+'yahoo\-newscrawler', 'Additional Yahoo bots.',
+'yahoocachesystem', 'Additional Yahoo bots.',
+'yahooexternalcache', 'Additional Yahoo bots.',
+'yahoo!\ssearchmonkey', 'Additional Yahoo bots.',
+'yahooysmcm', 'Additional Yahoo bots.',
+'yammer', 'yammer',
+#'yandexbot', 'yandexbot', #already covered by 'yandex'
+'yeti', 'yeti',
+'yie8', 'yie8',
+'youdao', 'youdao',
+'yourls', 'yourls',
+'zemanta', 'zemanta',
+'zend_http_client', 'Zend Http Client',
+'no_user_agent','Unknown robot (identified by empty user agent string)',
+# Unknown robots identified by hit on robots.txt
+'unknown', 'Unknown robot (identified by hit on \'robots.txt\')'
+);
+
+
+# RobotsAffiliateLib
+# This list try to tell by which Search Engine a robot is used
+#-------------------------------------------------------------
+%RobotsAffiliateLib = (
+'bingpreview'=>'Bing',
+'fast\-webcrawler'=>'AllTheWeb',
+'googlebot'=>'Google',
+'google\-sitemap'=>'Google',
+'google[_+\s]web[_+\s]preview'=>'Google',
+'msnbot'=>'MSN',
+'nutch'=>'Looksmart',
+'scooter'=>'AltaVista',
+'wisenutbot'=>'Looksmart',
+'yahoo\-blogs'=>'Yahoo',
+'yahoo\-verticalcrawler'=>'Yahoo',
+'yahoofeedseeker'=>'Yahoo',
+'yahooseeker\-testing'=>'Yahoo',
+'yahooseeker'=>'Yahoo',
+'yahoo\-mmcrawler'=>'Yahoo',
+'yahoo!_mindset'=>'Yahoo',
+'zyborg'=>'Looksmart',
+'cfetch'=>'Kosmix',
+'^voyager\/'=>'Kosmix',
+# Additional bots found by Sussex.
+'feedfetcher\-google'=>'Google',
+'bingbot'=>'MSN',
+'twitterbot'=>'Twitter',
+'twitterfeed'=>'Twitter',
+'yahoo!\sslurp'=>'Yahoo',
+'yahoo\spipes'=>'Yahoo',
+'yahoo-newscrawler'=>'Yahoo',
+'yahoocachesystem'=>'Yahoo',
+'yahooexternalcache'=>'Yahoo',
+'yahoo!\ssearchmonkey'=>'Yahoo',
+'yahooysmcm'=>'Yahoo'
+);
+
+1;
diff --git a/wwwroot/cgi-bin/lib/search_engines.pm b/wwwroot/cgi-bin/lib/search_engines.pm
index a84dc4e76..e56b00801 100644
--- a/wwwroot/cgi-bin/lib/search_engines.pm
+++ b/wwwroot/cgi-bin/lib/search_engines.pm
@@ -1,1578 +1,1578 @@
-# AWSTATS SEARCH ENGINES DATABASE
-#------------------------------------------------------------------------------
-# If you want to add a Search Engine to extend AWStats database detection capabilities,
-# you must add an entry in SearchEnginesSearchIDOrder, SearchEnginesHashID and in
-# SearchEnginesHashLib.
-# An entry if known in SearchEnginesKnownUrl is also welcome.
-#
-# to eldy: Please check if the following description is correct:
-# You need the following information to specify a search engine:
-# (a) A regular expression that matches the referrer string of the
-# search engine. Unclear: What about slashes in the name of
-# a search engine, e.g. as in 'ecosia.com/search'. Seems that
-# AWStats will non find search strings containing a slash.
-# Maybe use a search string without a slash, and - if necessary -
-# an entry in %NotSearchEnginesKeys , if this search string
-# matches entries that are not search engines.
-# Example of a web address of a Amazon search engine:
-# http://www.amazon.de/gp/bit/apps/web/SERP/search/ref=bit_bds-p24_serp_cr_de?ie=UTF8tagbase=bds-p24&query=deutsch+8.+klasse+gymnasium+protokoll
-# (b) A unique string to identify the search engine within AWStats
-# (c) A regular expression that finds the start of the query part in the
-# referrer string
-# (d) A HTML-fragment that goes into the reports generated by AWStats which
-# identifies the search engine to human reader of the report. In the
-# simplest case this is a string containing the name of the search
-# engine. You can also provide a hypertext clause that presents the
-# name together with a link to the search engine.
-#
-# The regular expression (a) goes into SearchEnginesSearchIDOrder_list1
-# or ..._list2. List 1 contains common search engines, list 2 those
-# that are not so often used.
-#
-# SearchEnginesHashID contains to consecutive entries for each search
-# engine: The regular expression (a) followed bei the search engine
-# identifier (b)
-#
-# SearchEnginesKnownUrl specifies how to find the start of the query.
-# For each search engine you enter the search engine identifier (b)
-# followed by the regular expression (c). Unclear: It is possible to
-# omit this entry. If you do this, how will AWStats find the start of
-# the query?
-#
-# SearchEnginesHashLib contains also two entries for each search engine:
-# The search engine identifier (b) followed by the HTML-Fragment (d)
-#
-# There are search engines that do not use a query part in their URLs.
-# They put the search expression in the main part of the URL instead.
-# AWStats is able to handle these cases. They are specified as described
-# above, except the following two things:
-# - The regular expression (c) searches the complete URL and not only
-# the query part.
-# - An additional Entry in the list %SearchEnginesWithKeysNotInQuery is
-# necessary.
-#
-#
-# AWStats runs a sanity check of the contents of search_engines.pm. This
-# check detects the following things:
-# - Inconsistencies (number of entries)
-# It does not detect the following errors:
-# - If the HTML-Fragment (d) is syntactically incorrect.
-#
-#------------------------------------------------------------------------------
-
-# 2005-08-19 Sean Carlos http://www.antezeta.com/awstats.html
-# added minor italian search engines
-# arianna http://arianna.libero.it/
-# supereva http://search.supereva.com/
-# kataweb http://kataweb.it/
-# corrected uk looksmart
-# 'askuk','ask=', 'bbc','q=', 'freeserve','q=', 'looksmart','key=',
-# to
-# 'askuk','ask=', 'bbc','q=', 'freeserve','q=', 'looksmartuk','key=',
-# corrected spelling
-# internationnal -> international
-# added 'google\.'=>'mail\.google\.', to NotSearchEnginesKeys in order to
-# avoid counting gmail referrals as search engine traffic
-# 2005-08-21 Sean Carlos http://www.antezeta.com/awstats.html
-# avoid counting babelfish.altavista referrals as search engine traffic
-# avoid counting translate.google referrals as search engine traffic
-# 2005-11-20 Sean Carlos
-# added missing 'tiscali','key=', entry. Check order
-# 2005-11-22 Sean Carlos
-# added Google Base & Froogle. Froogle not tested.
-# 2006-04-18 Sean Carlos http://www.antezeta.com/awstats.html
-# added biglotron.com (France)
-# added blingo http://www.blingo.com/
-# added Clusty & Vivisimo
-# added eniro.no (Norway) [https://sourceforge.net/forum/message.php?msg_id=3134783]
-# added GPU p2p search http://search.centraldatabase.org/
-# added mail.tiscali to "not search engines list" [https://sourceforge.net/forum/message.php?msg_id=3166688]
-# added Ask group's "mysearch"
-# added sify.com (India)
-# added sogou.com (Cina) [https://sourceforge.net/forum/message.php?msg_id=3501603]
-# Ask changes:
-# - added Ask Japan (ask.jp)
-# - break out Ask new country level variants (DE, ES, FR, IT, NL)
-# - updated Ask name from Ask Jevees
-# - added Ask q= parameter - many recent searches probably not recognized; [https://sourceforge.net/forum/message.php?msg_id=3465444]
-# - updated Ask uk (new uk.ask.com added to older ask.co.uk)
-# updated voila kw|rdata parameter [https://sourceforge.net/forum/message.php?msg_id=3373912]
-# for each new engine, added link to Search Engine. This serves to document engine. Done for major & Italian engines as well. Requires patch
-# to AWStats to allow untranslated html. Otherwise html will appear instead of link.
-# reviewed mnoGoSearch (http://www.mnogosearch.org/); the search engined mentioned no longer
-# exists https://sourceforge.net/forum/message.php?msg_id=3025426
-# 2006-05-13 Sean Carlos http://www.antezeta.com/awstats.html
-# added 10 Chello European broadband portals (Austria, Belgium, Czech Republic, France, Hungary, The Netherlands, Norway, Poland, Slovakia, Sweden)
-# added Alice Internal Search (blends data with Google?) search.alice.it.master:10005
-# added detection of google cache views from IPs 66.249.93.104 72.14.203.104 72.14.207.104
-# To do: add more extensive IP list; keywords not yet detected.
-# added icerocket.com blog search http://www.icerocket.com/
-# added live.com (msn) http://www.live.com/
-# added Meta motor kartoo. Note: Kartoo does not provide search words in referrers, thus the engine will appear in the
-# search engine list but the actual search words are not available.
-# added netluchs.de http://www.netluchs.de/
-# added sphere.com blog search http://www.sphere.com/
-# added wwweasel.de http://wwweasel.de
-# added Yahoo Mindset! http://mindset.research.yahoo.com/
-# updated Mirago query parameter recognition (qry=); added breakout for each country (France, Germany, Spain, Italy, Norway, Sweden, Denmark, Netherlands, Belgium, Switzerland)
-# 2006-05-13 Sean Carlos http://www.antezeta.com/awstats.html
-# added Google cache IPs 64.233.183.104 & 66.102.7.104
-# 2006-05-20 Sean Carlos http://www.antezeta.com/awstats.html
-# anzwers.com.au
-# schoenerbrausen.de http://www.schoenerbrausen.de/
-# added Google cache IP 216.239.59.104
-# answerbus http://www.answerbus.com/ (does not provide keywords)
-# 2006-05-23 Sean Carlos http://www.antezeta.com/awstats.html
-# added Google cache IP 66.102.9.104, 64.233.161.104
-# 2006-06-23 Sean Carlos http://www.antezeta.com/awstats.html
-# added Alice Search search.alice.it
-# added GoodSearch http://www.goodsearch.com/ (does not provide keywords) "a Yahoo-powered search engine that donates money to your favorite charity or school each time you search the web"
-# added googlee.com, variant of Google
-# added gotuneed http://www.gotuneed.com/ Italian search engine, in beta
-# added icq.com
-# added logic to parse Google Cache search keywords. Seems to work for alpha but not numeric cache IDs, i.e. search?q=cache:lWVLmnuGJswJ: is recognized but q=cache:Yv5qxeJNuhgJ: is not recognized. The URL triggering the keywords will also appear. The URLs are probably too varied to parse out?
-# added Nusearch http://www.nusearch.com/
-# added Polymeta www.polymeta.hu (does not provide keywords)
-# added scroogle http://www.scroogle.org/ (does not always provide keywords)
-# added Tango http://tango.hu/search.php?st=0&q=jeles+napok
-# Changed Google Cache notation 64\.233\.(161|167|179|183|187)\.104 to 64\.233\.1[0-9]{2}\.104
-# 72\.14\.(203|205|207|209|221)\.104 to 72\.14\.2[0-9]{2}\.104
-# 216\.239\.(51|59)\.104 to 216\.239\.5[0-9]\.104
-# 66\.102\.(7|9)\.104 to 66\.102\.[1-9]\.104
-# 2006-06-27 Sean Carlos http://www.antezeta.com/awstats.html
-# added Onet.pl http://szukaj.onet.pl/
-# corrected name "Wirtualna Polska" from "Szukaj" (search); added link http://szukaj.wp.pl/
-# 2006-06-30 Sean Carlos http://www.antezeta.com/awstats.html
-# Additional Polish Search Engines:
-# added Dodaj.pl http://www.dodaj.pl/
-# added Gazeta.pl http://szukaj.gazeta.pl/
-# added Gery.pl http://szukaj.gery.pl/
-# added Hoga.pl http://www.hoga.pl/
-# added Interia.pl http://www.google.interia.pl/
-# added Katalog.Onet.pl http://katalog.onet.pl/
-# added NetSprint.pl http://www.netsprint.pl/
-# added o2.pl http://szukaj2.o2.pl/
-# added Polska http://szukaj.polska.pl/
-# added Szukacz http://www.szukacz.pl/
-# added Wow.pl http://szukaj.wow.pl/
-# added Sagool http://sagool.jp/
-
-# 2006-08-25 Social Bookmarks
-# International
-# added del.icio.us/search - for now, just search referrer. To do: consider /tag/(tagname) referrer?
-# added stumbleupon.com - No keywords supplied.
-# added swik.net
-# added digg. Keywords sometimes supplied.
-# Italy
-# added segnalo.alice.it - No keywords supplied.
-# added ineffabile.it - No keywords supplied.
-
-# added filter for google groups. Attempt to parse group name as keyword.
-
-# 2006-09-14
-# added Eniro Sverige http://www.eniro.se/
-# added MyWebSearch http://search.mywebsearch.com/
-# added Teecno http://www.teecno.it/ Italian Open Source Search Engine
-
-#package AWSSE;
-
-# 2006-09-25 (Gabor Moizes)
-# added 4-counter (Google alternative) http://4-counter.com/
-# added Googlecom (Google alternative) http://googlecom.com/
-# added Goggle (Google alternative) http://goggle.co.hu/
-# added Comet toolbar http://as.starware.com
-# added new IP for Yahoo: 216.109.125.130
-# added Ledix http://ledix.net/
-# added AT&T search (powered by Google) http://www.att.net/
-# added Keresolap (Hungarian search engine) http://www.keresolap.hu/
-# added Mozbot (French search engine) http://www.mozbot.fr/
-# added Zoznam (Slovak search engine) http://www.zoznam.sk/
-# added sapo.pt (Portuguese search engine) http://www.sapo.pt/
-# added shaw.ca (powered by Google) http://start.shaw.ca/
-# added Searchalot http://www.searchalot.com/
-# added Copernic http://www.copernic.com/
-# added 216.109.125.130 to Yahoo
-# added 66.218.69.11 to Yahoo
-# added Avantfind http://www.avantfind.com/
-# added Steadysearch http://www.steadysearch.com/
-# added Steadysearch http://www.steady-search.com/
-# modified 216\.239\.5[0-9]\.104/search to 216\.239\.5[0-9]\.104
-
-
-# SearchEnginesSearchIDOrder
-# It contains all matching criteria to search for in log fields. This list is
-# used to know in which order to search Search Engines IDs.
-# Most frequent one are in list1, used when LevelForSearchEnginesDetection is 1 or more
-# Minor robots are in list2, used when LevelForSearchEnginesDetection is 2 or more
-# Note: Regex IDs are in lower case and ' ' and '+' are changed into '_'
-#------------------------------------------------------------------------------
-@SearchEnginesSearchIDOrder_list1=(
-# Major international search engines
-'google\.[\w.]+/products',
-'base\.google\.',
-'froogle\.google\.',
-'groups\.google\.',
-'images\.google\.',
-'google\.',
-'googlee\.',
-'googlecom\.com',
-'goggle\.co\.hu',
-'216\.239\.32\.20',
-'173\.194\.32\.223',
-'216\.239\.(35|37|39|51)\.100',
-'216\.239\.(35|37|39|51)\.101',
-'216\.239\.5[0-9]\.104',
-'64\.233\.1[0-9]{2}\.104',
-'66\.102\.[1-9]\.104',
-'66\.249\.93\.104',
-'72\.14\.2[0-9]{2}\.104',
-'msn\.',
-'live\.com',
-'bing\.',
-'voila\.',
-'mindset\.research\.yahoo',
-'yahoo\.','(66\.218\.71\.225|216\.109\.117\.135|216\.109\.125\.130|66\.218\.69\.11)',
-'search\.aol\.co',
-'tiscali\.',
-'lycos\.',
-'alexa\.com',
-'alltheweb\.com',
-'altavista\.',
-'a9\.com',
-'dmoz\.org',
-'netscape\.',
-'search\.terra\.',
-'www\.search\.com',
-'search\.sli\.sympatico\.ca',
-'excite\.'
-);
-
-@SearchEnginesSearchIDOrder_list2=(
-# Minor international search engines
-'4\-counter\.com',
-'att\.net',
-'bungeebonesdotcom',
-'northernlight\.',
-'hotbot\.',
-'kvasir\.',
-'webcrawler\.',
-'metacrawler\.',
-'go2net\.com',
-'(^|\.)go\.com',
-'euroseek\.',
-'looksmart\.',
-'spray\.',
-'nbci\.com\/search',
-'de\.ask.\com', # break out Ask country specific engines. (.jp is in Japan section)
-'es\.ask.\com',
-'fr\.ask.\com',
-'it\.ask.\com',
-'nl\.ask.\com',
-'uk\.ask.\com',
-'(^|\.)ask\.com',
-'atomz\.',
-'overture\.com', # Replace 'goto\.com','Goto.com',
-'teoma\.',
-'findarticles\.com',
-'infospace\.com',
-'mamma\.',
-'dejanews\.',
-'dogpile\.com',
-'wisenut\.com',
-'ixquick\.com',
-'search\.earthlink\.net',
-'i-une\.com',
-'blingo\.com',
-'centraldatabase\.org',
-'clusty\.com',
-'mysearch\.',
-'vivisimo\.com',
-'kartoo\.com',
-'icerocket\.com',
-'sphere\.com',
-'ledix\.net',
-'start\.shaw\.ca',
-'searchalot\.com',
-'copernic\.com',
-'avantfind\.com',
-'steadysearch\.com',
-'steady-search\.com',
-'claro-search\.com',
-'www1\.search-results\.com',
-'www\.holasearch\.com',
-'search\.conduit\.com',
-'static\.flipora\.com',
-'(?:www[12]?|mixidj)\.delta-search\.com',
-'start\.iminent\.com',
-'www\.searchmobileonline\.com',
-'int\.search-results\.com',
-'www2\.inbox\.com',
-'www\.govome\.com',
-'find1friend\.com',
-'start\.mysearchdial\.com',
-'go\.speedbit\.com',
-'search\.certified-toolbar\.com',
-'search\.sweetim\.com',
-'search\.searchcompletion\.com',
-'en\.eazel\.com',
-'sr\.searchfunmoods\.com',
-'173\.194\.35\.177',
-'dalesearch\.com',
-'sweetpacks-search\.com',
-'searchgol\.com',
-'duckduckgo\.com',
-'sr\.facemoods\.com',
-'shoppstop\.com',
-'searchya\.com',
-'picsearch\.de',
-'webssearches\.com',
-'airzip\.inspsearch\.com',
-'zapmeta\.de',
-'localmoxie\.com',
-'search-results\.mobi',
-'androidsearch\.com',
-'isearch\.nation\.com',
-'search\.zonealarm\.com',
-'www\.buenosearch\.com',
-'search\.foxtab\.com',
-'searches\.qone8\.com',
-'startpage\.com',
-'www\.qwant\.com',
-'searches\.safehomepage\.com',
-'searches\.vi-view\.com',
-'wow\.utop\.it',
-'windowssearch\.com',
-'www\.wow\.com',
-'globososo\.',
-'kingtale3\.inspsearch\.com',
-'swisscows\.ch',
-'preciobarato\.xyz',
-'www\.dregol\.com',
-'search\.socialdownloadr\.com',
-'int\.search\.myway\.com',
-'de\.dolphin\.com',
-'mys\.yoursearch\.me',
-# Chello Portals
-'chello\.at',
-'chello\.be',
-'chello\.cz',
-'chello\.fr',
-'chello\.hu',
-'chello\.nl',
-'chello\.no',
-'chello\.pl',
-'chello\.se',
-'chello\.sk',
-'chello', # required as catchall for new countries not yet known
-# Mirago
-'mirago\.be',
-'mirago\.ch',
-'mirago\.de',
-'mirago\.dk',
-'es\.mirago\.com',
-'mirago\.fr',
-'mirago\.it',
-'mirago\.nl',
-'no\.mirago\.com',
-'mirago\.se',
-'mirago\.co\.uk',
-'mirago', # required as catchall for new countries not yet known
-'answerbus\.com',
-'icq\.com\/search',
-'nusearch\.com',
-'goodsearch\.com',
-'scroogle\.org',
-'questionanswering\.com',
-'mywebsearch\.com',
-'as\.starware\.com',
-# Social Bookmarking Services
-'del\.icio\.us',
-'digg\.com',
-'stumbleupon\.com',
-'swik\.net',
-'segnalo\.alice\.it',
-'ineffabile\.it',
-# Minor Australian search engines
-'anzwers\.com\.au',
-# Minor brazilian search engines
-'engine\.exe', 'miner\.bol\.com\.br',
-# Minor chinese search engines
-'\.baidu\.com', # baidu search portal
-'\.vnet\.cn', # powered by MSN
-'\.soso\.com', # powered by Google
-'\.sogou\.com', # powered by Sohu
-'\.3721\.com', # powered by Yahoo!
-'iask\.com', # powered by Sina
-'\.accoona\.com', # Accoona
-'\.163\.com', # powered by Google
-'\.zhongsou\.com', # zhongsou search portal
-# Minor czech search engines
-'atlas\.cz','seznam\.cz','quick\.cz','centrum\.cz','jyxo\.(cz|com)','najdi\.to','redbox\.cz',
-'isearch\.avg\.com',
-# Minor danish search-engines
-'opasia\.dk', 'danielsen\.com', 'sol\.dk', 'jubii\.dk', 'find\.dk', 'edderkoppen\.dk', 'netstjernen\.dk', 'orbis\.dk', 'tyfon\.dk', '1klik\.dk', 'ofir\.dk',
-# Minor dutch search engines
-'ilse\.','vindex\.',
-# Minor english search engines
-'(^|\.)ask\.co\.uk','bbc\.co\.uk/cgi-bin/search','ifind\.freeserve','looksmart\.co\.uk','splut\.','spotjockey\.','ukdirectory\.','ukindex\.co\.uk','ukplus\.','searchy\.co\.uk',
-'search\.fbdownloader\.com',
-'search\.fdownloadr\.com',
-'search\.babylon\.com',
-'my\.allgameshome\.com',
-'surfcanyon\.com',
-'uk\.foxstart\.com',
-'yandex\.com',
-# Minor finnish search engines
-'haku\.www\.fi',
-# Minor french search engines
-'recherche\.aol\.fr','ctrouve\.','francite\.','\.lbb\.org','rechercher\.libertysurf\.fr', 'search[\w\-]+\.free\.fr', 'recherche\.club-internet\.fr',
-'toile\.com', 'biglotron\.com',
-'mozbot\.fr',
-# Minor german search engines
-'sucheaol\.aol\.de',
-'o2suche\.aol\.de',
-'fireball\.de','infoseek\.de','suche\d?\.web\.de','[a-z]serv\.rrzn\.uni-hannover\.de',
-'suchen\.abacho\.de','(brisbane|suche)\.t-online\.de','allesklar\.de','meinestadt\.de',
-'212\.227\.33\.241',
-'(161\.58\.227\.204|161\.58\.247\.101|212\.40\.165\.90|213\.133\.108\.202|217\.160\.108\.151|217\.160\.111\.99|217\.160\.131\.108|217\.160\.142\.227|217\.160\.176\.42)',
-'wwweasel\.de',
-'netluchs\.de',
-'schoenerbrausen\.de',
-'suche\.gmx\.net',
-'suche\.gmx\.at',
-'ecosia\.org',
-'de\.aolsearch\.com',
-'suche\.aol\.de',
-'www\.startxxl\.com',
-'www\.benefind\.de',
-'www\.amazon\.de.*search', #Just as a reminder, probably will not work as AWstats seems to consider the host part of an URL only
-'de\.wow\.com',
-'www\.vlips\.de',
-'metager\.de',
-'search\.1und1\.de',
-'sm\.de',
-'sumaja\.de',
-'navigationshilfe\.t-online\.de',
-'umfis\.de',
-'fastbot\.de',
-'tixuma\.de',
-'suche\.freenet\.de',
-'www\.izito\.de',
-'extern\.peoplecheck\.de',
-'www\.oneseek\.de',
-'de\.wiki\.gov\.cn',
-'umuwa\.de',
-'suche\.1und1\.de',
-'www\.metasuche\.ch',
-# Minor Hungarian search engines
-'heureka\.hu','vizsla\.origo\.hu','lapkereso\.hu','goliat\.hu','index\.hu','wahoo\.hu','webmania\.hu','search\.internetto\.hu',
-'tango\.hu',
-'keresolap\.hu',
-'kereso\.startlap\.hu',
-'polymeta\.hu',
-# Minor Indian search engines
-'sify\.com',
-# Minor Italian search engines
-'virgilio\.it','arianna\.libero\.it','supereva\.com','kataweb\.it','search\.alice\.it\.master','search\.alice\.it','gotuneed\.com',
-'godado','jumpy\.it','shinyseek\.it','teecno\.it',
-# Minor Israeli search engines
-'search\.genieo\.com',
-# Minor Japanese search engines
-'ask\.jp','sagool\.jp',
-'websearch\.rakuten\.co\.jp',
-# Minor Norwegian search engines
-'sok\.start\.no', 'eniro\.no',
-# Minor Polish search engines
-'szukaj\.wp\.pl','szukaj\.onet\.pl','dodaj\.pl','gazeta\.pl','gery\.pl','hoga\.pl','netsprint\.pl','interia\.pl','katalog\.onet\.pl','o2\.pl','polska\.pl','szukacz\.pl','wow\.pl',
-# Minor russian search engines
-'ya(ndex)?\.ru', 'aport\.ru', 'rambler\.ru', 'turtle\.ru', 'metabot\.ru',
-'go\.mail\.ru',
-# Minor Swedish search engines
-'evreka\.passagen\.se','eniro\.se',
-# Minor Slovak search engines
-'zoznam\.sk',
-# Minor Portuguese search engines
-'sapo\.pt',
-# Minor swiss search engines
-'search\.ch', 'search\.bluewin\.ch',
-'www\.zapmeta\.ch',
-'etools\.ch',
-# Minor Croatian, Serbian, Macedonian, Bosnian and Herzegovinian search engines
-'pogodak\.'
-);
-@SearchEnginesSearchIDOrder_listgen=(
-# Generic search engines
-'search\..*\.\w+'
-);
-
-
-# NotSearchEnginesKeys
-# If a search engine key is found, we check its exclude list to know if it's
-# really a search engine
-#------------------------------------------------------------------------------
-%NotSearchEnginesKeys=(
-'altavista\.'=>'babelfish\.altavista\.',
-'google\.'=>'mail\.google\.',
-'google\.'=>'translate\.google\.',
-'google\.'=>'code\.google\.',
-'google\.'=>'groups\.google\.',
-'msn\.'=>'hotmail\.msn\.',
-'tiscali\.'=>'mail\.tiscali\.',
-'yahoo\.'=>'(?:picks|mail)\.yahoo\.|yahoo\.[^/]+/picks',
-'yandex\.'=>'direct\.yandex\.'
-);
-
-
-# SearchEnginesHashID
-# Each Search Engine Search ID is associated to an AWStats id string
-#------------------------------------------------------------------------------
-%SearchEnginesHashID = (
-# Major international search engines
-'google\.[\w.]+/products','google_products',
-'base\.google\.','google_base',
-'froogle\.google\.','google_froogle',
-'groups\.google\.','google_groups',
-'images\.google\.','google_image',
-'google\.','google',
-'googlee\.','google',
-'googlecom\.com','google',
-'goggle\.co\.hu','google',
-'216\.239\.32\.20', 'google',
-'173\.194\.32\.223', 'google',
-'216\.239\.(35|37|39|51)\.100','google_cache',
-'216\.239\.(35|37|39|51)\.101','google_cache',
-'216\.239\.5[0-9]\.104','google_cache',
-'64\.233\.1[0-9]{2}\.104','google_cache',
-'66\.102\.[1-9]\.104','google_cache',
-'66\.249\.93\.104','google_cache',
-'72\.14\.2[0-9]{2}\.104','google_cache',
-'msn\.','msn',
-'live\.com','live',
-'bing\.','bing',
-'voila\.','voila',
-'mindset\.research\.yahoo','yahoo_mindset',
-'yahoo\.','yahoo','(66\.218\.71\.225|216\.109\.117\.135|216\.109\.125\.130|66\.218\.69\.11)','yahoo',
-'lycos\.','lycos',
-'alexa\.com','alexa',
-'alltheweb\.com','alltheweb',
-'altavista\.','altavista',
-'a9\.com','a9',
-'dmoz\.org','dmoz',
-'netscape\.','netscape',
-'search\.terra\.','terra',
-'www\.search\.com','search.com',
-'tiscali\.','tiscali',
-'search\.aol\.co','aol',
-'search\.sli\.sympatico\.ca','sympatico',
-'excite\.','excite',
-# Minor international search engines
-'4\-counter\.com','google4counter',
-'att\.net','att',
-'bungeebonesdotcom','bungeebonesdotcom',
-'northernlight\.','northernlight',
-'hotbot\.','hotbot',
-'kvasir\.','kvasir',
-'webcrawler\.','webcrawler',
-'metacrawler\.','metacrawler',
-'go2net\.com','go2net',
-'(^|\.)go\.com','go',
-'euroseek\.','euroseek',
-'looksmart\.','looksmart',
-'spray\.','spray',
-'nbci\.com\/search','nbci',
-'de\.ask.\com','askde', # break out Ask country specific engines.
-'es\.ask.\com','askes',
-'fr\.ask.\com','askfr',
-'it\.ask.\com','askit',
-'nl\.ask.\com','asknl',
-'uk\.ask.\com','askuk',
-'(^|\.)ask\.co\.uk','askuk',
-'(^|\.)ask\.com','ask',
-'atomz\.','atomz',
-'overture\.com','overture', # Replace 'goto\.com','Goto.com',
-'teoma\.','teoma',
-'findarticles\.com','findarticles',
-'infospace\.com','infospace',
-'mamma\.','mamma',
-'dejanews\.','dejanews',
-'dogpile\.com','dogpile',
-'wisenut\.com','wisenut',
-'ixquick\.com','ixquick',
-'search\.earthlink\.net','earthlink',
-'i-une\.com','iune',
-'blingo\.com','blingo',
-'centraldatabase\.org','centraldatabase',
-'clusty\.com','clusty',
-'mysearch\.','mysearch',
-'vivisimo\.com','vivisimo',
-'kartoo\.com','kartoo',
-'icerocket\.com','icerocket',
-'sphere\.com','sphere',
-'ledix\.net','ledix',
-'start\.shaw\.ca','shawca',
-'searchalot\.com','searchalot',
-'copernic\.com','copernic',
-'avantfind\.com','avantfind',
-'steadysearch\.com','steadysearch',
-'steady-search\.com','steadysearch',
-'claro-search\.com','clarosearch',
-'www1\.search-results\.com', 'searchresults',
-'www\.holasearch\.com', 'holasearch',
-'search\.conduit\.com', 'conduit',
-'static\.flipora\.com', 'flipora',
-'(?:www[12]?|mixidj)\.delta-search\.com', 'delta-search',
-'start\.iminent\.com', 'iminent',
-'www\.searchmobileonline\.com', 'searchmobileonline',
-'int\.search-results\.com', 'nortonsavesearch',
-'www2\.inbox\.com', 'inbox',
-'www\.govome\.com', 'govome',
-'find1friend\.com', 'find1friend',
-'start\.mysearchdial\.com', 'mysearchdial',
-'go\.speedbit\.com', 'speedbit',
-'search\.certified-toolbar\.com', 'certifiedtoolbarsearch',
-'search\.sweetim\.com', 'sweetim',
-'search\.searchcompletion\.com', 'searchcompletion',
-'en\.eazel\.com','eazelsearch',
-'sr\.searchfunmoods\.com', 'searchfunmoods',
-'173\.194\.35\.177', 'googleByIP',
-'dalesearch\.com', 'dalesearch',
-'sweetpacks-search\.com', 'sweetpacks',
-'searchgol\.com', 'searchgol',
-'duckduckgo\.com', 'duckduckgo',
-'sr\.facemoods\.com', 'facemoods',
-'shoppstop\.com', 'shoppstop',
-'searchya\.com', 'searchya',
-'picsearch\.de', 'picsearch',
-'webssearches\.com', 'webssearches',
-'airzip\.inspsearch\.com', 'webssearches',
-'zapmeta\.de', 'zapmeta',
-'localmoxie\.com', 'localmoxie',
-'search-results\.mobi', 'search-results_mobi',
-'androidsearch\.com', 'androidsearch',
-'isearch\.nation\.com', 'isearch_nation_com',
-'search\.zonealarm\.com', 'search_zonealarm_com',
-'www\.buenosearch\.com', 'www_buenosearch_com',
-'search\.foxtab\.com', 'search_foxtab_com',
-'searches\.qone8\.com', 'searches_qone8_com',
-'startpage\.com', 'startpage_com',
-'www\.qwant\.com', 'qwant_com',
-'searches\.safehomepage\.com', 'safehomepage_com',
-'searches\.vi-view\.com', 'vi-view_com',
-'wow\.utop\.it', 'wow_utop_it',
-'windowssearch\.com', 'windowssearch_com',
-'www\.wow\.com', 'www_wow_com',
-'globososo\.', 'globososo',
-'kingtale3\.inspsearch\.com', 'globososo',
-'swisscows\.ch', 'swisscows_ch',
-'preciobarato\.xyz', 'preciobarato_xyz',
-'www\.dregol\.com', 'www_dregol_com',
-'search\.socialdownloadr\.com', 'search_socialdownloadr_com',
-'int\.search\.myway\.com', 'int_search_myway_com',
-'de\.dolphin\.com', 'de_dolphin_com',
-'mys\.yoursearch\.me', 'mys_yoursearch_me',
-# Chello Portals
-'chello\.at','chelloat',
-'chello\.be','chellobe',
-'chello\.cz','chellocz',
-'chello\.fr','chellofr',
-'chello\.hu','chellohu',
-'chello\.nl','chellonl',
-'chello\.no','chellono',
-'chello\.pl','chellopl',
-'chello\.se','chellose',
-'chello\.sk','chellosk',
-'chello','chellocom',
-# Mirago
-'mirago\.be','miragobe',
-'mirago\.ch','miragoch',
-'mirago\.de','miragode',
-'mirago\.dk','miragodk',
-'es\.mirago\.com','miragoes',
-'mirago\.fr','miragofr',
-'mirago\.it','miragoit',
-'mirago\.nl','miragonl',
-'no\.mirago\.com','miragono',
-'mirago\.se','miragose',
-'mirago\.co\.uk','miragocouk',
-'mirago','mirago', # required as catchall for new countries not yet known
-'answerbus\.com','answerbus',
-'icq\.com\/search','icq',
-'nusearch\.com','nusearch',
-'goodsearch\.com','goodsearch',
-'scroogle\.org','scroogle',
-'questionanswering\.com','questionanswering',
-'mywebsearch\.com','mywebsearch',
-'as\.starware\.com','comettoolbar',
-# Social Bookmarking Services
-'del\.icio\.us','delicious',
-'digg\.com','digg',
-'stumbleupon\.com','stumbleupon',
-'swik\.net','swik',
-'segnalo\.alice\.it','segnalo',
-'ineffabile\.it','ineffabile',
-# Minor Australian search engines
-'anzwers\.com\.au','anzwers',
-# Minor brazilian search engines
-'engine\.exe','engine',
-'miner\.bol\.com\.br','miner',
-# Minor chinese search engines
-'\.baidu\.com','baidu',
-'iask\.com','iask',
-'\.accoona\.com','accoona',
-'\.3721\.com','3721',
-'\.163\.com','netease',
-'\.soso\.com','soso',
-'\.zhongsou\.com','zhongsou',
-'\.vnet\.cn','vnet',
-'\.sogou\.com','sogou',
-# Minor czech search engines
-'atlas\.cz','atlas',
-'seznam\.cz','seznam',
-'quick\.cz','quick',
-'centrum\.cz','centrum',
-'jyxo\.(cz|com)','jyxo',
-'najdi\.to','najdi',
-'redbox\.cz','redbox',
-'isearch\.avg\.com', 'avgsearch',
-# Minor danish search-engines
-'opasia\.dk','opasia',
-'danielsen\.com','danielsen',
-'sol\.dk','sol',
-'jubii\.dk','jubii',
-'find\.dk','finddk',
-'edderkoppen\.dk','edderkoppen',
-'netstjernen\.dk','netstjernen',
-'orbis\.dk','orbis',
-'tyfon\.dk','tyfon',
-'1klik\.dk','1klik',
-'ofir\.dk','ofir',
-# Minor dutch search engines
-'ilse\.','ilse',
-'vindex\.','vindex',
-# Minor english search engines
-'bbc\.co\.uk/cgi-bin/search','bbc',
-'ifind\.freeserve','freeserve',
-'looksmart\.co\.uk','looksmartuk',
-'splut\.','splut',
-'spotjockey\.','spotjockey',
-'ukdirectory\.','ukdirectory',
-'ukindex\.co\.uk','ukindex',
-'ukplus\.','ukplus',
-'searchy\.co\.uk','searchy',
-'search\.fbdownloader\.com','fbdownloader',
-'search\.fdownloadr\.com', 'fdownloadr_com',
-'search\.babylon\.com', 'babylon',
-'my\.allgameshome\.com', 'allgameshome',
-'surfcanyon\.com', 'surfcanyon_com',
-'uk\.foxstart\.com', 'uk_foxstart_com',
-'yandex\.com', 'yandex_com',
-# Minor finnish search engines
-'haku\.www\.fi','haku',
-# Minor french search engines
-'recherche\.aol\.fr','aolfr',
-'ctrouve\.','ctrouve',
-'francite\.','francite',
-'\.lbb\.org','lbb',
-'rechercher\.libertysurf\.fr','libertysurf',
-'search[\w\-]+\.free\.fr','free',
-'recherche\.club-internet\.fr','clubinternet',
-'toile\.com','toile',
-'biglotron\.com', 'biglotron',
-'mozbot\.fr', 'mozbot',
-# Minor german search engines
-'sucheaol\.aol\.de','aolde',
-'o2suche\.aol\.de','o2aolde',
-'fireball\.de','fireball',
-'infoseek\.de','infoseek',
-'suche\d?\.web\.de','webde',
-'[a-z]serv\.rrzn\.uni-hannover\.de','meta',
-'suchen\.abacho\.de','abacho',
-'(brisbane|suche)\.t-online\.de','t-online',
-'allesklar\.de','allesklar',
-'meinestadt\.de','meinestadt',
-'212\.227\.33\.241','metaspinner',
-'(161\.58\.227\.204|161\.58\.247\.101|212\.40\.165\.90|213\.133\.108\.202|217\.160\.108\.151|217\.160\.111\.99|217\.160\.131\.108|217\.160\.142\.227|217\.160\.176\.42)','metacrawler_de',
-'wwweasel\.de','wwweasel',
-'netluchs\.de','netluchs',
-'schoenerbrausen\.de','schoenerbrausen',
-'suche\.gmx\.net', 'gmxsuche',
-'suche\.gmx\.at', 'gmxsuche_at',
-'ecosia\.org', 'ecosiasearch',
-'de\.aolsearch\.com', 'aolsearch',
-'suche\.aol\.de', 'aolsuche',
-'www\.startxxl\.com', 'startxxl',
-'www\.benefind\.de', 'benefind',
-'www\.amazon\.de.*search', 'amazonsearch', #Not clear if this matches amazon searches only
-'de\.wow\.com', 'wowsearch',
-'www\.vlips\.de', 'vlips_de',
-'metager\.de', 'metager',
-'search\.1und1\.de', 'search_1und1_de',
-'sm\.de', 'smde',
-'sumaja\.de', 'sumaja',
-'navigationshilfe\.t-online\.de', 'navigationshilfe',
-'umfis\.de', 'umfis',
-'fastbot\.de', 'fastbot_de',
-'tixuma\.de', 'tixuma_de',
-'suche\.freenet\.de', 'freenet_de',
-'www\.izito\.de', 'izito_de',
-'extern\.peoplecheck\.de', 'peoplecheck_de',
-'www\.oneseek\.de', 'oneseek_de',
-'de\.wiki\.gov\.cn', 'de_wiki_gov_cn',
-'umuwa\.de', 'umuwa_de',
-'suche\.1und1\.de', '1und1_de',
-'www\.metasuche\.ch', 'metasuche_ch',
-# Minor Hungarian search engines
-'heureka\.hu','heureka',
-'vizsla\.origo\.hu','origo',
-'lapkereso\.hu','lapkereso',
-'goliat\.hu','goliat',
-'index\.hu','indexhu',
-'wahoo\.hu','wahoo',
-'webmania\.hu','webmania',
-'search\.internetto\.hu','internetto',
-'tango\.hu','tango_hu',
-'keresolap\.hu','keresolap_hu',
-'kereso\.startlap\.hu', 'startlap_hu',
-'polymeta\.hu','polymeta_hu',
-# Minor Indian search engines
-'sify\.com','sify',
-# Minor Italian search engines
-'virgilio\.it','virgilio',
-'arianna\.libero\.it','arianna',
-'supereva\.com','supereva',
-'kataweb\.it','kataweb',
-'search\.alice\.it\.master','aliceitmaster',
-'search\.alice\.it','aliceit',
-'gotuneed\.com','gotuneed',
-'godado','godado',
-'jumpy\.it','jumpy\.it',
-'shinyseek\.it','shinyseek\.it',
-'teecno\.it','teecnoit',
-# Minor Israeli search engines
-'search\.genieo\.com', 'genieo',
-# Minor Japanese search engines
-'ask\.jp','askjp',
-'sagool\.jp','sagool',
-'websearch\.rakuten\.co\.jp', 'rakuten',
-# Minor Norwegian search engines
-'sok\.start\.no','start', 'eniro\.no','eniro',
-# Minor Polish search engines
-'szukaj\.wp\.pl','wp',
-'szukaj\.onet\.pl','onetpl',
-'dodaj\.pl','dodajpl',
-'gazeta\.pl','gazetapl',
-'gery\.pl','gerypl',
-'netsprint\.pl\/hoga\-search','hogapl',
-'netsprint\.pl','netsprintpl',
-'interia\.pl','interiapl',
-'katalog\.onet\.pl','katalogonetpl',
-'o2\.pl','o2pl',
-'polska\.pl','polskapl',
-'szukacz\.pl','szukaczpl',
-'wow\.pl','wowpl',
-# Minor russian search engines
-'ya(ndex)?\.ru','yandex',
-'aport\.ru','aport',
-'rambler\.ru','rambler',
-'turtle\.ru','turtle',
-'metabot\.ru','metabot',
-'go\.mail\.ru', 'mailru',
-# Minor Swedish search engines
-'evreka\.passagen\.se','passagen',
-'eniro\.se','enirose',
-# Minor Slovak search engines
-'zoznam\.sk','zoznam',
-# Minor Portuguese search engines
-'sapo\.pt','sapo',
-# Minor swiss search engines
-'search\.ch','searchch',
-'search\.bluewin\.ch','bluewin',
-'www\.zapmeta\.ch', 'zapmeta_ch',
-'etools\.ch', 'etools_ch',
-# Minor Croatian, Serbian, Macedonian, Bosnian and Herzegovinian search engines
-'pogodak\.','pogodak',
-# Generic search engines
-'search\..*\.\w+','search'
-);
-
-
-# SearchEnginesWithKeysNotInQuery
-# List of search engines that store keyword as page instead of query parameter
-#------------------------------------------------------------------------------
-%SearchEnginesWithKeysNotInQuery=(
-'a9',1, # www.a9.com/searchkey1%20searchkey2
-'iminent',1, #http://start.iminent.com/StartWeb/1031/toolbox/#q=searchkey1%20searchkey2&additional_arguments
-'de_wiki_gov_cn',1, #http://de.wiki.gov.cn/s_searchkey1%20searchkey2
-'umuwa_de', 1, #http://umuwa.de/searchkey or http://umuwa.de/searchkey/Images
-'amazonsearch', 1 #http://www.amazon.de/gp/bit/apps/web/SERP/search/ref=bit_bds-p24_serp_cr_de?ie=UTF8tagbase=bds-p24&query=deutsch+8.+klasse+gymnasium+protokoll
-);
-
-# SearchEnginesKnownUrl
-# Known rules to extract keywords from a referrer search engine URL
-#------------------------------------------------------------------------------
-%SearchEnginesKnownUrl=(
-# Most common search engines
-'alexa','q=',
-'alltheweb','q(|uery)=',
-'altavista','q=',
-'a9','a9\.com\/',
-'dmoz','search=',
-'google_products','(p|q|as_p|as_q)=',
-'google_base','(p|q|as_p|as_q)=',
-'google_froogle','(p|q|as_p|as_q)=',
-'google_groups','group\/', # does not work
-'google_image','(p|q|as_p|as_q)=',
-'google_cache','(p|q|as_p|as_q)=cache:[0-9A-Za-z]{12}:',
-'google','(p|q|as_p|as_q)=',
-'lycos','query=',
-'msn','q=',
-'live','q=',
-'bing','q=',
-'netscape','search=',
-'tiscali','key=',
-'aol','query=',
-'terra','query=',
-'voila','(kw|rdata)=',
-'search.com','q=',
-'yahoo_mindset','p=',
-'yahoo','p=',
-'sympatico', 'query=',
-'excite','search=',
-# Minor international search engines
-'google4counter','(p|q|as_p|as_q)=',
-'att','qry=',
-'bungeebonesdotcom','query=',
-'go','qt=',
-'askde','(ask|q)=', # break out Ask country specific engines.
-'askes','(ask|q)=',
-'askfr','(ask|q)=',
-'askit','(ask|q)=',
-'asknl','(ask|q)=',
-'ask','(ask|q)=',
-'atomz','sp-q=',
-'euroseek','query=',
-'findarticles','key=',
-'go2net','general=',
-'hotbot','mt=',
-'infospace','qkw=',
-'kvasir', 'q=',
-'looksmart','key=',
-'mamma','query=',
-'metacrawler','general=',
-'nbci','keyword=',
-'northernlight','qr=',
-'overture','keywords=',
-'dogpile', 'q(|kw)=',
-'spray','string=',
-'teoma','q=',
-'webcrawler','searchText=',
-'wisenut','query=',
-'ixquick', 'query=',
-'earthlink', 'q=',
-'iune','(keywords|q)=',
-'blingo','q=',
-'centraldatabase','query=',
-'clusty','query=',
-'mysearch','searchfor=',
-'vivisimo','query=',
-# kartoo: No keywords passed in referring URL.
-'kartoo','',
-'icerocket','q=',
-'sphere','q=',
-'ledix','q=',
-'shawca','q=',
-'searchalot','q=',
-'copernic','web\/',
-'avantfind','keywords=',
-'steadysearch','w=',
-'clarosearch','q=',
-'searchresults','q=',
-'holasearch', 'q=',
-'conduit', 'q=',
-'flipora', 'q=',
-'delta-search', 'q=',
-'iminent', 'q=',
-'searchmobileonline', 'q=',
-'nortonsavesearch', 'q=',
-'inbox', 'q(?:kw)?=',
-'govome', 'q=',
-'find1friend', 'q=',
-'mysearchdial', 'q=',
-'speedbit', 'q=',
-'certifiedtoolbarsearch', 'q=',
-'sweetim', 'q=',
-'searchcompletion', 'q=',
-'eazelsearch', 'q=',
-'searchfunmoods', 'q=',
-'googleByIP', 'q=',
-'dalesearch', 'q=',
-'sweetpacks', 'q=',
-'searchgol', 'q=',
-'duckduckgo', 'uddg=',
-'facemoods', 'q=',
-'shoppstop', 'keywords=',
-'searchya', 'q=',
-'picsearch', 'q=',
-'webssearches', 'q=',
-'zapmeta', 'query=',
-'localmoxie', 'keyword=',
-'search-results_mobi', 'q=',
-'androidsearch', 'q=',
-'isearch_nation_com', 'q=',
-'search_zonealarm_com', 'q=',
-'www_buenosearch_com', 'q=',
-'search_foxtab_com', 'q=',
-'searches_qone8_com', 'q=',
-'startpage_com', 'query=',
-'qwant_com', 'q=',
-'safehomepage_com', 'q=',
-'vi-view_com', 'q=',
-'wow_utop_it', 'q=',
-'windowssearch_com', 'q=',
-'www_wow_com', 'q=',
-'globososo', 'q=',
-'swisscows_ch', 'query=',
-'preciobarato_xyz', 's=',
-'www_dregol_com', 'q=',
-'search_socialdownloadr_com', 'q=',
-'int_search_myway_com', 'searchfor=',
-'de_dolphin_com', 'q=',
-'mys_yoursearch_me', 'q=',
-# Chello Portals
-'chelloat','q1=',
-'chellobe','q1=',
-'chellocz','q1=',
-'chellofr','q1=',
-'chellohu','q1=',
-'chellonl','q1=',
-'chellono','q1=',
-'chellopl','q1=',
-'chellose','q1=',
-'chellosk','q1=',
-'chellocom','q1=',
-# Mirago
-'miragobe','(txtsearch|qry)=',
-'miragoch','(txtsearch|qry)=',
-'miragode','(txtsearch|qry)=',
-'miragodk','(txtsearch|qry)=',
-'miragoes','(txtsearch|qry)=',
-'miragofr','(txtsearch|qry)=',
-'miragoit','(txtsearch|qry)=',
-'miragonl','(txtsearch|qry)=',
-'miragono','(txtsearch|qry)=',
-'miragose','(txtsearch|qry)=',
-'miragocouk','(txtsearch|qry)=',
-'mirago','(txtsearch|qry)=',
-'answerbus','', # Does not provide query parameters
-'icq','q=',
-'nusearch','nusearch_terms=',
-'goodsearch','Keywords=',
-'scroogle','Gw=', # Does not always provide query parameters
-'questionanswering','',
-'mywebsearch','searchfor=',
-'comettoolbar','qry=',
-# Social Bookmarking Services
-'delicious','all=',
-'digg','s=',
-'stumbleupon','',
-'swik','swik\.net/', # does not work. Keywords follow domain, e.g. http://swik.net/awstats+analytics
-'segnalo','',
-'ineffabile','',
-# Minor Australian search engines
-'anzwers','search=',
-# Minor brazilian search engines
-'engine','p1=', 'miner','q=',
-# Minor chinese search engines
-'baidu','(wd|word)=',
-'iask','(w|k)=',
-'accoona','qt=',
-'3721','(p|name)=',
-'netease','q=',
-'soso','q=',
-'zhongsou','(word|w)=',
-'sogou', 'query=',
-'vnet','kw=',
-# Minor czech search engines
-'atlas','(searchtext|q)=', 'seznam','(w|q)=', 'quick','query=', 'centrum','q=', 'jyxo','(s|q)=', 'najdi','dotaz=', 'redbox','srch=',
-'avgsearch', 'q=',
-# Minor danish search engines
-'opasia','q=', 'danielsen','q=', 'sol','q=', 'jubii','soegeord=', 'finddk','words=', 'edderkoppen','query=', 'orbis','search_field=', '1klik','query=', 'ofir','querytext=',
-# Minor dutch search engines
-'ilse','search_for=', 'vindex','in=',
-# Minor english search engines
-'askuk','(ask|q)=', 'bbc','q=', 'freeserve','q=', 'looksmartuk','key=',
-'splut','pattern=', 'spotjockey','Search_Keyword=', 'ukindex', 'stext=', 'ukdirectory','k=', 'ukplus','search=', 'searchy', 'search_term=',
-'fbdownloader','q=',
-'fdownloadr_com', 'q=',
-'babylon','q=',
-'allgameshome', 's=',
-'surfcanyon_com', 'q=',
-'uk_foxstart_com', 'q=',
-'yandex_com', 'text=',
-# Minor finnish search engines
-'haku','w=',
-# Minor french search engines
-'francite','name=', 'clubinternet', 'q=',
-'toile', 'q=',
-'biglotron','question=',
-'mozbot','q=',
-# Minor german search engines
-'aolde','q=',
-'o2aolde', 'q=',
-'fireball','q=', 'infoseek','qt=', 'webde','su=',
-'abacho','q=', 't-online','q=',
-'metaspinner','qry=',
-'metacrawler_de','qry=',
-'wwweasel','q=',
-'netluchs','query=',
-'schoenerbrausen','q=',
-'gmxsuche', 'q=',
-'gmxsuche_at', 'q=',
-'ecosiasearch', 'q=',
-'aolsearch', 'q=',
-'aolsuche', 'q=',
-'startxxl', 'q=',
-'benefind', 'q=',
-'amazonsearch', 'query=',
-'wowsearch', 'q=',
-'vlips_de', 'q=',
-'metager', 'eingabe=',
-'search_1und1_de', 'q=',
-'smde', 'q=',
-#'sumaja', 'no query string available', #There is no query string in the referrer url
-'navigationshilfe', 'q=',
-'umfis', 'suchbegriff=',
-'fastbot_de', 'red=[0-9]*\+',
-'tixuma_de', 'sc=',
-'freenet_de', 'query=',
-'izito_de', 'q=',
-'peoplecheck_de', 'q=',
-'oneseek_de', 'q=',
-'de_wiki_gov_cn', 'de\.wiki\.gov\.cn\/s_',
-'umuwa_de', 'umuwa\.de\/',
-'1und1_de', 'q=',
-'metasuche_ch', 'q=',
-# Minor Hungarian search engines
-'heureka','heureka=', 'origo','(q|search)=', 'goliat','KERESES=', 'wahoo','q=', 'internetto','searchstr=',
-'keresolap_hu','q=',
-'startlap_hu', 'q=',
-'tango_hu','q=',
-'polymeta_hu','',
-# Minor Indian search engines
-'sify','keyword=',
-# Minor Italian search engines
-'virgilio','qs=',
-'arianna','query=',
-'supereva','q=',
-'kataweb','q=',
-'aliceitmaster','qs=',
-'aliceit','qs=',
-'gotuneed','', # Not yet known
-'godado','Keywords=',
-'jumpy\.it','searchWord=',
-'shinyseek\.it','KEY=',
-'teecnoit','q=',
-# Minor Israeli search engines
-'genieo','q=',
-# Minor Japanese search engines
-'askjp','(ask|q)=',
-'sagool','q=',
-'rakuten', 'qt=',
-# Minor Norwegian search engines
-'start','q=', 'eniro','q=',
-# Minor Polish search engines
-'wp','szukaj=',
-'onetpl','qt=',
-'dodajpl','keyword=',
-'gazetapl','slowo=',
-'gerypl','q=',
-'hogapl','qt=',
-'netsprintpl','q=',
-'interiapl','q=',
-'katalogonetpl','qt=',
-'o2pl','qt=',
-'polskapl','qt=',
-'szukaczpl','q=',
-'wowpl','q=',
-# Minor russian search engines
-'yandex', 'text=', 'rambler','words=', 'aport', 'r=', 'metabot', 'st=',
-'mailru', 'q=',
-# Minor swedish search engines
-'passagen','q=',
-'enirose', 'hitta:', #Not sure if this works, as the keywords are part of the URL, and therefore the URL does not contain a question mark.
-# Minor swiss search engines
-'searchch', 'q=', 'bluewin', 'qry=',
-'zapmeta_ch', 'query=',
-'etools_ch', 'query=',
-# Minor Croatian, Serbian, Macedonian, Bosnian and Herzegovinian search engines
-'pogodak', 'q='
-);
-
-# SearchEnginesKnownUrlNotFound
-# Known rules to extract not found keywords from a referrer search engine URL
-#------------------------------------------------------------------------------
-%SearchEnginesKnownUrlNotFound=(
-# Most common search engines
-'msn','origq='
-);
-
-# If no rules are known, we take first paramater not into WordsToCleanSearchUrl
-#------------------------------------------------------------------------------
-@WordsToCleanSearchUrl= ('act=','annuaire=','btng=','cat=','categoria=','cfg=','cof=','cou=','count=','cp=','dd=','domain=','dt=','dw=','enc=','exec=','geo=','hc=','height=','hits=','hl=','hq=','hs=','id=','kl=','lang=','loc=','lr=','matchmode=','medor=','message=','meta=','mode=','order=','page=','par=','pays=','pg=','pos=','prg=','qc=','refer=','sa=','safe=','sc=','sort=','src=','start=','style=','stype=','sum=','tag=','temp=','theme=','type=','url=','user=','width=','what=','\\.x=','\\.y=','y=','look=');
-
-# SearchEnginesKnownUTFCoding
-# Known parameter that proves a search engine has coded its parameters in UTF-8
-#------------------------------------------------------------------------------
-%SearchEnginesKnownUTFCoding=(
-# Most common search engines
-'google','ie=utf-8',
-'alltheweb','cs=utf-8'
-);
-
-
-# SearchEnginesHashLib
-# List of search engines names
-# 'search_engine_id', 'search_engine_name',
-#------------------------------------------------------------------------------
-%SearchEnginesHashLib=(
-# Major international search engines
-'alexa','Alexa',
-'alltheweb','AllTheWeb',
-'altavista','AltaVista',
-'a9', 'A9',
-'dmoz','DMOZ',
-'google_products','Google (Products)',
-'google_base','Google (Base)',
-'google_froogle','Froogle (Google)',
-'google_groups','Google (Groups)',
-'google_image','Google (Images)',
-'google_cache','Google (cache)',
-'google','Google',
-'lycos','Lycos',
-'msn','Microsoft MSN Search',
-'live','Microsoft Windows Live',
-'bing','Microsoft Bing',
-'netscape','Netscape',
-'aol','AOL',
-'terra','Terra',
-'tiscali','Tiscali',
-'voila','Voila',
-'search.com','Search.com',
-'yahoo_mindset','Yahoo! Mindset',
-'yahoo','Yahoo!',
-'sympatico','Sympatico',
-'excite','Excite',
-# Minor international search engines
-'google4counter','4-counter (Google)',
-'att','AT&T search (powered by Google)',
-'bungeebonesdotcom','BungeeBones',
-'go','Go.com',
-'askde','Ask Deutschland',
-'askes','Ask España', # break out Ask country specific engines.
-'askfr','Ask France',
-'askit','Ask Italia',
-'asknl','Ask Nederland',
-'ask','Ask',
-'atomz','Atomz',
-'dejanews','DejaNews',
-'euroseek','Euroseek',
-'findarticles','Find Articles',
-'go2net','Go2Net (Metamoteur)',
-'hotbot','Hotbot',
-'infospace','InfoSpace',
-'kvasir','Kvasir',
-'looksmart','Looksmart',
-'mamma','Mamma',
-'metacrawler','MetaCrawler (Metamoteur)',
-'nbci','NBCI',
-'northernlight','NorthernLight',
-'overture','Overture', # Replace 'goto\.com','Goto.com',
-'dogpile','Dogpile',
-'spray','Spray',
-'teoma','Teoma', # Replace 'directhit\.com','DirectHit',
-'webcrawler','WebCrawler',
-'wisenut','WISENut',
-'ixquick','ix quick',
-'earthlink', 'Earth Link',
-'iune','i-une',
-'blingo','Blingo',
-'centraldatabase','GPU p2p search',
-'clusty','Clusty',
-'mysearch','My Search',
-'vivisimo','Vivisimo',
-'kartoo','Kartoo',
-'icerocket','Icerocket (Blog)',
-'sphere','Sphere (Blog)',
-'ledix','Ledix',
-'shawca','Shaw.ca',
-'searchalot','Searchalot',
-'copernic','Copernic',
-'avantfind','Avantfind',
-'steadysearch','Avantfind',
-'clarosearch','Claro Search',
-'searchresults','Search-results',
-'holasearch', 'Hola Search',
-'conduit', 'Conduit Search',
-'flipora', 'Flipora',
-'delta-search', 'Delta Search',
-'iminent', 'Iminent',
-'searchmobileonline', 'Search Mobile Online (StartApp)',
-'nortonsavesearch', 'Norton Safe Search',
-'inbox', 'Inbox Search',
-'govome', 'Govome',
-'find1friend', 'Find1Friend',
-'mysearchdial', 'My Search Dial',
-'speedbit', 'Speedbit',
-'certifiedtoolbarsearch', 'Certified-Toolbar Search',
-'sweetim', 'SweetIM Search',
-'searchcompletion', 'SearchCompletion Search',
-'eazelsearch', 'Eazel Search',
-'searchfunmoods', 'Funmoods',
-'googleByIP', 'Google (Access by IP-Address)',
-'dalesearch', 'Dale Search',
-'sweetpacks', 'Sweetpacks',
-'searchgol', 'Search-Gol',
-'duckduckgo', 'DuckDuckGo (Does not provide search keyphrases, using found page instead)',
-'facemoods', 'Facemoods Search',
-'shoppstop', 'ShoppStop',
-'searchya', 'Searchya',
-'picsearch', 'picsearch',
-'webssearches', 'Various variants of Webssearches EMG Technologies and airzip.inspsearch.com',
-#Jan 8, 2016: No genuine inspsearch.com search engine seems so exist, but there is a couple of search engines using subdomains of inspsearch.com. Unclear how these are related to each other.
-'zapmeta', 'ZapMeta',
-'localmoxie', 'Local Moxie',
-'search-results_mobi', 'search-results.mobi',
-'androidsearch', 'androidsearch.com',
-'isearch_nation_com', 'Nation Search',
-'search_zonealarm_com', 'Zone Alarm Search',
-'www_buenosearch_com', 'BuenoSearch',
-'search_foxtab_com', 'Foxtab Search',
-'searches_qone8_com', 'Omiga-Plus',
-'startpage_com', 'Startpage',
-'qwant_com', 'qwant.com',
-'safehomepage_com', 'safehomepage.com',
-'vi-view_com', 'vi-view.com',
-'wow_utop_it', 'wow.utop.it',
-'windowssearch_com', 'windowssearch.com',
-'www_wow_com', 'WOW.com',
-'globososo', 'Various variants of Globososo (Kingtale Technology): www, searches, searches3, and at inspsearch.com (globososo, kingtale3)',
-'swisscows_ch', 'Swisscows',
-'preciobarato_xyz', 'Yandex',
-'www_dregol_com', 'Dregol Search',
-'search_socialdownloadr_com', 'Socialdownloadr',
-'int_search_myway_com', 'MyWay',
-'de_dolphin_com', 'Dolphin Search',
-'mys_yoursearch_me', 'Yoursearch.me',
-# Chello Portals
-'chelloat','Chello Austria',
-'chellobe','Chello Belgium',
-'chellocz','Chello Czech Republic',
-'chellofr','Chello France',
-'chellohu','Chello Hungary',
-'chellonl','Chello Netherlands',
-'chellono','Chello Norway',
-'chellopl','Chello Poland',
-'chellose','Chello Sweden',
-'chellosk','Chello Slovakia',
-'chellocom','Chello (Country not recognized)',
-# Mirago
-'miragobe','Mirago Belgium',
-'miragoch','Mirago Switzerland',
-'miragode','Mirago Germany',
-'miragodk','Mirago Denmark',
-'miragoes','Mirago Spain',
-'miragofr','Mirago France',
-'miragoit','Mirago Italy',
-'miragonl','Mirago Netherlands',
-'miragono','Mirago Norway',
-'miragose','Mirago Sweden',
-'miragocouk','Mirago UK',
-'mirago','Mirago (country unknown)',
-'answerbus','Answerbus',
-'icq','icq',
-'nusearch','Nusearch',
-'goodsearch','GoodSearch',
-'scroogle','Scroogle',
-'questionanswering','Questionanswering',
-'mywebsearch','MyWebSearch',
-'comettoolbar','Comet toolbar search',
-# Social Bookmarking Services
-'delicious','del.icio.us (Social Bookmark)',
-'digg','Digg (Social Bookmark)',
-'stumbleupon','Stumbleupon (Social Bookmark)',
-'swik','Swik (Social Bookmark)',
-'segnalo','Segnalo (Social Bookmark)',
-'ineffabile','Ineffabile.it (Social Bookmark)',
-# Minor Australian search engines
-'anzwers','anzwers.com.au',
-# Minor brazilian search engines
-'engine','Cade', 'miner','Meta Miner',
-# Minor chinese search engines
-'baidu','Baidu',
-'iask','Iask',
-'accoona','Accoona',
-'3721','3721',
-'netease', 'NetEase',
-'soso','SoSo',
-'zhongsou','ZhongSou',
-'sogou', 'SoGou',
-'vnet','VNet',
-# Minor czech search engines
-'atlas','Atlas.cz', 'seznam','Seznam', 'quick','Quick.cz', 'centrum','Centrum.cz', 'jyxo','Jyxo.cz', 'najdi','Najdi.to', 'redbox','RedBox.cz',
-'avgsearch', 'AVG Secure Search',
-# Minor danish search-engines
-'opasia','Opasia', 'danielsen','Thor (danielsen.com)', 'sol','SOL', 'jubii','Jubii', 'finddk','Find', 'edderkoppen','Edderkoppen', 'netstjernen','Netstjernen', 'orbis','Orbis', 'tyfon','Tyfon', '1klik','1Klik', 'ofir','Ofir',
-# Minor dutch search engines
-'ilse','Ilse','vindex','Vindex\.nl',
-# Minor english search engines
-'askuk','Ask UK',
-'bbc','BBC', 'freeserve','Freeserve', 'looksmartuk','Looksmart UK',
-'splut','Splut', 'spotjockey','Spotjockey', 'ukdirectory','UK Directory', 'ukindex','UKIndex', 'ukplus','UK Plus', 'searchy','searchy.co.uk',
-'fbdownloader','FBDownloader (fbdownloader)',
-'fdownloadr_com', 'FBDownloader (fdownloadr)',
-'babylon','Babylon',
-'allgameshome', 'AllGamesHome',
-'surfcanyon_com', 'SurfCanyon',
-'uk_foxstart_com', 'Foxstart.com',
-'yandex_com', 'Yandex',
-# Minor finnish search engines
-'haku','Ihmemaa',
-# Minor french search engines
-'aolfr','AOL (fr)', 'ctrouve','C\'est trouve', 'francite','Francite', 'lbb', 'LBB', 'libertysurf', 'Libertysurf', 'free', 'Free.fr', 'clubinternet', 'Club-internet',
-'toile', 'Toile du Quebec',
-'biglotron','Biglotron',
-'mozbot','Mozbot',
-# Minor German search engines
-'aolde','AOL (de)',
-'o2aolde', 'o2 Suche',
-'fireball','Fireball', 'infoseek','Infoseek',
-'webde','Web.de',
-'abacho','Abacho',
-'t-online','T-Online',
-'allesklar','allesklar.de', 'meinestadt','meinestadt.de',
-'metaspinner','metaspinner',
-'metacrawler_de','metacrawler.de',
-'wwweasel','WWWeasel',
-'netluchs','Netluchs',
-'schoenerbrausen','Schoenerbrausen/',
-'gmxsuche', 'GMX Suche',
-'gmxsuche_at', 'GMX Suche Oesterreich',
-'ecosiasearch', 'Ecosia Search',
-'aolsearch', 'AOL Search',
-'aolsuche', 'AOL Suche',
-'startxxl', 'StartXXL',
-'benefind', 'benefind',
-'amazonsearch', 'Amazon Web Search',
-'wowsearch', 'Wow Search',
-'vlips_de', 'vlips.de',
-'metager', 'MetaGer',
-'search_1und1_de', '1&1 Suche (subdomain "search")',
-'smde', 'SM.de - Die SuchMaschine',
-'sumaja', 'Sumaja',
-'navigationshilfe', 'T-Online Navigationshilfe',
-'umfis', 'UMFIS-Online Das Umweltfirmen-Informationssystem der IHKs in Deutschland',
-'fastbot_de', 'Fastbot.de (Does not provide search keyphrases, using found page instead)',
-'tixuma_de', 'Tixuma Deutschland',
-'freenet_de', 'suche.freenet.de',
-'izito_de', 'iZito Deutschland',
-'peoplecheck_de', 'PeopleCheck.de',
-'oneseek_de', 'Metasuchmaschine OneSeek.de',
-'de_wiki_gov_cn', 'Wiki Sucher',
-'umuwa_de', 'Umuwa Deutschland',
-'1und1_de', '1&1 Suche (subdomain "suche")',
-'metasuche_ch', 'Metasuche.ch',
-# Minor hungarian search engines
-'heureka','Heureka', 'origo','Origo-Vizsla', 'lapkereso','Startlapkereso', 'goliat','Goliat', 'indexhu','Index', 'wahoo','Wahoo', 'webmania','webmania.hu', 'internetto','Internetto Kereso',
-'tango_hu','Tango',
-'keresolap_hu','Tango keresolap',
-'startlap_hu','Startlab Kereso',
-'polymeta_hu','Polymeta',
-# Minor Indian search engines
-'sify','Sify',
-# Minor Italian search engines
-'virgilio','Virgilio',
-'arianna','Arianna',
-'supereva','Supereva',
-'kataweb','Kataweb',
-'aliceitmaster','search.alice.it.master',
-'aliceit','alice.it',
-'gotuneed','got u need',
-'godado','Godado.it',
-'jumpy\.it','Jumpy.it',
-'shinyseek\.it','Shinyseek.it',
-'teecnoit','Teecno',
-# Minor Israeli search engines
-'genieo','Genieo',
-# Minor Japanese search engines
-'askjp','Ask Japan',
-'sagool','Sagool',
-'rakuten', 'websearch.rakuten.co.jp',
-# Minor Norwegian search engines
-'start','start.no', 'eniro','Eniro',
-# Minor polish search engines
-'wp','Wirtualna Polska',
-'onetpl','Onet.pl',
-'dodajpl','Dodaj.pl',
-'gazetapl','Gazeta.pl',
-'gerypl','Gery.pl',
-'hogapl','Hoga.pl',
-'netsprintpl','NetSprint.pl',
-'interiapl','Interia.pl',
-'katalogonetpl','Katalog.Onet.pl',
-'o2pl','o2.pl',
-'polskapl','Polska',
-'szukaczpl','Szukacz',
-'wowpl','Wow.pl',
-# Minor russian search engines
-'yandex', 'Yandex', 'aport', 'Aport', 'rambler', 'Rambler', 'turtle', 'Turtle', 'metabot', 'MetaBot',
-'mailru','Mail.Ru',
-# Minor Swedish search engines
-'passagen','Evreka',
-'enirose','Eniro Sverige',
-# Minor Slovak search engines
-'zoznam','Zoznam',
-# Minor Portuguese search engines
-'sapo','Sapo',
-# Minor Swiss search engines
-'searchch', 'search.ch', 'bluewin', 'search.bluewin.ch',
-'zapmeta_ch', 'ZapMeta.ch',
-'etools_ch', 'eTools.ch',
-# Minor Croatian, Serbian, Macedonian, Bosnian and Herzegovinian search engines
-'pogodak','Pogodak.com',
-# Generic search engines
-'search','Unknown search engines'
-);
-
-
-# Sanity check.
-# Enable this code and run perl search_engines.pm to check file entries are ok
-#-----------------------------------------------------------------------------
-#foreach my $key (@SearchEnginesSearchIDOrder_list1) {
-# if (! $SearchEnginesHashID{$key}) { error("Entry '$key' has been found in SearchEnginesSearchIDOrder_list1 with no value in SearchEnginesHashID");
-# foreach my $key2 (@SearchEnginesSearchIDOrder_list2) { if ($key2 eq $key) { error("$key is in 1 and 2\n"); } }
-# foreach my $key2 (@SearchEnginesSearchIDOrder_listgen) { if ($key2 eq $key) { error("$key is in 1 and gen\n"); } }
-#} }
-#foreach my $key (@SearchEnginesSearchIDOrder_list2) {
-# if (! $SearchEnginesHashID{$key}) { error("Entry '$key' has been found in SearchEnginesSearchIDOrder_list1 with no value in SearchEnginesHashID");
-# foreach my $key2 (@SearchEnginesSearchIDOrder_list1) { if ($key2 eq $key) { error("$key is in 2 and 1\n"); } }
-# foreach my $key2 (@SearchEnginesSearchIDOrder_listgen) { if ($key2 eq $key) { error("$key is in 2 and gen\n"); } }
-#} }
-#foreach my $key (@SearchEnginesSearchIDOrder_listgen) { if (! $SearchEnginesHashID{$key}) { error("Entry '$key' has been found in SearchEnginesSearchIDOrder_listgen with no value in SearchEnginesHashID"); } }
-#foreach my $key (keys %NotSearchEnginesKeys) { if (! $SearchEnginesHashID{$key}) { error("Entry '$key' has been found in NotSearchEnginesKeys with no value in SearchEnginesHashID"); } }
-#foreach my $key (keys %SearchEnginesKnownUrl) {
-# my $found=0;
-# foreach my $key2 (values %SearchEnginesHashID) {
-# if ($key eq $key2) { $found=1; last; }
-# }
-# if (! $found) { die "Entry '$key' has been found in SearchEnginesKnownUrl with no value in SearchEnginesHashID"; }
-#}
-#foreach my $key (keys %SearchEnginesHashLib) {
-# my $found=0;
-# foreach my $key2 (values %SearchEnginesHashID) {
-# if ($key eq $key2) { $found=1; last; }
-# }
-# if (! $found) { die "Entry '$key' has been found in SearchEnginesHashLib with no value in SearchEnginesHashID"; }
-#}
-#print @SearchEnginesSearchIDOrder_list1." ".@SearchEnginesSearchIDOrder_list2." ".@SearchEnginesSearchIDOrder_listgen;
-
-1;
+# AWSTATS SEARCH ENGINES DATABASE
+#------------------------------------------------------------------------------
+# If you want to add a Search Engine to extend AWStats database detection capabilities,
+# you must add an entry in SearchEnginesSearchIDOrder, SearchEnginesHashID and in
+# SearchEnginesHashLib.
+# An entry if known in SearchEnginesKnownUrl is also welcome.
+#
+# to eldy: Please check if the following description is correct:
+# You need the following information to specify a search engine:
+# (a) A regular expression that matches the referrer string of the
+# search engine. Unclear: What about slashes in the name of
+# a search engine, e.g. as in 'ecosia.com/search'. Seems that
+# AWStats will non find search strings containing a slash.
+# Maybe use a search string without a slash, and - if necessary -
+# an entry in %NotSearchEnginesKeys , if this search string
+# matches entries that are not search engines.
+# Example of a web address of a Amazon search engine:
+# http://www.amazon.de/gp/bit/apps/web/SERP/search/ref=bit_bds-p24_serp_cr_de?ie=UTF8tagbase=bds-p24&query=deutsch+8.+klasse+gymnasium+protokoll
+# (b) A unique string to identify the search engine within AWStats
+# (c) A regular expression that finds the start of the query part in the
+# referrer string
+# (d) A HTML-fragment that goes into the reports generated by AWStats which
+# identifies the search engine to human reader of the report. In the
+# simplest case this is a string containing the name of the search
+# engine. You can also provide a hypertext clause that presents the
+# name together with a link to the search engine.
+#
+# The regular expression (a) goes into SearchEnginesSearchIDOrder_list1
+# or ..._list2. List 1 contains common search engines, list 2 those
+# that are not so often used.
+#
+# SearchEnginesHashID contains to consecutive entries for each search
+# engine: The regular expression (a) followed bei the search engine
+# identifier (b)
+#
+# SearchEnginesKnownUrl specifies how to find the start of the query.
+# For each search engine you enter the search engine identifier (b)
+# followed by the regular expression (c). Unclear: It is possible to
+# omit this entry. If you do this, how will AWStats find the start of
+# the query?
+#
+# SearchEnginesHashLib contains also two entries for each search engine:
+# The search engine identifier (b) followed by the HTML-Fragment (d)
+#
+# There are search engines that do not use a query part in their URLs.
+# They put the search expression in the main part of the URL instead.
+# AWStats is able to handle these cases. They are specified as described
+# above, except the following two things:
+# - The regular expression (c) searches the complete URL and not only
+# the query part.
+# - An additional Entry in the list %SearchEnginesWithKeysNotInQuery is
+# necessary.
+#
+#
+# AWStats runs a sanity check of the contents of search_engines.pm. This
+# check detects the following things:
+# - Inconsistencies (number of entries)
+# It does not detect the following errors:
+# - If the HTML-Fragment (d) is syntactically incorrect.
+#
+#------------------------------------------------------------------------------
+
+# 2005-08-19 Sean Carlos http://www.antezeta.com/awstats.html
+# added minor italian search engines
+# arianna http://arianna.libero.it/
+# supereva http://search.supereva.com/
+# kataweb http://kataweb.it/
+# corrected uk looksmart
+# 'askuk','ask=', 'bbc','q=', 'freeserve','q=', 'looksmart','key=',
+# to
+# 'askuk','ask=', 'bbc','q=', 'freeserve','q=', 'looksmartuk','key=',
+# corrected spelling
+# internationnal -> international
+# added 'google\.'=>'mail\.google\.', to NotSearchEnginesKeys in order to
+# avoid counting gmail referrals as search engine traffic
+# 2005-08-21 Sean Carlos http://www.antezeta.com/awstats.html
+# avoid counting babelfish.altavista referrals as search engine traffic
+# avoid counting translate.google referrals as search engine traffic
+# 2005-11-20 Sean Carlos
+# added missing 'tiscali','key=', entry. Check order
+# 2005-11-22 Sean Carlos
+# added Google Base & Froogle. Froogle not tested.
+# 2006-04-18 Sean Carlos http://www.antezeta.com/awstats.html
+# added biglotron.com (France)
+# added blingo http://www.blingo.com/
+# added Clusty & Vivisimo
+# added eniro.no (Norway) [https://sourceforge.net/forum/message.php?msg_id=3134783]
+# added GPU p2p search http://search.centraldatabase.org/
+# added mail.tiscali to "not search engines list" [https://sourceforge.net/forum/message.php?msg_id=3166688]
+# added Ask group's "mysearch"
+# added sify.com (India)
+# added sogou.com (Cina) [https://sourceforge.net/forum/message.php?msg_id=3501603]
+# Ask changes:
+# - added Ask Japan (ask.jp)
+# - break out Ask new country level variants (DE, ES, FR, IT, NL)
+# - updated Ask name from Ask Jevees
+# - added Ask q= parameter - many recent searches probably not recognized; [https://sourceforge.net/forum/message.php?msg_id=3465444]
+# - updated Ask uk (new uk.ask.com added to older ask.co.uk)
+# updated voila kw|rdata parameter [https://sourceforge.net/forum/message.php?msg_id=3373912]
+# for each new engine, added link to Search Engine. This serves to document engine. Done for major & Italian engines as well. Requires patch
+# to AWStats to allow untranslated html. Otherwise html will appear instead of link.
+# reviewed mnoGoSearch (http://www.mnogosearch.org/); the search engined mentioned no longer
+# exists https://sourceforge.net/forum/message.php?msg_id=3025426
+# 2006-05-13 Sean Carlos http://www.antezeta.com/awstats.html
+# added 10 Chello European broadband portals (Austria, Belgium, Czech Republic, France, Hungary, The Netherlands, Norway, Poland, Slovakia, Sweden)
+# added Alice Internal Search (blends data with Google?) search.alice.it.master:10005
+# added detection of google cache views from IPs 66.249.93.104 72.14.203.104 72.14.207.104
+# To do: add more extensive IP list; keywords not yet detected.
+# added icerocket.com blog search http://www.icerocket.com/
+# added live.com (msn) http://www.live.com/
+# added Meta motor kartoo. Note: Kartoo does not provide search words in referrers, thus the engine will appear in the
+# search engine list but the actual search words are not available.
+# added netluchs.de http://www.netluchs.de/
+# added sphere.com blog search http://www.sphere.com/
+# added wwweasel.de http://wwweasel.de
+# added Yahoo Mindset! http://mindset.research.yahoo.com/
+# updated Mirago query parameter recognition (qry=); added breakout for each country (France, Germany, Spain, Italy, Norway, Sweden, Denmark, Netherlands, Belgium, Switzerland)
+# 2006-05-13 Sean Carlos http://www.antezeta.com/awstats.html
+# added Google cache IPs 64.233.183.104 & 66.102.7.104
+# 2006-05-20 Sean Carlos http://www.antezeta.com/awstats.html
+# anzwers.com.au
+# schoenerbrausen.de http://www.schoenerbrausen.de/
+# added Google cache IP 216.239.59.104
+# answerbus http://www.answerbus.com/ (does not provide keywords)
+# 2006-05-23 Sean Carlos http://www.antezeta.com/awstats.html
+# added Google cache IP 66.102.9.104, 64.233.161.104
+# 2006-06-23 Sean Carlos http://www.antezeta.com/awstats.html
+# added Alice Search search.alice.it
+# added GoodSearch http://www.goodsearch.com/ (does not provide keywords) "a Yahoo-powered search engine that donates money to your favorite charity or school each time you search the web"
+# added googlee.com, variant of Google
+# added gotuneed http://www.gotuneed.com/ Italian search engine, in beta
+# added icq.com
+# added logic to parse Google Cache search keywords. Seems to work for alpha but not numeric cache IDs, i.e. search?q=cache:lWVLmnuGJswJ: is recognized but q=cache:Yv5qxeJNuhgJ: is not recognized. The URL triggering the keywords will also appear. The URLs are probably too varied to parse out?
+# added Nusearch http://www.nusearch.com/
+# added Polymeta www.polymeta.hu (does not provide keywords)
+# added scroogle http://www.scroogle.org/ (does not always provide keywords)
+# added Tango http://tango.hu/search.php?st=0&q=jeles+napok
+# Changed Google Cache notation 64\.233\.(161|167|179|183|187)\.104 to 64\.233\.1[0-9]{2}\.104
+# 72\.14\.(203|205|207|209|221)\.104 to 72\.14\.2[0-9]{2}\.104
+# 216\.239\.(51|59)\.104 to 216\.239\.5[0-9]\.104
+# 66\.102\.(7|9)\.104 to 66\.102\.[1-9]\.104
+# 2006-06-27 Sean Carlos http://www.antezeta.com/awstats.html
+# added Onet.pl http://szukaj.onet.pl/
+# corrected name "Wirtualna Polska" from "Szukaj" (search); added link http://szukaj.wp.pl/
+# 2006-06-30 Sean Carlos http://www.antezeta.com/awstats.html
+# Additional Polish Search Engines:
+# added Dodaj.pl http://www.dodaj.pl/
+# added Gazeta.pl http://szukaj.gazeta.pl/
+# added Gery.pl http://szukaj.gery.pl/
+# added Hoga.pl http://www.hoga.pl/
+# added Interia.pl http://www.google.interia.pl/
+# added Katalog.Onet.pl http://katalog.onet.pl/
+# added NetSprint.pl http://www.netsprint.pl/
+# added o2.pl http://szukaj2.o2.pl/
+# added Polska http://szukaj.polska.pl/
+# added Szukacz http://www.szukacz.pl/
+# added Wow.pl http://szukaj.wow.pl/
+# added Sagool http://sagool.jp/
+
+# 2006-08-25 Social Bookmarks
+# International
+# added del.icio.us/search - for now, just search referrer. To do: consider /tag/(tagname) referrer?
+# added stumbleupon.com - No keywords supplied.
+# added swik.net
+# added digg. Keywords sometimes supplied.
+# Italy
+# added segnalo.alice.it - No keywords supplied.
+# added ineffabile.it - No keywords supplied.
+
+# added filter for google groups. Attempt to parse group name as keyword.
+
+# 2006-09-14
+# added Eniro Sverige http://www.eniro.se/
+# added MyWebSearch http://search.mywebsearch.com/
+# added Teecno http://www.teecno.it/ Italian Open Source Search Engine
+
+#package AWSSE;
+
+# 2006-09-25 (Gabor Moizes)
+# added 4-counter (Google alternative) http://4-counter.com/
+# added Googlecom (Google alternative) http://googlecom.com/
+# added Goggle (Google alternative) http://goggle.co.hu/
+# added Comet toolbar http://as.starware.com
+# added new IP for Yahoo: 216.109.125.130
+# added Ledix http://ledix.net/
+# added AT&T search (powered by Google) http://www.att.net/
+# added Keresolap (Hungarian search engine) http://www.keresolap.hu/
+# added Mozbot (French search engine) http://www.mozbot.fr/
+# added Zoznam (Slovak search engine) http://www.zoznam.sk/
+# added sapo.pt (Portuguese search engine) http://www.sapo.pt/
+# added shaw.ca (powered by Google) http://start.shaw.ca/
+# added Searchalot http://www.searchalot.com/
+# added Copernic http://www.copernic.com/
+# added 216.109.125.130 to Yahoo
+# added 66.218.69.11 to Yahoo
+# added Avantfind http://www.avantfind.com/
+# added Steadysearch http://www.steadysearch.com/
+# added Steadysearch http://www.steady-search.com/
+# modified 216\.239\.5[0-9]\.104/search to 216\.239\.5[0-9]\.104
+
+
+# SearchEnginesSearchIDOrder
+# It contains all matching criteria to search for in log fields. This list is
+# used to know in which order to search Search Engines IDs.
+# Most frequent one are in list1, used when LevelForSearchEnginesDetection is 1 or more
+# Minor robots are in list2, used when LevelForSearchEnginesDetection is 2 or more
+# Note: Regex IDs are in lower case and ' ' and '+' are changed into '_'
+#------------------------------------------------------------------------------
+@SearchEnginesSearchIDOrder_list1=(
+# Major international search engines
+'google\.[\w.]+/products',
+'base\.google\.',
+'froogle\.google\.',
+'groups\.google\.',
+'images\.google\.',
+'google\.',
+'googlee\.',
+'googlecom\.com',
+'goggle\.co\.hu',
+'216\.239\.32\.20',
+'173\.194\.32\.223',
+'216\.239\.(35|37|39|51)\.100',
+'216\.239\.(35|37|39|51)\.101',
+'216\.239\.5[0-9]\.104',
+'64\.233\.1[0-9]{2}\.104',
+'66\.102\.[1-9]\.104',
+'66\.249\.93\.104',
+'72\.14\.2[0-9]{2}\.104',
+'msn\.',
+'live\.com',
+'bing\.',
+'voila\.',
+'mindset\.research\.yahoo',
+'yahoo\.','(66\.218\.71\.225|216\.109\.117\.135|216\.109\.125\.130|66\.218\.69\.11)',
+'search\.aol\.co',
+'tiscali\.',
+'lycos\.',
+'alexa\.com',
+'alltheweb\.com',
+'altavista\.',
+'a9\.com',
+'dmoz\.org',
+'netscape\.',
+'search\.terra\.',
+'www\.search\.com',
+'search\.sli\.sympatico\.ca',
+'excite\.'
+);
+
+@SearchEnginesSearchIDOrder_list2=(
+# Minor international search engines
+'4\-counter\.com',
+'att\.net',
+'bungeebonesdotcom',
+'northernlight\.',
+'hotbot\.',
+'kvasir\.',
+'webcrawler\.',
+'metacrawler\.',
+'go2net\.com',
+'(^|\.)go\.com',
+'euroseek\.',
+'looksmart\.',
+'spray\.',
+'nbci\.com\/search',
+'de\.ask.\com', # break out Ask country specific engines. (.jp is in Japan section)
+'es\.ask.\com',
+'fr\.ask.\com',
+'it\.ask.\com',
+'nl\.ask.\com',
+'uk\.ask.\com',
+'(^|\.)ask\.com',
+'atomz\.',
+'overture\.com', # Replace 'goto\.com','Goto.com',
+'teoma\.',
+'findarticles\.com',
+'infospace\.com',
+'mamma\.',
+'dejanews\.',
+'dogpile\.com',
+'wisenut\.com',
+'ixquick\.com',
+'search\.earthlink\.net',
+'i-une\.com',
+'blingo\.com',
+'centraldatabase\.org',
+'clusty\.com',
+'mysearch\.',
+'vivisimo\.com',
+'kartoo\.com',
+'icerocket\.com',
+'sphere\.com',
+'ledix\.net',
+'start\.shaw\.ca',
+'searchalot\.com',
+'copernic\.com',
+'avantfind\.com',
+'steadysearch\.com',
+'steady-search\.com',
+'claro-search\.com',
+'www1\.search-results\.com',
+'www\.holasearch\.com',
+'search\.conduit\.com',
+'static\.flipora\.com',
+'(?:www[12]?|mixidj)\.delta-search\.com',
+'start\.iminent\.com',
+'www\.searchmobileonline\.com',
+'int\.search-results\.com',
+'www2\.inbox\.com',
+'www\.govome\.com',
+'find1friend\.com',
+'start\.mysearchdial\.com',
+'go\.speedbit\.com',
+'search\.certified-toolbar\.com',
+'search\.sweetim\.com',
+'search\.searchcompletion\.com',
+'en\.eazel\.com',
+'sr\.searchfunmoods\.com',
+'173\.194\.35\.177',
+'dalesearch\.com',
+'sweetpacks-search\.com',
+'searchgol\.com',
+'duckduckgo\.com',
+'sr\.facemoods\.com',
+'shoppstop\.com',
+'searchya\.com',
+'picsearch\.de',
+'webssearches\.com',
+'airzip\.inspsearch\.com',
+'zapmeta\.de',
+'localmoxie\.com',
+'search-results\.mobi',
+'androidsearch\.com',
+'isearch\.nation\.com',
+'search\.zonealarm\.com',
+'www\.buenosearch\.com',
+'search\.foxtab\.com',
+'searches\.qone8\.com',
+'startpage\.com',
+'www\.qwant\.com',
+'searches\.safehomepage\.com',
+'searches\.vi-view\.com',
+'wow\.utop\.it',
+'windowssearch\.com',
+'www\.wow\.com',
+'globososo\.',
+'kingtale3\.inspsearch\.com',
+'swisscows\.ch',
+'preciobarato\.xyz',
+'www\.dregol\.com',
+'search\.socialdownloadr\.com',
+'int\.search\.myway\.com',
+'de\.dolphin\.com',
+'mys\.yoursearch\.me',
+# Chello Portals
+'chello\.at',
+'chello\.be',
+'chello\.cz',
+'chello\.fr',
+'chello\.hu',
+'chello\.nl',
+'chello\.no',
+'chello\.pl',
+'chello\.se',
+'chello\.sk',
+'chello', # required as catchall for new countries not yet known
+# Mirago
+'mirago\.be',
+'mirago\.ch',
+'mirago\.de',
+'mirago\.dk',
+'es\.mirago\.com',
+'mirago\.fr',
+'mirago\.it',
+'mirago\.nl',
+'no\.mirago\.com',
+'mirago\.se',
+'mirago\.co\.uk',
+'mirago', # required as catchall for new countries not yet known
+'answerbus\.com',
+'icq\.com\/search',
+'nusearch\.com',
+'goodsearch\.com',
+'scroogle\.org',
+'questionanswering\.com',
+'mywebsearch\.com',
+'as\.starware\.com',
+# Social Bookmarking Services
+'del\.icio\.us',
+'digg\.com',
+'stumbleupon\.com',
+'swik\.net',
+'segnalo\.alice\.it',
+'ineffabile\.it',
+# Minor Australian search engines
+'anzwers\.com\.au',
+# Minor brazilian search engines
+'engine\.exe', 'miner\.bol\.com\.br',
+# Minor chinese search engines
+'\.baidu\.com', # baidu search portal
+'\.vnet\.cn', # powered by MSN
+'\.soso\.com', # powered by Google
+'\.sogou\.com', # powered by Sohu
+'\.3721\.com', # powered by Yahoo!
+'iask\.com', # powered by Sina
+'\.accoona\.com', # Accoona
+'\.163\.com', # powered by Google
+'\.zhongsou\.com', # zhongsou search portal
+# Minor czech search engines
+'atlas\.cz','seznam\.cz','quick\.cz','centrum\.cz','jyxo\.(cz|com)','najdi\.to','redbox\.cz',
+'isearch\.avg\.com',
+# Minor danish search-engines
+'opasia\.dk', 'danielsen\.com', 'sol\.dk', 'jubii\.dk', 'find\.dk', 'edderkoppen\.dk', 'netstjernen\.dk', 'orbis\.dk', 'tyfon\.dk', '1klik\.dk', 'ofir\.dk',
+# Minor dutch search engines
+'ilse\.','vindex\.',
+# Minor english search engines
+'(^|\.)ask\.co\.uk','bbc\.co\.uk/cgi-bin/search','ifind\.freeserve','looksmart\.co\.uk','splut\.','spotjockey\.','ukdirectory\.','ukindex\.co\.uk','ukplus\.','searchy\.co\.uk',
+'search\.fbdownloader\.com',
+'search\.fdownloadr\.com',
+'search\.babylon\.com',
+'my\.allgameshome\.com',
+'surfcanyon\.com',
+'uk\.foxstart\.com',
+'yandex\.com',
+# Minor finnish search engines
+'haku\.www\.fi',
+# Minor french search engines
+'recherche\.aol\.fr','ctrouve\.','francite\.','\.lbb\.org','rechercher\.libertysurf\.fr', 'search[\w\-]+\.free\.fr', 'recherche\.club-internet\.fr',
+'toile\.com', 'biglotron\.com',
+'mozbot\.fr',
+# Minor german search engines
+'sucheaol\.aol\.de',
+'o2suche\.aol\.de',
+'fireball\.de','infoseek\.de','suche\d?\.web\.de','[a-z]serv\.rrzn\.uni-hannover\.de',
+'suchen\.abacho\.de','(brisbane|suche)\.t-online\.de','allesklar\.de','meinestadt\.de',
+'212\.227\.33\.241',
+'(161\.58\.227\.204|161\.58\.247\.101|212\.40\.165\.90|213\.133\.108\.202|217\.160\.108\.151|217\.160\.111\.99|217\.160\.131\.108|217\.160\.142\.227|217\.160\.176\.42)',
+'wwweasel\.de',
+'netluchs\.de',
+'schoenerbrausen\.de',
+'suche\.gmx\.net',
+'suche\.gmx\.at',
+'ecosia\.org',
+'de\.aolsearch\.com',
+'suche\.aol\.de',
+'www\.startxxl\.com',
+'www\.benefind\.de',
+'www\.amazon\.de.*search', #Just as a reminder, probably will not work as AWstats seems to consider the host part of an URL only
+'de\.wow\.com',
+'www\.vlips\.de',
+'metager\.de',
+'search\.1und1\.de',
+'sm\.de',
+'sumaja\.de',
+'navigationshilfe\.t-online\.de',
+'umfis\.de',
+'fastbot\.de',
+'tixuma\.de',
+'suche\.freenet\.de',
+'www\.izito\.de',
+'extern\.peoplecheck\.de',
+'www\.oneseek\.de',
+'de\.wiki\.gov\.cn',
+'umuwa\.de',
+'suche\.1und1\.de',
+'www\.metasuche\.ch',
+# Minor Hungarian search engines
+'heureka\.hu','vizsla\.origo\.hu','lapkereso\.hu','goliat\.hu','index\.hu','wahoo\.hu','webmania\.hu','search\.internetto\.hu',
+'tango\.hu',
+'keresolap\.hu',
+'kereso\.startlap\.hu',
+'polymeta\.hu',
+# Minor Indian search engines
+'sify\.com',
+# Minor Italian search engines
+'virgilio\.it','arianna\.libero\.it','supereva\.com','kataweb\.it','search\.alice\.it\.master','search\.alice\.it','gotuneed\.com',
+'godado','jumpy\.it','shinyseek\.it','teecno\.it',
+# Minor Israeli search engines
+'search\.genieo\.com',
+# Minor Japanese search engines
+'ask\.jp','sagool\.jp',
+'websearch\.rakuten\.co\.jp',
+# Minor Norwegian search engines
+'sok\.start\.no', 'eniro\.no',
+# Minor Polish search engines
+'szukaj\.wp\.pl','szukaj\.onet\.pl','dodaj\.pl','gazeta\.pl','gery\.pl','hoga\.pl','netsprint\.pl','interia\.pl','katalog\.onet\.pl','o2\.pl','polska\.pl','szukacz\.pl','wow\.pl',
+# Minor russian search engines
+'ya(ndex)?\.ru', 'aport\.ru', 'rambler\.ru', 'turtle\.ru', 'metabot\.ru',
+'go\.mail\.ru',
+# Minor Swedish search engines
+'evreka\.passagen\.se','eniro\.se',
+# Minor Slovak search engines
+'zoznam\.sk',
+# Minor Portuguese search engines
+'sapo\.pt',
+# Minor swiss search engines
+'search\.ch', 'search\.bluewin\.ch',
+'www\.zapmeta\.ch',
+'etools\.ch',
+# Minor Croatian, Serbian, Macedonian, Bosnian and Herzegovinian search engines
+'pogodak\.'
+);
+@SearchEnginesSearchIDOrder_listgen=(
+# Generic search engines
+'search\..*\.\w+'
+);
+
+
+# NotSearchEnginesKeys
+# If a search engine key is found, we check its exclude list to know if it's
+# really a search engine
+#------------------------------------------------------------------------------
+%NotSearchEnginesKeys=(
+'altavista\.'=>'babelfish\.altavista\.',
+'google\.'=>'mail\.google\.',
+'google\.'=>'translate\.google\.',
+'google\.'=>'code\.google\.',
+'google\.'=>'groups\.google\.',
+'msn\.'=>'hotmail\.msn\.',
+'tiscali\.'=>'mail\.tiscali\.',
+'yahoo\.'=>'(?:picks|mail)\.yahoo\.|yahoo\.[^/]+/picks',
+'yandex\.'=>'direct\.yandex\.'
+);
+
+
+# SearchEnginesHashID
+# Each Search Engine Search ID is associated to an AWStats id string
+#------------------------------------------------------------------------------
+%SearchEnginesHashID = (
+# Major international search engines
+'google\.[\w.]+/products','google_products',
+'base\.google\.','google_base',
+'froogle\.google\.','google_froogle',
+'groups\.google\.','google_groups',
+'images\.google\.','google_image',
+'google\.','google',
+'googlee\.','google',
+'googlecom\.com','google',
+'goggle\.co\.hu','google',
+'216\.239\.32\.20', 'google',
+'173\.194\.32\.223', 'google',
+'216\.239\.(35|37|39|51)\.100','google_cache',
+'216\.239\.(35|37|39|51)\.101','google_cache',
+'216\.239\.5[0-9]\.104','google_cache',
+'64\.233\.1[0-9]{2}\.104','google_cache',
+'66\.102\.[1-9]\.104','google_cache',
+'66\.249\.93\.104','google_cache',
+'72\.14\.2[0-9]{2}\.104','google_cache',
+'msn\.','msn',
+'live\.com','live',
+'bing\.','bing',
+'voila\.','voila',
+'mindset\.research\.yahoo','yahoo_mindset',
+'yahoo\.','yahoo','(66\.218\.71\.225|216\.109\.117\.135|216\.109\.125\.130|66\.218\.69\.11)','yahoo',
+'lycos\.','lycos',
+'alexa\.com','alexa',
+'alltheweb\.com','alltheweb',
+'altavista\.','altavista',
+'a9\.com','a9',
+'dmoz\.org','dmoz',
+'netscape\.','netscape',
+'search\.terra\.','terra',
+'www\.search\.com','search.com',
+'tiscali\.','tiscali',
+'search\.aol\.co','aol',
+'search\.sli\.sympatico\.ca','sympatico',
+'excite\.','excite',
+# Minor international search engines
+'4\-counter\.com','google4counter',
+'att\.net','att',
+'bungeebonesdotcom','bungeebonesdotcom',
+'northernlight\.','northernlight',
+'hotbot\.','hotbot',
+'kvasir\.','kvasir',
+'webcrawler\.','webcrawler',
+'metacrawler\.','metacrawler',
+'go2net\.com','go2net',
+'(^|\.)go\.com','go',
+'euroseek\.','euroseek',
+'looksmart\.','looksmart',
+'spray\.','spray',
+'nbci\.com\/search','nbci',
+'de\.ask.\com','askde', # break out Ask country specific engines.
+'es\.ask.\com','askes',
+'fr\.ask.\com','askfr',
+'it\.ask.\com','askit',
+'nl\.ask.\com','asknl',
+'uk\.ask.\com','askuk',
+'(^|\.)ask\.co\.uk','askuk',
+'(^|\.)ask\.com','ask',
+'atomz\.','atomz',
+'overture\.com','overture', # Replace 'goto\.com','Goto.com',
+'teoma\.','teoma',
+'findarticles\.com','findarticles',
+'infospace\.com','infospace',
+'mamma\.','mamma',
+'dejanews\.','dejanews',
+'dogpile\.com','dogpile',
+'wisenut\.com','wisenut',
+'ixquick\.com','ixquick',
+'search\.earthlink\.net','earthlink',
+'i-une\.com','iune',
+'blingo\.com','blingo',
+'centraldatabase\.org','centraldatabase',
+'clusty\.com','clusty',
+'mysearch\.','mysearch',
+'vivisimo\.com','vivisimo',
+'kartoo\.com','kartoo',
+'icerocket\.com','icerocket',
+'sphere\.com','sphere',
+'ledix\.net','ledix',
+'start\.shaw\.ca','shawca',
+'searchalot\.com','searchalot',
+'copernic\.com','copernic',
+'avantfind\.com','avantfind',
+'steadysearch\.com','steadysearch',
+'steady-search\.com','steadysearch',
+'claro-search\.com','clarosearch',
+'www1\.search-results\.com', 'searchresults',
+'www\.holasearch\.com', 'holasearch',
+'search\.conduit\.com', 'conduit',
+'static\.flipora\.com', 'flipora',
+'(?:www[12]?|mixidj)\.delta-search\.com', 'delta-search',
+'start\.iminent\.com', 'iminent',
+'www\.searchmobileonline\.com', 'searchmobileonline',
+'int\.search-results\.com', 'nortonsavesearch',
+'www2\.inbox\.com', 'inbox',
+'www\.govome\.com', 'govome',
+'find1friend\.com', 'find1friend',
+'start\.mysearchdial\.com', 'mysearchdial',
+'go\.speedbit\.com', 'speedbit',
+'search\.certified-toolbar\.com', 'certifiedtoolbarsearch',
+'search\.sweetim\.com', 'sweetim',
+'search\.searchcompletion\.com', 'searchcompletion',
+'en\.eazel\.com','eazelsearch',
+'sr\.searchfunmoods\.com', 'searchfunmoods',
+'173\.194\.35\.177', 'googleByIP',
+'dalesearch\.com', 'dalesearch',
+'sweetpacks-search\.com', 'sweetpacks',
+'searchgol\.com', 'searchgol',
+'duckduckgo\.com', 'duckduckgo',
+'sr\.facemoods\.com', 'facemoods',
+'shoppstop\.com', 'shoppstop',
+'searchya\.com', 'searchya',
+'picsearch\.de', 'picsearch',
+'webssearches\.com', 'webssearches',
+'airzip\.inspsearch\.com', 'webssearches',
+'zapmeta\.de', 'zapmeta',
+'localmoxie\.com', 'localmoxie',
+'search-results\.mobi', 'search-results_mobi',
+'androidsearch\.com', 'androidsearch',
+'isearch\.nation\.com', 'isearch_nation_com',
+'search\.zonealarm\.com', 'search_zonealarm_com',
+'www\.buenosearch\.com', 'www_buenosearch_com',
+'search\.foxtab\.com', 'search_foxtab_com',
+'searches\.qone8\.com', 'searches_qone8_com',
+'startpage\.com', 'startpage_com',
+'www\.qwant\.com', 'qwant_com',
+'searches\.safehomepage\.com', 'safehomepage_com',
+'searches\.vi-view\.com', 'vi-view_com',
+'wow\.utop\.it', 'wow_utop_it',
+'windowssearch\.com', 'windowssearch_com',
+'www\.wow\.com', 'www_wow_com',
+'globososo\.', 'globososo',
+'kingtale3\.inspsearch\.com', 'globososo',
+'swisscows\.ch', 'swisscows_ch',
+'preciobarato\.xyz', 'preciobarato_xyz',
+'www\.dregol\.com', 'www_dregol_com',
+'search\.socialdownloadr\.com', 'search_socialdownloadr_com',
+'int\.search\.myway\.com', 'int_search_myway_com',
+'de\.dolphin\.com', 'de_dolphin_com',
+'mys\.yoursearch\.me', 'mys_yoursearch_me',
+# Chello Portals
+'chello\.at','chelloat',
+'chello\.be','chellobe',
+'chello\.cz','chellocz',
+'chello\.fr','chellofr',
+'chello\.hu','chellohu',
+'chello\.nl','chellonl',
+'chello\.no','chellono',
+'chello\.pl','chellopl',
+'chello\.se','chellose',
+'chello\.sk','chellosk',
+'chello','chellocom',
+# Mirago
+'mirago\.be','miragobe',
+'mirago\.ch','miragoch',
+'mirago\.de','miragode',
+'mirago\.dk','miragodk',
+'es\.mirago\.com','miragoes',
+'mirago\.fr','miragofr',
+'mirago\.it','miragoit',
+'mirago\.nl','miragonl',
+'no\.mirago\.com','miragono',
+'mirago\.se','miragose',
+'mirago\.co\.uk','miragocouk',
+'mirago','mirago', # required as catchall for new countries not yet known
+'answerbus\.com','answerbus',
+'icq\.com\/search','icq',
+'nusearch\.com','nusearch',
+'goodsearch\.com','goodsearch',
+'scroogle\.org','scroogle',
+'questionanswering\.com','questionanswering',
+'mywebsearch\.com','mywebsearch',
+'as\.starware\.com','comettoolbar',
+# Social Bookmarking Services
+'del\.icio\.us','delicious',
+'digg\.com','digg',
+'stumbleupon\.com','stumbleupon',
+'swik\.net','swik',
+'segnalo\.alice\.it','segnalo',
+'ineffabile\.it','ineffabile',
+# Minor Australian search engines
+'anzwers\.com\.au','anzwers',
+# Minor brazilian search engines
+'engine\.exe','engine',
+'miner\.bol\.com\.br','miner',
+# Minor chinese search engines
+'\.baidu\.com','baidu',
+'iask\.com','iask',
+'\.accoona\.com','accoona',
+'\.3721\.com','3721',
+'\.163\.com','netease',
+'\.soso\.com','soso',
+'\.zhongsou\.com','zhongsou',
+'\.vnet\.cn','vnet',
+'\.sogou\.com','sogou',
+# Minor czech search engines
+'atlas\.cz','atlas',
+'seznam\.cz','seznam',
+'quick\.cz','quick',
+'centrum\.cz','centrum',
+'jyxo\.(cz|com)','jyxo',
+'najdi\.to','najdi',
+'redbox\.cz','redbox',
+'isearch\.avg\.com', 'avgsearch',
+# Minor danish search-engines
+'opasia\.dk','opasia',
+'danielsen\.com','danielsen',
+'sol\.dk','sol',
+'jubii\.dk','jubii',
+'find\.dk','finddk',
+'edderkoppen\.dk','edderkoppen',
+'netstjernen\.dk','netstjernen',
+'orbis\.dk','orbis',
+'tyfon\.dk','tyfon',
+'1klik\.dk','1klik',
+'ofir\.dk','ofir',
+# Minor dutch search engines
+'ilse\.','ilse',
+'vindex\.','vindex',
+# Minor english search engines
+'bbc\.co\.uk/cgi-bin/search','bbc',
+'ifind\.freeserve','freeserve',
+'looksmart\.co\.uk','looksmartuk',
+'splut\.','splut',
+'spotjockey\.','spotjockey',
+'ukdirectory\.','ukdirectory',
+'ukindex\.co\.uk','ukindex',
+'ukplus\.','ukplus',
+'searchy\.co\.uk','searchy',
+'search\.fbdownloader\.com','fbdownloader',
+'search\.fdownloadr\.com', 'fdownloadr_com',
+'search\.babylon\.com', 'babylon',
+'my\.allgameshome\.com', 'allgameshome',
+'surfcanyon\.com', 'surfcanyon_com',
+'uk\.foxstart\.com', 'uk_foxstart_com',
+'yandex\.com', 'yandex_com',
+# Minor finnish search engines
+'haku\.www\.fi','haku',
+# Minor french search engines
+'recherche\.aol\.fr','aolfr',
+'ctrouve\.','ctrouve',
+'francite\.','francite',
+'\.lbb\.org','lbb',
+'rechercher\.libertysurf\.fr','libertysurf',
+'search[\w\-]+\.free\.fr','free',
+'recherche\.club-internet\.fr','clubinternet',
+'toile\.com','toile',
+'biglotron\.com', 'biglotron',
+'mozbot\.fr', 'mozbot',
+# Minor german search engines
+'sucheaol\.aol\.de','aolde',
+'o2suche\.aol\.de','o2aolde',
+'fireball\.de','fireball',
+'infoseek\.de','infoseek',
+'suche\d?\.web\.de','webde',
+'[a-z]serv\.rrzn\.uni-hannover\.de','meta',
+'suchen\.abacho\.de','abacho',
+'(brisbane|suche)\.t-online\.de','t-online',
+'allesklar\.de','allesklar',
+'meinestadt\.de','meinestadt',
+'212\.227\.33\.241','metaspinner',
+'(161\.58\.227\.204|161\.58\.247\.101|212\.40\.165\.90|213\.133\.108\.202|217\.160\.108\.151|217\.160\.111\.99|217\.160\.131\.108|217\.160\.142\.227|217\.160\.176\.42)','metacrawler_de',
+'wwweasel\.de','wwweasel',
+'netluchs\.de','netluchs',
+'schoenerbrausen\.de','schoenerbrausen',
+'suche\.gmx\.net', 'gmxsuche',
+'suche\.gmx\.at', 'gmxsuche_at',
+'ecosia\.org', 'ecosiasearch',
+'de\.aolsearch\.com', 'aolsearch',
+'suche\.aol\.de', 'aolsuche',
+'www\.startxxl\.com', 'startxxl',
+'www\.benefind\.de', 'benefind',
+'www\.amazon\.de.*search', 'amazonsearch', #Not clear if this matches amazon searches only
+'de\.wow\.com', 'wowsearch',
+'www\.vlips\.de', 'vlips_de',
+'metager\.de', 'metager',
+'search\.1und1\.de', 'search_1und1_de',
+'sm\.de', 'smde',
+'sumaja\.de', 'sumaja',
+'navigationshilfe\.t-online\.de', 'navigationshilfe',
+'umfis\.de', 'umfis',
+'fastbot\.de', 'fastbot_de',
+'tixuma\.de', 'tixuma_de',
+'suche\.freenet\.de', 'freenet_de',
+'www\.izito\.de', 'izito_de',
+'extern\.peoplecheck\.de', 'peoplecheck_de',
+'www\.oneseek\.de', 'oneseek_de',
+'de\.wiki\.gov\.cn', 'de_wiki_gov_cn',
+'umuwa\.de', 'umuwa_de',
+'suche\.1und1\.de', '1und1_de',
+'www\.metasuche\.ch', 'metasuche_ch',
+# Minor Hungarian search engines
+'heureka\.hu','heureka',
+'vizsla\.origo\.hu','origo',
+'lapkereso\.hu','lapkereso',
+'goliat\.hu','goliat',
+'index\.hu','indexhu',
+'wahoo\.hu','wahoo',
+'webmania\.hu','webmania',
+'search\.internetto\.hu','internetto',
+'tango\.hu','tango_hu',
+'keresolap\.hu','keresolap_hu',
+'kereso\.startlap\.hu', 'startlap_hu',
+'polymeta\.hu','polymeta_hu',
+# Minor Indian search engines
+'sify\.com','sify',
+# Minor Italian search engines
+'virgilio\.it','virgilio',
+'arianna\.libero\.it','arianna',
+'supereva\.com','supereva',
+'kataweb\.it','kataweb',
+'search\.alice\.it\.master','aliceitmaster',
+'search\.alice\.it','aliceit',
+'gotuneed\.com','gotuneed',
+'godado','godado',
+'jumpy\.it','jumpy\.it',
+'shinyseek\.it','shinyseek\.it',
+'teecno\.it','teecnoit',
+# Minor Israeli search engines
+'search\.genieo\.com', 'genieo',
+# Minor Japanese search engines
+'ask\.jp','askjp',
+'sagool\.jp','sagool',
+'websearch\.rakuten\.co\.jp', 'rakuten',
+# Minor Norwegian search engines
+'sok\.start\.no','start', 'eniro\.no','eniro',
+# Minor Polish search engines
+'szukaj\.wp\.pl','wp',
+'szukaj\.onet\.pl','onetpl',
+'dodaj\.pl','dodajpl',
+'gazeta\.pl','gazetapl',
+'gery\.pl','gerypl',
+'netsprint\.pl\/hoga\-search','hogapl',
+'netsprint\.pl','netsprintpl',
+'interia\.pl','interiapl',
+'katalog\.onet\.pl','katalogonetpl',
+'o2\.pl','o2pl',
+'polska\.pl','polskapl',
+'szukacz\.pl','szukaczpl',
+'wow\.pl','wowpl',
+# Minor russian search engines
+'ya(ndex)?\.ru','yandex',
+'aport\.ru','aport',
+'rambler\.ru','rambler',
+'turtle\.ru','turtle',
+'metabot\.ru','metabot',
+'go\.mail\.ru', 'mailru',
+# Minor Swedish search engines
+'evreka\.passagen\.se','passagen',
+'eniro\.se','enirose',
+# Minor Slovak search engines
+'zoznam\.sk','zoznam',
+# Minor Portuguese search engines
+'sapo\.pt','sapo',
+# Minor swiss search engines
+'search\.ch','searchch',
+'search\.bluewin\.ch','bluewin',
+'www\.zapmeta\.ch', 'zapmeta_ch',
+'etools\.ch', 'etools_ch',
+# Minor Croatian, Serbian, Macedonian, Bosnian and Herzegovinian search engines
+'pogodak\.','pogodak',
+# Generic search engines
+'search\..*\.\w+','search'
+);
+
+
+# SearchEnginesWithKeysNotInQuery
+# List of search engines that store keyword as page instead of query parameter
+#------------------------------------------------------------------------------
+%SearchEnginesWithKeysNotInQuery=(
+'a9',1, # www.a9.com/searchkey1%20searchkey2
+'iminent',1, #http://start.iminent.com/StartWeb/1031/toolbox/#q=searchkey1%20searchkey2&additional_arguments
+'de_wiki_gov_cn',1, #http://de.wiki.gov.cn/s_searchkey1%20searchkey2
+'umuwa_de', 1, #http://umuwa.de/searchkey or http://umuwa.de/searchkey/Images
+'amazonsearch', 1 #http://www.amazon.de/gp/bit/apps/web/SERP/search/ref=bit_bds-p24_serp_cr_de?ie=UTF8tagbase=bds-p24&query=deutsch+8.+klasse+gymnasium+protokoll
+);
+
+# SearchEnginesKnownUrl
+# Known rules to extract keywords from a referrer search engine URL
+#------------------------------------------------------------------------------
+%SearchEnginesKnownUrl=(
+# Most common search engines
+'alexa','q=',
+'alltheweb','q(|uery)=',
+'altavista','q=',
+'a9','a9\.com\/',
+'dmoz','search=',
+'google_products','(p|q|as_p|as_q)=',
+'google_base','(p|q|as_p|as_q)=',
+'google_froogle','(p|q|as_p|as_q)=',
+'google_groups','group\/', # does not work
+'google_image','(p|q|as_p|as_q)=',
+'google_cache','(p|q|as_p|as_q)=cache:[0-9A-Za-z]{12}:',
+'google','(p|q|as_p|as_q)=',
+'lycos','query=',
+'msn','q=',
+'live','q=',
+'bing','q=',
+'netscape','search=',
+'tiscali','key=',
+'aol','query=',
+'terra','query=',
+'voila','(kw|rdata)=',
+'search.com','q=',
+'yahoo_mindset','p=',
+'yahoo','p=',
+'sympatico', 'query=',
+'excite','search=',
+# Minor international search engines
+'google4counter','(p|q|as_p|as_q)=',
+'att','qry=',
+'bungeebonesdotcom','query=',
+'go','qt=',
+'askde','(ask|q)=', # break out Ask country specific engines.
+'askes','(ask|q)=',
+'askfr','(ask|q)=',
+'askit','(ask|q)=',
+'asknl','(ask|q)=',
+'ask','(ask|q)=',
+'atomz','sp-q=',
+'euroseek','query=',
+'findarticles','key=',
+'go2net','general=',
+'hotbot','mt=',
+'infospace','qkw=',
+'kvasir', 'q=',
+'looksmart','key=',
+'mamma','query=',
+'metacrawler','general=',
+'nbci','keyword=',
+'northernlight','qr=',
+'overture','keywords=',
+'dogpile', 'q(|kw)=',
+'spray','string=',
+'teoma','q=',
+'webcrawler','searchText=',
+'wisenut','query=',
+'ixquick', 'query=',
+'earthlink', 'q=',
+'iune','(keywords|q)=',
+'blingo','q=',
+'centraldatabase','query=',
+'clusty','query=',
+'mysearch','searchfor=',
+'vivisimo','query=',
+# kartoo: No keywords passed in referring URL.
+'kartoo','',
+'icerocket','q=',
+'sphere','q=',
+'ledix','q=',
+'shawca','q=',
+'searchalot','q=',
+'copernic','web\/',
+'avantfind','keywords=',
+'steadysearch','w=',
+'clarosearch','q=',
+'searchresults','q=',
+'holasearch', 'q=',
+'conduit', 'q=',
+'flipora', 'q=',
+'delta-search', 'q=',
+'iminent', 'q=',
+'searchmobileonline', 'q=',
+'nortonsavesearch', 'q=',
+'inbox', 'q(?:kw)?=',
+'govome', 'q=',
+'find1friend', 'q=',
+'mysearchdial', 'q=',
+'speedbit', 'q=',
+'certifiedtoolbarsearch', 'q=',
+'sweetim', 'q=',
+'searchcompletion', 'q=',
+'eazelsearch', 'q=',
+'searchfunmoods', 'q=',
+'googleByIP', 'q=',
+'dalesearch', 'q=',
+'sweetpacks', 'q=',
+'searchgol', 'q=',
+'duckduckgo', 'uddg=',
+'facemoods', 'q=',
+'shoppstop', 'keywords=',
+'searchya', 'q=',
+'picsearch', 'q=',
+'webssearches', 'q=',
+'zapmeta', 'query=',
+'localmoxie', 'keyword=',
+'search-results_mobi', 'q=',
+'androidsearch', 'q=',
+'isearch_nation_com', 'q=',
+'search_zonealarm_com', 'q=',
+'www_buenosearch_com', 'q=',
+'search_foxtab_com', 'q=',
+'searches_qone8_com', 'q=',
+'startpage_com', 'query=',
+'qwant_com', 'q=',
+'safehomepage_com', 'q=',
+'vi-view_com', 'q=',
+'wow_utop_it', 'q=',
+'windowssearch_com', 'q=',
+'www_wow_com', 'q=',
+'globososo', 'q=',
+'swisscows_ch', 'query=',
+'preciobarato_xyz', 's=',
+'www_dregol_com', 'q=',
+'search_socialdownloadr_com', 'q=',
+'int_search_myway_com', 'searchfor=',
+'de_dolphin_com', 'q=',
+'mys_yoursearch_me', 'q=',
+# Chello Portals
+'chelloat','q1=',
+'chellobe','q1=',
+'chellocz','q1=',
+'chellofr','q1=',
+'chellohu','q1=',
+'chellonl','q1=',
+'chellono','q1=',
+'chellopl','q1=',
+'chellose','q1=',
+'chellosk','q1=',
+'chellocom','q1=',
+# Mirago
+'miragobe','(txtsearch|qry)=',
+'miragoch','(txtsearch|qry)=',
+'miragode','(txtsearch|qry)=',
+'miragodk','(txtsearch|qry)=',
+'miragoes','(txtsearch|qry)=',
+'miragofr','(txtsearch|qry)=',
+'miragoit','(txtsearch|qry)=',
+'miragonl','(txtsearch|qry)=',
+'miragono','(txtsearch|qry)=',
+'miragose','(txtsearch|qry)=',
+'miragocouk','(txtsearch|qry)=',
+'mirago','(txtsearch|qry)=',
+'answerbus','', # Does not provide query parameters
+'icq','q=',
+'nusearch','nusearch_terms=',
+'goodsearch','Keywords=',
+'scroogle','Gw=', # Does not always provide query parameters
+'questionanswering','',
+'mywebsearch','searchfor=',
+'comettoolbar','qry=',
+# Social Bookmarking Services
+'delicious','all=',
+'digg','s=',
+'stumbleupon','',
+'swik','swik\.net/', # does not work. Keywords follow domain, e.g. http://swik.net/awstats+analytics
+'segnalo','',
+'ineffabile','',
+# Minor Australian search engines
+'anzwers','search=',
+# Minor brazilian search engines
+'engine','p1=', 'miner','q=',
+# Minor chinese search engines
+'baidu','(wd|word)=',
+'iask','(w|k)=',
+'accoona','qt=',
+'3721','(p|name)=',
+'netease','q=',
+'soso','q=',
+'zhongsou','(word|w)=',
+'sogou', 'query=',
+'vnet','kw=',
+# Minor czech search engines
+'atlas','(searchtext|q)=', 'seznam','(w|q)=', 'quick','query=', 'centrum','q=', 'jyxo','(s|q)=', 'najdi','dotaz=', 'redbox','srch=',
+'avgsearch', 'q=',
+# Minor danish search engines
+'opasia','q=', 'danielsen','q=', 'sol','q=', 'jubii','soegeord=', 'finddk','words=', 'edderkoppen','query=', 'orbis','search_field=', '1klik','query=', 'ofir','querytext=',
+# Minor dutch search engines
+'ilse','search_for=', 'vindex','in=',
+# Minor english search engines
+'askuk','(ask|q)=', 'bbc','q=', 'freeserve','q=', 'looksmartuk','key=',
+'splut','pattern=', 'spotjockey','Search_Keyword=', 'ukindex', 'stext=', 'ukdirectory','k=', 'ukplus','search=', 'searchy', 'search_term=',
+'fbdownloader','q=',
+'fdownloadr_com', 'q=',
+'babylon','q=',
+'allgameshome', 's=',
+'surfcanyon_com', 'q=',
+'uk_foxstart_com', 'q=',
+'yandex_com', 'text=',
+# Minor finnish search engines
+'haku','w=',
+# Minor french search engines
+'francite','name=', 'clubinternet', 'q=',
+'toile', 'q=',
+'biglotron','question=',
+'mozbot','q=',
+# Minor german search engines
+'aolde','q=',
+'o2aolde', 'q=',
+'fireball','q=', 'infoseek','qt=', 'webde','su=',
+'abacho','q=', 't-online','q=',
+'metaspinner','qry=',
+'metacrawler_de','qry=',
+'wwweasel','q=',
+'netluchs','query=',
+'schoenerbrausen','q=',
+'gmxsuche', 'q=',
+'gmxsuche_at', 'q=',
+'ecosiasearch', 'q=',
+'aolsearch', 'q=',
+'aolsuche', 'q=',
+'startxxl', 'q=',
+'benefind', 'q=',
+'amazonsearch', 'query=',
+'wowsearch', 'q=',
+'vlips_de', 'q=',
+'metager', 'eingabe=',
+'search_1und1_de', 'q=',
+'smde', 'q=',
+#'sumaja', 'no query string available', #There is no query string in the referrer url
+'navigationshilfe', 'q=',
+'umfis', 'suchbegriff=',
+'fastbot_de', 'red=[0-9]*\+',
+'tixuma_de', 'sc=',
+'freenet_de', 'query=',
+'izito_de', 'q=',
+'peoplecheck_de', 'q=',
+'oneseek_de', 'q=',
+'de_wiki_gov_cn', 'de\.wiki\.gov\.cn\/s_',
+'umuwa_de', 'umuwa\.de\/',
+'1und1_de', 'q=',
+'metasuche_ch', 'q=',
+# Minor Hungarian search engines
+'heureka','heureka=', 'origo','(q|search)=', 'goliat','KERESES=', 'wahoo','q=', 'internetto','searchstr=',
+'keresolap_hu','q=',
+'startlap_hu', 'q=',
+'tango_hu','q=',
+'polymeta_hu','',
+# Minor Indian search engines
+'sify','keyword=',
+# Minor Italian search engines
+'virgilio','qs=',
+'arianna','query=',
+'supereva','q=',
+'kataweb','q=',
+'aliceitmaster','qs=',
+'aliceit','qs=',
+'gotuneed','', # Not yet known
+'godado','Keywords=',
+'jumpy\.it','searchWord=',
+'shinyseek\.it','KEY=',
+'teecnoit','q=',
+# Minor Israeli search engines
+'genieo','q=',
+# Minor Japanese search engines
+'askjp','(ask|q)=',
+'sagool','q=',
+'rakuten', 'qt=',
+# Minor Norwegian search engines
+'start','q=', 'eniro','q=',
+# Minor Polish search engines
+'wp','szukaj=',
+'onetpl','qt=',
+'dodajpl','keyword=',
+'gazetapl','slowo=',
+'gerypl','q=',
+'hogapl','qt=',
+'netsprintpl','q=',
+'interiapl','q=',
+'katalogonetpl','qt=',
+'o2pl','qt=',
+'polskapl','qt=',
+'szukaczpl','q=',
+'wowpl','q=',
+# Minor russian search engines
+'yandex', 'text=', 'rambler','words=', 'aport', 'r=', 'metabot', 'st=',
+'mailru', 'q=',
+# Minor swedish search engines
+'passagen','q=',
+'enirose', 'hitta:', #Not sure if this works, as the keywords are part of the URL, and therefore the URL does not contain a question mark.
+# Minor swiss search engines
+'searchch', 'q=', 'bluewin', 'qry=',
+'zapmeta_ch', 'query=',
+'etools_ch', 'query=',
+# Minor Croatian, Serbian, Macedonian, Bosnian and Herzegovinian search engines
+'pogodak', 'q='
+);
+
+# SearchEnginesKnownUrlNotFound
+# Known rules to extract not found keywords from a referrer search engine URL
+#------------------------------------------------------------------------------
+%SearchEnginesKnownUrlNotFound=(
+# Most common search engines
+'msn','origq='
+);
+
+# If no rules are known, we take first paramater not into WordsToCleanSearchUrl
+#------------------------------------------------------------------------------
+@WordsToCleanSearchUrl= ('act=','annuaire=','btng=','cat=','categoria=','cfg=','cof=','cou=','count=','cp=','dd=','domain=','dt=','dw=','enc=','exec=','geo=','hc=','height=','hits=','hl=','hq=','hs=','id=','kl=','lang=','loc=','lr=','matchmode=','medor=','message=','meta=','mode=','order=','page=','par=','pays=','pg=','pos=','prg=','qc=','refer=','sa=','safe=','sc=','sort=','src=','start=','style=','stype=','sum=','tag=','temp=','theme=','type=','url=','user=','width=','what=','\\.x=','\\.y=','y=','look=');
+
+# SearchEnginesKnownUTFCoding
+# Known parameter that proves a search engine has coded its parameters in UTF-8
+#------------------------------------------------------------------------------
+%SearchEnginesKnownUTFCoding=(
+# Most common search engines
+'google','ie=utf-8',
+'alltheweb','cs=utf-8'
+);
+
+
+# SearchEnginesHashLib
+# List of search engines names
+# 'search_engine_id', 'search_engine_name',
+#------------------------------------------------------------------------------
+%SearchEnginesHashLib=(
+# Major international search engines
+'alexa','Alexa',
+'alltheweb','AllTheWeb',
+'altavista','AltaVista',
+'a9', 'A9',
+'dmoz','DMOZ',
+'google_products','Google (Products)',
+'google_base','Google (Base)',
+'google_froogle','Froogle (Google)',
+'google_groups','Google (Groups)',
+'google_image','Google (Images)',
+'google_cache','Google (cache)',
+'google','Google',
+'lycos','Lycos',
+'msn','Microsoft MSN Search',
+'live','Microsoft Windows Live',
+'bing','Microsoft Bing',
+'netscape','Netscape',
+'aol','AOL',
+'terra','Terra',
+'tiscali','Tiscali',
+'voila','Voila',
+'search.com','Search.com',
+'yahoo_mindset','Yahoo! Mindset',
+'yahoo','Yahoo!',
+'sympatico','Sympatico',
+'excite','Excite',
+# Minor international search engines
+'google4counter','4-counter (Google)',
+'att','AT&T search (powered by Google)',
+'bungeebonesdotcom','BungeeBones',
+'go','Go.com',
+'askde','Ask Deutschland',
+'askes','Ask España', # break out Ask country specific engines.
+'askfr','Ask France',
+'askit','Ask Italia',
+'asknl','Ask Nederland',
+'ask','Ask',
+'atomz','Atomz',
+'dejanews','DejaNews',
+'euroseek','Euroseek',
+'findarticles','Find Articles',
+'go2net','Go2Net (Metamoteur)',
+'hotbot','Hotbot',
+'infospace','InfoSpace',
+'kvasir','Kvasir',
+'looksmart','Looksmart',
+'mamma','Mamma',
+'metacrawler','MetaCrawler (Metamoteur)',
+'nbci','NBCI',
+'northernlight','NorthernLight',
+'overture','Overture', # Replace 'goto\.com','Goto.com',
+'dogpile','Dogpile',
+'spray','Spray',
+'teoma','Teoma', # Replace 'directhit\.com','DirectHit',
+'webcrawler','WebCrawler',
+'wisenut','WISENut',
+'ixquick','ix quick',
+'earthlink', 'Earth Link',
+'iune','i-une',
+'blingo','Blingo',
+'centraldatabase','GPU p2p search',
+'clusty','Clusty',
+'mysearch','My Search',
+'vivisimo','Vivisimo',
+'kartoo','Kartoo',
+'icerocket','Icerocket (Blog)',
+'sphere','Sphere (Blog)',
+'ledix','Ledix',
+'shawca','Shaw.ca',
+'searchalot','Searchalot',
+'copernic','Copernic',
+'avantfind','Avantfind',
+'steadysearch','Avantfind',
+'clarosearch','Claro Search',
+'searchresults','Search-results',
+'holasearch', 'Hola Search',
+'conduit', 'Conduit Search',
+'flipora', 'Flipora',
+'delta-search', 'Delta Search',
+'iminent', 'Iminent',
+'searchmobileonline', 'Search Mobile Online (StartApp)',
+'nortonsavesearch', 'Norton Safe Search',
+'inbox', 'Inbox Search',
+'govome', 'Govome',
+'find1friend', 'Find1Friend',
+'mysearchdial', 'My Search Dial',
+'speedbit', 'Speedbit',
+'certifiedtoolbarsearch', 'Certified-Toolbar Search',
+'sweetim', 'SweetIM Search',
+'searchcompletion', 'SearchCompletion Search',
+'eazelsearch', 'Eazel Search',
+'searchfunmoods', 'Funmoods',
+'googleByIP', 'Google (Access by IP-Address)',
+'dalesearch', 'Dale Search',
+'sweetpacks', 'Sweetpacks',
+'searchgol', 'Search-Gol',
+'duckduckgo', 'DuckDuckGo (Does not provide search keyphrases, using found page instead)',
+'facemoods', 'Facemoods Search',
+'shoppstop', 'ShoppStop',
+'searchya', 'Searchya',
+'picsearch', 'picsearch',
+'webssearches', 'Various variants of Webssearches EMG Technologies and airzip.inspsearch.com',
+#Jan 8, 2016: No genuine inspsearch.com search engine seems so exist, but there is a couple of search engines using subdomains of inspsearch.com. Unclear how these are related to each other.
+'zapmeta', 'ZapMeta',
+'localmoxie', 'Local Moxie',
+'search-results_mobi', 'search-results.mobi',
+'androidsearch', 'androidsearch.com',
+'isearch_nation_com', 'Nation Search',
+'search_zonealarm_com', 'Zone Alarm Search',
+'www_buenosearch_com', 'BuenoSearch',
+'search_foxtab_com', 'Foxtab Search',
+'searches_qone8_com', 'Omiga-Plus',
+'startpage_com', 'Startpage',
+'qwant_com', 'qwant.com',
+'safehomepage_com', 'safehomepage.com',
+'vi-view_com', 'vi-view.com',
+'wow_utop_it', 'wow.utop.it',
+'windowssearch_com', 'windowssearch.com',
+'www_wow_com', 'WOW.com',
+'globososo', 'Various variants of Globososo (Kingtale Technology): www, searches, searches3, and at inspsearch.com (globososo, kingtale3)',
+'swisscows_ch', 'Swisscows',
+'preciobarato_xyz', 'Yandex',
+'www_dregol_com', 'Dregol Search',
+'search_socialdownloadr_com', 'Socialdownloadr',
+'int_search_myway_com', 'MyWay',
+'de_dolphin_com', 'Dolphin Search',
+'mys_yoursearch_me', 'Yoursearch.me',
+# Chello Portals
+'chelloat','Chello Austria',
+'chellobe','Chello Belgium',
+'chellocz','Chello Czech Republic',
+'chellofr','Chello France',
+'chellohu','Chello Hungary',
+'chellonl','Chello Netherlands',
+'chellono','Chello Norway',
+'chellopl','Chello Poland',
+'chellose','Chello Sweden',
+'chellosk','Chello Slovakia',
+'chellocom','Chello (Country not recognized)',
+# Mirago
+'miragobe','Mirago Belgium',
+'miragoch','Mirago Switzerland',
+'miragode','Mirago Germany',
+'miragodk','Mirago Denmark',
+'miragoes','Mirago Spain',
+'miragofr','Mirago France',
+'miragoit','Mirago Italy',
+'miragonl','Mirago Netherlands',
+'miragono','Mirago Norway',
+'miragose','Mirago Sweden',
+'miragocouk','Mirago UK',
+'mirago','Mirago (country unknown)',
+'answerbus','Answerbus',
+'icq','icq',
+'nusearch','Nusearch',
+'goodsearch','GoodSearch',
+'scroogle','Scroogle',
+'questionanswering','Questionanswering',
+'mywebsearch','MyWebSearch',
+'comettoolbar','Comet toolbar search',
+# Social Bookmarking Services
+'delicious','del.icio.us (Social Bookmark)',
+'digg','Digg (Social Bookmark)',
+'stumbleupon','Stumbleupon (Social Bookmark)',
+'swik','Swik (Social Bookmark)',
+'segnalo','Segnalo (Social Bookmark)',
+'ineffabile','Ineffabile.it (Social Bookmark)',
+# Minor Australian search engines
+'anzwers','anzwers.com.au',
+# Minor brazilian search engines
+'engine','Cade', 'miner','Meta Miner',
+# Minor chinese search engines
+'baidu','Baidu',
+'iask','Iask',
+'accoona','Accoona',
+'3721','3721',
+'netease', 'NetEase',
+'soso','SoSo',
+'zhongsou','ZhongSou',
+'sogou', 'SoGou',
+'vnet','VNet',
+# Minor czech search engines
+'atlas','Atlas.cz', 'seznam','Seznam', 'quick','Quick.cz', 'centrum','Centrum.cz', 'jyxo','Jyxo.cz', 'najdi','Najdi.to', 'redbox','RedBox.cz',
+'avgsearch', 'AVG Secure Search',
+# Minor danish search-engines
+'opasia','Opasia', 'danielsen','Thor (danielsen.com)', 'sol','SOL', 'jubii','Jubii', 'finddk','Find', 'edderkoppen','Edderkoppen', 'netstjernen','Netstjernen', 'orbis','Orbis', 'tyfon','Tyfon', '1klik','1Klik', 'ofir','Ofir',
+# Minor dutch search engines
+'ilse','Ilse','vindex','Vindex\.nl',
+# Minor english search engines
+'askuk','Ask UK',
+'bbc','BBC', 'freeserve','Freeserve', 'looksmartuk','Looksmart UK',
+'splut','Splut', 'spotjockey','Spotjockey', 'ukdirectory','UK Directory', 'ukindex','UKIndex', 'ukplus','UK Plus', 'searchy','searchy.co.uk',
+'fbdownloader','FBDownloader (fbdownloader)',
+'fdownloadr_com', 'FBDownloader (fdownloadr)',
+'babylon','Babylon',
+'allgameshome', 'AllGamesHome',
+'surfcanyon_com', 'SurfCanyon',
+'uk_foxstart_com', 'Foxstart.com',
+'yandex_com', 'Yandex',
+# Minor finnish search engines
+'haku','Ihmemaa',
+# Minor french search engines
+'aolfr','AOL (fr)', 'ctrouve','C\'est trouve', 'francite','Francite', 'lbb', 'LBB', 'libertysurf', 'Libertysurf', 'free', 'Free.fr', 'clubinternet', 'Club-internet',
+'toile', 'Toile du Quebec',
+'biglotron','Biglotron',
+'mozbot','Mozbot',
+# Minor German search engines
+'aolde','AOL (de)',
+'o2aolde', 'o2 Suche',
+'fireball','Fireball', 'infoseek','Infoseek',
+'webde','Web.de',
+'abacho','Abacho',
+'t-online','T-Online',
+'allesklar','allesklar.de', 'meinestadt','meinestadt.de',
+'metaspinner','metaspinner',
+'metacrawler_de','metacrawler.de',
+'wwweasel','WWWeasel',
+'netluchs','Netluchs',
+'schoenerbrausen','Schoenerbrausen/',
+'gmxsuche', 'GMX Suche',
+'gmxsuche_at', 'GMX Suche Oesterreich',
+'ecosiasearch', 'Ecosia Search',
+'aolsearch', 'AOL Search',
+'aolsuche', 'AOL Suche',
+'startxxl', 'StartXXL',
+'benefind', 'benefind',
+'amazonsearch', 'Amazon Web Search',
+'wowsearch', 'Wow Search',
+'vlips_de', 'vlips.de',
+'metager', 'MetaGer',
+'search_1und1_de', '1&1 Suche (subdomain "search")',
+'smde', 'SM.de - Die SuchMaschine',
+'sumaja', 'Sumaja',
+'navigationshilfe', 'T-Online Navigationshilfe',
+'umfis', 'UMFIS-Online Das Umweltfirmen-Informationssystem der IHKs in Deutschland',
+'fastbot_de', 'Fastbot.de (Does not provide search keyphrases, using found page instead)',
+'tixuma_de', 'Tixuma Deutschland',
+'freenet_de', 'suche.freenet.de',
+'izito_de', 'iZito Deutschland',
+'peoplecheck_de', 'PeopleCheck.de',
+'oneseek_de', 'Metasuchmaschine OneSeek.de',
+'de_wiki_gov_cn', 'Wiki Sucher',
+'umuwa_de', 'Umuwa Deutschland',
+'1und1_de', '1&1 Suche (subdomain "suche")',
+'metasuche_ch', 'Metasuche.ch',
+# Minor hungarian search engines
+'heureka','Heureka', 'origo','Origo-Vizsla', 'lapkereso','Startlapkereso', 'goliat','Goliat', 'indexhu','Index', 'wahoo','Wahoo', 'webmania','webmania.hu', 'internetto','Internetto Kereso',
+'tango_hu','Tango',
+'keresolap_hu','Tango keresolap',
+'startlap_hu','Startlab Kereso',
+'polymeta_hu','Polymeta',
+# Minor Indian search engines
+'sify','Sify',
+# Minor Italian search engines
+'virgilio','Virgilio',
+'arianna','Arianna',
+'supereva','Supereva',
+'kataweb','Kataweb',
+'aliceitmaster','search.alice.it.master',
+'aliceit','alice.it',
+'gotuneed','got u need',
+'godado','Godado.it',
+'jumpy\.it','Jumpy.it',
+'shinyseek\.it','Shinyseek.it',
+'teecnoit','Teecno',
+# Minor Israeli search engines
+'genieo','Genieo',
+# Minor Japanese search engines
+'askjp','Ask Japan',
+'sagool','Sagool',
+'rakuten', 'websearch.rakuten.co.jp',
+# Minor Norwegian search engines
+'start','start.no', 'eniro','Eniro',
+# Minor polish search engines
+'wp','Wirtualna Polska',
+'onetpl','Onet.pl',
+'dodajpl','Dodaj.pl',
+'gazetapl','Gazeta.pl',
+'gerypl','Gery.pl',
+'hogapl','Hoga.pl',
+'netsprintpl','NetSprint.pl',
+'interiapl','Interia.pl',
+'katalogonetpl','Katalog.Onet.pl',
+'o2pl','o2.pl',
+'polskapl','Polska',
+'szukaczpl','Szukacz',
+'wowpl','Wow.pl',
+# Minor russian search engines
+'yandex', 'Yandex', 'aport', 'Aport', 'rambler', 'Rambler', 'turtle', 'Turtle', 'metabot', 'MetaBot',
+'mailru','Mail.Ru',
+# Minor Swedish search engines
+'passagen','Evreka',
+'enirose','Eniro Sverige',
+# Minor Slovak search engines
+'zoznam','Zoznam',
+# Minor Portuguese search engines
+'sapo','Sapo',
+# Minor Swiss search engines
+'searchch', 'search.ch', 'bluewin', 'search.bluewin.ch',
+'zapmeta_ch', 'ZapMeta.ch',
+'etools_ch', 'eTools.ch',
+# Minor Croatian, Serbian, Macedonian, Bosnian and Herzegovinian search engines
+'pogodak','Pogodak.com',
+# Generic search engines
+'search','Unknown search engines'
+);
+
+
+# Sanity check.
+# Enable this code and run perl search_engines.pm to check file entries are ok
+#-----------------------------------------------------------------------------
+#foreach my $key (@SearchEnginesSearchIDOrder_list1) {
+# if (! $SearchEnginesHashID{$key}) { error("Entry '$key' has been found in SearchEnginesSearchIDOrder_list1 with no value in SearchEnginesHashID");
+# foreach my $key2 (@SearchEnginesSearchIDOrder_list2) { if ($key2 eq $key) { error("$key is in 1 and 2\n"); } }
+# foreach my $key2 (@SearchEnginesSearchIDOrder_listgen) { if ($key2 eq $key) { error("$key is in 1 and gen\n"); } }
+#} }
+#foreach my $key (@SearchEnginesSearchIDOrder_list2) {
+# if (! $SearchEnginesHashID{$key}) { error("Entry '$key' has been found in SearchEnginesSearchIDOrder_list1 with no value in SearchEnginesHashID");
+# foreach my $key2 (@SearchEnginesSearchIDOrder_list1) { if ($key2 eq $key) { error("$key is in 2 and 1\n"); } }
+# foreach my $key2 (@SearchEnginesSearchIDOrder_listgen) { if ($key2 eq $key) { error("$key is in 2 and gen\n"); } }
+#} }
+#foreach my $key (@SearchEnginesSearchIDOrder_listgen) { if (! $SearchEnginesHashID{$key}) { error("Entry '$key' has been found in SearchEnginesSearchIDOrder_listgen with no value in SearchEnginesHashID"); } }
+#foreach my $key (keys %NotSearchEnginesKeys) { if (! $SearchEnginesHashID{$key}) { error("Entry '$key' has been found in NotSearchEnginesKeys with no value in SearchEnginesHashID"); } }
+#foreach my $key (keys %SearchEnginesKnownUrl) {
+# my $found=0;
+# foreach my $key2 (values %SearchEnginesHashID) {
+# if ($key eq $key2) { $found=1; last; }
+# }
+# if (! $found) { die "Entry '$key' has been found in SearchEnginesKnownUrl with no value in SearchEnginesHashID"; }
+#}
+#foreach my $key (keys %SearchEnginesHashLib) {
+# my $found=0;
+# foreach my $key2 (values %SearchEnginesHashID) {
+# if ($key eq $key2) { $found=1; last; }
+# }
+# if (! $found) { die "Entry '$key' has been found in SearchEnginesHashLib with no value in SearchEnginesHashID"; }
+#}
+#print @SearchEnginesSearchIDOrder_list1." ".@SearchEnginesSearchIDOrder_list2." ".@SearchEnginesSearchIDOrder_listgen;
+
+1;