diff --git a/classes/Encoding.php b/classes/Encoding.php
new file mode 100644
index 0000000000..5c81a31f98
--- /dev/null
+++ b/classes/Encoding.php
@@ -0,0 +1,349 @@
+
+ * @package Encoding
+ * @version 2.0
+ * @link https://github.com/neitanod/forceutf8
+ * @example https://github.com/neitanod/forceutf8
+ * @license Revised BSD
+ */
+
+class Encoding {
+
+ const ICONV_TRANSLIT = "TRANSLIT";
+ const ICONV_IGNORE = "IGNORE";
+ const WITHOUT_ICONV = "";
+
+ protected static $win1252ToUtf8 = array(
+ 128 => "\xe2\x82\xac",
+
+ 130 => "\xe2\x80\x9a",
+ 131 => "\xc6\x92",
+ 132 => "\xe2\x80\x9e",
+ 133 => "\xe2\x80\xa6",
+ 134 => "\xe2\x80\xa0",
+ 135 => "\xe2\x80\xa1",
+ 136 => "\xcb\x86",
+ 137 => "\xe2\x80\xb0",
+ 138 => "\xc5\xa0",
+ 139 => "\xe2\x80\xb9",
+ 140 => "\xc5\x92",
+
+ 142 => "\xc5\xbd",
+
+
+ 145 => "\xe2\x80\x98",
+ 146 => "\xe2\x80\x99",
+ 147 => "\xe2\x80\x9c",
+ 148 => "\xe2\x80\x9d",
+ 149 => "\xe2\x80\xa2",
+ 150 => "\xe2\x80\x93",
+ 151 => "\xe2\x80\x94",
+ 152 => "\xcb\x9c",
+ 153 => "\xe2\x84\xa2",
+ 154 => "\xc5\xa1",
+ 155 => "\xe2\x80\xba",
+ 156 => "\xc5\x93",
+
+ 158 => "\xc5\xbe",
+ 159 => "\xc5\xb8"
+ );
+
+ protected static $brokenUtf8ToUtf8 = array(
+ "\xc2\x80" => "\xe2\x82\xac",
+
+ "\xc2\x82" => "\xe2\x80\x9a",
+ "\xc2\x83" => "\xc6\x92",
+ "\xc2\x84" => "\xe2\x80\x9e",
+ "\xc2\x85" => "\xe2\x80\xa6",
+ "\xc2\x86" => "\xe2\x80\xa0",
+ "\xc2\x87" => "\xe2\x80\xa1",
+ "\xc2\x88" => "\xcb\x86",
+ "\xc2\x89" => "\xe2\x80\xb0",
+ "\xc2\x8a" => "\xc5\xa0",
+ "\xc2\x8b" => "\xe2\x80\xb9",
+ "\xc2\x8c" => "\xc5\x92",
+
+ "\xc2\x8e" => "\xc5\xbd",
+
+
+ "\xc2\x91" => "\xe2\x80\x98",
+ "\xc2\x92" => "\xe2\x80\x99",
+ "\xc2\x93" => "\xe2\x80\x9c",
+ "\xc2\x94" => "\xe2\x80\x9d",
+ "\xc2\x95" => "\xe2\x80\xa2",
+ "\xc2\x96" => "\xe2\x80\x93",
+ "\xc2\x97" => "\xe2\x80\x94",
+ "\xc2\x98" => "\xcb\x9c",
+ "\xc2\x99" => "\xe2\x84\xa2",
+ "\xc2\x9a" => "\xc5\xa1",
+ "\xc2\x9b" => "\xe2\x80\xba",
+ "\xc2\x9c" => "\xc5\x93",
+
+ "\xc2\x9e" => "\xc5\xbe",
+ "\xc2\x9f" => "\xc5\xb8"
+ );
+
+ protected static $utf8ToWin1252 = array(
+ "\xe2\x82\xac" => "\x80",
+
+ "\xe2\x80\x9a" => "\x82",
+ "\xc6\x92" => "\x83",
+ "\xe2\x80\x9e" => "\x84",
+ "\xe2\x80\xa6" => "\x85",
+ "\xe2\x80\xa0" => "\x86",
+ "\xe2\x80\xa1" => "\x87",
+ "\xcb\x86" => "\x88",
+ "\xe2\x80\xb0" => "\x89",
+ "\xc5\xa0" => "\x8a",
+ "\xe2\x80\xb9" => "\x8b",
+ "\xc5\x92" => "\x8c",
+
+ "\xc5\xbd" => "\x8e",
+
+
+ "\xe2\x80\x98" => "\x91",
+ "\xe2\x80\x99" => "\x92",
+ "\xe2\x80\x9c" => "\x93",
+ "\xe2\x80\x9d" => "\x94",
+ "\xe2\x80\xa2" => "\x95",
+ "\xe2\x80\x93" => "\x96",
+ "\xe2\x80\x94" => "\x97",
+ "\xcb\x9c" => "\x98",
+ "\xe2\x84\xa2" => "\x99",
+ "\xc5\xa1" => "\x9a",
+ "\xe2\x80\xba" => "\x9b",
+ "\xc5\x93" => "\x9c",
+
+ "\xc5\xbe" => "\x9e",
+ "\xc5\xb8" => "\x9f"
+ );
+
+ static function toUTF8($text){
+ /**
+ * Function \ForceUTF8\Encoding::toUTF8
+ *
+ * This function leaves UTF8 characters alone, while converting almost all non-UTF8 to UTF8.
+ *
+ * It assumes that the encoding of the original string is either Windows-1252 or ISO 8859-1.
+ *
+ * It may fail to convert characters to UTF-8 if they fall into one of these scenarios:
+ *
+ * 1) when any of these characters: ÀÁÂÃÄÅÆÇÈÉÊËÌÍÎÏÐÑÒÓÔÕÖ×ØÙÚÛÜÝÞß
+ * are followed by any of these: ("group B")
+ * ¡¢£¤¥¦§¨©ª«¬®¯°±²³´µ¶•¸¹º»¼½¾¿
+ * For example: %ABREPRESENT%C9%BB. «REPRESENTÉ»
+ * The "«" (%AB) character will be converted, but the "É" followed by "»" (%C9%BB)
+ * is also a valid unicode character, and will be left unchanged.
+ *
+ * 2) when any of these: àáâãäåæçèéêëìíîï are followed by TWO chars from group B,
+ * 3) when any of these: ðñòó are followed by THREE chars from group B.
+ *
+ * @name toUTF8
+ * @param string $text Any string.
+ * @return string The same string, UTF8 encoded
+ *
+ */
+
+ if(is_array($text))
+ {
+ foreach($text as $k => $v)
+ {
+ $text[$k] = self::toUTF8($v);
+ }
+ return $text;
+ }
+
+ if(!is_string($text)) {
+ return $text;
+ }
+
+ $max = self::strlen($text);
+
+ $buf = "";
+ for($i = 0; $i < $max; $i++){
+ $c1 = $text[$i];
+ if($c1>="\xc0"){ //Should be converted to UTF8, if it's not UTF8 already
+ $c2 = $i+1 >= $max? "\x00" : $text[$i+1];
+ $c3 = $i+2 >= $max? "\x00" : $text[$i+2];
+ $c4 = $i+3 >= $max? "\x00" : $text[$i+3];
+ if($c1 >= "\xc0" & $c1 <= "\xdf"){ //looks like 2 bytes UTF8
+ if($c2 >= "\x80" && $c2 <= "\xbf"){ //yeah, almost sure it's UTF8 already
+ $buf .= $c1 . $c2;
+ $i++;
+ } else { //not valid UTF8. Convert it.
+ $cc1 = (chr(ord($c1) / 64) | "\xc0");
+ $cc2 = ($c1 & "\x3f") | "\x80";
+ $buf .= $cc1 . $cc2;
+ }
+ } elseif($c1 >= "\xe0" & $c1 <= "\xef"){ //looks like 3 bytes UTF8
+ if($c2 >= "\x80" && $c2 <= "\xbf" && $c3 >= "\x80" && $c3 <= "\xbf"){ //yeah, almost sure it's UTF8 already
+ $buf .= $c1 . $c2 . $c3;
+ $i = $i + 2;
+ } else { //not valid UTF8. Convert it.
+ $cc1 = (chr(ord($c1) / 64) | "\xc0");
+ $cc2 = ($c1 & "\x3f") | "\x80";
+ $buf .= $cc1 . $cc2;
+ }
+ } elseif($c1 >= "\xf0" & $c1 <= "\xf7"){ //looks like 4 bytes UTF8
+ if($c2 >= "\x80" && $c2 <= "\xbf" && $c3 >= "\x80" && $c3 <= "\xbf" && $c4 >= "\x80" && $c4 <= "\xbf"){ //yeah, almost sure it's UTF8 already
+ $buf .= $c1 . $c2 . $c3 . $c4;
+ $i = $i + 3;
+ } else { //not valid UTF8. Convert it.
+ $cc1 = (chr(ord($c1) / 64) | "\xc0");
+ $cc2 = ($c1 & "\x3f") | "\x80";
+ $buf .= $cc1 . $cc2;
+ }
+ } else { //doesn't look like UTF8, but should be converted
+ $cc1 = (chr(ord($c1) / 64) | "\xc0");
+ $cc2 = (($c1 & "\x3f") | "\x80");
+ $buf .= $cc1 . $cc2;
+ }
+ } elseif(($c1 & "\xc0") === "\x80"){ // needs conversion
+ if(isset(self::$win1252ToUtf8[ord($c1)])) { //found in Windows-1252 special cases
+ $buf .= self::$win1252ToUtf8[ord($c1)];
+ } else {
+ $cc1 = (chr(ord($c1) / 64) | "\xc0");
+ $cc2 = (($c1 & "\x3f") | "\x80");
+ $buf .= $cc1 . $cc2;
+ }
+ } else { // it doesn't need conversion
+ $buf .= $c1;
+ }
+ }
+ return $buf;
+ }
+
+ static function toWin1252($text, $option = self::WITHOUT_ICONV) {
+ if(is_array($text)) {
+ foreach($text as $k => $v) {
+ $text[$k] = self::toWin1252($v, $option);
+ }
+ return $text;
+ } elseif(is_string($text)) {
+ return static::utf8_decode($text, $option);
+ } else {
+ return $text;
+ }
+ }
+
+ static function toISO8859($text, $option = self::WITHOUT_ICONV) {
+ return self::toWin1252($text, $option);
+ }
+
+ static function toLatin1($text, $option = self::WITHOUT_ICONV) {
+ return self::toWin1252($text, $option);
+ }
+
+ static function fixUTF8($text, $option = self::WITHOUT_ICONV){
+ if(is_array($text)) {
+ foreach($text as $k => $v) {
+ $text[$k] = self::fixUTF8($v, $option);
+ }
+ return $text;
+ }
+
+ if(!is_string($text)) {
+ return $text;
+ }
+
+ $last = "";
+ while($last <> $text){
+ $last = $text;
+ $text = self::toUTF8(static::utf8_decode($text, $option));
+ }
+ $text = self::toUTF8(static::utf8_decode($text, $option));
+ return $text;
+ }
+
+ static function UTF8FixWin1252Chars($text){
+ // If you received an UTF-8 string that was converted from Windows-1252 as it was ISO8859-1
+ // (ignoring Windows-1252 chars from 80 to 9F) use this function to fix it.
+ // See: http://en.wikipedia.org/wiki/Windows-1252
+
+ return str_replace(array_keys(self::$brokenUtf8ToUtf8), array_values(self::$brokenUtf8ToUtf8), $text);
+ }
+
+ static function removeBOM($str=""){
+ if(substr($str, 0,3) === pack("CCC",0xef,0xbb,0xbf)) {
+ $str=substr($str, 3);
+ }
+ return $str;
+ }
+
+ protected static function strlen($text){
+ return (function_exists('mb_strlen') && ((int) ini_get('mbstring.func_overload')) & 2) ?
+ mb_strlen($text,'8bit') : strlen($text);
+ }
+
+ public static function normalizeEncoding($encodingLabel)
+ {
+ $encoding = strtoupper($encodingLabel);
+ $encoding = preg_replace('/[^a-zA-Z0-9\s]/', '', $encoding);
+ $equivalences = array(
+ 'ISO88591' => 'ISO-8859-1',
+ 'ISO8859' => 'ISO-8859-1',
+ 'ISO' => 'ISO-8859-1',
+ 'LATIN1' => 'ISO-8859-1',
+ 'LATIN' => 'ISO-8859-1',
+ 'UTF8' => 'UTF-8',
+ 'UTF' => 'UTF-8',
+ 'WIN1252' => 'ISO-8859-1',
+ 'WINDOWS1252' => 'ISO-8859-1'
+ );
+
+ if(empty($equivalences[$encoding])){
+ return 'UTF-8';
+ }
+
+ return $equivalences[$encoding];
+ }
+
+ public static function encode($encodingLabel, $text)
+ {
+ $encodingLabel = self::normalizeEncoding($encodingLabel);
+ if($encodingLabel === 'ISO-8859-1') return self::toLatin1($text);
+ return self::toUTF8($text);
+ }
+
+ protected static function utf8_decode($text, $option = self::WITHOUT_ICONV)
+ {
+ if ($option == self::WITHOUT_ICONV || !function_exists('iconv')) {
+ $o = utf8_decode(
+ str_replace(array_keys(self::$utf8ToWin1252), array_values(self::$utf8ToWin1252), self::toUTF8($text))
+ );
+ } else {
+ $o = iconv("UTF-8", "Windows-1252" . ($option === self::ICONV_TRANSLIT ? '//TRANSLIT' : ($option === self::ICONV_IGNORE ? '//IGNORE' : '')), $text);
+ }
+ return $o;
+ }
+}
diff --git a/classes/ImageShared.php b/classes/ImageShared.php
index fe475d8c48..8523d6b6ec 100644
--- a/classes/ImageShared.php
+++ b/classes/ImageShared.php
@@ -1122,10 +1122,13 @@ private static function getImgDim2($imgUrl) {
$data = curl_exec($curl);
curl_close($curl);
$width = 0; $height = 0;
+
$im = @imagecreatefromstring($data);
- $width = @imagesx($im);
- $height = @imagesy($im);
- if($im) imagedestroy($im);
+ if($im){
+ $width = @imagesx($im);
+ $height = @imagesy($im);
+ imagedestroy($im);
+ }
if(!$width || !$height) return false;
return array($width,$height);
}
diff --git a/classes/SpecProcessorOcr.php b/classes/SpecProcessorOcr.php
index 6bb7be65ff..e4295466f6 100644
--- a/classes/SpecProcessorOcr.php
+++ b/classes/SpecProcessorOcr.php
@@ -4,6 +4,7 @@
*/
include_once($SERVER_ROOT.'/config/dbconnection.php');
include_once($SERVER_ROOT.'/classes/Manager.php');
+include_once($SERVER_ROOT.'/classes/Encoding.php');
class SpecProcessorOcr extends Manager{
@@ -768,7 +769,7 @@ private function setTempPath(){
$tempPath = ini_get('upload_tmp_dir');
}
if(!$tempPath){
- $tempPath = $GLOBALS['serverRoot'];
+ $tempPath = $GLOBALS['SERVER_ROOT'];
if(substr($tempPath,-1) != '/') $tempPath .= '/';
$tempPath .= 'temp/';
}
@@ -786,38 +787,19 @@ private function setTempPath(){
//Misc functions
private function cleanRawStr($inStr){
- $retStr = $this->encodeString($inStr);
+ $retStr = trim($inStr);
+ //$retStr = $this->encodeString($retStr);
+ $retStr = Encoding::toUTF8($retStr);
+
+ $retStr = preg_replace("/(^[\r\n]*|[\r\n]+)[\s\t]*[\r\n]+[\s\t]*[\r\n]+/", "\n\n", $retStr);
//replace commonly misinterpreted characters
$replacements = array("/\." => "A.", "/-\\" => "A", "\X/" => "W", "\Y/" => "W", "`\‘i/" => "W", chr(96) => "'", chr(145) => "'", chr(146) => "'",
"�" => "'", "�" => '"', "�" => '"', "�" => '"', chr(147) => '"', chr(148) => '"', chr(152) => '"', chr(239) => "�");
$retStr = str_replace(array_keys($replacements), $replacements, $retStr);
- //replace Is, ls and |s in latitudes and longitudes with ones
- //replace Os in latitudes and longitudes with zeroes, Ss with 5s and Zs with 2s
- //latitudes and longitudes can be of the types: ddd.ddddddd�, ddd� ddd.ddd' or ddd� ddd' ddd.ddd"
- $false_num_class = "[OSZl|I!\d]";//the regex class that represents numbers and characters that numbers are commonly replaced with
- $preg_replace_callback_pattern =
- array(
- "/".$false_num_class."{1,3}(\.".$false_num_class."{1,7})\s?".chr(176)."\s?[NSEW(\\\V)(\\\W)]/",
- "/".$false_num_class."{1,3}".chr(176)."\s?".$false_num_class."{1,3}(\.".$false_num_class."{1,3})?\s?'\s?[NSEW(\\\V)(\\\W)]/",
- "/".$false_num_class."{1,3}".chr(176)."\s?".$false_num_class."{1,3}\s?'\s?(".$false_num_class."{1,3}(\.".$false_num_class."{1,3})?\"\s?)?[NSEW(\\\V)(\\\W)]/"
- );
- $retStr = preg_replace_callback($preg_replace_callback_pattern, create_function('$matches','return str_replace(array("l","|","!","I","O","S","Z"), array("1","1","1","1","0","5","2"), $matches[0]);'), $retStr);
//replace \V and \W in longitudes and latitudes with W
$retStr = preg_replace("/(\d\s?[".chr(176)."'\"])\s?\\\[VW]/", "\${1}W", $retStr, -1);
- //replace Zs and zs with 2s, Is, !s, |s and ls with 1s and Os and os with 0s in dates of type Mon(th) DD, YYYY
- $retStr = preg_replace_callback(
- "/(((?i)January|Jan\.?|February|Feb\.?|March|Mar\.?|April|Apr\.?|May|June|Jun\.?|July|Jul\.?|August|Aug\.?|September|Sept?\.?|October|Oct\.?|November|Nov\.?|December|Dec\.?)\s)(([\dOIl|!ozZS]{1,2}),?\s)([\dOI|!lozZS]{4})/",
- create_function('$matches','return $matches[1].str_replace(array("l","|","!","I","O","o","Z","z","S"), array("1","1","1","1","0","0","2","2","5"), $matches[3]).str_replace(array("l","|","!","I","O","o","Z","z","S"), array("1","1","1","1","0","0","2","2","5"), $matches[5]);'),
- $retStr
- );
- //replace Zs with 2s, Is with 1s and Os with 0s in dates of type DD-Mon(th)-YYYY or DDMon(th)YYYY or DD Mon(th) YYYY
- $retStr = preg_replace_callback(
- "/([\dOIl!|ozZS]{1,2}[-\s]?)(((?i)January|Jan\.?|February|Feb\.?|March|Mar\.?|April|Apr\.?|May|June|Jun\.?|July|Jul\.?|August|Aug\.?|September|Sept?\.?|October|Oct\.?|November|Nov\.?|December|Dec\.?)[-\s]?)([\dOIl|!ozZS]{4})/i",
- create_function('$matches','return str_replace(array("l","|","!","I","O","o","Z","z","S"), array("1","1","1","1","0","0","2","2","5"), $matches[1]).$matches[2].str_replace(array("l","|","!","I","O","o","Z","z","S"), array("1","1","1","1","0","0","2","2","5"), $matches[4]);'),
- $retStr
- );
return $retStr;
}
}
diff --git a/classes/TaxonProfile.php b/classes/TaxonProfile.php
index 1e53903a0a..bfb39ea1d1 100644
--- a/classes/TaxonProfile.php
+++ b/classes/TaxonProfile.php
@@ -687,7 +687,10 @@ public function getOccTaxonInDbCnt($limitRank = 170, $collidStr = 'all')
{
$count = -1;
if ($this->rankId >= $limitRank) {
- $sql = 'SELECT COUNT(o.occid) as cnt FROM omoccurrences o JOIN (SELECT DISTINCT e.tid, t.sciname FROM taxaenumtree e JOIN taxa t ON e.tid = t.tid WHERE parenttid = '.$this->tid.' OR e.tid = '.$this->tid.') AS parentAndChildren ON o.tidinterpreted = parentAndChildren.tid ';
+ //$sql = 'SELECT COUNT(o.occid) as cnt FROM omoccurrences o JOIN (SELECT DISTINCT e.tid, t.sciname FROM taxaenumtree e JOIN taxa t ON e.tid = t.tid WHERE parenttid = '.$this->tid.' OR e.tid = '.$this->tid.') AS parentAndChildren ON o.tidinterpreted = parentAndChildren.tid ';
+ $sql = 'SELECT COUNT(o.occid) as cnt
+ FROM omoccurrences o JOIN (SELECT DISTINCT ts.tid FROM taxaenumtree e JOIN taxa t ON e.tid = t.tid INNER JOIN taxstatus ts ON e.tid = ts.tidaccepted
+ WHERE e.parenttid = '.$this->tid.' OR e.tid = '.$this->tid.') AS taxa ON o.tidinterpreted = taxa.tid ';
if (preg_match('/^[,\d]+$/',$collidStr)) $sql .= 'AND o.collid IN('.$collidStr.')';
$result = $this->conn->query($sql);
while ($row = $result->fetch_object()){
diff --git a/classes/TaxonomyHarvester.php b/classes/TaxonomyHarvester.php
index e1fbf75e21..f388d9d24c 100644
--- a/classes/TaxonomyHarvester.php
+++ b/classes/TaxonomyHarvester.php
@@ -96,6 +96,10 @@ private function addSciname($taxonArr, $resourceKey){
$this->logOrEcho('Checking TROPICOS...',1);
$newTid= $this->addTropicosTaxon($taxonArr);
}
+ elseif($resourceKey== 'fdex'){
+ $this->logOrEcho('Checking fdex...',1);
+ $newTid= $this->addFdexTaxon($taxonArr);
+ }
elseif($resourceKey== 'eol'){
$this->logOrEcho('Checking EOL...',1);
$newTid= $this->addEolTaxon($taxonArr);
@@ -735,25 +739,48 @@ private function getTropicosNode($nodeArr){
return $taxonArr;
}
- //Index Fungorum functions
+ //Index Fungorum functions via MyCoPortal FdEx tools
//http://www.indexfungorum.org/ixfwebservice/fungus.asmx/NameSearch?SearchText=Acarospora%20socialis&AnywhereInText=false&MaxNumber=10
- private function addIndexFungorumTaxon($taxonArr){
+ private function addFdexTaxon($taxonArr){
$sciName = $taxonArr['sciname'];
if($sciName){
$adjustedName = $sciName;
if(isset($taxonArr['rankid']) && $taxonArr['rankid'] > 220) $adjustedName = trim($taxonArr['unitname1'].' '.$taxonArr['unitname2'].' '.$taxonArr['unitname3']);
- $url = 'https://webservice.catalogueoflife.org/col/webservice?response=full&format=json&name='.str_replace(' ','%20',$adjustedName);
+ $url = 'https://mycoportal.org/fdex/services/api/query.php?qText='.str_replace(' ','%20',$adjustedName).'&qField=taxon';
//echo $url.'
';
$retArr = $this->getContentString($url);
$content = $retArr['str'];
- $resultArr = json_decode($content,true);
- $numResults = $resultArr['number_of_results_returned'];
- if($numResults){
-
+ if($content == '0 results'){
+ $this->logOrEcho('Taxon not found',2);
+ return false;
+ }
+ else{
+ $resultArr = json_decode($content,true);
+ $numResults = count($resultArr);
+ $taxonArr = array();
+ if($numResults){
+ /*
+ * return example "taxon" : "Verrucaria microstictica" , "authors" : "Leight." , "mbNumber" : "307221" , "otherID" : "86A6E1F9-AACE-43AF-A466-9427B38788D4" ,
+ * "rank" : "sp." , "rankCode" : "20" , "taxonomicStatus" : "Assumed legitimate" , "currentTaxon" : "Polycoccum microsticticum" , "currentMbNumber" : "307214" ,
+ * "currentOtherID" : "CACE62EC-E136-44D9-B01C-BB36D95E6262" , "currentStatus" : "Stable" , "parentTaxon" : "Verrucaria" , "parentMbNumber" : "5725" ,
+ * "parentOtherID" : "1CB1CC6A-36B9-11D5-9548-00D0592D548C" , "taxonomicAgreement" : "Asynchronous", "recordSource" : "Index Fungorum"
+ */
+ foreach($resultArr as $unitArr){
+ $taxonArr['sciname'] = $unitArr['taxon'];
+ $rankArr = $this->getFdexRank($unitArr['rank'],$unitArr['rankCode']);
+ }
+ }
+ $this->loadNewTaxon($taxonArr);
}
}
}
+ private function getFdexRank($rankStr, $rankCode){
+ $retArr = array();
+
+ return $retArr;
+ }
+
//EOL functions
private function addEolTaxon($taxonArr){
//Returns content for accepted name
diff --git a/classes/WordCloud.php b/classes/WordCloud.php
index f92e77aa0e..1f7018593e 100644
--- a/classes/WordCloud.php
+++ b/classes/WordCloud.php
@@ -17,12 +17,12 @@ class WordCloud{
private $supportUtf8 = true;
public function __construct(){
- $this->conn = MySQLiConnectionFactory::getCon("readonly");
+ $this->conn = MySQLiConnectionFactory::getCon('readonly');
- $this->displayedWordCount = 100;
+ $this->displayedWordCount = 150;
if($GLOBALS['charset'] == 'ISO-8859-1') $this->supportUtf8 = false;
- $this->tagUrl = "http://www.google.com/search?hl=en&q=";
-
+ //$this->tagUrl = "https://www.google.com/search?hl=en&q=";
+ $this->tagUrl = $GLOBALS['CLIENT_ROOT'].'/collections/editor/occurrencetabledisplay.php?occindex=0&reset=1&q_processingstatus=unprocessed';
$this->backgroundColor = "#000";
$this->wordColors[0] = "#5122CC";
$this->wordColors[1] = "#229926";
@@ -35,104 +35,116 @@ public function __construct(){
$this->wordColors[8] = "#229938";
$this->wordColors[9] = "#419922";
- $commonWordStr = "a,able,about,across,after,all,almost,also,am,among,an,and,any,are,arent," .
- "as,at,be,because,been,but,by,can,cant,cannot,could,couldve,couldnt,dear,did,didnt,do,does,doesnt," .
- "dont,either,else,ever,every,for,from,get,got,had,has,hasnt,have,he,her,him,his,how,however," .
- "i,if,in,into,is,isnt,it,its,just,least,let,like,likely,may,me,might,most,must,my,neither,no,nor,not,of,off," .
- "often,on,only,or,other,our,own,rather,said,say,says,she,should,since,so,some,than,that," .
- "the,their,them,then,there,theres,these,they,this,to,too,us,wants,was,wasnt,we,were,werent,what," .
- "when,when,where,which,while,who,whom,why,will,with,wont,would,wouldve,wouldnt,yet,you,your";
- //$commonWordStr = strtolower($commonWordStr);
- $this->commonWordArr = explode(",", $commonWordStr);
+ $commonWordStr = 'a,able,about,across,after,all,almost,also,am,among,an,and,any,are,arent,as,at,be,because,been,but,by,can,cant,cannot,could,couldve,couldnt,dear,did,didnt,do,'.
+ 'does,doesnt,dont,either,else,ever,every,for,from,get,got,had,has,hasnt,have,he,her,him,his,how,however,i,if,in,into,is,isnt,it,its,just,least,let,like,likely,may,me,might,'.
+ 'most,must,my,neither,no,nor,not,of,off,often,on,only,or,other,our,own,rather,said,say,says,she,should,since,so,some,than,that,the,their,them,then,there,theres,these,they,this.'.
+ 'to,too,us,wants,was,wasnt,we,were,werent,what,when,when,where,which,while,who,whom,why,will,with,wont,would,wouldve,wouldnt,yet,you,your';
+ $this->commonWordArr = explode(',', $commonWordStr);
}
public function __destruct(){
if(!($this->conn === null)) $this->conn->close();
}
- public function buildWordFile($collectionId = 0,$csMode = 0){
- $collArr = array();
- $sqlFrag = 'FROM omoccurrences o INNER JOIN images i ON o.occid = i.occid '.
- 'INNER JOIN specprocessorrawlabels r ON i.imgid = r.imgid ';
- if($csMode){
- $sqlFrag .= 'INNER JOIN omcrowdsourcequeue q ON o.occid = q.occid ';
+ public function batchBuildWordClouds($csMode = 0){
+ $processingArr = array();
+ $sql = 'SELECT DISTINCT c.collid FROM omoccurrences o INNER JOIN images i ON o.occid = i.occid INNER JOIN specprocessorrawlabels r ON i.imgid = r.imgid ';
+ if($csMode) $sql .= 'INNER JOIN omcrowdsourcequeue q ON o.occid = q.occid ';
+ $sql .= 'WHERE o.processingstatus = "unprocessed" AND o.locality IS NULL ';
+ $rs = $this->conn->query($sql);
+ while($r = $rs->fetch_object()){
+ $processingArr[] = $r->collid;
}
- $sqlColl = 'SELECT DISTINCT c.collid, c.collectionname '.$sqlFrag.
- 'INNER JOIN omcollections c ON c.collid = o.collid ';
- if($collectionId){
- $sqlColl .= 'WHERE c.collid = '.$collectionId;
+ $rs->free();
+ foreach($processingArr as $collid){
+ $this->buildWordCloud($collid, $csMode);
}
- //echo 'sql: '.$sqlColl;
- $rsColl = $this->conn->query($sqlColl);
- while($rColl = $rsColl->fetch_object()){
- $collArr[$rColl->collid] = $rColl->collectionname;
+ }
+
+ public function buildWordCloud($collid, $csMode = 0){
+ $retPath = '';
+ //Reset frequency array
+ unset($this->frequencyArr);
+ $this->frequencyArr = array();
+ $this->tagUrl .= '&collid='.$collid.'&q_customfield1=ocrFragment&q_customtype1=LIKE&q_customvalue1=';
+ $sql = 'SELECT DISTINCT r.rawstr FROM omoccurrences o INNER JOIN images i ON o.occid = i.occid INNER JOIN specprocessorrawlabels r ON i.imgid = r.imgid ';
+ if($csMode) $sql .= 'INNER JOIN omcrowdsourcequeue q ON o.occid = q.occid ';
+ $sql .= 'WHERE o.processingstatus = "unprocessed" AND o.locality IS NULL ';
+ if($collid) $sql .= 'AND o.collid = '.$collid;
+ //echo $sql; exit;
+ //Process all raw OCR strings for collection
+ $rs = $this->conn->query($sql);
+ while($r = $rs->fetch_object()){
+ $this->addTagsFromText($r->rawstr);
}
- $rsColl->free();
-
- $sql = 'SELECT DISTINCT r.rawstr '.$sqlFrag.
- 'WHERE o.processingstatus = "unprocessed" AND o.locality IS NULL ';
- foreach($collArr as $collid => $collName){
- //Reset frequency array
- unset($this->frequencyArr);
- $this->frequencyArr = array();
- //Process all raw OCR strings for collection
- $sql .= 'AND o.collid = '.$collid;
- $rs = $this->conn->query($sql);
- while($r = $rs->fetch_object()){
- $this->addTagsFromText($r->rawstr);
- }
- $rs->free();
- //Get Word cloud
- $cloudStr = $this->getWordCloud();
- echo $cloudStr.'
';
- //Write word out to text file
- $wcPath = $GLOBALS['serverRoot'];
- if(substr($wcPath,-1) != '/' && substr($wcPath,-1) != "\\") $wcPath .= '/';
- $wcPath .= 'content/collections/wordclouds/ocrcloud'.$collid.'.html';
- if(file_exists($wcPath)){
- $wcFH = fopen($wcPath, 'a');
- if(!$wcFH = fopen($wcPath, 'a')) {
- echo "Cannot open file ($wcPath)";
- exit;
- }
- if(fwrite($wcFH, $cloudStr) === FALSE) {
- echo "Cannot write to file ($wcPath)";
- exit;
- }
- fclose($handle);
- }
- else{
- echo 'ERROR trying to write word cloud to temp folder: '.$wcPath;
- echo '
Is the symbiota temp folder writable to Apache?';
+ $rs->free();
+ //Get Word cloud
+ $cloudStr = $this->getWordCloud();
+ if(!$cloudStr){
+ echo '