File Editor
Directories:
.. (Back)
Diff
dashboard
rest-api
Files:
Diff.php
IPTraf.php
IPTrafList.php
WFLSPHP52Compatability.php
compat.php
diffResult.php
email_genericAlert.php
email_newIssues.php
email_unlockRequest.php
email_unsubscribeRequest.php
flags.php
live_activity.php
menu_dashboard.php
menu_dashboard_options.php
menu_firewall.php
menu_firewall_blocking.php
menu_firewall_blocking_options.php
menu_firewall_waf.php
menu_firewall_waf_options.php
menu_install.php
menu_options.php
menu_scanner.php
menu_scanner_credentials.php
menu_scanner_options.php
menu_support.php
menu_tools.php
menu_tools_diagnostic.php
menu_tools_importExport.php
menu_tools_livetraffic.php
menu_tools_twoFactor.php
menu_tools_whois.php
menu_wordfence_central.php
sysinfo.php
viewFullActivityLog.php
wf503.php
wfAPI.php
wfActivityReport.php
wfAdminNoticeQueue.php
wfAlerts.php
wfArray.php
wfBrowscap.php
wfBrowscapCache.php
wfBulkCountries.php
wfCache.php
wfCentralAPI.php
wfConfig.php
wfCrawl.php
wfCredentialsController.php
wfCrypt.php
wfCurlInterceptor.php
wfDB.php
wfDashboard.php
wfDateLocalization.php
wfDeactivationOption.php
wfDiagnostic.php
wfDict.php
wfDirectoryIterator.php
wfFileUtils.php
wfHelperBin.php
wfHelperString.php
wfIPWhitelist.php
wfImportExportController.php
wfInvalidPathException.php
wfIpLocation.php
wfIpLocator.php
wfIssues.php
wfJWT.php
wfLicense.php
wfLockedOut.php
wfLog.php
wfMD5BloomFilter.php
wfModuleController.php
wfNotification.php
wfOnboardingController.php
wfPersistenceController.php
wfRESTAPI.php
wfScan.php
wfScanEngine.php
wfScanEntrypoint.php
wfScanFile.php
wfScanFileLink.php
wfScanMonitor.php
wfScanPath.php
wfSchema.php
wfStyle.php
wfSupportController.php
wfUnlockMsg.php
wfUpdateCheck.php
wfUtils.php
wfVersionCheckController.php
wfView.php
wfViewResult.php
wfWebsite.php
wordfenceClass.php
wordfenceConstants.php
wordfenceHash.php
wordfenceScanner.php
wordfenceURLHoover.php
Create New File
Create
Edit File: wfCrawl.php
<?php require_once(dirname(__FILE__) . '/wfUtils.php'); class wfCrawl { const GOOGLE_BOT_VERIFIED = 'verified'; const GOOGLE_BOT_FAKE = 'fakeBot'; const GOOGLE_BOT_UNDETERMINED = 'undetermined'; public static function isCrawler($UA){ $browscap = new wfBrowscap(); $b = $browscap->getBrowser($UA); if (!$b || $b['Parent'] == 'DefaultProperties') { $IP = wfUtils::getIP(); return !wfLog::isHumanRequest($IP, $UA); } else if (isset($b['Crawler']) && $b['Crawler']) { return true; } return false; } public static function verifyCrawlerPTR($hostPattern, $IP){ $table = wfDB::networkTable('wfCrawlers'); $db = new wfDB(); $IPn = wfUtils::inet_pton($IP); $status = $db->querySingle("select status from $table where IP=%s and patternSig=UNHEX(MD5('%s')) and lastUpdate > unix_timestamp() - %d", $IPn, $hostPattern, WORDFENCE_CRAWLER_VERIFY_CACHE_TIME); if($status){ if($status == 'verified'){ return true; } else { return false; } } $host = wfUtils::reverseLookup($IP); if(! $host){ $db->queryWrite("insert into $table (IP, patternSig, status, lastUpdate, PTR) values (%s, UNHEX(MD5('%s')), '%s', unix_timestamp(), '%s') ON DUPLICATE KEY UPDATE status='%s', lastUpdate=unix_timestamp(), PTR='%s'", $IPn, $hostPattern, 'noPTR', '', 'noPTR', ''); return false; } if(preg_match($hostPattern, $host)){ $resultIPs = wfUtils::resolveDomainName($host); $addrsMatch = false; foreach($resultIPs as $resultIP){ if($resultIP == $IP){ $addrsMatch = true; break; } } if($addrsMatch){ $db->queryWrite("insert into $table (IP, patternSig, status, lastUpdate, PTR) values (%s, UNHEX(MD5('%s')), '%s', unix_timestamp(), '%s') ON DUPLICATE KEY UPDATE status='%s', lastUpdate=unix_timestamp(), PTR='%s'", $IPn, $hostPattern, 'verified', $host, 'verified', $host); return true; } else { $db->queryWrite("insert into $table (IP, patternSig, status, lastUpdate, PTR) values (%s, UNHEX(MD5('%s')), '%s', unix_timestamp(), '%s') ON DUPLICATE KEY UPDATE status='%s', lastUpdate=unix_timestamp(), PTR='%s'", $IPn, $hostPattern, 'fwdFail', $host, 'fwdFail', $host); return false; } } else { $db->queryWrite("insert into $table (IP, patternSig, status, lastUpdate, PTR) values (%s, UNHEX(MD5('%s')), '%s', unix_timestamp(), '%s') ON DUPLICATE KEY UPDATE status='%s', lastUpdate=unix_timestamp(), PTR='%s'", $IPn, $hostPattern, 'badPTR', $host, 'badPTR', $host); return false; } } public static function isGooglebot($userAgent = null){ if ($userAgent === null) { $userAgent = isset($_SERVER['HTTP_USER_AGENT']) ? $_SERVER['HTTP_USER_AGENT'] : ''; } return (bool) preg_match('/Googlebot\/\d\.\d/', $userAgent); } public static function isGoogleCrawler($userAgent = null){ if ($userAgent === null) { $userAgent = isset($_SERVER['HTTP_USER_AGENT']) ? $_SERVER['HTTP_USER_AGENT'] : ''; } foreach (self::$googPat as $pat) { if (preg_match($pat . 'i', $userAgent)) { return true; } } return false; } private static $googPat = array( '@^Mozilla/5\\.0 \\(.*Google Keyword Tool.*\\)$@', '@^Mozilla/5\\.0 \\(.*Feedfetcher\\-Google.*\\)$@', '@^Feedfetcher\\-Google\\-iGoogleGadgets.*$@', '@^searchbot admin\\@google\\.com$@', '@^Google\\-Site\\-Verification.*$@', '@^Google OpenSocial agent.*$@', '@^.*Googlebot\\-Mobile/2\\..*$@', '@^AdsBot\\-Google\\-Mobile.*$@', '@^google \\(.*Enterprise.*\\)$@', '@^Mediapartners\\-Google.*$@', '@^GoogleFriendConnect.*$@', '@^googlebot\\-urlconsole$@', '@^.*Google Web Preview.*$@', '@^Feedfetcher\\-Google.*$@', '@^AppEngine\\-Google.*$@', '@^Googlebot\\-Video.*$@', '@^Googlebot\\-Image.*$@', '@^Google\\-Sitemaps.*$@', '@^Googlebot/Test.*$@', '@^Googlebot\\-News.*$@', '@^.*Googlebot/2\\.1.*$@', '@^AdsBot\\-Google.*$@', '@^Google$@' ); /** * Has correct user agent and PTR record points to .googlebot.com domain. * * @param string|null $ip * @param string|null $ua * @return bool */ public static function isVerifiedGoogleCrawler($ip = null, $ua = null) { static $verified; if (!isset($verified)) { $verified = array(); } if ($ip === null) { $ip = wfUtils::getIP(); } if ($ip === null || $ip === false) { //Likely a CLI execution return false; } if (array_key_exists($ip, $verified)) { return $verified[$ip]; } if (self::isGoogleCrawler($ua)) { if (self::verifyCrawlerPTR(wordfence::getLog()->getGooglePattern(), $ip)) { $verified[$ip] = true; return $verified[$ip]; } $noc1Status = self::verifyGooglebotViaNOC1($ip); if ($noc1Status == self::GOOGLE_BOT_VERIFIED) { $verified[$ip] = true; return $verified[$ip]; } else if ($noc1Status == self::GOOGLE_BOT_FAKE) { $verified[$ip] = false; return $verified[$ip]; } return true; //We were unable to successfully validate Googlebot status so default to being permissive } $verified[$ip] = false; return $verified[$ip]; } /** * Attempts to verify whether an IP claiming to be Googlebot is actually Googlebot. * * @param string|null $ip * @return string */ public static function verifyGooglebotViaNOC1($ip = null) { $table = wfDB::networkTable('wfCrawlers'); if ($ip === null) { $ip = wfUtils::getIP(); } $db = new wfDB(); $IPn = wfUtils::inet_pton($ip); $patternSig = 'googlenoc1'; $status = $db->querySingle("select status from $table where IP=%s and patternSig=UNHEX(MD5('%s')) and lastUpdate > unix_timestamp() - %d", $IPn, $patternSig, WORDFENCE_CRAWLER_VERIFY_CACHE_TIME); if ($status === 'verified') { return self::GOOGLE_BOT_VERIFIED; } else if ($status === 'fakeBot') { return self::GOOGLE_BOT_FAKE; } $api = new wfAPI(wfConfig::get('apiKey'), wfUtils::getWPVersion()); try { $data = $api->call('verify_googlebot', array( 'ip' => $ip, )); if (is_array($data) && !empty($data['verified'])) { // Cache results $db->queryWrite("INSERT INTO {$table} (IP, patternSig, status, lastUpdate) VALUES ('%s', UNHEX(MD5('%s')), '%s', unix_timestamp()) ON DUPLICATE KEY UPDATE status = VALUES(status), lastUpdate = VALUES(lastUpdate)", $IPn, $patternSig, 'verified'); return self::GOOGLE_BOT_VERIFIED; } else { $db->queryWrite("INSERT INTO {$table} (IP, patternSig, status, lastUpdate) VALUES ('%s', UNHEX(MD5('%s')), '%s', unix_timestamp()) ON DUPLICATE KEY UPDATE status = VALUES(status), lastUpdate = VALUES(lastUpdate)", $IPn, $patternSig, 'fakeBot'); self::GOOGLE_BOT_FAKE; } } catch (Exception $e) { // Do nothing, bail } return self::GOOGLE_BOT_UNDETERMINED; } }
Save Changes
Rename File
Rename