Initial commit
This commit is contained in:
300
typo3conf/ext/dd_googlesitemap/Classes/Scheduler/Task.php
Normal file
300
typo3conf/ext/dd_googlesitemap/Classes/Scheduler/Task.php
Normal file
@@ -0,0 +1,300 @@
|
||||
<?php
|
||||
/***************************************************************
|
||||
* Copyright notice
|
||||
*
|
||||
* (c) 2013 Dmitry Dulepov <dmitry.dulepov@gmail.com>
|
||||
* All rights reserved
|
||||
*
|
||||
* This script is part of the TYPO3 project. The TYPO3 project is
|
||||
* free software; you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License as published by
|
||||
* the Free Software Foundation; either version 2 of the License, or
|
||||
* (at your option) any later version.
|
||||
*
|
||||
* The GNU General Public License can be found at
|
||||
* http://www.gnu.org/copyleft/gpl.html.
|
||||
*
|
||||
* This script is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU General Public License for more details.
|
||||
*
|
||||
* This copyright notice MUST APPEAR in all copies of the script!
|
||||
***************************************************************/
|
||||
|
||||
namespace DmitryDulepov\DdGooglesitemap\Scheduler;
|
||||
|
||||
use TYPO3\CMS\Core\Utility\GeneralUtility;
|
||||
|
||||
/**
|
||||
* This class provides a scheduler task to create sitemap index as required
|
||||
* by the Google sitemap protocol.
|
||||
*
|
||||
* @author Dmitry Dulepov <dmitry.dulepov@gmail.com>
|
||||
* @see http://support.google.com/webmasters/bin/answer.py?hl=en&answer=71453
|
||||
*/
|
||||
class Task extends \TYPO3\CMS\Scheduler\Task\AbstractTask {
|
||||
|
||||
const DEFAULT_FILE_PATH = 'typo3temp/dd_googlesitemap';
|
||||
|
||||
/** @var string */
|
||||
private $baseUrl;
|
||||
|
||||
/** @var string */
|
||||
protected $eIdScriptUrl;
|
||||
|
||||
/** @var string */
|
||||
protected $indexFilePath;
|
||||
|
||||
/** @var int */
|
||||
protected $maxUrlsPerSitemap = 50000;
|
||||
|
||||
/** @var string */
|
||||
private $sitemapFileFormat;
|
||||
|
||||
/** @var int */
|
||||
private $offset;
|
||||
|
||||
/**
|
||||
* Creates the instance of the class. This call initializes the index file
|
||||
* path to the random value. After the task is configured, the user may
|
||||
* change the file and the file name will be serialized with the task and
|
||||
* used later.
|
||||
*
|
||||
* @see __sleep
|
||||
*/
|
||||
public function __construct() {
|
||||
parent::__construct();
|
||||
$this->indexFilePath = self::DEFAULT_FILE_PATH . '/' . GeneralUtility::getRandomHexString(24) . '.xml';
|
||||
}
|
||||
|
||||
/**
|
||||
* Reconstructs some variables after the object is unserialized.
|
||||
*
|
||||
* @return void
|
||||
*/
|
||||
public function __wakeup() {
|
||||
$this->buildSitemapFileFormat();
|
||||
$this->buildBaseUrl();
|
||||
}
|
||||
|
||||
/**
|
||||
* This is the main method that is called when a task is executed
|
||||
* It MUST be implemented by all classes inheriting from this one
|
||||
* Note that there is no error handling, errors and failures are expected
|
||||
* to be handled and logged by the client implementations.
|
||||
* Should return true on successful execution, false on error.
|
||||
*
|
||||
* @return boolean Returns true on successful execution, false on error
|
||||
*/
|
||||
public function execute() {
|
||||
$indexFilePathTemp = PATH_site . $this->indexFilePath . '.tmp';
|
||||
$indexFile = fopen($indexFilePathTemp, 'wt');
|
||||
fwrite($indexFile, '<?xml version="1.0" encoding="UTF-8"?>' . chr(10));
|
||||
fwrite($indexFile, '<sitemapindex xmlns="http://www.sitemaps.org/schemas/sitemap/0.9">' . chr(10));
|
||||
|
||||
$eIDscripts = GeneralUtility::trimExplode(chr(10), $this->eIdScriptUrl);
|
||||
$eIdIndex = 1;
|
||||
foreach ($eIDscripts as $eIdScriptUrl) {
|
||||
$this->offset = 0;
|
||||
$currentFileNumber = 1;
|
||||
$lastFileHash = '';
|
||||
do {
|
||||
$sitemapFileName = sprintf($this->sitemapFileFormat, $eIdIndex, $currentFileNumber++);
|
||||
$this->buildSitemap($eIdScriptUrl, $sitemapFileName);
|
||||
|
||||
$isSitemapEmpty = $this->isSitemapEmpty($sitemapFileName);
|
||||
$currentFileHash = $isSitemapEmpty ? -1 : md5_file(PATH_site . $sitemapFileName);
|
||||
$stopLoop = $isSitemapEmpty || ($currentFileHash == $lastFileHash);
|
||||
|
||||
if ($stopLoop) {
|
||||
@unlink(PATH_site . $sitemapFileName);
|
||||
}
|
||||
else {
|
||||
fwrite($indexFile, '<sitemap><loc>' . htmlspecialchars($this->makeSitemapUrl($sitemapFileName)) . '</loc></sitemap>' . chr(10));
|
||||
$lastFileHash = $currentFileHash;
|
||||
}
|
||||
} while (!$stopLoop);
|
||||
$eIdIndex++;
|
||||
}
|
||||
|
||||
fwrite($indexFile, '</sitemapindex>' . chr(10));
|
||||
fclose($indexFile);
|
||||
|
||||
@unlink(PATH_site . $this->indexFilePath);
|
||||
rename($indexFilePathTemp, PATH_site . $this->indexFilePath);
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
/**
|
||||
* This method is designed to return some additional information about the task,
|
||||
* that may help to set it apart from other tasks from the same class
|
||||
* This additional information is used - for example - in the Scheduler's BE module
|
||||
* This method should be implemented in most task classes
|
||||
*
|
||||
* @return string Information to display
|
||||
*/
|
||||
public function getAdditionalInformation() {
|
||||
/** @noinspection PhpUndefinedMethodInspection */
|
||||
$format = $GLOBALS['LANG']->sL('LLL:EXT:dd_googlesitemap/locallang.xml:scheduler.extra_info');
|
||||
return sprintf($format, $this->getIndexFileUrl());
|
||||
}
|
||||
|
||||
/**
|
||||
* Sets the url of the eID script. This is called from the task
|
||||
* configuration inside scheduler.
|
||||
*
|
||||
* @return string
|
||||
* @see tx_ddgooglesitemap_additionalfieldsprovider
|
||||
*/
|
||||
public function getEIdScriptUrl() {
|
||||
return $this->eIdScriptUrl;
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns the index file path. This is called from the task
|
||||
* configuration inside scheduler.
|
||||
*
|
||||
* @return string
|
||||
* @see tx_ddgooglesitemap_additionalfieldsprovider
|
||||
*/
|
||||
public function getIndexFilePath() {
|
||||
return $this->indexFilePath;
|
||||
}
|
||||
|
||||
/**
|
||||
* Obtains the number of urls per sitemap. This is called from the task
|
||||
* configuration inside scheduler.
|
||||
*
|
||||
* @return int
|
||||
* @see tx_ddgooglesitemap_additionalfieldsprovider
|
||||
*/
|
||||
public function getMaxUrlsPerSitemap() {
|
||||
return $this->maxUrlsPerSitemap;
|
||||
}
|
||||
|
||||
/**
|
||||
* Sets the URl of the eID script. This is called from the task
|
||||
* configuration inside scheduler.
|
||||
*
|
||||
* @param $url
|
||||
* @see tx_ddgooglesitemap_additionalfieldsprovider
|
||||
*/
|
||||
public function setEIdScriptUrl($url) {
|
||||
$this->eIdScriptUrl = $url;
|
||||
}
|
||||
|
||||
/**
|
||||
* Sets the URL of the eID script. This is called from the task
|
||||
* configuration inside scheduler.
|
||||
*
|
||||
* @param string $path
|
||||
* @see tx_ddgooglesitemap_additionalfieldsprovider
|
||||
*/
|
||||
public function setIndexFilePath($path) {
|
||||
$this->indexFilePath = $path;
|
||||
}
|
||||
|
||||
/**
|
||||
* Sets the number of URLs per sitemap. This is called from the task
|
||||
* configuration inside scheduler.
|
||||
*
|
||||
* @param int $maxUrlsPerSitemap
|
||||
* @see tx_ddgooglesitemap_additionalfieldsprovider
|
||||
*/
|
||||
public function setMaxUrlsPerSitemap($maxUrlsPerSitemap) {
|
||||
$this->maxUrlsPerSitemap = $maxUrlsPerSitemap;
|
||||
}
|
||||
|
||||
/**
|
||||
* Creates a base url for sitemaps.
|
||||
*
|
||||
* @return void
|
||||
*/
|
||||
protected function buildBaseUrl() {
|
||||
$urlParts = parse_url($this->eIdScriptUrl);
|
||||
$this->baseUrl = $urlParts['scheme'] . '://';
|
||||
if ($urlParts['user']) {
|
||||
$this->baseUrl .= $urlParts['user'];
|
||||
if ($urlParts['pass']) {
|
||||
$this->baseUrl .= ':' . $urlParts['pass'];
|
||||
}
|
||||
$this->baseUrl .= '@';
|
||||
}
|
||||
$this->baseUrl .= $urlParts['host'];
|
||||
if ($urlParts['port']) {
|
||||
$this->baseUrl .= ':' . $urlParts['port'];
|
||||
}
|
||||
$this->baseUrl .= '/';
|
||||
}
|
||||
|
||||
/**
|
||||
* Builds the sitemap.
|
||||
*
|
||||
* @param string $eIdScriptUrl
|
||||
* @param string $sitemapFileName
|
||||
* @see tx_ddgooglesitemap_additionalfieldsprovider
|
||||
*/
|
||||
protected function buildSitemap($eIdScriptUrl, $sitemapFileName) {
|
||||
$url = $eIdScriptUrl . sprintf('&offset=%d&limit=%d', $this->offset, $this->maxUrlsPerSitemap);
|
||||
|
||||
$content = GeneralUtility::getURL($url);
|
||||
if ($content) {
|
||||
file_put_contents(PATH_site . $sitemapFileName, $content);
|
||||
$this->offset += $this->maxUrlsPerSitemap;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Creates the format string for the sitemap files.
|
||||
*
|
||||
* @return void
|
||||
*/
|
||||
protected function buildSitemapFileFormat() {
|
||||
$fileParts = pathinfo($this->indexFilePath);
|
||||
$this->sitemapFileFormat = $fileParts['dirname'] . '/' . $fileParts['filename'] . '_sitemap_%05d_%05d.xml';
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns the index file url.
|
||||
*
|
||||
* @return string
|
||||
*/
|
||||
protected function getIndexFileUrl() {
|
||||
return $this->baseUrl . $this->indexFilePath;
|
||||
}
|
||||
|
||||
/**
|
||||
* Checks if the current sitemap has no entries. The function reads a chunk
|
||||
* of the file, which is large enough to have a '<url>' token in it and
|
||||
* examines the chunk. If the token is not found, than the sitemap is either
|
||||
* empty or corrupt.
|
||||
*
|
||||
* @param string $sitemapFileName
|
||||
* @return bool
|
||||
*/
|
||||
protected function isSitemapEmpty($sitemapFileName) {
|
||||
$result = TRUE;
|
||||
|
||||
$fileDescriptor = @fopen(PATH_site . $sitemapFileName, 'rt');
|
||||
if ($fileDescriptor) {
|
||||
$chunkSizeToCheck = 10240;
|
||||
$testString = fread($fileDescriptor, $chunkSizeToCheck);
|
||||
fclose($fileDescriptor);
|
||||
$result = (strpos($testString, '<url>') === FALSE);
|
||||
}
|
||||
|
||||
return $result;
|
||||
}
|
||||
|
||||
/**
|
||||
* Creates a url to the sitemap.
|
||||
*
|
||||
* @param string $siteMapPath
|
||||
* @return string
|
||||
*/
|
||||
protected function makeSitemapUrl($siteMapPath) {
|
||||
return $this->baseUrl . $siteMapPath;
|
||||
}
|
||||
}
|
||||
Reference in New Issue
Block a user