-
Notifications
You must be signed in to change notification settings - Fork 2
/
SunSitemap.php
328 lines (303 loc) · 11.9 KB
/
SunSitemap.php
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
<?php
//error_reporting(E_ALL);
//ini_set('display_errors', TRUE);
/**
* SunSitemap Class
*
* @category Sitemap Generator
* @package SunSitemap
* @author Mehmet Selcuk Batal <batalms@gmail.com>
* @copyright Copyright (c) 2020, Sunhill Technology <www.sunhillint.com>
* @license https://opensource.org/licenses/lgpl-3.0.html The GNU Lesser General Public License, version 3.0
* @link https://github.com/msbatal/PHP-Sitemap-Generator
* @version 2.4.2
*/
class SunSitemap
{
/**
* Sitemap file name
* @var string
*/
private $sitemapFile = "sitemap.xml";
/**
* Sitemap index file name
* @var string
*/
private $sitemapIndexFile = "sitemap-index.xml";
/**
* Robots file name
* @var string
*/
private $robotsFile = "robots.txt";
/**
* Sitemap base url
* @var string
*/
public $baseUrl;
/**
* Sitemap relative path
* @var string
*/
public $relPath;
/**
* Sitemap absolute path
* @var string
*/
private $absPath;
/**
* Maximum Urls Per Sitemap (max 50000)
* @var integer
*/
public $maxUrl = 50000;
/**
* Create zipped copy of sitemap
* @var boolean
*/
public $createZip = false;
/**
* Urls to add sitemap
* @var array
*/
private $urls = [];
/**
* Sitemaps
* @var array
*/
private $sitemaps = [];
/**
* Sitemap index
* @var array
*/
private $sitemapIndex = [];
/**
* Sitemap full url
* @var string
*/
private $sitemapUrl;
/**
* Calculated start time
* @var integer
*/
private $startTime = 0;
/**
* Calculated finish time
* @var integer
*/
private $finishTime = 0;
/**
* @param string $baseUrl
* @param string $relPath
* @param integer $maxUrl
* @param boolean $createZip
*/
public function __construct($baseUrl = null, $relPath = null, $maxUrl = null, $createZip = null) {
set_exception_handler(function($exception) {
echo '<b>[SunSitemap] Exception:</b> '.$exception->getMessage();
});
$this->startTime = microtime(true); // get process start time
if (!empty($baseUrl)) {
$this->baseUrl = $baseUrl . '/'; // set base url
} else {
$this->baseUrl = 'http' . (isset($_SERVER['HTTPS']) ? 's' : '') . '://' . $_SERVER['HTTP_HOST'] . '/'; // set default base url
}
if (!empty($relPath)) {
if (!file_exists($relPath)){
throw new Exception('Sitemap path "'.$relPath.'" does not valid.');
}
$this->relPath = str_replace(['../','..','./'], '', $relPath) . '/'; // set relative path
$this->absPath = $_SERVER['DOCUMENT_ROOT'] . '/' . $this->relPath; // set absolute path
} else {
$this->relPath = null; // set default relative path
$this->absPath = $_SERVER['DOCUMENT_ROOT'] . '/'; // set default absolute path
}
if (is_int($maxUrl)) {
$this->maxUrl = $maxUrl; // set maximum urls per sitemap
}
if (is_bool($createZip)) {
$this->createZip = $createZip; // create zipped copy of sitemap
}
}
/**
* Add URL(s) into the sitemap file
*
* @param array|string $urls
* @param string $lastmod
* @param string $changefreq
* @param string $priority
* @throws exception
*/
public function addUrl($urls = null, $lastmod = null, $changefreq = null, $priority = null) {
if (is_array($urls)) { // if urls sent in an array
foreach ($urls as $url) {
$this->addUrl(
isset ($url[0]) ? $url[0] : null,
isset ($url[1]) ? $url[1] : null,
isset ($url[2]) ? $url[2] : null,
isset ($url[3]) ? $url[3] : null
);
}
} else { // if urls sent separately
if ($urls == null) {
throw new Exception('URL parameter is required. At least this parameter should be sent.');
}
if (strlen($urls) > 2048) {
throw new Exception('URL length cannot exceed 2048 characters.');
}
$urlArray = [];
$urlArray['loc'] = $this->baseUrl . $this->relPath . $urls; // add page url attribute
if (!empty($lastmod)) {
$urlArray['lastmod'] = $lastmod; // add page last modification attribute
}
if (!empty($changefreq)) {
$urlArray['changefreq'] = $changefreq; // add change frequency attribute
}
if (!empty($priority)) {
$urlArray['priority'] = $priority; // add page priority attribute
}
$this->urls[] = $urlArray;
}
}
/**
* Create sitemap file(s) and sitemap index file
*
* @throws exception
* @return object
*/
public function createSitemap() {
if (!isset($this->urls)) {
throw new Exception('To create a sitemap, first call the "addUrl" method.');
}
if ($this->maxUrl > 50000) {
throw new Exception('Each sitemap file can contain a maximum of 50,000 URLs.');
}
foreach (array_chunk($this->urls, $this->maxUrl) as $sitemap) { // create xml for sitemap
$xml = new SimpleXMLElement('<?xml version="1.0" encoding="UTF-8"?><urlset xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://www.sitemaps.org/schemas/sitemap/0.9 http://www.sitemaps.org/schemas/sitemap/0.9/sitemap.xsd" xmlns="http://www.sitemaps.org/schemas/sitemap/0.9"></urlset>'); // add xml header
foreach ($sitemap as $url) {
$row = $xml->addChild('url');
$row->addChild('loc', htmlspecialchars($url['loc'], ENT_QUOTES, 'utf-8')); // add page url
if (isset($url['lastmod'])) {
$row->addChild('lastmod', $url['lastmod']); // add page last modification
}
if (isset($url['changefreq'])) {
$row->addChild('changefreq',$url['changefreq']); // add change frequency
}
if (isset($url['priority'])) {
$row->addChild('priority',$url['priority']); // add page priority
}
}
if (strlen($xml->asXML()) > 1024*1024*50) {
throw new Exception('The size of the sitemap file is more than 50 MB. Please update your sitemap settings to reduce the number of pages.');
}
$this->sitemaps[] = $xml->asXML();
}
if (sizeof($this->sitemaps) > 1000) {
throw new Exception("The sitemap directory (index) can contain a maximum of 1,000 sitemap files.");
}
if (sizeof($this->sitemaps) > 1) { // create xml for sitemap index
for ($i=0; $i < sizeof($this->sitemaps); $i++) {
$this->sitemaps[$i] = array(str_replace('.xml', ($i+1).'.xml.gz', $this->sitemapFile), $this->sitemaps[$i]);
}
$xml = new SimpleXMLElement('<?xml version="1.0" encoding="UTF-8"?><sitemapindex xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://www.sitemaps.org/schemas/sitemap/0.9 http://www.sitemaps.org/schemas/sitemap/0.9/siteindex.xsd" xmlns="http://www.sitemaps.org/schemas/sitemap/0.9"></sitemapindex>'); // add xml header
foreach ($this->sitemaps as $sitemap) {
$row = $xml->addChild('sitemap');
$row->addChild('loc', $this->baseUrl . $this->relPath . htmlentities($sitemap[0])); // add page url
$row->addChild('lastmod', date('c')); // add page last modification
}
$this->sitemapUrl = $this->baseUrl . $this->relPath . $this->sitemapIndexFile;
$this->sitemapIndex = array($this->sitemapIndexFile, $xml->asXML());
} else {
if ($this->createZip) { // if will create gzip file
$this->sitemapUrl = $this->baseUrl . $this->relPath . $this->sitemapFile . '.gz';
} else {
$this->sitemapUrl = $this->baseUrl . $this->relPath . $this->sitemapFile;
}
$this->sitemaps[0] = array($this->sitemapFile, $this->sitemaps[0]);
}
if (!isset($this->sitemaps)) {
throw new Exception('To create/update the sitemap file, first call the "createSitemap" method.');
}
if (count($this->sitemapIndex) > 0) { // if will create sitemap index
$file = fopen($this->absPath . $this->sitemapIndex[0], 'w');
fwrite($file, $this->sitemapIndex[1]);
fclose($file);
if (function_exists('ob_gzhandler') && ini_get('zlib.output_compression')) { // check if gzip extension enabled
foreach ($this->sitemaps as $sitemap) {
$file = gzopen($this->absPath . $sitemap[0], 'w');
gzwrite($file, $sitemap[1]);
gzclose($file);
}
} else {
throw new Exception('The GZip file compression module is not enabled.');
}
} else { // if will create sitemap
$file = fopen($this->absPath . $this->sitemaps[0][0], 'w');
fwrite($file, $this->sitemaps[0][1]);
fclose($file);
if ($this->createZip) {
if (function_exists('ob_gzhandler') && ini_get('zlib.output_compression')) { // check if gzip extension enabled
$file = gzopen($this->absPath . $this->sitemaps[0][0] . '.gz', 'w');
gzwrite($file, $this->sitemaps[0][1]);
gzclose($file);
} else {
throw new Exception('The GZip file compression module is not enabled.');
}
}
}
return $this;
}
/**
* Create/update Robots.txt file
*
* @throws exception
* @return object
*/
public function updateRobots() {
if (!isset($this->sitemaps)) {
throw new Exception('To create/update the robots.txt file, first call the "createSitemap" method.');
}
if (file_exists($this->absPath . $this->robotsFile)) { // if robots.txt file already exists
$robotsFile = explode("\n", file_get_contents($this->absPath . $this->robotsFile)); // read robots file
$robotsContent = '';
foreach ($robotsFile as $key => $value) {
if (!empty($value)) {
if (substr($value, 0, 8) == 'Sitemap:') {
unset($robotsFile[$key]);
} else {
$robotsContent .= $value . "\n";
}
}
}
} else {
$robotsContent = "User-agent: *\nDisallow:\n";
}
if (count($this->sitemapIndex) == 0) {
$robotsContent .= "\nSitemap: " . $this->baseUrl . $this->relPath . $this->sitemapFile;
} else {
$robotsContent .= "\nSitemap: " . $this->baseUrl . $this->relPath . $this->sitemapIndexFile;
}
if ($this->createZip && count($this->sitemapIndex) == 0) {
$robotsContent .= "\nSitemap: " . $this->baseUrl . $this->relPath . $this->sitemapFile . '.gz';
}
file_put_contents($this->absPath . $this->robotsFile, $robotsContent); // create robots.txt file
return $this;
}
/**
* Show total memory usage
*
* @return float
*/
public function memoryUsage() {
return number_format(memory_get_peak_usage() / (1024 * 1024), 2); // calculate memory usage
}
/**
* Show total process duration
*
* @return float
*/
public function showDuration() {
$this->finishTime = microtime(true); // get process finish time
$total = $this->finishTime - $this->startTime; // calculate total time
return number_format($total, 4);
}
}
?>