-
Notifications
You must be signed in to change notification settings - Fork 7
Expand file tree
/
Copy pathWarmDeviceDetectorCache.php
More file actions
189 lines (163 loc) · 6.96 KB
/
WarmDeviceDetectorCache.php
File metadata and controls
189 lines (163 loc) · 6.96 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
<?php
/**
* Matomo - free/libre analytics platform
*
* @link https://matomo.org
* @license http://www.gnu.org/licenses/gpl-3.0.html GPL v3 or later
*/
namespace Piwik\Plugins\DeviceDetectorCache\Commands;
use Piwik\Container\StaticContainer;
use Piwik\Date;
use Piwik\Piwik;
use Piwik\Plugin\ConsoleCommand;
use Piwik\Plugins\DeviceDetectorCache\CachedEntry;
use Piwik\Plugins\DeviceDetectorCache\Configuration;
class WarmDeviceDetectorCache extends ConsoleCommand
{
public const COMMAND_NAME = 'device-detector-cache:warm-cache';
/**
* @var Configuration
*/
private $config;
public function __construct($name = null)
{
parent::__construct($name);
$this->config = StaticContainer::get(Configuration::class);
}
protected function configure()
{
$this->setName(self::COMMAND_NAME);
$this->setDescription('Cached device detector information based on access log');
}
private function printupdate($count)
{
if ($this->getOutput()->isVerbose()) {
$mem = round(memory_get_peak_usage() / 1024 / 1024);
$now = Date::now()->getDatetime();
$this->getOutput()->writeln("Count: " . $count . ' Mem:' . $mem . 'MB Date: ' . $now);
}
}
private function log($message)
{
if ($this->getOutput()->isVerbose()) {
$mem = round(memory_get_peak_usage() / 1024 / 1024);
$now = Date::now()->getDatetime();
$this->getOutput()->writeln($message . ' Mem:' . $mem . 'MB Date: ' . $now);
}
}
protected function doExecute(): int
{
$userAgents = [];
$output = $this->getOutput();
$regex = $this->config->getAccessLogRegex();
$numEntriesToCache = $this->config->getNumEntriesToCache();
$matchEntry = $this->config->getRegexMatchEntry();
$path = $this->config->getAccessLogPath();
$path = trim($path);
$this->log('caching up to ' . $numEntriesToCache . ' entries');
$this->log('reading from file ' . $path);
$this->log('used regex ' . $regex . ' with index ' . $matchEntry);
if (empty($numEntriesToCache)) {
$output->writeln('No entries are supposed to be cached. Stopping command');
return self::SUCCESS;
}
if (!file_exists($path)) {
throw new \Exception('Configured access log path does not exist: "' . $path . '"');
}
$count = 0;
$numLinesToProcess = StaticContainer::get('DeviceDetectorCacheNumLinesToScan');
$numLinesProcessed = 0;
$handle = fopen($path, "r");
if ($handle) {
while (($line = fgets($handle)) !== false) {
$numLinesProcessed++;
if ($numLinesProcessed >= $numLinesToProcess) {
break;// we read max 5M lines to prevent in running for too long time
}
if (empty($line)) {
continue;
}
preg_match($regex, $line, $matches);
if (
!empty($matches[$matchEntry])
&& strlen($matches[$matchEntry]) > 5
&& strlen($matches[$matchEntry]) < 700
) {
$useragent = $matches[$matchEntry];
if (!isset($userAgents[$useragent])) {
$userAgents[$useragent] = 1;
$count = count($userAgents);
if ($count % 10000 === 0) {
$this->printupdate($count);
}
} else {
$userAgents[$useragent] = $userAgents[$useragent] + 1;
}
}
$line = null;
unset($line);
$matches = null;
unset($matches);
if ($numLinesProcessed % 10 === 0) {
usleep(300); // slightly slow down disk usage to avoid running eg into some EBS limit
}
if ($numLinesProcessed % 1000 === 0) {
usleep(10000); // every 10K lines sleep for a 10ms to not max out CPU as much
}
}
fclose($handle);
} else {
throw new \Exception('Error opening file. Maybe no read permission? Path: ' . $path);
}
$this->log("parsed file: " . $numLinesProcessed . " lines");
$this->printupdate($count);
arsort($userAgents, SORT_NATURAL);
if (empty($userAgents)) {
$output->writeln('No user agents found');
return self::SUCCESS;
}
$this->log($count . ' user agents found');
$this->log("writing files");
$i = 0;
$numRequestsDetected = 0;
$ignoreUserAgentsWithLessRequestsThan = StaticContainer::get('DeviceDetectorCacheIgnoreUserAgentsWithLessThanXRequests');
foreach ($userAgents as $agent => $val) {
if ($i >= $numEntriesToCache) {
$output->writeln('stopping because number of configured entries were cached');
break;
}
if ($val < $ignoreUserAgentsWithLessRequestsThan) {
$output->writeln('stopping because remaining user agents have only few requests');
// we don't cache user agents that happened less than 9 times or less as it's so rare it's not really worth caching it and we rather do it on demand
break;
}
$i++;
$numRequestsDetected += $val; // useful to detect hit ratio
if ($i % 5000 === 0) {
$this->printupdate(
'written files so far: ' . $i . ' detecting that many requests: ' . $numRequestsDetected
);
}
if ($i <= 10) {
$this->log('Found user agent ' . $agent . ' count: ' . $val);
}
CachedEntry::writeToCache($agent);
// sleep 2ms to let CPU do something else
// this will make things about 10m slower for 200K entries but at least sudden CPU increase for instance
// can be prevented when there are only few CPUs available
// note: roughly per minute we write around 5K entries
usleep(2000);
}
$output->writeln('Written ' . $i . ' cache entries to file.');
$output->writeln('The hit ratio will be roughly ' . Piwik::getPercentageSafe($numRequestsDetected, $numLinesToProcess) . '%');
$numCacheFilesExist = CachedEntry::getNumEntriesInCacheDir();
$output->writeln($numCacheFilesExist . ' cached files exist');
if ($numCacheFilesExist > $numEntriesToCache) {
$numEntriesToDelete = $numCacheFilesExist - $numEntriesToCache;
$output->writeln('Need to delete ' . $numEntriesToDelete . ' files');
CachedEntry::deleteLeastAccessedFiles($numEntriesToDelete);
$output->writeln('done deleting files');
}
return self::SUCCESS;
}
}