This commit is contained in:
Dennis Eichhorn 2024-01-30 21:29:21 +00:00
parent 87e3e9797d
commit f48b4e9f1c
4 changed files with 215 additions and 186 deletions

View File

@ -66,7 +66,7 @@ final class ApiController extends Controller
* @var string[] * @var string[]
* @since 1.0.0 * @since 1.0.0
*/ */
public const IMG_RENDERABLE = ['png', 'jpg', 'jpeg', 'gif', 'bmp', 'svg']; public const IMG_RENDERABLE = ['png', 'jpg', 'jpeg', 'gif', 'webp', 'bmp', 'svg'];
/** /**
* Api method to create resource * Api method to create resource
@ -439,10 +439,10 @@ final class ApiController extends Controller
// Check downloaded resources // Check downloaded resources
$totalCount = \count($toCheck); $totalCount = \count($toCheck);
$maxLoops = (int) \min(60 * 10, $totalCount * 10 / 4); // At most run 600 times or 2.5 times the resource count $maxLoops = 60 * 10; // At most wait 600 times per individual resource
$startTime = \time(); $startTime = \time();
$minTime = $startTime + ((int) \max(10 * $totalCount, 60)); // At least run 10 seconds per element or 5 minutes $minTime = $startTime + ((int) \max(15 * $totalCount, 60 * 15)); // At least run 15 seconds per element or 15 minutes in total
$maxTime = $startTime + ((int) \min(60 * $totalCount, 60 * 60 * 3)); // At most run 60 seconds per element or 3 hours $maxTime = $startTime + ((int) \min(60 * $totalCount, 60 * 60 * 3)); // At most run 60 seconds per element or 3 hours in total
while (!empty($toCheck)) { while (!empty($toCheck)) {
$time = \time(); $time = \time();
@ -484,6 +484,7 @@ final class ApiController extends Controller
if (!\is_dir($path)) { if (!\is_dir($path)) {
// Either the download takes too long or the download failed! // Either the download takes too long or the download failed!
// Let's go to the next element and re-check later on. // Let's go to the next element and re-check later on.
// However, an element will only get checked a finite amount of times (limited by checks AND/OR total time)
continue; continue;
} }
@ -567,9 +568,7 @@ final class ApiController extends Controller
if ($extension === 'htm') { if ($extension === 'htm') {
try { try {
\var_dump(\exec('whoami')); // Tool: software used is wkhtmltopdf
\var_dump(\exec('wkhtmltoimage ' . \escapeshellarg($resource->uri) . ' ' . \escapeshellarg($basePath . '/' . $resource->id . '/' . $check['timestamp'] . '/index.jpg')));
echo 'wkhtmltoimage ' . \escapeshellarg($resource->uri) . ' ' . \escapeshellarg($basePath . '/' . $resource->id . '/' . $check['timestamp'] . '/index.jpg') . "\n";
SystemUtils::runProc( SystemUtils::runProc(
OperatingSystem::getSystem() === SystemType::WIN ? 'wkhtmltoimage.exe' : 'wkhtmltoimage', OperatingSystem::getSystem() === SystemType::WIN ? 'wkhtmltoimage.exe' : 'wkhtmltoimage',
\escapeshellarg($resource->uri) . ' ' . \escapeshellarg($basePath . '/' . $resource->id . '/' . $check['timestamp'] . '/index.jpg'), \escapeshellarg($resource->uri) . ' ' . \escapeshellarg($basePath . '/' . $resource->id . '/' . $check['timestamp'] . '/index.jpg'),
@ -631,53 +630,58 @@ final class ApiController extends Controller
// Different file hash -> content inspection required // Different file hash -> content inspection required
if ($hasDifferentHash) { if ($hasDifferentHash) {
if (\in_array($extension, self::TEXT_RENDERABLE)) { if (\in_array($extension, self::TEXT_RENDERABLE)) {
$contentOld = \Modules\Media\Controller\ApiController::loadFileContent($oldPath, $extension); $contentOld = \Modules\Media\Controller\ApiController::loadFileContent($oldPath, $extension, 'txt', ['path' => $resource->xpath]);
$contentNew = \Modules\Media\Controller\ApiController::loadFileContent($newPath, $extension); $contentNew = \Modules\Media\Controller\ApiController::loadFileContent($newPath, $extension, 'txt', ['path' => $resource->xpath]);
$contentOld = \preg_replace('/(\ {2,}|\t)/', ' ', $contentOld);
$contentOld = \preg_replace('/(\s{2,})/', "\n", $contentOld);
$contentNew = \preg_replace('/(\ {2,}|\t)/', ' ', $contentNew);
$contentNew = \preg_replace('/(\s{2,})/', "\n", $contentNew);
// Calculate difference index // Calculate difference index
$difference = \levenshtein($contentOld, $contentNew); $difference = \levenshtein($contentOld, $contentNew);
// Handle xpath $diffPath = \dirname($newPath) . '/_' . \basename($newPath);
if ($difference > 0
&& $extension === 'htm'
&& $resource->path !== ''
) {
$xmlOld = new \DOMDocument();
$xmlNew = new \DOMDocument();
$xmlOld->loadHtml($contentOld); \file_put_contents(
$xmlNew->loadHtml($contentNew); $diffPath,
\phpOMS\Utils\StringUtils::createDiffMarkup(
$contentOld,
$contentNew,
' '
)
);
$xpathOld = new \DOMXpath($xmlOld); // @todo allow $resource->path handling for html paths
$xpathNew = new \DOMXpath($xmlNew);
$elementsOld = $xpathOld->query($resource->path);
$elementsNew = $xpathNew->query($resource->path);
$subcontentOld = '';
if ($elementsOld !== false) {
foreach ($elementsOld as $node) {
foreach ($node->childNodes as $child) {
$subcontentOld .= $xmlOld->saveXML($child);
}
}
}
$subcontentNew = '';
if ($elementsNew !== false) {
foreach ($elementsNew as $node) {
foreach ($node->childNodes as $child) {
$subcontentNew .= $xmlNew->saveXML($child);
}
}
}
// Calculate difference index
$difference = \levenshtein($subcontentOld, $subcontentNew);
}
} elseif (\in_array($extension, self::IMG_RENDERABLE)) { } elseif (\in_array($extension, self::IMG_RENDERABLE)) {
$diffPath = \dirname($newPath) . '/_' . \basename($newPath);
// Tool: software used is imagemagick
SystemUtils::runProc(
OperatingSystem::getSystem() === SystemType::WIN ? 'compare.exe' : 'compare',
'-compose src ' . $oldPath . ' ' . $newPath . ' ' . $diffPath
);
// @todo allow $resource->path handling for x1/y1 -> x2/y2 coordinates
// Difference index is always 0/1. Comparing pixels is too slow and doesn't add much value // Difference index is always 0/1. Comparing pixels is too slow and doesn't add much value
$difference = 1; //ImageUtils::difference($oldPath, $newPath, $path . '/_' . \basename($newPath), 0); // too slow $difference = 1;
} elseif ($extension === 'pdf') {
$diffPath = \dirname($newPath) . '/_' . \basename($newPath, '.pdf') . '.htm';
\file_put_contents(
$diffPath,
\phpOMS\Utils\StringUtils::createDiffMarkup(
\Modules\Media\Controller\ApiController::loadFileContent($oldPath, $extension),
\Modules\Media\Controller\ApiController::loadFileContent($newPath, $extension),
' '
)
);
// @todo allow $resource->path handling for page/headline searches
$difference = 1;
} else { } else {
// All other files always have a difference index of 0/1 // All other files always have a difference index of 0/1
$difference = 1; $difference = 1;
@ -708,6 +712,7 @@ final class ApiController extends Controller
// If is htm/html create image // If is htm/html create image
if ($extension === 'htm') { if ($extension === 'htm') {
try { try {
// Tool: software used is wkhtmltopdf
SystemUtils::runProc( SystemUtils::runProc(
OperatingSystem::getSystem() === SystemType::WIN ? 'wkhtmltoimage.exe' : 'wkhtmltoimage', OperatingSystem::getSystem() === SystemType::WIN ? 'wkhtmltoimage.exe' : 'wkhtmltoimage',
\escapeshellarg($resource->uri) . ' ' . \escapeshellarg($basePath . '/' . $resource->id . '/' . $check['timestamp'] . '/index.jpg'), \escapeshellarg($resource->uri) . ' ' . \escapeshellarg($basePath . '/' . $resource->id . '/' . $check['timestamp'] . '/index.jpg'),
@ -729,8 +734,6 @@ final class ApiController extends Controller
$resource->checkedAt = $report->createdAt; $resource->checkedAt = $report->createdAt;
$this->updateModel($request->header->account, $old, $resource, ResourceMapper::class, 'resource', $request->getOrigin()); $this->updateModel($request->header->account, $old, $resource, ResourceMapper::class, 'resource', $request->getOrigin());
// Directory::delete($basePath . '/temp/' . $resource->id);
// @todo delete older history depending on plan // @todo delete older history depending on plan
unset($toCheck[$index]); unset($toCheck[$index]);
@ -740,10 +743,11 @@ final class ApiController extends Controller
} }
// Kill running processes in x seconds that shouldn't be running any longer // Kill running processes in x seconds that shouldn't be running any longer
$time = \time();
if (OperatingSystem::getSystem() === SystemType::LINUX) { if (OperatingSystem::getSystem() === SystemType::LINUX) {
SystemUtils::runProc('sleep', \max(0, $time - $minTime) . ' && pkill -9 -f wkhtmltoimage', true); SystemUtils::runProc('sleep', \max(0, $minTime - $time) . ' && pkill -9 -f wkhtmltoimage', true);
} else { } else {
SystemUtils::runProc('timeout', '/t ' . \max(0, $time - $minTime) . ' > NUL && taskkill /F /IM wkhtmltoimage.exe', true); SystemUtils::runProc('timeout', '/t ' . \max(0, $minTime - $time) . ' > NUL && taskkill /F /IM wkhtmltoimage.exe', true);
} }
Directory::delete($basePath . '/temp'); Directory::delete($basePath . '/temp');

View File

@ -24,6 +24,7 @@ return ['OnlineResourceWatcher' => [
'BillingSettings' => 'Billing Settings', 'BillingSettings' => 'Billing Settings',
'Bills' => 'Bills', 'Bills' => 'Bills',
'By' => 'By', 'By' => 'By',
'Difference' => 'Unterschiede',
'Cancel' => 'Cancel', 'Cancel' => 'Cancel',
'Checked' => 'Checked', 'Checked' => 'Checked',
'City' => 'City', 'City' => 'City',

View File

@ -24,6 +24,7 @@ return ['OnlineResourceWatcher' => [
'BillingSettings' => 'Billing Settings', 'BillingSettings' => 'Billing Settings',
'Bills' => 'Bills', 'Bills' => 'Bills',
'By' => 'By', 'By' => 'By',
'Difference' => 'Difference',
'Cancel' => 'Cancel', 'Cancel' => 'Cancel',
'Checked' => 'Checked', 'Checked' => 'Checked',
'City' => 'City', 'City' => 'City',

View File

@ -1,10 +1,22 @@
<?php declare(strict_types=1); <?php
declare(strict_types=1);
use Modules\OnlineResourceWatcher\Models\ReportStatus; use Modules\OnlineResourceWatcher\Models\ReportStatus;
use phpOMS\Uri\UriFactory; use phpOMS\Uri\UriFactory;
?> ?>
<div class="row row-simple"> <div class="tabview tab-2">
<div class="box">
<ul class="tab-links">
<li><label for="c-tab-1"><?= $this->getHtml('Comparison'); ?></label>
<li><label for="c-tab-2"><?= $this->getHtml('Difference'); ?></label>
</ul>
</div>
<div class="tab-content">
<input type="radio" id="c-tab-1" name="tabular-2"<?= $this->request->uri->fragment === 'c-tab-1' ? ' checked' : ''; ?>>
<div class="tab">
<div class="row row-simple">
<?php <?php
$old = null; $old = null;
$new = null; $new = null;
@ -36,7 +48,7 @@ use phpOMS\Uri\UriFactory;
$files = \scandir($newBasePath); $files = \scandir($newBasePath);
if ($files !== false) { if ($files !== false) {
foreach ($files as $file) { foreach ($files as $file) {
if ($file === '.' || $file === '..') { if ($file === '.' || $file === '..' || \str_starts_with($file, '_')) {
continue; continue;
} }
@ -72,7 +84,7 @@ use phpOMS\Uri\UriFactory;
$files = \scandir($oldBasePath); $files = \scandir($oldBasePath);
if ($files !== false) { if ($files !== false) {
foreach ($files as $file) { foreach ($files as $file) {
if ($file === '.' || $file === '..') { if ($file === '.' || $file === '..' || \str_starts_with($file, '_')) {
continue; continue;
} }
@ -96,26 +108,18 @@ use phpOMS\Uri\UriFactory;
} }
} }
} }
$base = __DIR__ . '/../../../../';
$newDiffPath = '';
if ($type === 'pdf') {
$newDiffPath = \dirname($newWebPath) . '/_' . \basename($newWebPath, '.pdf') . '.htm';
} else {
$newDiffPath = \dirname($newWebPath) . '/_' . \basename($newWebPath);
}
?> ?>
<?php if ($type === 'img') : ?> <?php if ($type === 'pdf' && $old !== null) : ?>
<div class="col-xs-12 col-simple">
<div class="portlet col-simple">
<div class="portlet-body col-simple">
<?php
if ($old !== null) : ?>
<div class="image-comparison">
<div>
<img src="<?= UriFactory::build($oldWebPath); ?>" alt="<?= $this->printHtml($resource->title); ?>">
</div>
<img src="<?= UriFactory::build($newWebPath); ?>" alt="<?= $this->printHtml($resource->title); ?>">
</div>
<?php endif; ?>
</div>
</div>
</div>
<?php elseif ($type === 'pdf' && $old !== null) : ?>
<div class="col-xs-6 col-simple"> <div class="col-xs-6 col-simple">
<div class="portlet col-simple"> <div class="portlet col-simple">
<div class="portlet-body col-simple"> <div class="portlet-body col-simple">
@ -148,5 +152,24 @@ use phpOMS\Uri\UriFactory;
</div> </div>
</div> </div>
<?php endif; ?> <?php endif; ?>
<?php endif; ?> <?php endif; ?>
</div>
</div>
<input type="radio" id="c-tab-2" name="tabular-2"<?= $this->request->uri->fragment === 'c-tab-2' ? ' checked' : ''; ?>>
<div class="tab">
<div class="row row-simple">
<div class="col-xs-6 col-simple">
<div class="portlet col-simple">
<div class="portlet-body col-simple">
<?php if (\is_file($base . $newDiffPath)) : ?>
<iframe class="col-simple" id="iRenderNew" sandbox="allow-scripts" src="<?= $newDiffPath; ?>" loading="lazy" allowfullscreen></iframe>
<?php endif; ?>
</div>
</div>
</div>
</div>
</div>
</div>
</div> </div>