mirror of
https://github.com/Karaka-Management/phpOMS.git
synced 2026-02-12 14:58:42 +00:00
improving external prog. calls
This commit is contained in:
parent
9745efa7d5
commit
e9618de67d
|
|
@ -28,6 +28,22 @@ use phpOMS\Utils\StringUtils;
|
||||||
*/
|
*/
|
||||||
class PdfParser
|
class PdfParser
|
||||||
{
|
{
|
||||||
|
/**
|
||||||
|
* PDFToText path.
|
||||||
|
*
|
||||||
|
* @var string
|
||||||
|
* @var 1.0.0
|
||||||
|
*/
|
||||||
|
public static $pdftotext = '/usr/bin/pdftotext';
|
||||||
|
|
||||||
|
/**
|
||||||
|
* PDFToPPM path.
|
||||||
|
*
|
||||||
|
* @var string
|
||||||
|
* @var 1.0.0
|
||||||
|
*/
|
||||||
|
public static $pdftoppm = '/usr/bin/pdftoppm';
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Pdf to text
|
* Pdf to text
|
||||||
*
|
*
|
||||||
|
|
@ -37,7 +53,7 @@ class PdfParser
|
||||||
*
|
*
|
||||||
* @since 1.0.0
|
* @since 1.0.0
|
||||||
*/
|
*/
|
||||||
public static function pdf2text(string $path) : string
|
public static function pdf2text(string $path, string $optimizer = '') : string
|
||||||
{
|
{
|
||||||
$text = '';
|
$text = '';
|
||||||
$tmpDir = \sys_get_temp_dir();
|
$tmpDir = \sys_get_temp_dir();
|
||||||
|
|
@ -47,11 +63,13 @@ class PdfParser
|
||||||
return '';
|
return '';
|
||||||
}
|
}
|
||||||
|
|
||||||
SystemUtils::runProc(
|
if (\is_file(self::$pdftotext)) {
|
||||||
'/usr/bin/pdftotext', '-layout '
|
SystemUtils::runProc(
|
||||||
. \escapeshellarg($path) . ' '
|
self::$pdftotext, '-layout '
|
||||||
. \escapeshellarg($out)
|
. \escapeshellarg($path) . ' '
|
||||||
);
|
. \escapeshellarg($out)
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
$text = \file_get_contents($out);
|
$text = \file_get_contents($out);
|
||||||
\unlink($out);
|
\unlink($out);
|
||||||
|
|
@ -66,12 +84,14 @@ class PdfParser
|
||||||
return '';
|
return '';
|
||||||
}
|
}
|
||||||
|
|
||||||
SystemUtils::runProc(
|
if (\is_file(self::$pdftoppm)) {
|
||||||
'/usr/bin/pdftoppm',
|
SystemUtils::runProc(
|
||||||
'-jpeg -r 300 '
|
self::$pdftoppm,
|
||||||
. \escapeshellarg($path) . ' '
|
'-jpeg -r 300 '
|
||||||
. \escapeshellarg($out)
|
. \escapeshellarg($path) . ' '
|
||||||
);
|
. \escapeshellarg($out)
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
$files = \glob($out . '*');
|
$files = \glob($out . '*');
|
||||||
if ($files === false) {
|
if ($files === false) {
|
||||||
|
|
@ -91,11 +111,13 @@ class PdfParser
|
||||||
Skew::autoRotate($file, $file, 10);
|
Skew::autoRotate($file, $file, 10);
|
||||||
*/
|
*/
|
||||||
|
|
||||||
SystemUtils::runProc(
|
if (!empty($optimizer) && \is_file($optimizer)) {
|
||||||
__DIR__ . '/../../../cOMS/Tools/InvoicePreprocessing/App',
|
SystemUtils::runProc(
|
||||||
\escapeshellarg($file) . ' '
|
$optimizer,
|
||||||
|
\escapeshellarg($file) . ' '
|
||||||
. \escapeshellarg($file)
|
. \escapeshellarg($file)
|
||||||
);
|
);
|
||||||
|
}
|
||||||
|
|
||||||
$ocr = new TesseractOcr();
|
$ocr = new TesseractOcr();
|
||||||
$text = $ocr->parseImage($file);
|
$text = $ocr->parseImage($file);
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue
Block a user