255) { return $text; } $out = \tempnam($tmpDir, 'oms_pdf_'); if ($out === false) { return ''; } if (\is_file(self::$pdftoppm)) { try { SystemUtils::runProc( self::$pdftoppm, '-jpeg -r 300 ' . \escapeshellarg($path) . ' ' . \escapeshellarg($out) ); } catch (\Throwable $_) { \unlink($out); return ''; } } $files = \glob($out . '*'); if ($files === false) { \unlink($out); return $text === false ? '' : $text; } foreach ($files as $file) { if (!StringUtils::endsWith($file, '.jpg') && !StringUtils::endsWith($file, '.png') && !StringUtils::endsWith($file, '.gif') ) { continue; } /* Too slow Thresholding::integralThresholding($file, $file); Skew::autoRotate($file, $file, 10); */ if (!empty($optimizer) && \is_file($optimizer)) { try { SystemUtils::runProc( $optimizer, \escapeshellarg($file) . ' ' . \escapeshellarg($file) ); } catch (\Throwable $_) { \unlink($file); continue; } } $ocr = new TesseractOcr(); $text = $ocr->parseImage($file); \unlink($file); } \unlink($out); return $text; } }