diff --git a/.github/workflows/phpunit-integration.yml b/.github/workflows/phpunit-integration.yml
index 70b75d7d..27e96cf4 100644
--- a/.github/workflows/phpunit-integration.yml
+++ b/.github/workflows/phpunit-integration.yml
@@ -133,7 +133,7 @@ jobs:
if: failure()
uses: actions/upload-artifact@v4
with:
- name: logs
+ name: logs-integrationtests-${{ matrix.backend }}
path: data/*.log
diff --git a/.github/workflows/phpunit.yml b/.github/workflows/phpunit.yml
index 429374e2..aa84358a 100644
--- a/.github/workflows/phpunit.yml
+++ b/.github/workflows/phpunit.yml
@@ -15,8 +15,7 @@ jobs:
runs-on: ubuntu-22.04
strategy:
- # do not stop on another job's failure
- fail-fast: false
+ fail-fast: true
matrix:
php-versions: ['8.3']
databases: ['sqlite']
diff --git a/README.md b/README.md
index 11c1b233..56e7aa6a 100644
--- a/README.md
+++ b/README.md
@@ -217,6 +217,8 @@ For processing PDF files, the external command line tool [`OCRmyPDF`](https://gi
### Images
For processing single images (currently `jpg` and `png` are supported), `ocrmypdf` converts the image to a PDF. The converted PDF file will then be OCR processed and saved as a new file with the original filename and the extension `.pdf` (for example `myImage.jpg` will be saved to `myImage.jpg.pdf`). The original image fill will remain untouched.
+Note about PNG alpha channel: The OCR processing will remove (flatten) the alpha/transparency channel from PNG images before conversion. Transparent areas are filled with a white background to make the OCR process possible with `ocmypdf`. This behavior applies automatically to PNG images processed by the app. See [this issue](https://github.com/R0Wi-DEV/workflow_ocr/issues/295) for details.
+
## Troubleshooting
### Generic troubleshooting guide
diff --git a/lib/AppInfo/Application.php b/lib/AppInfo/Application.php
index 680fc613..9647a724 100644
--- a/lib/AppInfo/Application.php
+++ b/lib/AppInfo/Application.php
@@ -56,7 +56,9 @@
use OCA\WorkflowOcr\Wrapper\IAppApiWrapper;
use OCA\WorkflowOcr\Wrapper\ICommand;
use OCA\WorkflowOcr\Wrapper\IFilesystem;
+use OCA\WorkflowOcr\Wrapper\IPhpNativeFunctions;
use OCA\WorkflowOcr\Wrapper\IViewFactory;
+use OCA\WorkflowOcr\Wrapper\PhpNativeFunctions;
use OCA\WorkflowOcr\Wrapper\ViewFactory;
use OCP\AppFramework\App;
use OCP\AppFramework\Bootstrap\IBootContext;
@@ -93,6 +95,7 @@ public function register(IRegistrationContext $context): void {
$context->registerServiceAlias(INotificationService::class, NotificationService::class);
$context->registerServiceAlias(IApiClient::class, ApiClient::class);
$context->registerServiceAlias(ICommandLineUtils::class, CommandLineUtils::class);
+ $context->registerServiceAlias(IPhpNativeFunctions::class, PhpNativeFunctions::class);
$context->registerServiceAlias(IAppApiWrapper::class, AppApiWrapper::class);
// BUG #43
diff --git a/lib/OcrProcessors/IOcrProcessor.php b/lib/OcrProcessors/IOcrProcessor.php
index bca2209d..a5f3a0c4 100644
--- a/lib/OcrProcessors/IOcrProcessor.php
+++ b/lib/OcrProcessors/IOcrProcessor.php
@@ -23,7 +23,9 @@
namespace OCA\WorkflowOcr\OcrProcessors;
+use OCA\WorkflowOcr\Exception\OcrAlreadyDoneException;
use OCA\WorkflowOcr\Exception\OcrNotPossibleException;
+use OCA\WorkflowOcr\Exception\OcrResultEmptyException;
use OCA\WorkflowOcr\Model\GlobalSettings;
use OCA\WorkflowOcr\Model\WorkflowSettings;
use OCP\Files\File;
@@ -35,6 +37,8 @@ interface IOcrProcessor {
* @param WorkflowSettings $settings The settings to be used for this specific workflow
* @param GlobalSettings $globalSettings The global settings configured for all OCR workflows on this system
* @return OcrProcessorResult
+ * @throws OcrAlreadyDoneException
+ * @throws OcrResultEmptyException
* @throws OcrNotPossibleException
*/
public function ocrFile(File $file, WorkflowSettings $settings, GlobalSettings $globalSettings) : OcrProcessorResult;
diff --git a/lib/OcrProcessors/Local/OcrMyPdfBasedProcessor.php b/lib/OcrProcessors/Local/OcrMyPdfBasedProcessor.php
index d7b29680..2c3f81e7 100644
--- a/lib/OcrProcessors/Local/OcrMyPdfBasedProcessor.php
+++ b/lib/OcrProcessors/Local/OcrMyPdfBasedProcessor.php
@@ -23,34 +23,35 @@
namespace OCA\WorkflowOcr\OcrProcessors\Local;
-use OCA\WorkflowOcr\Exception\OcrAlreadyDoneException;
-use OCA\WorkflowOcr\Exception\OcrNotPossibleException;
-use OCA\WorkflowOcr\Exception\OcrResultEmptyException;
use OCA\WorkflowOcr\Helper\ISidecarFileAccessor;
use OCA\WorkflowOcr\Model\GlobalSettings;
use OCA\WorkflowOcr\Model\WorkflowSettings;
use OCA\WorkflowOcr\OcrProcessors\ICommandLineUtils;
-use OCA\WorkflowOcr\OcrProcessors\IOcrProcessor;
-use OCA\WorkflowOcr\OcrProcessors\OcrProcessorResult;
+use OCA\WorkflowOcr\OcrProcessors\OcrProcessorBase;
use OCA\WorkflowOcr\Wrapper\ICommand;
-use OCP\Files\File;
+use OCA\WorkflowOcr\Wrapper\IPhpNativeFunctions;
use Psr\Log\LoggerInterface;
-abstract class OcrMyPdfBasedProcessor implements IOcrProcessor {
+abstract class OcrMyPdfBasedProcessor extends OcrProcessorBase {
public function __construct(
private ICommand $command,
- private LoggerInterface $logger,
+ protected LoggerInterface $logger,
private ISidecarFileAccessor $sidecarFileAccessor,
private ICommandLineUtils $commandLineUtils,
+ IPhpNativeFunctions $phpNativeFunctions,
) {
+ parent::__construct($logger, $phpNativeFunctions);
}
- public function ocrFile(File $file, WorkflowSettings $settings, GlobalSettings $globalSettings): OcrProcessorResult {
+ protected function doOcrProcessing($fileResource, string $fileName, WorkflowSettings $settings, GlobalSettings $globalSettings): array {
$additionalCommandlineArgs = $this->getAdditionalCommandlineArgs($settings, $globalSettings);
$sidecarFile = $this->sidecarFileAccessor->getOrCreateSidecarFile();
$commandStr = 'ocrmypdf ' . $this->commandLineUtils->getCommandlineArgs($settings, $globalSettings, $sidecarFile, $additionalCommandlineArgs) . ' - - || exit $? ; cat';
- $inputFileContent = $file->getContent();
+ $inputFileContent = $this->phpNative->streamGetContents($fileResource);
+ if ($inputFileContent === false) {
+ return [false, null, null, -1, 'Failed to read file content from stream'];
+ }
$this->command
->setCommand($commandStr)
@@ -64,11 +65,7 @@ public function ocrFile(File $file, WorkflowSettings $settings, GlobalSettings $
$exitCode = $this->command->getExitCode();
if (!$success) {
- # Gracefully handle OCR_MODE_SKIP_FILE (ExitCode.already_done_ocr)
- if ($exitCode === 6) {
- throw new OcrAlreadyDoneException('File ' . $file->getPath() . ' appears to contain text so it may not need OCR. Message: ' . $errorOutput . ' ' . $stdErr);
- }
- throw new OcrNotPossibleException('OCRmyPDF exited abnormally with exit-code ' . $exitCode . ' for file ' . $file->getPath() . '. Message: ' . $errorOutput . ' ' . $stdErr);
+ return [false, null, null, $exitCode, $errorOutput . ' ' . $stdErr];
}
if ($stdErr !== '' || $errorOutput !== '') {
@@ -80,20 +77,9 @@ public function ocrFile(File $file, WorkflowSettings $settings, GlobalSettings $
}
$ocrFileContent = $this->command->getOutput();
-
- if (!$ocrFileContent) {
- throw new OcrResultEmptyException('OCRmyPDF did not produce any output for file ' . $file->getPath());
- }
-
$recognizedText = $this->sidecarFileAccessor->getSidecarFileContent();
- if (!$recognizedText) {
- $this->logger->info('Temporary sidecar file at \'{path}\' was empty', ['path' => $sidecarFile]);
- }
-
- $this->logger->debug('OCR processing was successful');
-
- return new OcrProcessorResult($ocrFileContent, 'pdf', $recognizedText);
+ return [true, $ocrFileContent, $recognizedText, $exitCode, null];
}
/**
diff --git a/lib/OcrProcessors/OcrProcessorBase.php b/lib/OcrProcessors/OcrProcessorBase.php
new file mode 100644
index 00000000..2f47dff3
--- /dev/null
+++ b/lib/OcrProcessors/OcrProcessorBase.php
@@ -0,0 +1,135 @@
+
+ *
+ * @license GNU AGPL version 3 or any later version
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU Affero General Public License as
+ * published by the Free Software Foundation, either version 3 of the
+ * License, or (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU Affero General Public License for more details.
+ *
+ * You should have received a copy of the GNU Affero General Public License
+ * along with this program. If not, see .
+ */
+
+namespace OCA\WorkflowOcr\OcrProcessors;
+
+use OCA\WorkflowOcr\Exception\OcrAlreadyDoneException;
+use OCA\WorkflowOcr\Exception\OcrNotPossibleException;
+use OCA\WorkflowOcr\Exception\OcrResultEmptyException;
+use OCA\WorkflowOcr\Model\GlobalSettings;
+use OCA\WorkflowOcr\Model\WorkflowSettings;
+use OCA\WorkflowOcr\Wrapper\IPhpNativeFunctions;
+use OCP\Files\File;
+use Psr\Log\LoggerInterface;
+
+/**
+ * Base class for all OCR processors.
+ */
+abstract class OcrProcessorBase implements IOcrProcessor {
+ public function __construct(
+ protected LoggerInterface $logger,
+ protected IPhpNativeFunctions $phpNative,
+ ) {
+ }
+
+ public function ocrFile(File $file, WorkflowSettings $settings, GlobalSettings $globalSettings): OcrProcessorResult {
+ $fileName = $file->getName();
+ $fileResource = $this->doFilePreprocessing($file);
+ try {
+ [$success, $fileContent, $recognizedText, $exitCode, $errorMessage] = $this->doOcrProcessing($fileResource, $fileName, $settings, $globalSettings);
+ if (!$success) {
+ $this->throwException($errorMessage, $exitCode);
+ }
+ if (!$recognizedText) {
+ $this->logger->info('Recognized text was empty');
+ }
+ return $fileContent ? new OcrProcessorResult($fileContent, $recognizedText) : throw new OcrResultEmptyException('OCRmyPDF did not produce any output for file ' . $fileName);
+ } finally {
+ if (is_resource($fileResource)) {
+ fclose($fileResource);
+ }
+ }
+ }
+
+ /**
+ * Perform the actual OCR processing. Implementation is specific to the OCR processor. Might be local or remote.
+ * Should return [$success, $fileContent, $recognizedText, $exitCode, $errorMessage]
+ * @param resource $fileResource
+ * @param string $fileName
+ * @param WorkflowSettings $settings
+ * @param GlobalSettings $globalSettings
+ * @return array{bool, string|null, string|null, int, string|null} [$success, $fileContent, $recognizedText, $exitCode, $errorMessage]
+ */
+ abstract protected function doOcrProcessing($fileResource, string $fileName, WorkflowSettings $settings, GlobalSettings $globalSettings): array;
+
+ /**
+ * @return resource|false
+ */
+ private function doFilePreprocessing(File $file) {
+ return $file->getMimeType() !== 'image/png' ? $file->fopen('rb') : $this->removeAlphaChannelFromImage($file);
+ }
+
+ /**
+ * @return resource|false
+ */
+ private function removeAlphaChannelFromImage(File $file) {
+ // Remove any alpha channel from the PNG image (if any)
+ $imageResource = null;
+ $destroyImageResource = false;
+ try {
+ $this->logger->debug('Checking if PNG has alpha channel');
+
+ $imageResource = $file->fopen('rb');
+ $image = new \Imagick();
+ $image->readImageFile($imageResource, $file->getName());
+ $alphaChannel = $image->getImageAlphaChannel();
+
+ if (!$alphaChannel) {
+ $this->logger->debug('PNG does not have alpha channel, no need to remove it');
+ return $imageResource;
+ }
+
+ $this->logger->debug('PNG has alpha channel, removing it');
+ $image->setImageAlphaChannel(\Imagick::ALPHACHANNEL_REMOVE);
+ $image->mergeImageLayers(\Imagick::LAYERMETHOD_FLATTEN);
+ $imageBlob = $image->getImageBlob();
+ $stream = $this->phpNative->fopen('php://temp', 'r+');
+ if ($stream === false) {
+ throw new \RuntimeException('Failed to create temporary stream for alpha channel removal');
+ }
+ fwrite($stream, $imageBlob);
+ rewind($stream);
+ $destroyImageResource = true;
+ return $stream;
+ } finally {
+ // Only close the original image resource if we did not return it
+ if (isset($imageResource) && is_resource($imageResource) && $destroyImageResource) {
+ fclose($imageResource);
+ }
+ if (isset($image) && is_object($image)) {
+ $image->clear();
+ $image->destroy();
+ }
+ }
+ }
+
+ /**
+ * Throws an appropriate exception based on the error message and exit code.
+ */
+ private function throwException($errorMessage, $exitCode) {
+ if ($exitCode === 6) {
+ throw new OcrAlreadyDoneException('File appears to contain text so it may not need OCR. Message: ' . $errorMessage);
+ }
+ throw new OcrNotPossibleException($errorMessage);
+ }
+}
diff --git a/lib/OcrProcessors/OcrProcessorFactory.php b/lib/OcrProcessors/OcrProcessorFactory.php
index ce939a0c..55751215 100644
--- a/lib/OcrProcessors/OcrProcessorFactory.php
+++ b/lib/OcrProcessors/OcrProcessorFactory.php
@@ -30,6 +30,7 @@
use OCA\WorkflowOcr\OcrProcessors\Remote\WorkflowOcrRemoteProcessor;
use OCA\WorkflowOcr\Service\IOcrBackendInfoService;
use OCA\WorkflowOcr\Wrapper\ICommand;
+use OCA\WorkflowOcr\Wrapper\IPhpNativeFunctions;
use OCP\AppFramework\Bootstrap\IRegistrationContext;
use Psr\Container\ContainerInterface;
use Psr\Log\LoggerInterface;
@@ -72,13 +73,15 @@ public static function registerOcrProcessors(IRegistrationContext $context) : vo
$c->get(ICommand::class),
$c->get(LoggerInterface::class),
$c->get(ISidecarFileAccessor::class),
- $c->get(ICommandLineUtils::class)), false);
+ $c->get(ICommandLineUtils::class),
+ $c->get(IPhpNativeFunctions::class)), false);
$context->registerService(ImageOcrProcessor::class, fn (ContainerInterface $c)
=> new ImageOcrProcessor(
$c->get(ICommand::class),
$c->get(LoggerInterface::class),
$c->get(ISidecarFileAccessor::class),
- $c->get(ICommandLineUtils::class)), false);
+ $c->get(ICommandLineUtils::class),
+ $c->get(IPhpNativeFunctions::class)), false);
}
/** @inheritdoc */
diff --git a/lib/OcrProcessors/OcrProcessorResult.php b/lib/OcrProcessors/OcrProcessorResult.php
index e1d6b07a..0af7cf93 100644
--- a/lib/OcrProcessors/OcrProcessorResult.php
+++ b/lib/OcrProcessors/OcrProcessorResult.php
@@ -30,14 +30,11 @@ class OcrProcessorResult {
/** @var string */
private $fileContent;
/** @var string */
- private $fileExtension;
- /** @var string */
private $recognizedText;
- public function __construct(string $fileContent, string $fileExtension, string $recognizedText) {
+ public function __construct(string $fileContent, string $recognizedText) {
$this->fileContent = $fileContent;
- $this->fileExtension = $fileExtension;
$this->recognizedText = $recognizedText;
}
@@ -45,10 +42,6 @@ public function getFileContent(): string {
return $this->fileContent;
}
- public function getFileExtension(): string {
- return $this->fileExtension;
- }
-
public function getRecognizedText(): string {
return $this->recognizedText;
}
diff --git a/lib/OcrProcessors/Remote/WorkflowOcrRemoteProcessor.php b/lib/OcrProcessors/Remote/WorkflowOcrRemoteProcessor.php
index c86c6d1d..d66b483f 100644
--- a/lib/OcrProcessors/Remote/WorkflowOcrRemoteProcessor.php
+++ b/lib/OcrProcessors/Remote/WorkflowOcrRemoteProcessor.php
@@ -23,34 +23,29 @@
namespace OCA\WorkflowOcr\OcrProcessors\Remote;
-use OCA\WorkflowOcr\Exception\OcrAlreadyDoneException;
-use OCA\WorkflowOcr\Exception\OcrNotPossibleException;
use OCA\WorkflowOcr\Model\GlobalSettings;
use OCA\WorkflowOcr\Model\WorkflowSettings;
use OCA\WorkflowOcr\OcrProcessors\ICommandLineUtils;
-use OCA\WorkflowOcr\OcrProcessors\IOcrProcessor;
-use OCA\WorkflowOcr\OcrProcessors\OcrProcessorResult;
+use OCA\WorkflowOcr\OcrProcessors\OcrProcessorBase;
use OCA\WorkflowOcr\OcrProcessors\Remote\Client\IApiClient;
use OCA\WorkflowOcr\OcrProcessors\Remote\Client\Model\ErrorResult;
-use OCP\Files\File;
use Psr\Log\LoggerInterface;
/**
* OCR Processor which utilizes the Workflow OCR Backend remote service to perform OCR.
*/
-class WorkflowOcrRemoteProcessor implements IOcrProcessor {
+class WorkflowOcrRemoteProcessor extends OcrProcessorBase {
public function __construct(
private IApiClient $apiClient,
private ICommandLineUtils $commandLineUtils,
- private LoggerInterface $logger,
+ protected LoggerInterface $logger,
+ \OCA\WorkflowOcr\Wrapper\IPhpNativeFunctions $phpNative,
) {
-
+ parent::__construct($logger, $phpNative);
}
- public function ocrFile(File $file, WorkflowSettings $settings, GlobalSettings $globalSettings): OcrProcessorResult {
+ protected function doOcrProcessing($fileResource, string $fileName, WorkflowSettings $settings, GlobalSettings $globalSettings): array {
$ocrMyPdfParameters = $this->commandLineUtils->getCommandlineArgs($settings, $globalSettings);
- $fileResource = $file->fopen('rb');
- $fileName = $file->getName();
$this->logger->debug('Sending OCR request to remote backend');
$apiResult = $this->apiClient->processOcr($fileResource, $fileName, $ocrMyPdfParameters);
@@ -60,17 +55,9 @@ public function ocrFile(File $file, WorkflowSettings $settings, GlobalSettings $
$resultMessage = $apiResult->getMessage();
$exitCode = $apiResult->getOcrMyPdfExitCode();
- # Gracefully handle OCR_MODE_SKIP_FILE (ExitCode.already_done_ocr)
- if ($exitCode === 6) {
- throw new OcrAlreadyDoneException('File ' . $file->getPath() . ' appears to contain text so it may not need OCR. Message: ' . $resultMessage);
- }
- throw new OcrNotPossibleException($resultMessage);
+ return [false, null, null, $exitCode, $resultMessage];
}
- return new OcrProcessorResult(
- base64_decode($apiResult->getFileContent()),
- pathinfo($apiResult->getFilename(), PATHINFO_EXTENSION),
- $apiResult->getRecognizedText()
- );
+ return [true, base64_decode($apiResult->getFileContent()), $apiResult->getRecognizedText(), 0, null];
}
}
diff --git a/lib/Service/OcrService.php b/lib/Service/OcrService.php
index e7777e10..e92a39c6 100644
--- a/lib/Service/OcrService.php
+++ b/lib/Service/OcrService.php
@@ -52,6 +52,7 @@
class OcrService implements IOcrService {
private const FILE_VERSION_LABEL_KEY = 'label';
private const FILE_VERSION_LABEL_VALUE = 'Before OCR';
+ private const PDF_FILE_EXTENSION = 'pdf';
/** @var IOcrProcessorFactory */
private $ocrProcessorFactory;
@@ -328,7 +329,6 @@ private function doPostProcessing(Node $file, string $uid, WorkflowSettings $set
$fileId = $file->getId();
$fileContent = $result->getFileContent();
$originalFileExtension = $file->getExtension();
- $newFileExtension = $result->getFileExtension();
// Only create a new file version if the file OCR result was not empty #130
if ($result->getRecognizedText() !== '') {
@@ -337,7 +337,7 @@ private function doPostProcessing(Node $file, string $uid, WorkflowSettings $set
$this->setFileVersionsLabel($file, $uid, self::FILE_VERSION_LABEL_VALUE);
}
- $newFilePath = $this->determineNewFilePath($file, $originalFileExtension, $newFileExtension);
+ $newFilePath = $this->determineNewFilePath($file, $originalFileExtension);
$this->createNewFileVersion($newFilePath, $fileContent, $fileMtime);
}
@@ -354,19 +354,18 @@ private function doPostProcessing(Node $file, string $uid, WorkflowSettings $set
*
* @param Node $file The original file node for which the OCR processing has been succeeded.
* @param string $originalFileExtension The original file extension.
- * @param string $newFileExtension The new file extension to be applied.
* @return string The new file path with the updated extension.
*/
- private function determineNewFilePath(Node $file, string $originalFileExtension, string $newFileExtension): string {
+ private function determineNewFilePath(Node $file, string $originalFileExtension): string {
$filePath = $file->getPath();
- if ($originalFileExtension !== $newFileExtension) {
+ if ($originalFileExtension !== self::PDF_FILE_EXTENSION) {
// If the extension changed, will create a new file with the new extension
- return $filePath . '.' . $newFileExtension;
+ return "$filePath." . self::PDF_FILE_EXTENSION;
}
if (!$file->isUpdateable()) {
// Add suffix '_OCR' if original file cannot be updated
$fileInfo = pathinfo($filePath);
- return $fileInfo['dirname'] . '/' . $fileInfo['filename'] . '_OCR.' . $newFileExtension;
+ return $fileInfo['dirname'] . '/' . $fileInfo['filename'] . '_OCR.' . self::PDF_FILE_EXTENSION;
}
// By returning the original file path, we will create a new file version of the original file
return $filePath;
diff --git a/lib/Wrapper/IPhpNativeFunctions.php b/lib/Wrapper/IPhpNativeFunctions.php
new file mode 100644
index 00000000..14219a50
--- /dev/null
+++ b/lib/Wrapper/IPhpNativeFunctions.php
@@ -0,0 +1,26 @@
+assertInstanceOf(TextRecognizedEvent::class, $textRecognizedEvent, 'Expected TextRecognizedEvent instance');
$this->assertEquals('[OCR skipped on page(s) 1]', trim($textRecognizedEvent->getRecognizedText()), 'Expected recognized text');
}
+
+ public function testWorkflowOcrLocalBackendPngWithAlphaChannel(): void {
+ $this->addOperation('image/png');
+ $this->uploadTestFile('png-with-alpha-channel.png');
+ $this->runOcrBackgroundJob();
+
+ $this->assertEmpty($this->apiClient->getRequests(), 'Expected no OCR Backend Service requests');
+ $this->assertEquals(1, count($this->capturedEvents), 'Expected 1 TextRecognizedEvent');
+ $textRecognizedEvent = $this->capturedEvents[0];
+ $this->assertInstanceOf(TextRecognizedEvent::class, $textRecognizedEvent, 'Expected TextRecognizedEvent instance');
+ $this->assertEquals('PNG with alpha channel', trim($textRecognizedEvent->getRecognizedText()), 'Expected recognized text');
+ }
+
+ public function testWorkflowOcrLocalBackendRegularJpg(): void {
+ $this->addOperation('image/png');
+ $this->uploadTestFile('png-without-alpha-channel.png');
+ $this->runOcrBackgroundJob();
+
+ $this->assertEmpty($this->apiClient->getRequests(), 'Expected no OCR Backend Service requests');
+ $this->assertEquals(1, count($this->capturedEvents), 'Expected 1 TextRecognizedEvent');
+ $textRecognizedEvent = $this->capturedEvents[0];
+ $this->assertInstanceOf(TextRecognizedEvent::class, $textRecognizedEvent, 'Expected TextRecognizedEvent instance');
+ $this->assertEquals('PNG without alpha channel', trim($textRecognizedEvent->getRecognizedText()), 'Expected recognized text');
+ }
}
diff --git a/tests/Integration/testdata/png-with-alpha-channel.png b/tests/Integration/testdata/png-with-alpha-channel.png
new file mode 100644
index 00000000..194d8910
Binary files /dev/null and b/tests/Integration/testdata/png-with-alpha-channel.png differ
diff --git a/tests/Integration/testdata/png-without-alpha-channel.png b/tests/Integration/testdata/png-without-alpha-channel.png
new file mode 100644
index 00000000..9ca984c6
Binary files /dev/null and b/tests/Integration/testdata/png-without-alpha-channel.png differ
diff --git a/tests/Unit/OcrProcessors/Local/ImageOcrProcessorTest.php b/tests/Unit/OcrProcessors/Local/ImageOcrProcessorTest.php
index 46a9a4c6..df34c3c7 100644
--- a/tests/Unit/OcrProcessors/Local/ImageOcrProcessorTest.php
+++ b/tests/Unit/OcrProcessors/Local/ImageOcrProcessorTest.php
@@ -35,6 +35,8 @@
use Test\TestCase;
class ImageOcrProcessorTest extends TestCase {
+ /** @var \OCA\WorkflowOcr\Wrapper\IPhpNativeFunctions|MockObject */
+ private $phpNative;
public function testOcrFileSetsImageDpi() {
/** @var ICommand|MockObject $command */
$command = $this->createMock(ICommand::class);
@@ -49,11 +51,22 @@ public function testOcrFileSetsImageDpi() {
$commandLineUtils->method('getCommandlineArgs')
->willReturnCallback(fn ($settings, $globalSettings, $sidecarFile, $additionalCommandlineArgs) => implode(' ', $additionalCommandlineArgs));
- $processor = new ImageOcrProcessor($command, $logger, $sidecarFileAccessor, $commandLineUtils);
+ $phpNative = $this->createMock(\OCA\WorkflowOcr\Wrapper\IPhpNativeFunctions::class);
+ $phpNative->method('fopen')->willReturnCallback(fn ($file, $mode) => fopen($file, $mode));
+ $phpNative->method('streamGetContents')->willReturnCallback(fn ($h) => stream_get_contents($h));
+ $processor = new ImageOcrProcessor($command, $logger, $sidecarFileAccessor, $commandLineUtils, $phpNative);
+
+ $file->method('fopen')
+ ->willReturnCallback(function ($mode) {
+ $stream = fopen('php://temp', 'r+');
+ fwrite($stream, 'content');
+ rewind($stream);
+ return $stream;
+ });
$file->expects($this->once())
- ->method('getContent')
- ->willReturn('content');
+ ->method('getName')
+ ->willReturn('test.pdf');
$command->expects($this->once())
->method('setCommand')
->with($this->stringContains(' --image-dpi 300 '))
diff --git a/tests/Unit/OcrProcessors/Local/PdfOcrProcessorTest.php b/tests/Unit/OcrProcessors/Local/PdfOcrProcessorTest.php
index bdade24a..c7cff0df 100644
--- a/tests/Unit/OcrProcessors/Local/PdfOcrProcessorTest.php
+++ b/tests/Unit/OcrProcessors/Local/PdfOcrProcessorTest.php
@@ -64,6 +64,8 @@ class PdfOcrProcessorTest extends TestCase {
private $defaultSettings;
/** @var GlobalSettings */
private $defaultGlobalSettings;
+ /** @var \OCA\WorkflowOcr\Wrapper\IPhpNativeFunctions|MockObject */
+ private $phpNative;
protected function setUp(): void {
parent::setUp();
@@ -82,6 +84,15 @@ protected function setUp(): void {
->willReturn(self::FILE_CONTENT_BEFORE);
$this->fileBefore->method('getMimeType')
->willReturnCallback(fn () => $this->fileBeforeMimeType);
+ $this->fileBefore->method('getName')
+ ->willReturn('someFileName.pdf');
+ $this->fileBefore->method('fopen')
+ ->willReturnCallback(function ($mode) {
+ $stream = fopen('php://temp', 'r+');
+ fwrite($stream, self::FILE_CONTENT_BEFORE);
+ rewind($stream);
+ return $stream;
+ });
$this->fileBeforeMimeType = 'application/pdf';
$this->ocrMyPdfOutput = self::FILE_CONTENT_AFTER;
@@ -90,6 +101,14 @@ protected function setUp(): void {
;
$this->ocrBackendInfoService->method('isRemoteBackend')
->willReturn(false);
+
+ $this->phpNative = $this->createMock(\OCA\WorkflowOcr\Wrapper\IPhpNativeFunctions::class);
+ $this->phpNative->method('fopen')->willReturnCallback(function ($file, $mode) {
+ return fopen($file, $mode);
+ });
+ $this->phpNative->method('streamGetContents')->willReturnCallback(function ($h) {
+ return stream_get_contents($h);
+ });
}
public function testCallsCommandInterface() {
@@ -103,7 +122,7 @@ public function testCallsCommandInterface() {
->method('execute')
->willReturn(true);
- $processor = new PdfOcrProcessor($this->command, $this->logger, $this->sidecarFileAccessor, $this->commandLineUtils);
+ $processor = new PdfOcrProcessor($this->command, $this->logger, $this->sidecarFileAccessor, $this->commandLineUtils, $this->phpNative);
$result = $processor->ocrFile($this->fileBefore, $this->defaultSettings, $this->defaultGlobalSettings);
$this->assertEquals(self::FILE_CONTENT_AFTER, $result->getFileContent());
@@ -123,7 +142,7 @@ public function testThrowsOcrNotPossibleException() {
$this->command->expects($this->once())
->method('getExitCode');
- $processor = new PdfOcrProcessor($this->command, $this->logger, $this->sidecarFileAccessor, $this->commandLineUtils);
+ $processor = new PdfOcrProcessor($this->command, $this->logger, $this->sidecarFileAccessor, $this->commandLineUtils, $this->phpNative);
$thrown = false;
try {
@@ -162,7 +181,7 @@ public function testLogsWarningIfOcrMyPdfSucceedsWithWarningOutput() {
&& $paramsArray['errorOutput'] === 'getErrorOutput';
}));
- $processor = new PdfOcrProcessor($this->command, $this->logger, $this->sidecarFileAccessor, $this->commandLineUtils);
+ $processor = new PdfOcrProcessor($this->command, $this->logger, $this->sidecarFileAccessor, $this->commandLineUtils, $this->phpNative);
$processor->ocrFile($this->fileBefore, $this->defaultSettings, $this->defaultGlobalSettings);
}
@@ -177,19 +196,16 @@ public function testThrowsErrorIfOcrFileWasEmpty() {
->method('getStdErr')
->willReturn('stdErr');
$this->ocrMyPdfOutput = '';
- $this->fileBefore->expects($this->once())
- ->method('getPath')
- ->willReturn('/admin/files/somefile.pdf');
$thrown = false;
- $processor = new PdfOcrProcessor($this->command, $this->logger, $this->sidecarFileAccessor, $this->commandLineUtils);
+ $processor = new PdfOcrProcessor($this->command, $this->logger, $this->sidecarFileAccessor, $this->commandLineUtils, $this->phpNative);
try {
$processor->ocrFile($this->fileBefore, $this->defaultSettings, $this->defaultGlobalSettings);
} catch (\Throwable $t) {
$thrown = true;
$this->assertInstanceOf(OcrResultEmptyException::class, $t);
- $this->assertEquals('OCRmyPDF did not produce any output for file /admin/files/somefile.pdf', $t->getMessage());
+ $this->assertEquals('OCRmyPDF did not produce any output for file someFileName.pdf', $t->getMessage());
}
$this->assertTrue($thrown);
@@ -206,7 +222,7 @@ public function testLanguageSettingsAreSetCorrectly() {
->method('getOutput')
->willReturn('someOcrContent');
- $processor = new PdfOcrProcessor($this->command, $this->logger, $this->sidecarFileAccessor, $this->commandLineUtils);
+ $processor = new PdfOcrProcessor($this->command, $this->logger, $this->sidecarFileAccessor, $this->commandLineUtils, $this->phpNative);
$processor->ocrFile($this->fileBefore, new WorkflowSettings('{"languages": ["deu", "eng"] }'), $this->defaultGlobalSettings);
}
@@ -221,7 +237,7 @@ public function testRemoveBackgroundFlagIsSetCorrectly() {
->method('getOutput')
->willReturn('someOcrContent');
- $processor = new PdfOcrProcessor($this->command, $this->logger, $this->sidecarFileAccessor, $this->commandLineUtils);
+ $processor = new PdfOcrProcessor($this->command, $this->logger, $this->sidecarFileAccessor, $this->commandLineUtils, $this->phpNative);
$processor->ocrFile($this->fileBefore, new WorkflowSettings('{"removeBackground": true }'), $this->defaultGlobalSettings);
}
@@ -236,7 +252,7 @@ public function testProcessorCountIsNotSetIfGlobalSettingsDoesNotContainProcesso
->method('getOutput')
->willReturn('someOcrContent');
- $processor = new PdfOcrProcessor($this->command, $this->logger, $this->sidecarFileAccessor, $this->commandLineUtils);
+ $processor = new PdfOcrProcessor($this->command, $this->logger, $this->sidecarFileAccessor, $this->commandLineUtils, $this->phpNative);
$processor->ocrFile($this->fileBefore, $this->defaultSettings, $this->defaultGlobalSettings);
}
@@ -253,7 +269,7 @@ public function testProcessorCountIsSetCorrectlyFromGobalSettings() {
$this->defaultGlobalSettings->processorCount = 42;
- $processor = new PdfOcrProcessor($this->command, $this->logger, $this->sidecarFileAccessor, $this->commandLineUtils);
+ $processor = new PdfOcrProcessor($this->command, $this->logger, $this->sidecarFileAccessor, $this->commandLineUtils, $this->phpNative);
$processor->ocrFile($this->fileBefore, $this->defaultSettings, $this->defaultGlobalSettings);
}
@@ -271,10 +287,10 @@ public function testLogsInfoIfSidecarFileContentWasEmpty() {
$this->logger->expects($this->once())
->method('info')
->with($this->callback(function ($message) {
- return strpos($message, 'Temporary sidecar file at') !== false && strpos($message, 'was empty') !== false;
+ return $message === 'Recognized text was empty';
}));
- $processor = new PdfOcrProcessor($this->command, $this->logger, $this->sidecarFileAccessor, $this->commandLineUtils);
+ $processor = new PdfOcrProcessor($this->command, $this->logger, $this->sidecarFileAccessor, $this->commandLineUtils, $this->phpNative);
$processor->ocrFile($this->fileBefore, $this->defaultSettings, $this->defaultGlobalSettings);
}
@@ -292,7 +308,7 @@ public function testDoesNotLogInfoIfSidecarFileContentWasNotEmpty() {
$this->logger->expects($this->never())
->method('info');
- $processor = new PdfOcrProcessor($this->command, $this->logger, $this->sidecarFileAccessor, $this->commandLineUtils);
+ $processor = new PdfOcrProcessor($this->command, $this->logger, $this->sidecarFileAccessor, $this->commandLineUtils, $this->phpNative);
$processor->ocrFile($this->fileBefore, $this->defaultSettings, $this->defaultGlobalSettings);
}
@@ -313,7 +329,7 @@ public function testAppliesSidecarParameterIfSidecarFileCanBeCreated() {
->method('getOrCreateSidecarFile')
->willReturn('/tmp/sidecar.txt');
- $processor = new PdfOcrProcessor($this->command, $this->logger, $this->sidecarFileAccessor, $this->commandLineUtils);
+ $processor = new PdfOcrProcessor($this->command, $this->logger, $this->sidecarFileAccessor, $this->commandLineUtils, $this->phpNative);
$processor->ocrFile($this->fileBefore, $this->defaultSettings, $this->defaultGlobalSettings);
}
@@ -335,7 +351,7 @@ public function testAppliesOcrModeParameter(int $simulatedOcrMode, string $expec
->method('getOrCreateSidecarFile')
->willReturn('/tmp/sidecar.txt');
- $processor = new PdfOcrProcessor($this->command, $this->logger, $this->sidecarFileAccessor, $this->commandLineUtils);
+ $processor = new PdfOcrProcessor($this->command, $this->logger, $this->sidecarFileAccessor, $this->commandLineUtils, $this->phpNative);
$processor->ocrFile($this->fileBefore, new WorkflowSettings('{"ocrMode": ' . $simulatedOcrMode . '}'), $this->defaultGlobalSettings);
}
@@ -361,7 +377,7 @@ public function testRemoveBackgroundIsNotAppliedIfOcrModeIsRedoOcr() {
return strpos($message, '--remove-background is incompatible with --redo-ocr') !== false;
}));
- $processor = new PdfOcrProcessor($this->command, $this->logger, $this->sidecarFileAccessor, $this->commandLineUtils);
+ $processor = new PdfOcrProcessor($this->command, $this->logger, $this->sidecarFileAccessor, $this->commandLineUtils, $this->phpNative);
$processor->ocrFile($this->fileBefore, new WorkflowSettings('{"ocrMode": ' . WorkflowSettings::OCR_MODE_REDO_OCR . ', "removeBackground": true}'), $this->defaultGlobalSettings);
}
@@ -383,7 +399,7 @@ public function testAppliesCustomCliArgsCorrectly() {
->willReturn('/tmp/sidecar.txt');
$workflowSettings = new WorkflowSettings('{"customCliArgs": "--output-type pdf"}');
- $processor = new PdfOcrProcessor($this->command, $this->logger, $this->sidecarFileAccessor, $this->commandLineUtils);
+ $processor = new PdfOcrProcessor($this->command, $this->logger, $this->sidecarFileAccessor, $this->commandLineUtils, $this->phpNative);
$processor->ocrFile($this->fileBefore, $workflowSettings, $this->defaultGlobalSettings);
}
@@ -406,7 +422,7 @@ public function testThrowsOcrAlreadyDoneExceptionIfErrorCodeIsEquals6() {
$thrown = false;
try {
- $processor = new PdfOcrProcessor($this->command, $this->logger, $this->sidecarFileAccessor, $this->commandLineUtils);
+ $processor = new PdfOcrProcessor($this->command, $this->logger, $this->sidecarFileAccessor, $this->commandLineUtils, $this->phpNative);
$processor->ocrFile($this->fileBefore, $this->defaultSettings, $this->defaultGlobalSettings);
} catch (\Throwable $t) {
$thrown = true;
diff --git a/tests/Unit/OcrProcessors/OcrProcessorBaseTest.php b/tests/Unit/OcrProcessors/OcrProcessorBaseTest.php
new file mode 100644
index 00000000..d09d4add
--- /dev/null
+++ b/tests/Unit/OcrProcessors/OcrProcessorBaseTest.php
@@ -0,0 +1,217 @@
+doOcrProcessing($fileResource, $fileName, $settings, $globalSettings);
+ }
+
+ protected function getAdditionalCommandlineArgs($settings, $globalSettings): array {
+ return [];
+ }
+}
+
+class OcrProcessorBaseTest extends TestCase {
+ public function testRemoveAlphaChannelFromImage_RemovesAlphaAndReturnsNewStream(): void {
+ $logger = $this->createMock(LoggerInterface::class);
+ $phpNative = $this->createMock(IPhpNativeFunctions::class);
+ $phpNative->method('fopen')->willReturnCallback(fn ($file, $mode) => fopen($file, $mode));
+ $phpNative->method('streamGetContents')->willReturnCallback(fn ($h) => stream_get_contents($h));
+
+ $processor = new TestOcrProcessor($logger, $phpNative);
+
+ $originalStream = $this->createPngStream(true);
+
+ $file = $this->createMock(File::class);
+ $file->method('getMimeType')->willReturn('image/png');
+ $file->method('fopen')->with('rb')->willReturn($originalStream);
+ $file->method('getName')->willReturn('test.png');
+
+ $settings = $this->createMock(WorkflowSettings::class);
+ $globalSettings = $this->createMock(GlobalSettings::class);
+
+ // Capture original bytes before ocrFile closes streams
+ if (!is_resource($originalStream)) {
+ $this->fail('createPngStream did not return a valid stream resource');
+ }
+ $originalBytes = stream_get_contents($originalStream);
+ rewind($originalStream);
+
+ $result = $processor->ocrFile($file, $settings, $globalSettings);
+
+ $this->assertNotNull($result);
+ $processed = $result->getFileContent();
+
+ // Check PNG IHDR color type byte: 6 = RGBA (has alpha), 2 = RGB (no alpha)
+ $this->assertGreaterThanOrEqual(26, strlen($originalBytes), 'PNG too short to inspect IHDR');
+ $origColorType = ord($originalBytes[25]);
+ $procColorType = ord($processed[25]);
+
+ $this->assertEquals(6, $origColorType, 'Original should have RGBA color type');
+ $this->assertNotEquals(6, $procColorType, 'Processed image should not have RGBA color type');
+ }
+
+ public function testDoOcrProcessingReturnsErrorIfInputFileCannotBeRead(): void {
+ $logger = $this->createMock(LoggerInterface::class);
+
+ // Create mocks for constructor dependencies
+ $command = $this->createMock(ICommand::class);
+ $sidecar = $this->createMock(ISidecarFileAccessor::class);
+ $cmdUtils = $this->createMock(ICommandLineUtils::class);
+ $phpNative = $this->createMock(IPhpNativeFunctions::class);
+
+ // Mock that "stream_get_contents" fails
+ $phpNative->method('streamGetContents')->willReturn(false);
+
+ $processor = new TestableOcrMyPdfBasedProcessor($command, $logger, $sidecar, $cmdUtils, $phpNative);
+
+ $settings = $this->createMock(WorkflowSettings::class);
+ $globalSettings = $this->createMock(GlobalSettings::class);
+
+ $badStream = fopen('php://temp', 'r');
+ try {
+ $result = $processor->runCallToDoOcrProcessing($badStream, 'unreadable.pdf', $settings, $globalSettings);
+ } finally {
+ if (is_resource($badStream)) {
+ fclose($badStream);
+ }
+ }
+
+ $this->assertIsArray($result);
+ $this->assertFalse($result[0], 'Expected processing to indicate failure');
+ $this->assertSame(-1, $result[3], 'Expected exit code -1 for read failure');
+ $this->assertStringContainsString('Failed to read file content', (string)$result[4]);
+ }
+
+ public function testRemoveAlphaChannelFromImage_NoAlpha_ReturnsOriginalResource(): void {
+ $logger = $this->createMock(LoggerInterface::class);
+ $phpNative = $this->createMock(IPhpNativeFunctions::class);
+ $phpNative->method('fopen')->willReturnCallback(fn ($file, $mode) => fopen($file, $mode));
+ $phpNative->method('streamGetContents')->willReturnCallback(fn ($h) => stream_get_contents($h));
+
+ $processor = new TestOcrProcessor($logger, $phpNative);
+
+ $originalStream = $this->createPngStream(false);
+
+ $file = $this->createMock(File::class);
+ $file->method('getMimeType')->willReturn('image/png');
+ $file->method('fopen')->with('rb')->willReturn($originalStream);
+ $file->method('getName')->willReturn('test_no_alpha.png');
+
+ $settings = $this->createMock(WorkflowSettings::class);
+ $globalSettings = $this->createMock(GlobalSettings::class);
+
+ // Capture original bytes before ocrFile closes streams
+ rewind($originalStream);
+ $originalBytes = stream_get_contents($originalStream);
+ rewind($originalStream);
+
+ $result = $processor->ocrFile($file, $settings, $globalSettings);
+
+ $this->assertNotNull($result);
+ $processed = $result->getFileContent();
+
+ // For no-alpha images the preprocessed bytes should equal the original
+ $this->assertEquals($originalBytes, $processed);
+ }
+
+ public function testThrowsRuntimeExceptionIfOpeningTempFails(): void {
+ $logger = $this->createMock(LoggerInterface::class);
+ $phpNative = $this->createMock(IPhpNativeFunctions::class);
+
+ // Simulate failure when opening the temporary stream for alpha removal
+ $phpNative->method('fopen')->with('php://temp', 'r+')->willReturn(false);
+
+ $processor = new TestOcrProcessor($logger, $phpNative);
+
+ $originalStream = $this->createPngStream(true);
+
+ $file = $this->createMock(File::class);
+ $file->method('getMimeType')->willReturn('image/png');
+ $file->method('fopen')->with('rb')->willReturn($originalStream);
+ $file->method('getName')->willReturn('test.png');
+
+ $settings = $this->createMock(WorkflowSettings::class);
+ $globalSettings = $this->createMock(GlobalSettings::class);
+
+ $this->expectException(\RuntimeException::class);
+ $this->expectExceptionMessage('Failed to create temporary stream for alpha channel removal');
+
+ $processor->ocrFile($file, $settings, $globalSettings);
+ }
+
+ private function createPngStream(bool $withAlpha) {
+ $stream = fopen('php://temp', 'r+');
+ $img = imagecreatetruecolor(10, 10);
+ if ($withAlpha) {
+ imagesavealpha($img, true);
+ $trans_colour = imagecolorallocatealpha($img, 0, 0, 0, 127);
+ imagefill($img, 0, 0, $trans_colour);
+ } else {
+ $white = imagecolorallocate($img, 255, 255, 255);
+ imagefill($img, 0, 0, $white);
+ }
+ ob_start();
+ imagepng($img);
+ $png = ob_get_clean();
+ imagedestroy($img);
+ fwrite($stream, $png);
+ rewind($stream);
+ return $stream;
+ }
+}
diff --git a/tests/Unit/OcrProcessors/Remote/WorkflowOcrRemoteProcessorTest.php b/tests/Unit/OcrProcessors/Remote/WorkflowOcrRemoteProcessorTest.php
index d478d0a2..9e69a56a 100644
--- a/tests/Unit/OcrProcessors/Remote/WorkflowOcrRemoteProcessorTest.php
+++ b/tests/Unit/OcrProcessors/Remote/WorkflowOcrRemoteProcessorTest.php
@@ -46,6 +46,8 @@ class WorkflowOcrRemoteProcessorTest extends TestCase {
private $workflowSettings;
private $globalSettings;
private $processor;
+ /** @var \OCA\WorkflowOcr\Wrapper\IPhpNativeFunctions|MockObject */
+ private $phpNative;
protected function setUp(): void {
$this->apiClient = $this->createMock(IApiClient::class);
@@ -55,10 +57,13 @@ protected function setUp(): void {
$this->workflowSettings = $this->createMock(WorkflowSettings::class);
$this->globalSettings = $this->createMock(GlobalSettings::class);
+ $this->phpNative = $this->createMock(\OCA\WorkflowOcr\Wrapper\IPhpNativeFunctions::class);
+
$this->processor = new WorkflowOcrRemoteProcessor(
$this->apiClient,
$this->commandLineUtils,
- $this->logger
+ $this->logger,
+ $this->phpNative
);
}
@@ -85,7 +90,6 @@ public function testOcrFileSuccess(): void {
$this->assertInstanceOf(OcrProcessorResult::class, $result);
$this->assertEquals('file content', $result->getFileContent());
- $this->assertEquals('pdf', $result->getFileExtension());
$this->assertEquals('recognized text', $result->getRecognizedText());
}
diff --git a/tests/Unit/Service/EventServiceTest.php b/tests/Unit/Service/EventServiceTest.php
index a9925e84..9c676f8b 100644
--- a/tests/Unit/Service/EventServiceTest.php
+++ b/tests/Unit/Service/EventServiceTest.php
@@ -48,7 +48,7 @@ public function testTextRecognizedDispatchesEvent() {
/** @var File|MockObject */
$file = $this->createMock(File::class);
$recognizedText = 'recognizedText';
- $ocrResult = new OcrProcessorResult('content', 'pdf', $recognizedText);
+ $ocrResult = new OcrProcessorResult('content', $recognizedText);
$this->eventDispatcher->expects($this->once())
->method('dispatchTyped')
->with($this->callback(function (TextRecognizedEvent $event) use ($recognizedText, $file) {
diff --git a/tests/Unit/Service/OcrServiceTest.php b/tests/Unit/Service/OcrServiceTest.php
index 1b0d1d4b..6e99e7f6 100644
--- a/tests/Unit/Service/OcrServiceTest.php
+++ b/tests/Unit/Service/OcrServiceTest.php
@@ -382,7 +382,7 @@ public function testCreatesNewFileVersionAndEmitsTextRecognizedEvent(string $ori
$mimeType = 'application/pdf';
$content = 'someFileContent';
$ocrContent = 'someOcrProcessedFile';
- $ocrResult = new OcrProcessorResult($ocrContent, 'pdf', $ocrContent); // Extend this cases if we add new OCR processors
+ $ocrResult = new OcrProcessorResult($ocrContent, $ocrContent); // Extend this cases if we add new OCR processors
$originalFileMock = $this->createValidFileMock($mimeType, $content, $rootFolderPath, $originalFilename);
$this->rootFolderGetFirstNodeById42ReturnValue = $originalFileMock;
@@ -472,7 +472,7 @@ public function testCallsProcessingFileAccessor() {
$content = 'someFileContent';
$ocrContent = 'someOcrProcessedFile';
$filePath = '/admin/files/somefile.pdf';
- $ocrResult = new OcrProcessorResult($ocrContent, 'pdf', $ocrContent); // Extend this cases if we add new OCR processors
+ $ocrResult = new OcrProcessorResult($ocrContent, $ocrContent); // Extend this cases if we add new OCR processors
$this->rootFolderGetFirstNodeById42ReturnValue = $this->createValidFileMock($mimeType, $content);
@@ -515,7 +515,7 @@ public function testDoesNotCreateNewFileVersionIfOcrContentWasEmpty() {
$mimeType = 'application/pdf';
$content = 'someFileContent';
$ocrContent = '';
- $ocrResult = new OcrProcessorResult($ocrContent, 'pdf', $ocrContent);
+ $ocrResult = new OcrProcessorResult($ocrContent, $ocrContent);
$fileId = 42;
$this->rootFolder->expects($this->never())->method('getById');
@@ -600,7 +600,7 @@ public function testRestoreOriginalFileModificationDate() {
$mimeType = 'application/pdf';
$content = 'someFileContent';
$ocrContent = 'someOcrProcessedFile';
- $ocrResult = new OcrProcessorResult($ocrContent, 'pdf', $ocrContent); // Extend this cases if we add new OCR processors
+ $ocrResult = new OcrProcessorResult($ocrContent, $ocrContent); // Extend this cases if we add new OCR processors
$fileMock = $this->createValidFileMock($mimeType, $content);
$this->rootFolderGetFirstNodeById42ReturnValue = $fileMock;
@@ -682,7 +682,7 @@ public function testCreatesNewFileVersionWithSuffixIfNodeIsNotUpdateable() {
$mimeType = 'application/pdf';
$content = 'someFileContent';
$ocrContent = 'someOcrProcessedFile';
- $ocrResult = new OcrProcessorResult($ocrContent, 'pdf', $ocrContent); // Extend this cases if we add new OCR processors
+ $ocrResult = new OcrProcessorResult($ocrContent, $ocrContent); // Extend this cases if we add new OCR processors
$fileMock = $this->createValidFileMock($mimeType, $content, '/admin/files', 'somefile.pdf', false);
$this->rootFolderGetFirstNodeById42ReturnValue = $fileMock;
diff --git a/tests/psalm-baseline.xml b/tests/psalm-baseline.xml
index 34c1c659..ff0eaae0 100644
--- a/tests/psalm-baseline.xml
+++ b/tests/psalm-baseline.xml
@@ -155,6 +155,7 @@
+