From 0631a0a3a3dd001c4838c6ab617db25834a4d6d3 Mon Sep 17 00:00:00 2001 From: PrinsFrank <25006490+PrinsFrank@users.noreply.github.com> Date: Tue, 23 Dec 2025 20:11:15 +0100 Subject: [PATCH] Use byte offset next in use object to start search for end of current object to allow for "endobj" markers to appear in embedded files --- .../Source/CrossReferenceSource.php | 22 +++++++++++++++++++ src/Document/Document.php | 2 +- .../UncompressedObjectParser.php | 9 ++++++-- 3 files changed, 30 insertions(+), 3 deletions(-) diff --git a/src/Document/CrossReference/Source/CrossReferenceSource.php b/src/Document/CrossReference/Source/CrossReferenceSource.php index 929eb2a8..1e4c8d74 100644 --- a/src/Document/CrossReference/Source/CrossReferenceSource.php +++ b/src/Document/CrossReference/Source/CrossReferenceSource.php @@ -75,4 +75,26 @@ public function getFirstId(): string { return $firstId; } + + public function getByteOffsetNextInUseObject(CrossReferenceEntryInUseObject $crossReferenceEntry): ?int { + $byteOffsets = []; + foreach ($this->crossReferenceSections as $crossReferenceSection) { + foreach ($crossReferenceSection->crossReferenceSubSections as $crossReferenceSubSection) { + foreach ($crossReferenceSubSection->crossReferenceEntries as $entry) { + if ($entry instanceof CrossReferenceEntryInUseObject) { + $byteOffsets[] = $entry->byteOffsetInDecodedStream; + } + } + } + } + + sort($byteOffsets); + foreach ($byteOffsets as $byteOffset) { + if ($byteOffset > $crossReferenceEntry->byteOffsetInDecodedStream) { + return $byteOffset; + } + } + + return null; + } } diff --git a/src/Document/Document.php b/src/Document/Document.php index b511c18d..b647c7b6 100644 --- a/src/Document/Document.php +++ b/src/Document/Document.php @@ -123,7 +123,7 @@ public function getObject(int $objectNumber, ?string $expectedDecoratorFQN = nul $objectItem = $parentObject->objectItem->getCompressedObject($objectNumber, $this); } else { - $objectItem = UncompressedObjectParser::parseObject($crossReferenceEntry, $objectNumber, $this->stream); + $objectItem = UncompressedObjectParser::parseObject($crossReferenceEntry, $this->crossReferenceSource->getByteOffsetNextInUseObject($crossReferenceEntry), $objectNumber, $this->stream); } return $this->objectCache[$objectNumber] = DecoratedObjectFactory::forItem($objectItem, $this, $expectedDecoratorFQN); diff --git a/src/Document/Object/Item/UncompressedObject/UncompressedObjectParser.php b/src/Document/Object/Item/UncompressedObject/UncompressedObjectParser.php index 159bdf1f..0d5d3527 100644 --- a/src/Document/Object/Item/UncompressedObject/UncompressedObjectParser.php +++ b/src/Document/Object/Item/UncompressedObject/UncompressedObjectParser.php @@ -10,8 +10,13 @@ /** @internal */ class UncompressedObjectParser { - public static function parseObject(CrossReferenceEntryInUseObject $crossReferenceEntry, int $objectNumber, Stream $stream): UncompressedObject { - $endObj = $stream->firstPos(Marker::END_OBJ, $crossReferenceEntry->byteOffsetInDecodedStream, $stream->getSizeInBytes()) ?? throw new ParseFailureException('Unable to locate end of object'); + public static function parseObject(CrossReferenceEntryInUseObject $crossReferenceEntry, ?int $byteOffsetNextInUseObject, int $objectNumber, Stream $stream): UncompressedObject { + if ($byteOffsetNextInUseObject !== null) { + $endObj = $stream->lastPos(Marker::END_OBJ, $stream->getSizeInBytes() - $byteOffsetNextInUseObject) ?? throw new ParseFailureException('Unable to locate end of object'); + } else { + $endObj = $stream->firstPos(Marker::END_OBJ, $crossReferenceEntry->byteOffsetInDecodedStream, $stream->getSizeInBytes()) ?? throw new ParseFailureException('Unable to locate end of object'); + } + $startObj = $stream->firstPos(Marker::OBJ, $crossReferenceEntry->byteOffsetInDecodedStream, $endObj) ?? throw new ParseFailureException('Unable to locate start of object'); $objHeader = $stream->read($crossReferenceEntry->byteOffsetInDecodedStream, $startObj + Marker::OBJ->length() - $crossReferenceEntry->byteOffsetInDecodedStream); $objHeaderParts = explode(WhitespaceCharacter::SPACE->value, str_replace([WhitespaceCharacter::LINE_FEED->value], ' ', trim($objHeader)));