From 1cd384228a795b23a9feeeaca72e02efed4fc4ee Mon Sep 17 00:00:00 2001 From: gal kahana Date: Sat, 18 Feb 2023 21:31:56 +0200 Subject: [PATCH] fix: harsher hex string parsing to avoid more dire parsing errors (#195) running fuzztests on my mac was failing erraticaly for one of the files. also recreating a decompressed version didnt work. traced the first problem to a too permissive hex string parsing which caused overreads. and the second problem to a too restrictive zip decoding issue where end of stream error canceled the whole stream reading. which is a shame cause it could parse something worthwhile till then, like in this case. --- CMakeLists.txt | 2 +- PDFWriter/InputFlateDecodeStream.cpp | 10 ++++------ PDFWriter/PDFParserTokenizer.cpp | 17 +++++++++++++++-- 3 files changed, 20 insertions(+), 9 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 9b155c21..73044646 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -3,7 +3,7 @@ cmake_minimum_required(VERSION 3.15) # Soname # MAJOR is incremented when symbols are removed or changed in an incompatible way # MINOR is incremented when new symbols are added -project(PDFHummus VERSION 4.5.1) +project(PDFHummus VERSION 4.5.2) option(USE_BUNDLED "Whether to use bundled libraries" TRUE) diff --git a/PDFWriter/InputFlateDecodeStream.cpp b/PDFWriter/InputFlateDecodeStream.cpp index 7759011b..827561f3 100644 --- a/PDFWriter/InputFlateDecodeStream.cpp +++ b/PDFWriter/InputFlateDecodeStream.cpp @@ -106,8 +106,8 @@ IOBasicTypes::LongBufferSizeType InputFlateDecodeStream::Read(IOBasicTypes::Byte IOBasicTypes::LongBufferSizeType InputFlateDecodeStream::DecodeBufferAndRead(const IOBasicTypes::Byte* inBuffer,IOBasicTypes::LongBufferSizeType inSize) { - if(0 == inSize) - return 0; // inflate kinda touchy about getting 0 lengths + if(0 == inSize || mEndOfCompressionEoncountered) + return 0; // inflate kinda touchy about getting 0 lengths, also stop if already decided to finish int inflateResult = Z_OK; @@ -168,10 +168,8 @@ IOBasicTypes::LongBufferSizeType InputFlateDecodeStream::DecodeBufferAndRead(con // should be that at the last buffer we'll get here a nice Z_STREAM_END mEndOfCompressionEoncountered = (Z_STREAM_END == inflateResult) || isError(inflateResult); - if(Z_OK == inflateResult || Z_STREAM_END == inflateResult) - return inSize - mZLibState->avail_out; - else - return 0; + // allows returning what's read even if finished with error. a forgiving approach for end of stream errors + return inSize - mZLibState->avail_out; } bool InputFlateDecodeStream::NotEnded() diff --git a/PDFWriter/PDFParserTokenizer.cpp b/PDFWriter/PDFParserTokenizer.cpp index 0bb75a11..44f516dc 100644 --- a/PDFWriter/PDFParserTokenizer.cpp +++ b/PDFWriter/PDFParserTokenizer.cpp @@ -21,6 +21,7 @@ #include "PDFParserTokenizer.h" #include "IByteReader.h" #include "OutputStringBufferStream.h" +#include "Trace.h" using namespace PDFHummus; using namespace IOBasicTypes; @@ -212,8 +213,20 @@ BoolAndString PDFParserTokenizer::GetNextToken() break; } - if(!IsPDFWhiteSpace(buffer)) - tokenBuffer.Write(&buffer,1); + if(IsPDFWhiteSpace(buffer)) + continue; + + // verify that getting a hex char or ending char, so that we are sure there's no orcish mischief + if(!(buffer>='0' && buffer<='9') && + !(buffer>='A' && buffer<='F') && + !(buffer>='a' && buffer<='f') && + buffer != '>') { + TRACE_LOG1("PDFParserTokenizer::GetNextToken, encountered non ascii char in hex string. probably a corruption. byte value = %d", buffer); + result.first = false; + break; + } + + tokenBuffer.Write(&buffer,1); } } result.second = tokenBuffer.ToString();