Skip to content

Commit

Permalink
fix: harsher hex string parsing to avoid more dire parsing errors (#195)
Browse files Browse the repository at this point in the history
running fuzztests on my mac was failing erraticaly for one of the files.

also recreating a decompressed version didnt work.

traced the first problem to a too permissive hex string parsing which caused overreads.

and the second problem to a too restrictive zip decoding issue where end of stream error canceled the whole stream reading. which is a shame cause it could parse something worthwhile till then, like in this case.
  • Loading branch information
galkahana authored Feb 18, 2023
1 parent c0de725 commit 1cd3842
Show file tree
Hide file tree
Showing 3 changed files with 20 additions and 9 deletions.
2 changes: 1 addition & 1 deletion CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@ cmake_minimum_required(VERSION 3.15)
# Soname
# MAJOR is incremented when symbols are removed or changed in an incompatible way
# MINOR is incremented when new symbols are added
project(PDFHummus VERSION 4.5.1)
project(PDFHummus VERSION 4.5.2)


option(USE_BUNDLED "Whether to use bundled libraries" TRUE)
Expand Down
10 changes: 4 additions & 6 deletions PDFWriter/InputFlateDecodeStream.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -106,8 +106,8 @@ IOBasicTypes::LongBufferSizeType InputFlateDecodeStream::Read(IOBasicTypes::Byte

IOBasicTypes::LongBufferSizeType InputFlateDecodeStream::DecodeBufferAndRead(const IOBasicTypes::Byte* inBuffer,IOBasicTypes::LongBufferSizeType inSize)
{
if(0 == inSize)
return 0; // inflate kinda touchy about getting 0 lengths
if(0 == inSize || mEndOfCompressionEoncountered)
return 0; // inflate kinda touchy about getting 0 lengths, also stop if already decided to finish

int inflateResult = Z_OK;

Expand Down Expand Up @@ -168,10 +168,8 @@ IOBasicTypes::LongBufferSizeType InputFlateDecodeStream::DecodeBufferAndRead(con

// should be that at the last buffer we'll get here a nice Z_STREAM_END
mEndOfCompressionEoncountered = (Z_STREAM_END == inflateResult) || isError(inflateResult);
if(Z_OK == inflateResult || Z_STREAM_END == inflateResult)
return inSize - mZLibState->avail_out;
else
return 0;
// allows returning what's read even if finished with error. a forgiving approach for end of stream errors
return inSize - mZLibState->avail_out;
}

bool InputFlateDecodeStream::NotEnded()
Expand Down
17 changes: 15 additions & 2 deletions PDFWriter/PDFParserTokenizer.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@
#include "PDFParserTokenizer.h"
#include "IByteReader.h"
#include "OutputStringBufferStream.h"
#include "Trace.h"

using namespace PDFHummus;
using namespace IOBasicTypes;
Expand Down Expand Up @@ -212,8 +213,20 @@ BoolAndString PDFParserTokenizer::GetNextToken()
break;
}

if(!IsPDFWhiteSpace(buffer))
tokenBuffer.Write(&buffer,1);
if(IsPDFWhiteSpace(buffer))
continue;

// verify that getting a hex char or ending char, so that we are sure there's no orcish mischief
if(!(buffer>='0' && buffer<='9') &&
!(buffer>='A' && buffer<='F') &&
!(buffer>='a' && buffer<='f') &&
buffer != '>') {
TRACE_LOG1("PDFParserTokenizer::GetNextToken, encountered non ascii char in hex string. probably a corruption. byte value = %d", buffer);
result.first = false;
break;
}

tokenBuffer.Write(&buffer,1);
}
}
result.second = tokenBuffer.ToString();
Expand Down

0 comments on commit 1cd3842

Please sign in to comment.