Monday, April 26, 2010

How to compare two streams byte by byte

If you wish to check for differences between two files but you don't want to "see" them, just to know the percent of differences, here's a utility function that does just that.
function CompareStreams(Stream1, Stream2: TStream): Extended;
type
  TCompareBuffer = array[0..8191] of Byte;

const
  // store the buffer's size
  szCompareBuffer = SizeOf(TCompareBuffer);

var
  // buffer variables, one for each stream
  Buffer1: TCompareBuffer;
  Buffer2: TCompareBuffer;
  // variables that will store the actual read bytes from streams
  ReadBytes1: Integer;
  ReadBytes2: Integer;
  // declare a variable that will store the number of different bytes in streams
  DifferenceCount: Int64;
  // loop variable
  Index: Integer;
  // max difference check loop's per buffer
  MaxCount: Integer;
begin
  // set stream position to 0
  Stream1.Position := 0;
  Stream2.Position := 0;
  // initialize difference count
  DifferenceCount := 0;
  // start a loop
  while True do begin
    // read from both streams
    ReadBytes1 := Stream1.Read(Buffer1, szCompareBuffer);
    ReadBytes2 := Stream2.Read(Buffer2, szCompareBuffer);
    // set the max count to the smaller value of read bytes
    MaxCount := Min(ReadBytes1, ReadBytes2);
    // check differences byte by byte
    for Index := 0 to MaxCount -1 do
      if Buffer1[Index] <> Buffer2[Index] then
        // difference found! increment DifferenceCount variable
        Inc(DifferenceCount);
    // if the number of read bytes from Stream1 is different than the
    // number of read bytes from Stream or we haven't read any bytes from
    // a stream, then break the loop, we're done comparing
    if (ReadBytes1 <> ReadBytes2) or (ReadBytes1 = 0) or (ReadBytes2 = 0) then
      Break;
  end; // while True do begin
  // return the number of differences 
  Result := (DifferenceCount * 100) / Max(Stream1.Size, Stream2.Size);
end;
For a quick and dirty testing, create a new VCL application, drop a open dialog, set the open dialog's options to allow multi select(ofAllowMultiSelect), drop a button on the form, double-click the button and paste this code:
var
  Stream1: TFileStream;
  Stream2: TFileStream;
  Differences: Extended;
begin
  Differences := 0.0000;
  if OpenDialog1.Execute and (OpenDialog1.Files.Count = 2) then
    try
      Stream1 := TFileStream.Create(OpenDialog1.Files[0], fmOpenRead);
      Stream2 := TFileStream.Create(OpenDialog1.Files[1], fmOpenRead);
      Differences := CompareStreams(Stream1, Stream2);
    finally
      FreeAndNil(Stream1);
      FreeAndNil(Stream2);
    end;
  ShowMessageFmt('%.4f', [Differences]);
end;
Now click the button, select two files and wait for the compare to finish.
WARNING: do not open big files i.e. movies or somewhere around/over 700 Mb or you'll spend a few mins waiting for the process to complete.

1 comment:

  1. Brilliant,just what I needed. I learnt something too. Thankyou.

    ReplyDelete

Blogroll(General programming and Delphi feeds)