Monday, April 26, 2010

How to compare two streams byte by byte

If you wish to check for differences between two files but you don't want to "see" them, just to know the percent of differences, here's a utility function that does just that.
function CompareStreams(Stream1, Stream2: TStream): Extended;
type
  TCompareBuffer = array[0..8191] of Byte;

const
  // store the buffer's size
  szCompareBuffer = SizeOf(TCompareBuffer);

var
  // buffer variables, one for each stream
  Buffer1: TCompareBuffer;
  Buffer2: TCompareBuffer;
  // variables that will store the actual read bytes from streams
  ReadBytes1: Integer;
  ReadBytes2: Integer;
  // declare a variable that will store the number of different bytes in streams
  DifferenceCount: Int64;
  // loop variable
  Index: Integer;
  // max difference check loop's per buffer
  MaxCount: Integer;
begin
  // set stream position to 0
  Stream1.Position := 0;
  Stream2.Position := 0;
  // initialize difference count
  DifferenceCount := 0;
  // start a loop
  while True do begin
    // read from both streams
    ReadBytes1 := Stream1.Read(Buffer1, szCompareBuffer);
    ReadBytes2 := Stream2.Read(Buffer2, szCompareBuffer);
    // set the max count to the smaller value of read bytes
    MaxCount := Min(ReadBytes1, ReadBytes2);
    // check differences byte by byte
    for Index := 0 to MaxCount -1 do
      if Buffer1[Index] <> Buffer2[Index] then
        // difference found! increment DifferenceCount variable
        Inc(DifferenceCount);
    // if the number of read bytes from Stream1 is different than the
    // number of read bytes from Stream or we haven't read any bytes from
    // a stream, then break the loop, we're done comparing
    if (ReadBytes1 <> ReadBytes2) or (ReadBytes1 = 0) or (ReadBytes2 = 0) then
      Break;
  end; // while True do begin
  // return the number of differences 
  Result := (DifferenceCount * 100) / Max(Stream1.Size, Stream2.Size);
end;
For a quick and dirty testing, create a new VCL application, drop a open dialog, set the open dialog's options to allow multi select(ofAllowMultiSelect), drop a button on the form, double-click the button and paste this code:
var
  Stream1: TFileStream;
  Stream2: TFileStream;
  Differences: Extended;
begin
  Differences := 0.0000;
  if OpenDialog1.Execute and (OpenDialog1.Files.Count = 2) then
    try
      Stream1 := TFileStream.Create(OpenDialog1.Files[0], fmOpenRead);
      Stream2 := TFileStream.Create(OpenDialog1.Files[1], fmOpenRead);
      Differences := CompareStreams(Stream1, Stream2);
    finally
      FreeAndNil(Stream1);
      FreeAndNil(Stream2);
    end;
  ShowMessageFmt('%.4f', [Differences]);
end;
Now click the button, select two files and wait for the compare to finish.
WARNING: do not open big files i.e. movies or somewhere around/over 700 Mb or you'll spend a few mins waiting for the process to complete.

Tuesday, April 20, 2010

TurboHashedStringList at version 1.2

Thanks to SportsGuy, TurboHashedStringList is updated to version 1.2.
There are two new properties added to the class:
- Objects
- ObjectsByName

here are their definition:
property Objects[Index: Integer]: TObject
      read GetObject write PutObject;
    property ObjectsByName[Name: String; bCaseSensitive: Boolean]: TObject
      read GetObjectByName write PutObjectByName;
and implementation if you wish to update manually:
function TurboHashedStringList.GetObject(Index: Integer): TObject;
begin
  if (Index < 0) or (Index >= Self.Count) then
    Result := NIL;
  Result := PStringRec(FList[Index])^.ObjectRef;
end; // function TurboHashedStringList.GetObject(Index: Integer): TObject;

procedure TurboHashedStringList.PutObject(Index: Integer;
  AObject: TObject);
begin
  if (Index < 0) or (Index >= Self.Count) then
    Exit;
  PStringRec(FList[Index])^.ObjectRef := AObject;
end; // procedure TurboHashedStringList.PutObject(Index: Integer;

function TurboHashedStringList.GetObjectByName(Name: String;
  bCaseSensitive: Boolean): TObject;
var
  Index: Integer;
begin
  Index := IndexOfName(Name, bCaseSensitive);
  if Index >= 0 then
    Result := Objects[Index]
  else
    Result := NIL;
end; // function TurboHashedStringList.GetObjectByName(Name: String;

procedure TurboHashedStringList.PutObjectByName(Name: String;
  bCaseSensitive: Boolean; const Value: TObject);
var
  Index: Integer;
begin
  Index := IndexOfName(Name, bCaseSensitive);
  if Index >= 0 then
    PStringRec(FList[Index])^.ObjectRef := Value;
end; // procedure TurboHashedStringList.PutObjectByName(Name: String;
Alternatively you can download the class as a tiny zip file by clicking on this sentance.

Monday, April 12, 2010

Boyer-Moore Horspool in Delphi 2010

Today I found myself in need of a string search algorithm that must meet the following requirements:
1. fast as possible
2. work on ANSI and Unicode version of Delphi
3. must be able to search both case sensitive and insensitive
4. must be able to search from a given position in a string
5. it's code must be as easier to read as possible
Of course, the first thing I did(like anyone would do) was to search the Internet using Google Almighty for a Boyer-Moore implementation(preferably in Delphi) that would meet at least half of the requirements listed above, guess what? I did not found one that would NOT give a error.
Next step: „back to the drawing board¯ -- „If you want something done right, you must do it yourself¯ or something like that.
Here's with what I came up:
function FindString(const Value, Pattern: string;
  const CaseSensitive: Boolean = True;
  const StartPos: Integer = 1): integer;
var
  Index: Integer;
  jIndex: Integer;
  kIndex: Integer;
  LLenPattern: Integer;
  LLenValue: Integer;
  LSkipTable: array[Char] of Integer;
  LFound: Boolean;
  LChar: Char;

    function __SameChar: Boolean;
    begin
      if CaseSensitive then
        Result := (Value[Index] = Pattern[jIndex])
      else
        Result := (CompareText(Value[Index], Pattern[jIndex]) = 0);
    end; // function __SameChar: Boolean;

begin
  LFound := False;
  Result := 0;
  LLenPattern := Length(Pattern);
  if LLenPattern = 0 then begin
    Result := 1;
    LFound := True;
  end; // if LLenPattern = 0 then begin
  for LChar := Low(Char) to High(Char) do
    LSkipTable[LChar] := LLenPattern;
  if CaseSensitive then begin
    for kIndex := 1 to LLenPattern -1 do
      LSkipTable[Pattern[kIndex]] := LLenPattern -kIndex;
  end else begin
    for kIndex := 1 to LLenPattern -1 do
      LSkipTable[Windows.CharLower(@Pattern[kIndex])^] := LLenPattern -kIndex;
  end; // if CaseSensitive then begin
  kIndex := LLenPattern + (StartPos -1);
  LLenValue := Length(Value);
  while (NOT LFound) and (kIndex <= LLenValue) do begin
    Index := kIndex;
    jIndex := LLenPattern;
    while (jIndex >= 1) do begin
      if __SameChar then begin
        jIndex := jIndex -1;
        Index := Index -1;
      end else
        jIndex := -1;
      if jIndex = 0 then begin
        Result := Index +1;
        LFound := True;
      end; // if jIndex = 0 then begin
      kIndex := kIndex + LSkipTable[Value[kIndex]];
    end; // while (jIndex >= 1) do begin
  end; // while (NOT LFound) and (kIndex <= LLenValue) do begin
end;
If you find any problems(or improvements) in the algorithm please let me know.

Blogroll(General programming and Delphi feeds)