How to search for a pattern in a file (Views: 32)
Problem/Question/Abstract: I need to locate a pattern in a file (both text and binary) - just like the Pos function does with the strings. Preferably, it should deal with FileStream. Straightforward solution first seemed kind of expensive - that is to just plainly go through the stream comparing patterns on every step. Answer: Solve 1: You can do it that way but it is much faster to load chunks of data into a sizeable buffer and do the search in the buffer. Here is an example: function ScanFile(const filename: string; const forString: string; caseSensitive: Boolean): LongInt; { returns position of string in file or -1, if not found } const BufferSize = $8001; { 32K + 1 bytes } var pBuf, pEnd, pScan, pPos: Pchar; filesize: LongInt; bytesRemaining: LongInt; bytesToRead: Word; F: file; SearchFor: Pchar; oldMode: Word; begin Result := -1; { assume failure } if (Length(forString) = 0) or (Length(filename) = 0) then Exit; SearchFor := nil; pBuf := nil; { open file as binary, 1 byte recordsize } AssignFile(F, filename); oldMode := FileMode; FileMode := 0; { read-only access } Reset(F, 1); FileMode := oldMode; try { allocate memory for buffer and pchar search string } SearchFor := StrAlloc(Length(forString) + 1); StrPCopy(SearchFor, forString); if not caseSensitive then { convert to upper case } AnsiUpper(SearchFor); GetMem(pBuf, BufferSize); filesize := System.Filesize(F); bytesRemaining := filesize; pPos := nil; while bytesRemaining > 0 do begin { calc how many bytes to read this round } if bytesRemaining >= BufferSize then bytesToRead := Pred(BufferSize) else bytesToRead := bytesRemaining; { read a buffer full and zero-terminate the buffer } BlockRead(F, pBuf^, bytesToRead, bytesToRead); pEnd := @pBuf[bytesToRead]; pEnd^ := #0; { scan the buffer. Problem: buffer may contain #0 chars! So we treat it as a concatenation of zero-terminated strings. } pScan := pBuf; while pScan < pEnd do begin if not caseSensitive then { convert to upper case } AnsiUpper(pScan); pPos := StrPos(pScan, SearchFor); { search for substring } if pPos <> nil then begin { Found it! } Result := FileSize - bytesRemaining + LongInt(pPos) - LongInt(pBuf); Break; end; pScan := StrEnd(pScan); Inc(pScan); end; if pPos <> nil then Break; bytesRemaining := bytesRemaining - bytesToRead; if bytesRemaining > 0 then begin { no luck in this buffers load. We need to handle the case of the search string spanning two chunks of file now. We simply go back a bit in the file and read from there, thus inspecting some characters twice } Seek(F, FilePos(F) - Length(forString)); bytesRemaining := bytesRemaining + Length(forString); end; end; finally CloseFile(F); if SearchFor <> nil then StrDispose(SearchFor); if pBuf <> nil then FreeMem(pBuf, BufferSize); end; end; Solve 2: procedure TForm1.Button1Click(Sender: TObject); var s: string; hFile: THandle; hFileMapObj: THandle; pSharedBuf: Pointer; Time0: Integer; p: PChar; begin if not OpenDialog1.Execute then Exit; s := InputBox('Find', 'Match', ''); Time0 := GetTickCount; hfile := 0; hFileMapObj := 0; pSharedBuf := nil; try hFile := FileOpen(OpenDialog1.FileName, fmOpenRead); Win32Check(hFileMapObj <> INVALID_HANDLE_VALUE); hFileMapObj := CreateFileMapping(hFile, nil, PAGE_READONLY, 0, 0, nil); Win32Check(hFileMapObj <> 0); pSharedBuf := MapViewOfFile(hFileMapObj, FILE_MAP_READ, 0, 0, 0); Win32Check(pSharedBuf <> nil); P := StrPos(PChar(pSharedBuf), PChar(s)); finally if pSharedBuf <> nil then UnMapViewOfFile(pSharedBuf); if hFileMapObj <> 0 then CloseHandle(hFileMapObj); if hFile <> 0 then CloseHandle(hFile); end; if P = nil then Caption := Format('Not found, ticks=%d', [GetTickCount - Time0]) else Caption := Format('Found it at pos %d, ticks=%d', [Integer(P - PChar(pSharedBuf)), GetTickCount - Time0]); end; |