import re import time # Paths mft_path = r"C:\Temp\projects\fileCollector\Collection\MFT_C" output_path = r"C:\Temp\projects\fileCollector\sqlScripts.txt" # MFT record size MFT_RECORD_SIZE = 1024 # bytes per $MFT record # Context window CONTEXT_CHARS = 100 def clean_text(s: str) -> str: """Sanitize text for clean one-line output.""" s = s.replace("\n", " ").replace("\r", " ").replace("\t", " ") s = re.sub(r"[^\x20-\x7E]", "", s) # strip non-printable return s.strip() def slice_around_hit(decoded: str, hit_index: int, hit_len: int, ctx: int) -> str: """Return exactly ctx chars before and after the hit in decoded text.""" start = max(0, hit_index - ctx) end = min(len(decoded), hit_index + hit_len + ctx) return decoded[start:end] def search_mft_for_dot_sql(mft_path, output_path): start = time.perf_counter() print(f"[INFO] Opening MFT file: {mft_path}") with open(mft_path, "rb") as f: data = f.read() file_size = len(data) print(f"[INFO] File size: {file_size} bytes") # Calculate total records in the MFT file total_records = file_size // MFT_RECORD_SIZE print(f"[INFO] Total records in $MFT file: {total_records}") hits_found = 0 # Compile patterns: literal ".sql" (case-insensitive) ascii_pattern = re.compile(rb"\.sql", re.IGNORECASE) utf16_pattern = re.compile(rb"\x2e\x00[sS]\x00[qQ]\x00[lL]\x00") with open(output_path, "w", encoding="utf-8") as out: # Header header = ( f"# File size: {file_size} bytes\n" f"# MFT record size: {MFT_RECORD_SIZE} bytes\n" f"# Total records in $MFT file: {total_records}\n" ) out.write(header) print("[INFO] Header written to log") # ASCII search for m in ascii_pattern.finditer(data): hits_found += 1 start_idx = max(0, m.start() - 4096) end_idx = min(len(data), m.end() + 4096) decoded = data[start_idx:end_idx].decode("ascii", errors="ignore") rel_hit = re.search(r"\.sql", decoded, flags=re.IGNORECASE) if rel_hit: snippet = slice_around_hit(decoded, rel_hit.start(), len(rel_hit.group()), CONTEXT_CHARS) else: snippet = decoded[:CONTEXT_CHARS*2] snippet = clean_text(snippet) record_num = m.start() // MFT_RECORD_SIZE line = f"Record {record_num} | Offset {m.start()} | Match: .sql | Preview: ...{snippet}..." print(line) out.write(line + "\n") # UTF-16LE search for m in utf16_pattern.finditer(data): hits_found += 1 start_idx = max(0, m.start() - 4096) if start_idx % 2: start_idx -= 1 end_idx = min(len(data), m.end() + 4096) if end_idx % 2: end_idx += 1 decoded = data[start_idx:end_idx].decode("utf-16-le", errors="ignore") rel_hit = re.search(r"\.sql", decoded, flags=re.IGNORECASE) if rel_hit: snippet = slice_around_hit(decoded, rel_hit.start(), len(rel_hit.group()), CONTEXT_CHARS) else: snippet = decoded[:CONTEXT_CHARS*2] snippet = clean_text(snippet) record_num = m.start() // MFT_RECORD_SIZE line = f"Record {record_num} | Offset {m.start()} | Match: .sql | Preview: ...{snippet}..." print(line) out.write(line + "\n") elapsed = time.perf_counter() - start summary = ( f"\n--- Script completed in {elapsed:.2f} seconds ---\n" f"Summary Calculation:\n" f" File size: {file_size} bytes\n" f" MFT record size: {MFT_RECORD_SIZE} bytes\n" f" Total records in $MFT file: {total_records}\n" f" Total .sql hits found: {hits_found}\n" ) print(summary) out.write(summary + "\n") if hits_found == 0: print("[INFO] No .sql hits found in this MFT file.") if __name__ == "__main__": search_mft_for_dot_sql(mft_path, output_path)