Eu tenho um hashes.txt
arquivo que armazena strings e seus valores de hash SHA-256 compactados. Cada linha do arquivo é formatada da seguinte forma:
<compressed_hash>:<original_string>
O compressed_hash
é criado pegando o 6º, 13º, 20º e 27º caracteres do hash SHA-256 completo. Por exemplo, a string alon
quando hash: 5a24f03a01d5b10cab6124f3c0e7086994ac9c869fc8e76e1463458f829fc864
seria armazenada como:
0db3:alon
Eu tenho um search.py
script que funciona assim
Por exemplo, se as entradas do usuário 5a24f03a01d5b10cab6124f3c0e7086994ac9c869fc8e76e1463458f829fc864
no search.py
script pesquisarem seu formato abreviado, 0db3
em hashes.txt
. Se várias correspondências forem encontradas, como:
0db3:alon
0db3:apple
O script refaz as correspondências ( alon
, apple
) para obter o hash SHA-256 completo e, se houver uma correspondência (por exemplo, alon
quando o hash completo corresponde à entrada do usuário ( 5a24f03a01d5b10cab6124f3c0e7086994ac9c869fc8e76e1463458f829fc864
), o script imprime a string ( alon
)
O problema desse script é que a busca geralmente leva em torno de 1 hora, e a minha hashes.txt
tem 54GB. Aqui está o search.py
:
import hashlib
import mmap
def compress_hash(hash_value):
return hash_value[6] + hash_value[13] + hash_value[20] + hash_value[27]
def search_compressed_hash(hash_input, compressed_file):
compressed_input = compress_hash(hash_input)
potential_matches = []
with open(compressed_file, "r+b") as file:
# Memory-map the file, size 0 means the whole file
mmapped_file = mmap.mmap(file.fileno(), 0)
# Read through the memory-mapped file line by line
for line in iter(mmapped_file.readline, b""):
line = line.decode().strip()
parts = line.split(":", 1) # Split only on the first colon
if len(parts) == 2: # Ensure there are exactly two parts
compressed_hash, string = parts
if compressed_hash == compressed_input:
potential_matches.append(string)
mmapped_file.close()
return potential_matches
def verify_full_hash(potential_matches, hash_input):
for string in potential_matches:
if hashlib.sha256(string.encode()).hexdigest() == hash_input:
return string
return None
if __name__ == "__main__":
while True:
hash_input = input("Enter the hash (or type 'exit' to quit): ")
if hash_input.lower() == 'exit':
break
potential_matches = search_compressed_hash(hash_input, "hashes.txt")
found_string = verify_full_hash(potential_matches, hash_input)
if found_string:
print(f"Corresponding string: {found_string}")
else:
print("String not found for the given hash.")
E, se ajudar, aqui está o hash.py
script que realmente gera as strings e hashes e os colocahashes.txt
import hashlib
import sys
import time
`# Set the interval for saving progress (in seconds)
SAVE_INTERVAL = 60 # Save progress every minute
BUFFER_SIZE = 1000000 # Number of hashes to buffer before writing to file
def generate_hash(string):
return hashlib.sha256(string.encode()).hexdigest()
def compress_hash(hash_value):
return hash_value[6] + hash_value[13] + hash_value[20] + hash_value[27]
def write_hashes_to_file(start_length):
buffer = [] # Buffer to store generated hashes
last_save_time = time.time() # Store the last save time
for generated_string in generate_strings_and_hashes(start_length):
full_hash = generate_hash(generated_string)
compressed_hash = compress_hash(full_hash)
buffer.append((compressed_hash, generated_string))
if len(buffer) >= BUFFER_SIZE:
save_buffer_to_file(buffer)
buffer = [] # Clear the buffer after writing to file
# Check if it's time to save progress
if time.time() - last_save_time >= SAVE_INTERVAL:
print("Saving progress...")
save_buffer_to_file(buffer) # Save any remaining hashes in buffer
buffer = [] # Clear buffer after saving
last_save_time = time.time()
# Save any remaining hashes in buffer
if buffer:
save_buffer_to_file(buffer)
def save_buffer_to_file(buffer):
with open("hashes.txt", "a") as file_hashes:
file_hashes.writelines(f"{compressed_hash}:{generated_string}\n" for compressed_hash, generated_string in buffer)
def generate_strings_and_hashes(start_length):
for length in range(start_length, sys.maxsize): # Use sys.maxsize to simulate infinity
current_string = [' '] * length # Initialize with spaces
while True:
yield ''.join(current_string)
if current_string == ['z'] * length: # Stop when all characters reach 'z'
break
current_string = increment_string(current_string)
def increment_string(string_list):
index = len(string_list) - 1
while index >= 0:
if string_list[index] == 'z':
string_list[index] = ' '
index -= 1
else:
string_list[index] = chr(ord(string_list[index]) + 1)
break
if index < 0:
string_list.insert(0, ' ')
return string_list
def load_progress():
# You may not need this function anymore
return 1 # Just return a default value
if __name__ == "__main__":
write_hashes_to_file(load_progress())`
Meu sistema operacional é o Windows 10.