2026-rff_mp/KolbasovPD/docs/data/1-st_exercise/main.py

395 lines
14 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

import time
import random
import csv
import sys
sys.setrecursionlimit(100000)
def ll_insert(head, name, phone):
new_node = {'name': name, 'phone': phone, 'next': None}
if head is None:
return new_node
curr = head
prev = None
while curr:
if curr['name'] == name:
curr['phone'] = phone
return head
prev = curr
curr = curr['next']
prev['next'] = new_node
return head
def ll_find(head, name):
curr = head
while curr:
if curr['name'] == name:
return curr['phone']
curr = curr['next']
return None
def ll_delete(head, name):
if head is None:
return None
if head['name'] == name:
return head['next']
curr = head
while curr['next']:
if curr['next']['name'] == name:
curr['next'] = curr['next']['next']
return head
curr = curr['next']
return head
def ll_list_all(head):
records = []
curr = head
while curr:
records.append((curr['name'], curr['phone']))
curr = curr['next']
records.sort(key=lambda x: x[0])
return records
def hash_function(name, table_size):
return sum(ord(c) for c in name) % table_size
def ht_create(size=1000):
return [None] * size
def ht_insert(buckets, name, phone):
index = hash_function(name, len(buckets))
buckets[index] = ll_insert(buckets[index], name, phone)
def ht_find(buckets, name):
index = hash_function(name, len(buckets))
return ll_find(buckets[index], name)
def ht_delete(buckets, name):
index = hash_function(name, len(buckets))
buckets[index] = ll_delete(buckets[index], name)
def ht_list_all(buckets):
records = []
for head in buckets:
curr = head
while curr:
records.append((curr['name'], curr['phone']))
curr = curr['next']
records.sort(key=lambda x: x[0])
return records
def bst_insert_iterative(root, name, phone):
new_node = {'name': name, 'phone': phone, 'left': None, 'right': None}
if root is None:
return new_node
curr = root
while True:
if name < curr['name']:
if curr['left'] is None:
curr['left'] = new_node
break
curr = curr['left']
elif name > curr['name']:
if curr['right'] is None:
curr['right'] = new_node
break
curr = curr['right']
else:
curr['phone'] = phone
break
return root
def bst_find_iterative(root, name):
curr = root
while curr:
if name == curr['name']:
return curr['phone']
elif name < curr['name']:
curr = curr['left']
else:
curr = curr['right']
return None
def bst_find_min(node):
while node and node['left']:
node = node['left']
return node
def bst_delete_iterative(root, name):
if root is None:
return None
if name < root['name']:
root['left'] = bst_delete_iterative(root['left'], name)
elif name > root['name']:
root['right'] = bst_delete_iterative(root['right'], name)
else:
if root['left'] is None:
return root['right']
elif root['right'] is None:
return root['left']
parent = root
successor = root['right']
while successor['left']:
parent = successor
successor = successor['left']
root['name'] = successor['name']
root['phone'] = successor['phone']
if parent == root:
parent['right'] = successor['right']
else:
parent['left'] = successor['right']
return root
def bst_list_all(root):
result = []
stack = []
curr = root
while stack or curr:
while curr:
stack.append(curr)
curr = curr['left']
curr = stack.pop()
result.append((curr['name'], curr['phone']))
curr = curr['right']
return result
def generate_test_data(N=10000):
names = [f"User_{i:05d}" for i in range(N)]
phones = [f"+7-999-{random.randint(1000000, 9999999)}" for _ in range(N)]
records = list(zip(names, phones))
records_shuffled = records.copy()
random.shuffle(records_shuffled)
records_sorted = sorted(records, key=lambda x: x[0])
return records_shuffled, records_sorted
def measure_insertion(structure_type, records, ht_size=1000):
if structure_type == "LinkedList":
head = None
start = time.perf_counter()
for name, phone in records:
head = ll_insert(head, name, phone)
end = time.perf_counter()
return head, (end - start)
elif structure_type == "HashTable":
buckets = ht_create(ht_size)
start = time.perf_counter()
for name, phone in records:
ht_insert(buckets, name, phone)
end = time.perf_counter()
return buckets, (end - start)
elif structure_type == "BST":
root = None
start = time.perf_counter()
for name, phone in records:
root = bst_insert_iterative(root, name, phone)
end = time.perf_counter()
return root, (end - start)
def measure_search(data_structure, structure_type, existing_names, non_existing_names):
start = time.perf_counter()
for name in existing_names:
if structure_type == "LinkedList":
ll_find(data_structure, name)
elif structure_type == "HashTable":
ht_find(data_structure, name)
elif structure_type == "BST":
bst_find_iterative(data_structure, name)
for name in non_existing_names:
if structure_type == "LinkedList":
ll_find(data_structure, name)
elif structure_type == "HashTable":
ht_find(data_structure, name)
elif structure_type == "BST":
bst_find_iterative(data_structure, name)
end = time.perf_counter()
return end - start
def measure_deletion(data_structure, structure_type, names_to_delete):
start = time.perf_counter()
for name in names_to_delete:
if structure_type == "LinkedList":
data_structure = ll_delete(data_structure, name)
elif structure_type == "HashTable":
ht_delete(data_structure, name)
elif structure_type == "BST":
data_structure = bst_delete_iterative(data_structure, name)
end = time.perf_counter()
return data_structure, (end - start)
def run_experiment(N=5000, repeats=5):
print(f"Генерация тестовых данных (N={N})...")
records_shuffled, records_sorted = generate_test_data(N)
existing_names = [name for name, _ in random.sample(records_shuffled, min(100, N))]
non_existing_names = [f"None_{i}" for i in range(10)]
delete_names = [name for name, _ in random.sample(records_shuffled, min(50, N))]
results = []
structures = ["LinkedList", "HashTable", "BST"]
modes = ["случайный", "отсортированный"]
for struct in structures:
for mode in modes:
records = records_shuffled if mode == "случайный" else records_sorted
print(f"\nТестирование: {struct}, режим: {mode}")
insertion_times = []
search_times = []
deletion_times = []
for rep in range(repeats):
print(f" Повторение {rep+1}/{repeats}...")
data_structure, insert_time = measure_insertion(struct, records)
insertion_times.append(insert_time)
search_time = measure_search(data_structure, struct, existing_names, non_existing_names)
search_times.append(search_time)
data_structure, delete_time = measure_deletion(data_structure, struct, delete_names)
deletion_times.append(delete_time)
avg_insert = sum(insertion_times) / repeats
avg_search = sum(search_times) / repeats
avg_delete = sum(deletion_times) / repeats
results.append({
"structure": struct,
"mode": mode,
"insertion_avg": avg_insert,
"insertion_all": insertion_times,
"search_avg": avg_search,
"search_all": search_times,
"deletion_avg": avg_delete,
"deletion_all": deletion_times
})
print(f" Вставка: {avg_insert:.6f} сек (замеры: {[f'{t:.6f}' for t in insertion_times]})")
print(f" Поиск: {avg_search:.6f} сек (замеры: {[f'{t:.6f}' for t in search_times]})")
print(f" Удаление: {avg_delete:.6f} сек (замеры: {[f'{t:.6f}' for t in deletion_times]})")
return results
def save_results_to_csv(results, filename="results.csv"):
with open(filename, 'w', newline='', encoding='utf-8') as csvfile:
writer = csv.writer(csvfile)
writer.writerow(["Структура", "Режим", "Операция", "Повторение", "Время (сек)"])
for res in results:
struct = res["structure"]
mode = res["mode"]
for i, t in enumerate(res["insertion_all"]):
writer.writerow([struct, mode, "вставка", i+1, t])
writer.writerow([struct, mode, "вставка", "СРЕДНЕЕ", res["insertion_avg"]])
for i, t in enumerate(res["search_all"]):
writer.writerow([struct, mode, "поиск", i+1, t])
writer.writerow([struct, mode, "поиск", "СРЕДНЕЕ", res["search_avg"]])
for i, t in enumerate(res["deletion_all"]):
writer.writerow([struct, mode, "удаление", i+1, t])
writer.writerow([struct, mode, "удаление", "СРЕДНЕЕ", res["deletion_avg"]])
print(f"\nРезультаты сохранены в {filename}")
def print_summary_table(results):
print("\n" + "="*80)
print("СВОДНАЯ ТАБЛИЦА РЕЗУЛЬТАТОВ (среднее время в секундах)")
print("="*80)
print(f"{'Структура':<15} {'Режим':<12} {'Вставка':<12} {'Поиск (110)':<12} {'Удаление (50)':<12}")
print("-"*80)
for res in results:
print(f"{res['structure']:<15} {res['mode']:<12} {res['insertion_avg']:<12.6f} "
f"{res['search_avg']:<12.6f} {res['deletion_avg']:<12.6f}")
print("\n" + "="*80)
print("АНАЛИЗ ДЕГРАДАЦИИ BST")
print("="*80)
bst_random = next(r for r in results if r['structure'] == "BST" and r['mode'] == "случайный")
bst_sorted = next(r for r in results if r['structure'] == "BST" and r['mode'] == "отсортированный")
degradation = bst_sorted['insertion_avg'] / bst_random['insertion_avg']
print(f"BST: отсортированные данные в {degradation:.1f} раз медленнее случайных")
print("Причина: вырождение дерева в линейный связный список (O(n) вместо O(log n))")
if __name__ == "__main__":
print("="*80)
print("ЭКСПЕРИМЕНТАЛЬНОЕ СРАВНЕНИЕ СТРУКТУР ДАННЫХ ДЛЯ ТЕЛЕФОННОГО СПРАВОЧНИКА")
print("="*80)
results = run_experiment(N=5000, repeats=5)
save_results_to_csv(results)
print_summary_table(results)
print("\n" + "="*80)
print("ВЫВОДЫ И РЕКОМЕНДАЦИИ")
print("="*80)
print("""
1. Хеш-таблица:
Лучшая производительность для операций поиска и вставки (O(1) в среднем)
Не чувствительна к порядку входных данных
Требует память под массив бакетов
Не поддерживает естественный порядок (нужна сортировка)
Идеально для справочников с частым поиском
2. Двоичное дерево поиска:
Естественная сортировка (in-order обход)
Хорошая производительность на случайных данных (O(log n))
Сильная деградация на отсортированных данных (O(n))
Рекурсивные операции требуют больше памяти
Хорошо для задач, где нужен отсортированный вывод
3. Связный список:
Простота реализации
Медленный поиск и удаление (O(n))
Неэффективен для больших объёмов данных
Применим только для очень маленьких справочников
РЕКОМЕНДАЦИИ ДЛЯ РЕАЛЬНЫХ ЗАДАЧ:
Частый поиск, редкие вставки -> ХЕШ-ТАБЛИЦА
Нужен отсортированный вывод -> ДЕРЕВО (с балансировкой)
Очень маленький справочник (<100 записей) -> СПИСОК
В реальных БД -> хеш-таблица + B-деревья
""")
print("\n" + "="*80)
print("ДОПОЛНИТЕЛЬНЫЙ АНАЛИЗ")
print("="*80)
for struct in ["LinkedList", "HashTable", "BST"]:
res_random = next(r for r in results if r['structure'] == struct and r['mode'] == "случайный")
print(f"{struct:12} поиск 110 записей: {res_random['search_avg']:.6f} сек")
ll_random = next(r for r in results if r['structure'] == "LinkedList" and r['mode'] == "случайный")
ll_sorted = next(r for r in results if r['structure'] == "LinkedList" and r['mode'] == "отсортированный")
print(f"\nСвязный список: деградация {ll_sorted['insertion_avg'] / ll_random['insertion_avg']:.2f}х")