395 lines
14 KiB
Python
395 lines
14 KiB
Python
import time
|
||
import random
|
||
import csv
|
||
import sys
|
||
sys.setrecursionlimit(100000)
|
||
|
||
def ll_insert(head, name, phone):
|
||
new_node = {'name': name, 'phone': phone, 'next': None}
|
||
|
||
if head is None:
|
||
return new_node
|
||
|
||
curr = head
|
||
prev = None
|
||
while curr:
|
||
if curr['name'] == name:
|
||
curr['phone'] = phone
|
||
return head
|
||
prev = curr
|
||
curr = curr['next']
|
||
|
||
prev['next'] = new_node
|
||
return head
|
||
|
||
def ll_find(head, name):
|
||
curr = head
|
||
while curr:
|
||
if curr['name'] == name:
|
||
return curr['phone']
|
||
curr = curr['next']
|
||
return None
|
||
|
||
def ll_delete(head, name):
|
||
if head is None:
|
||
return None
|
||
|
||
if head['name'] == name:
|
||
return head['next']
|
||
|
||
curr = head
|
||
while curr['next']:
|
||
if curr['next']['name'] == name:
|
||
curr['next'] = curr['next']['next']
|
||
return head
|
||
curr = curr['next']
|
||
return head
|
||
|
||
def ll_list_all(head):
|
||
records = []
|
||
curr = head
|
||
while curr:
|
||
records.append((curr['name'], curr['phone']))
|
||
curr = curr['next']
|
||
records.sort(key=lambda x: x[0])
|
||
return records
|
||
|
||
def hash_function(name, table_size):
|
||
return sum(ord(c) for c in name) % table_size
|
||
|
||
def ht_create(size=1000):
|
||
return [None] * size
|
||
|
||
def ht_insert(buckets, name, phone):
|
||
index = hash_function(name, len(buckets))
|
||
buckets[index] = ll_insert(buckets[index], name, phone)
|
||
|
||
def ht_find(buckets, name):
|
||
index = hash_function(name, len(buckets))
|
||
return ll_find(buckets[index], name)
|
||
|
||
def ht_delete(buckets, name):
|
||
index = hash_function(name, len(buckets))
|
||
buckets[index] = ll_delete(buckets[index], name)
|
||
|
||
def ht_list_all(buckets):
|
||
records = []
|
||
for head in buckets:
|
||
curr = head
|
||
while curr:
|
||
records.append((curr['name'], curr['phone']))
|
||
curr = curr['next']
|
||
records.sort(key=lambda x: x[0])
|
||
return records
|
||
|
||
def bst_insert_iterative(root, name, phone):
|
||
new_node = {'name': name, 'phone': phone, 'left': None, 'right': None}
|
||
|
||
if root is None:
|
||
return new_node
|
||
|
||
curr = root
|
||
while True:
|
||
if name < curr['name']:
|
||
if curr['left'] is None:
|
||
curr['left'] = new_node
|
||
break
|
||
curr = curr['left']
|
||
elif name > curr['name']:
|
||
if curr['right'] is None:
|
||
curr['right'] = new_node
|
||
break
|
||
curr = curr['right']
|
||
else:
|
||
curr['phone'] = phone
|
||
break
|
||
|
||
return root
|
||
|
||
def bst_find_iterative(root, name):
|
||
curr = root
|
||
while curr:
|
||
if name == curr['name']:
|
||
return curr['phone']
|
||
elif name < curr['name']:
|
||
curr = curr['left']
|
||
else:
|
||
curr = curr['right']
|
||
return None
|
||
|
||
def bst_find_min(node):
|
||
while node and node['left']:
|
||
node = node['left']
|
||
return node
|
||
|
||
def bst_delete_iterative(root, name):
|
||
if root is None:
|
||
return None
|
||
|
||
if name < root['name']:
|
||
root['left'] = bst_delete_iterative(root['left'], name)
|
||
elif name > root['name']:
|
||
root['right'] = bst_delete_iterative(root['right'], name)
|
||
else:
|
||
if root['left'] is None:
|
||
return root['right']
|
||
elif root['right'] is None:
|
||
return root['left']
|
||
|
||
parent = root
|
||
successor = root['right']
|
||
while successor['left']:
|
||
parent = successor
|
||
successor = successor['left']
|
||
|
||
root['name'] = successor['name']
|
||
root['phone'] = successor['phone']
|
||
|
||
if parent == root:
|
||
parent['right'] = successor['right']
|
||
else:
|
||
parent['left'] = successor['right']
|
||
|
||
return root
|
||
|
||
def bst_list_all(root):
|
||
result = []
|
||
stack = []
|
||
curr = root
|
||
|
||
while stack or curr:
|
||
while curr:
|
||
stack.append(curr)
|
||
curr = curr['left']
|
||
curr = stack.pop()
|
||
result.append((curr['name'], curr['phone']))
|
||
curr = curr['right']
|
||
|
||
return result
|
||
|
||
def generate_test_data(N=10000):
|
||
names = [f"User_{i:05d}" for i in range(N)]
|
||
phones = [f"+7-999-{random.randint(1000000, 9999999)}" for _ in range(N)]
|
||
|
||
records = list(zip(names, phones))
|
||
|
||
records_shuffled = records.copy()
|
||
random.shuffle(records_shuffled)
|
||
|
||
records_sorted = sorted(records, key=lambda x: x[0])
|
||
|
||
return records_shuffled, records_sorted
|
||
|
||
def measure_insertion(structure_type, records, ht_size=1000):
|
||
if structure_type == "LinkedList":
|
||
head = None
|
||
start = time.perf_counter()
|
||
for name, phone in records:
|
||
head = ll_insert(head, name, phone)
|
||
end = time.perf_counter()
|
||
return head, (end - start)
|
||
|
||
elif structure_type == "HashTable":
|
||
buckets = ht_create(ht_size)
|
||
start = time.perf_counter()
|
||
for name, phone in records:
|
||
ht_insert(buckets, name, phone)
|
||
end = time.perf_counter()
|
||
return buckets, (end - start)
|
||
|
||
elif structure_type == "BST":
|
||
root = None
|
||
start = time.perf_counter()
|
||
for name, phone in records:
|
||
root = bst_insert_iterative(root, name, phone)
|
||
end = time.perf_counter()
|
||
return root, (end - start)
|
||
|
||
def measure_search(data_structure, structure_type, existing_names, non_existing_names):
|
||
start = time.perf_counter()
|
||
for name in existing_names:
|
||
if structure_type == "LinkedList":
|
||
ll_find(data_structure, name)
|
||
elif structure_type == "HashTable":
|
||
ht_find(data_structure, name)
|
||
elif structure_type == "BST":
|
||
bst_find_iterative(data_structure, name)
|
||
|
||
for name in non_existing_names:
|
||
if structure_type == "LinkedList":
|
||
ll_find(data_structure, name)
|
||
elif structure_type == "HashTable":
|
||
ht_find(data_structure, name)
|
||
elif structure_type == "BST":
|
||
bst_find_iterative(data_structure, name)
|
||
end = time.perf_counter()
|
||
|
||
return end - start
|
||
|
||
def measure_deletion(data_structure, structure_type, names_to_delete):
|
||
start = time.perf_counter()
|
||
for name in names_to_delete:
|
||
if structure_type == "LinkedList":
|
||
data_structure = ll_delete(data_structure, name)
|
||
elif structure_type == "HashTable":
|
||
ht_delete(data_structure, name)
|
||
elif structure_type == "BST":
|
||
data_structure = bst_delete_iterative(data_structure, name)
|
||
end = time.perf_counter()
|
||
|
||
return data_structure, (end - start)
|
||
|
||
def run_experiment(N=5000, repeats=5):
|
||
print(f"Генерация тестовых данных (N={N})...")
|
||
records_shuffled, records_sorted = generate_test_data(N)
|
||
|
||
existing_names = [name for name, _ in random.sample(records_shuffled, min(100, N))]
|
||
non_existing_names = [f"None_{i}" for i in range(10)]
|
||
delete_names = [name for name, _ in random.sample(records_shuffled, min(50, N))]
|
||
|
||
results = []
|
||
|
||
structures = ["LinkedList", "HashTable", "BST"]
|
||
modes = ["случайный", "отсортированный"]
|
||
|
||
for struct in structures:
|
||
for mode in modes:
|
||
records = records_shuffled if mode == "случайный" else records_sorted
|
||
|
||
print(f"\nТестирование: {struct}, режим: {mode}")
|
||
|
||
insertion_times = []
|
||
search_times = []
|
||
deletion_times = []
|
||
|
||
for rep in range(repeats):
|
||
print(f" Повторение {rep+1}/{repeats}...")
|
||
|
||
data_structure, insert_time = measure_insertion(struct, records)
|
||
insertion_times.append(insert_time)
|
||
|
||
search_time = measure_search(data_structure, struct, existing_names, non_existing_names)
|
||
search_times.append(search_time)
|
||
|
||
data_structure, delete_time = measure_deletion(data_structure, struct, delete_names)
|
||
deletion_times.append(delete_time)
|
||
|
||
avg_insert = sum(insertion_times) / repeats
|
||
avg_search = sum(search_times) / repeats
|
||
avg_delete = sum(deletion_times) / repeats
|
||
|
||
results.append({
|
||
"structure": struct,
|
||
"mode": mode,
|
||
"insertion_avg": avg_insert,
|
||
"insertion_all": insertion_times,
|
||
"search_avg": avg_search,
|
||
"search_all": search_times,
|
||
"deletion_avg": avg_delete,
|
||
"deletion_all": deletion_times
|
||
})
|
||
|
||
print(f" Вставка: {avg_insert:.6f} сек (замеры: {[f'{t:.6f}' for t in insertion_times]})")
|
||
print(f" Поиск: {avg_search:.6f} сек (замеры: {[f'{t:.6f}' for t in search_times]})")
|
||
print(f" Удаление: {avg_delete:.6f} сек (замеры: {[f'{t:.6f}' for t in deletion_times]})")
|
||
|
||
return results
|
||
|
||
def save_results_to_csv(results, filename="results.csv"):
|
||
with open(filename, 'w', newline='', encoding='utf-8') as csvfile:
|
||
writer = csv.writer(csvfile)
|
||
writer.writerow(["Структура", "Режим", "Операция", "Повторение", "Время (сек)"])
|
||
|
||
for res in results:
|
||
struct = res["structure"]
|
||
mode = res["mode"]
|
||
|
||
for i, t in enumerate(res["insertion_all"]):
|
||
writer.writerow([struct, mode, "вставка", i+1, t])
|
||
writer.writerow([struct, mode, "вставка", "СРЕДНЕЕ", res["insertion_avg"]])
|
||
|
||
for i, t in enumerate(res["search_all"]):
|
||
writer.writerow([struct, mode, "поиск", i+1, t])
|
||
writer.writerow([struct, mode, "поиск", "СРЕДНЕЕ", res["search_avg"]])
|
||
|
||
for i, t in enumerate(res["deletion_all"]):
|
||
writer.writerow([struct, mode, "удаление", i+1, t])
|
||
writer.writerow([struct, mode, "удаление", "СРЕДНЕЕ", res["deletion_avg"]])
|
||
|
||
print(f"\nРезультаты сохранены в {filename}")
|
||
|
||
def print_summary_table(results):
|
||
print("\n" + "="*80)
|
||
print("СВОДНАЯ ТАБЛИЦА РЕЗУЛЬТАТОВ (среднее время в секундах)")
|
||
print("="*80)
|
||
print(f"{'Структура':<15} {'Режим':<12} {'Вставка':<12} {'Поиск (110)':<12} {'Удаление (50)':<12}")
|
||
print("-"*80)
|
||
|
||
for res in results:
|
||
print(f"{res['structure']:<15} {res['mode']:<12} {res['insertion_avg']:<12.6f} "
|
||
f"{res['search_avg']:<12.6f} {res['deletion_avg']:<12.6f}")
|
||
|
||
print("\n" + "="*80)
|
||
print("АНАЛИЗ ДЕГРАДАЦИИ BST")
|
||
print("="*80)
|
||
|
||
bst_random = next(r for r in results if r['structure'] == "BST" and r['mode'] == "случайный")
|
||
bst_sorted = next(r for r in results if r['structure'] == "BST" and r['mode'] == "отсортированный")
|
||
|
||
degradation = bst_sorted['insertion_avg'] / bst_random['insertion_avg']
|
||
print(f"BST: отсортированные данные в {degradation:.1f} раз медленнее случайных")
|
||
print("Причина: вырождение дерева в линейный связный список (O(n) вместо O(log n))")
|
||
|
||
if __name__ == "__main__":
|
||
print("="*80)
|
||
print("ЭКСПЕРИМЕНТАЛЬНОЕ СРАВНЕНИЕ СТРУКТУР ДАННЫХ ДЛЯ ТЕЛЕФОННОГО СПРАВОЧНИКА")
|
||
print("="*80)
|
||
|
||
results = run_experiment(N=5000, repeats=5)
|
||
|
||
save_results_to_csv(results)
|
||
|
||
print_summary_table(results)
|
||
|
||
print("\n" + "="*80)
|
||
print("ВЫВОДЫ И РЕКОМЕНДАЦИИ")
|
||
print("="*80)
|
||
print("""
|
||
1. Хеш-таблица:
|
||
Лучшая производительность для операций поиска и вставки (O(1) в среднем)
|
||
Не чувствительна к порядку входных данных
|
||
Требует память под массив бакетов
|
||
Не поддерживает естественный порядок (нужна сортировка)
|
||
Идеально для справочников с частым поиском
|
||
|
||
2. Двоичное дерево поиска:
|
||
Естественная сортировка (in-order обход)
|
||
Хорошая производительность на случайных данных (O(log n))
|
||
Сильная деградация на отсортированных данных (O(n))
|
||
Рекурсивные операции требуют больше памяти
|
||
Хорошо для задач, где нужен отсортированный вывод
|
||
|
||
3. Связный список:
|
||
Простота реализации
|
||
Медленный поиск и удаление (O(n))
|
||
Неэффективен для больших объёмов данных
|
||
Применим только для очень маленьких справочников
|
||
|
||
РЕКОМЕНДАЦИИ ДЛЯ РЕАЛЬНЫХ ЗАДАЧ:
|
||
Частый поиск, редкие вставки -> ХЕШ-ТАБЛИЦА
|
||
Нужен отсортированный вывод -> ДЕРЕВО (с балансировкой)
|
||
Очень маленький справочник (<100 записей) -> СПИСОК
|
||
В реальных БД -> хеш-таблица + B-деревья
|
||
""")
|
||
|
||
print("\n" + "="*80)
|
||
print("ДОПОЛНИТЕЛЬНЫЙ АНАЛИЗ")
|
||
print("="*80)
|
||
|
||
for struct in ["LinkedList", "HashTable", "BST"]:
|
||
res_random = next(r for r in results if r['structure'] == struct and r['mode'] == "случайный")
|
||
print(f"{struct:12} поиск 110 записей: {res_random['search_avg']:.6f} сек")
|
||
|
||
ll_random = next(r for r in results if r['structure'] == "LinkedList" and r['mode'] == "случайный")
|
||
ll_sorted = next(r for r in results if r['structure'] == "LinkedList" and r['mode'] == "отсортированный")
|
||
print(f"\nСвязный список: деградация {ll_sorted['insertion_avg'] / ll_random['insertion_avg']:.2f}х") |