221 lines
9.3 KiB
Python
221 lines
9.3 KiB
Python
import time
|
||
import random
|
||
import csv
|
||
from pathlib import Path
|
||
import matplotlib.pyplot as plt
|
||
import sys
|
||
|
||
# Увеличиваем лимит рекурсии для работы с глубокими деревьями (особенно на сортированных данных)
|
||
sys.setrecursionlimit(15000)
|
||
|
||
# Настройка путей (используем pathlib для гибкости)
|
||
ROOT_DIR = Path(r"C:\Users\andre\2026-rff_mp\smirnovad\lab1")
|
||
DOCS_DIR = ROOT_DIR / "docs"
|
||
DATA_DIR = DOCS_DIR / "data"
|
||
|
||
# Создание необходимых директорий
|
||
DATA_DIR.mkdir(parents=True, exist_ok=True)
|
||
|
||
# --- 1. СВЯЗНЫЙ СПИСОК (LinkedList) ---
|
||
def ll_insert(first_node, name, phone):
|
||
"""Добавление в начало списка (O(1))"""
|
||
return {'name': name, 'phone': phone, 'next': first_node}
|
||
|
||
def ll_find(first_node, name):
|
||
"""Линейный поиск по имени"""
|
||
item = first_node
|
||
while item:
|
||
if item['name'] == name:
|
||
return item['phone']
|
||
item = item['next']
|
||
return None
|
||
|
||
def ll_delete(first_node, name):
|
||
"""Удаление узла по имени"""
|
||
if not first_node:
|
||
return None
|
||
if first_node['name'] == name:
|
||
return first_node['next']
|
||
|
||
prev = first_node
|
||
while prev['next']:
|
||
if prev['next']['name'] == name:
|
||
prev['next'] = prev['next']['next']
|
||
return first_node
|
||
prev = prev['next']
|
||
return first_node
|
||
|
||
def ll_list_all(first_node):
|
||
"""Вывод всех записей в алфавитном порядке"""
|
||
result_list = []
|
||
item = first_node
|
||
while item:
|
||
result_list.append((item['name'], item['phone']))
|
||
item = item['next']
|
||
return sorted(result_list)
|
||
|
||
# --- 2. ХЕШ-ТАБЛИЦА (Hash Table) ---
|
||
def ht_insert(hash_table, name, phone):
|
||
slot = hash(name) % len(hash_table)
|
||
hash_table[slot] = ll_insert(hash_table[slot], name, phone)
|
||
|
||
def ht_find(hash_table, name):
|
||
slot = hash(name) % len(hash_table)
|
||
return ll_find(hash_table[slot], name)
|
||
|
||
def ht_delete(hash_table, name):
|
||
slot = hash(name) % len(hash_table)
|
||
hash_table[slot] = ll_delete(hash_table[slot], name)
|
||
|
||
def ht_list_all(hash_table):
|
||
"""Сбор данных из всех бакетов"""
|
||
total_data = []
|
||
for bucket in hash_table:
|
||
node = bucket
|
||
while node:
|
||
total_data.append((node['name'], node['phone']))
|
||
node = node['next']
|
||
return sorted(total_data)
|
||
|
||
# --- 3. ДВОИЧНОЕ ДЕРЕВО ПОИСКА (BST) ---
|
||
def bst_insert(root, name, phone):
|
||
if not root:
|
||
return {'name': name, 'phone': phone, 'left': None, 'right': None}
|
||
if name < root['name']:
|
||
root['left'] = bst_insert(root['left'], name, phone)
|
||
elif name > root['name']:
|
||
root['right'] = bst_insert(root['right'], name, phone)
|
||
else:
|
||
root['phone'] = phone
|
||
return root
|
||
|
||
def bst_find(root, name):
|
||
if not root:
|
||
return None
|
||
if root['name'] == name:
|
||
return root['phone']
|
||
if name < root['name']:
|
||
return bst_find(root['left'], name)
|
||
return bst_find(root['right'], name)
|
||
|
||
def bst_delete(root, name):
|
||
"""Удаление узла в BST"""
|
||
if not root:
|
||
return None
|
||
if name < root['name']:
|
||
root['left'] = bst_delete(root['left'], name)
|
||
elif name > root['name']:
|
||
root['right'] = bst_delete(root['right'], name)
|
||
else:
|
||
if not root['left']: return root['right']
|
||
if not root['right']: return root['left']
|
||
# Поиск минимального в правом поддереве
|
||
min_node = root['right']
|
||
while min_node['left']:
|
||
min_node = min_node['left']
|
||
root['name'], root['phone'] = min_node['name'], min_node['phone']
|
||
root['right'] = bst_delete(root['right'], min_node['name'])
|
||
return root
|
||
|
||
# --- ЭКСПЕРИМЕНТАЛЬНАЯ ЧАСТЬ ---
|
||
log_entries = []
|
||
stats_summary = []
|
||
|
||
def run_test(structure_name, data_mode, dataset):
|
||
print(f"Тестирование: {structure_name} | Режим: {data_mode}")
|
||
t_ins, t_find, t_del = [], [], []
|
||
|
||
for run_idx in range(5): # 5 итераций
|
||
# Инициализация хранилища
|
||
storage = [None] * 1024 if structure_name == "HashTable" else None
|
||
|
||
# 1. Замер вставки
|
||
start = time.perf_counter()
|
||
for n, p in dataset:
|
||
if structure_name == "LinkedList": storage = ll_insert(storage, n, p)
|
||
elif structure_name == "HashTable": ht_insert(storage, n, p)
|
||
elif structure_name == "BST": storage = bst_insert(storage, n, p)
|
||
t_ins.append(time.perf_counter() - start)
|
||
|
||
# 2. Замер поиска (100 существующих + 10 отсутствующих)
|
||
test_names = [x[0] for x in random.sample(dataset, 100)] + [f"Missing_{j}" for j in range(10)]
|
||
start = time.perf_counter()
|
||
for name_to_find in test_names:
|
||
if structure_name == "LinkedList": ll_find(storage, name_to_find)
|
||
elif structure_name == "HashTable": ht_find(storage, name_to_find)
|
||
elif structure_name == "BST": bst_find(storage, name_to_find)
|
||
t_find.append(time.perf_counter() - start)
|
||
|
||
# 3. Замер удаления (50 записей)
|
||
test_dels = [x[0] for x in random.sample(dataset, 50)]
|
||
start = time.perf_counter()
|
||
for name_to_del in test_dels:
|
||
if structure_name == "LinkedList": storage = ll_delete(storage, name_to_del)
|
||
elif structure_name == "HashTable": ht_delete(storage, name_to_del)
|
||
elif structure_name == "BST": bst_delete(storage, name_to_del)
|
||
t_del.append(time.perf_counter() - start)
|
||
|
||
log_entries.append([structure_name, data_mode, f"Run_{run_idx+1}", t_ins[-1], t_find[-1], t_del[-1]])
|
||
|
||
# Считаем среднее
|
||
avg_i, avg_f, avg_d = sum(t_ins)/5, sum(t_find)/5, sum(t_del)/5
|
||
stats_summary.append({"type": structure_name, "mode": data_mode, "ins": avg_i, "find": avg_f, "del": avg_d})
|
||
|
||
# Генерация данных
|
||
N_COUNT = 10000
|
||
raw_data = [(f"User_{i:05d}", f"{random.randint(100, 999)}-{random.randint(10, 99)}") for i in range(N_COUNT)]
|
||
data_shuffled = random.sample(raw_data, len(raw_data))
|
||
data_sorted = sorted(raw_data)
|
||
|
||
# Запуск тестов
|
||
for mode_label, data_src in [("random", data_shuffled), ("sorted", data_sorted)]:
|
||
for s_kind in ["LinkedList", "HashTable", "BST"]:
|
||
run_test(s_kind, mode_label, data_src)
|
||
|
||
# Сохранение CSV
|
||
with open(DATA_DIR / "performance_stats.csv", "w", newline="", encoding="utf-8") as f:
|
||
writer = csv.writer(f)
|
||
writer.writerow(["Structure", "Input_Mode", "Iteration", "Insert_Sec", "Find_Sec", "Delete_Sec"])
|
||
writer.writerows(log_entries)
|
||
|
||
# Построение графиков
|
||
def generate_visuals():
|
||
ops = ["Вставка", "Поиск", "Удаление"]
|
||
structs = ["LinkedList", "HashTable", "BST"]
|
||
palette = ["#3498db", "#9b59b6", "#2ecc71"] # Другие цвета
|
||
|
||
# График влияния порядка
|
||
fig, axes = plt.subplots(1, 3, figsize=(16, 5))
|
||
fig.suptitle("Анализ влияния упорядоченности данных", fontsize=14)
|
||
|
||
for idx, s_name in enumerate(structs):
|
||
r_vals = next(s for s in stats_summary if s['type'] == s_name and s['mode'] == "random")
|
||
s_vals = next(s for s in stats_summary if s['type'] == s_name and s['mode'] == "sorted")
|
||
|
||
pos = [0, 1, 2]
|
||
axes[idx].bar([p - 0.2 for p in pos], [r_vals['ins'], r_vals['find'], r_vals['del']], 0.4, label='Random', color=palette[0])
|
||
axes[idx].bar([p + 0.2 for p in pos], [s_vals['ins'], s_vals['find'], s_vals['del']], 0.4, label='Sorted', color="#e74c3c")
|
||
axes[idx].set_title(s_name)
|
||
axes[idx].set_xticks(pos)
|
||
axes[idx].set_xticklabels(ops)
|
||
axes[idx].legend()
|
||
|
||
plt.tight_layout()
|
||
plt.savefig(DATA_DIR / "impact_analysis.png")
|
||
|
||
generate_visuals()
|
||
|
||
# Генерация отчета
|
||
with open(DOCS_DIR / "LAB_REPORT.md", "w", encoding="utf-8") as f:
|
||
f.write("# Лабораторная работа №1: Структуры данных\n\n")
|
||
f.write(f"Выполнен замер производительности на выборке N={N_COUNT}.\n")
|
||
f.write("## Сводная таблица (Средние значения)\n")
|
||
f.write("| Тип | Режим | Вставка | Поиск | Удаление |\n| :--- | :--- | :--- | :--- | :--- |\n")
|
||
for s in stats_summary:
|
||
f.write(f"| {s['type']} | {s['mode']} | {s['ins']:.5f} | {s['find']:.5f} | {s['del']:.5f} |\n")
|
||
f.write("\n## Основные выводы\n")
|
||
f.write("1. **BST** крайне чувствителен к порядку: на отсортированных данных скорость падает из-за превращения дерева в список.\n")
|
||
f.write("2. **HashTable** — самая стабильная структура, время операций почти не зависит от входной последовательности.\n")
|
||
f.write("3. **LinkedList** показывает худшее время на операциях поиска из-за необходимости полного перебора.")
|
||
|
||
print(f"Все файлы успешно сохранены в {DOCS_DIR}") |