2026-rff_mp/smirnovad/lab11/docs/data/lab1.py
2026-05-14 22:49:35 +03:00

249 lines
11 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

import time
import random
import csv
import os
import sys
import matplotlib.pyplot as plt
sys.setrecursionlimit(20000)
BASE_PATH = r"C:\Users\andre\2026-rff_mp\smirnovad\lab1"
DOCS_PATH = os.path.join(BASE_PATH, "docs")
DATA_PATH = os.path.join(DOCS_PATH, "data")
for p in [DOCS_PATH, DATA_PATH]:
if not os.path.exists(p):
os.makedirs(p)
def ll_insert(head, name, phone):
return {'name': name, 'phone': phone, 'next': head}
def ll_find(head, name):
curr = head
while curr:
if curr['name'] == name: return curr['phone']
curr = curr['next']
return None
def ll_delete(head, name):
if not head: return None
if head['name'] == name: return head['next']
curr = head
while curr['next']:
if curr['next']['name'] == name:
curr['next'] = curr['next']['next']
return head
curr = curr['next']
return head
def ll_list_all(head):
res = []
curr = head
while curr:
res.append((curr['name'], curr['phone']))
curr = curr['next']
return sorted(res)
def ht_insert(buckets, name, phone):
idx = hash(name) % len(buckets)
buckets[idx] = ll_insert(buckets[idx], name, phone)
def ht_find(buckets, name):
idx = hash(name) % len(buckets)
return ll_find(buckets[idx], name)
def ht_delete(buckets, name):
idx = hash(name) % len(buckets)
buckets[idx] = ll_delete(buckets[idx], name)
def ht_list_all(buckets):
all_recs = []
for b in buckets:
curr = b
while curr:
all_recs.append((curr['name'], curr['phone']))
curr = curr['next']
return sorted(all_recs)
def bst_insert(root, name, phone):
if not root:
return {'name': name, 'phone': phone, 'left': None, 'right': None}
if name < root['name']:
root['left'] = bst_insert(root['left'], name, phone)
elif name > root['name']:
root['right'] = bst_insert(root['right'], name, phone)
else:
root['phone'] = phone
return root
def bst_find(root, name):
if not root: return None
if root['name'] == name: return root['phone']
if name < root['name']: return bst_find(root['left'], name)
return bst_find(root['right'], name)
def bst_delete(root, name):
if not root: return None
if name < root['name']:
root['left'] = bst_delete(root['left'], name)
elif name > root['name']:
root['right'] = bst_delete(root['right'], name)
else:
if not root['left']: return root['right']
if not root['right']: return root['left']
temp = root['right']
while temp['left']: temp = temp['left']
root['name'], root['phone'] = temp['name'], temp['phone']
root['right'] = bst_delete(root['right'], temp['name'])
return root
def bst_list_all(root):
res = []
def _inorder(node):
if node:
_inorder(node['left'])
res.append((node['name'], node['phone']))
_inorder(node['right'])
_inorder(root)
return res
all_results_csv = []
summary_for_report = []
def run_experiment(struct_type, mode, data):
print(f"Processing: {struct_type} ({mode})")
ins_times, find_times, del_times = [], [], []
for i in range(5):
container = [None]*1000 if struct_type == "HashTable" else None
start = time.perf_counter()
for n, p in data:
if struct_type == "LinkedList": container = ll_insert(container, n, p)
elif struct_type == "HashTable": ht_insert(container, n, p)
elif struct_type == "BST": container = bst_insert(container, n, p)
ins_times.append(time.perf_counter() - start)
search_list = [d[0] for d in random.sample(data, 100)] + [f"None_{j}" for j in range(10)]
start = time.perf_counter()
for s_name in search_list:
if struct_type == "LinkedList": ll_find(container, s_name)
elif struct_type == "HashTable": ht_find(container, s_name)
elif struct_type == "BST": bst_find(container, s_name)
find_times.append(time.perf_counter() - start)
del_list = [d[0] for d in random.sample(data, 50)]
start = time.perf_counter()
for d_name in del_list:
if struct_type == "LinkedList": container = ll_delete(container, d_name)
elif struct_type == "HashTable": ht_delete(container, d_name)
elif struct_type == "BST": container = bst_delete(container, d_name)
del_times.append(time.perf_counter() - start)
all_results_csv.append([struct_type, mode, f"Run {i+1}", ins_times[-1], find_times[-1], del_times[-1]])
avg_ins = sum(ins_times) / 5
avg_find = sum(find_times) / 5
avg_del = sum(del_times) / 5
all_results_csv.append([struct_type, mode, "AVERAGE", avg_ins, avg_find, avg_del])
summary_for_report.append({"name": struct_type, "mode": mode, "ins": avg_ins, "find": avg_find, "del": avg_del})
N = 10000
records_raw = [(f"User_{i:05d}", f"8-900-{random.randint(100, 999)}") for i in range(N)]
records_shuffled = records_raw[:]
random.shuffle(records_shuffled)
records_sorted = sorted(records_raw)
for m_name, d_set in [("случайный", records_shuffled), ("сортированный", records_sorted)]:
for s_type in ["LinkedList", "HashTable", "BST"]:
run_experiment(s_type, m_name, d_set)
with open(os.path.join(DATA_PATH, "results.csv"), "w", newline="", encoding="utf-8") as f:
writer = csv.writer(f)
writer.writerow(["Структура", "Режим", "Итерация", "Вставка", "Поиск", "Удаление"])
writer.writerows(all_results_csv)
def create_plots():
labels = ["insert", "find", "delete"]
structs = ["LinkedList", "HashTable", "BST"]
colors = ['#5dade2', '#e67e22', '#58d68d']
fig1, axs = plt.subplots(1, 3, figsize=(18, 6))
fig1.suptitle("Влияние порядка данных на время операций", fontsize=16, fontweight='bold')
for i, s_name in enumerate(structs):
rand_data = next(r for r in summary_for_report if r['name'] == s_name and r['mode'] == "случайный")
sort_data = next(r for r in summary_for_report if r['name'] == s_name and r['mode'] == "сортированный")
x = [0, 1, 2]
width = 0.35
axs[i].bar([p - width/2 for p in x], [rand_data['ins'], rand_data['find'], rand_data['del']], width, label='случайный', color=colors[0])
axs[i].bar([p + width/2 for p in x], [sort_data['ins'], sort_data['find'], sort_data['del']], width, label='сортированный', color='#e74c3c', alpha=0.8)
axs[i].set_title(s_name, fontweight='bold')
axs[i].set_xticks(x)
axs[i].set_xticklabels(labels)
axs[i].set_ylabel("Время (с)")
axs[i].legend()
axs[i].grid(axis='y', linestyle='--', alpha=0.3)
plt.tight_layout(rect=[0, 0.03, 1, 0.95])
plt.savefig(os.path.join(DATA_PATH, "order_impact.png"))
fig2, axs2 = plt.subplots(1, 3, figsize=(18, 6))
fig2.suptitle(f"Сравнение структур данных (N={N})", fontsize=16, fontweight='bold')
op_keys = ['ins', 'find', 'del']
op_names = ['insert', 'find', 'delete']
for i, op in enumerate(op_keys):
plot_labels = []
plot_values = []
plot_colors = []
for r in summary_for_report:
plot_labels.append(f"{r['name']}\n({r['mode'][:4]})")
plot_values.append(r[op])
if r['name'] == "LinkedList": plot_colors.append(colors[0])
elif r['name'] == "HashTable": plot_colors.append(colors[1])
else: plot_colors.append(colors[2])
bars = axs2[i].bar(plot_labels, plot_values, color=plot_colors)
axs2[i].set_title(f"Операция: {op_names[i]}", fontweight='bold')
axs2[i].set_ylabel("Время (с)")
axs2[i].tick_params(axis='x', rotation=15)
for bar in bars:
height = bar.get_height()
axs2[i].text(bar.get_x() + bar.get_width()/2., height, f'{height:.4f}', ha='center', va='bottom', fontsize=8)
plt.tight_layout(rect=[0, 0.03, 1, 0.95])
plt.savefig(os.path.join(DATA_PATH, "struct_comparison.png"))
create_plots()
with open(os.path.join(DOCS_PATH, "report.md"), "w", encoding="utf-8") as f:
f.write("# Технический отчет: Сравнительный анализ структур данных\n\n")
f.write("## 1. Вводные данные\n")
f.write(f"Целью теста является оценка производительности LinkedList, HashTable и BST на массиве из {N} элементов. ")
f.write("Анализировались сценарии со случайным распределением и предварительной сортировкой ключей.\n\n")
f.write("## 2. Результаты измерений (AVG)\n")
f.write("| Алгоритм | Входные данные | Вставка (с) | Поиск (с) | Удаление (с) |\n")
f.write("| :--- | :--- | :--- | :--- | :--- |\n")
for r in summary_for_report:
f.write(f"| {r['name']} | {r['mode']} | {r['ins']:.6f} | {r['find']:.6f} | {r['del']:.6f} |\n")
f.write("\n## 3. Визуальный анализ\n")
f.write("### Сравнение по типам операций\n![Сравнение](data/struct_comparison.png)\n\n")
f.write("### Влияние упорядоченности на производительность\n![Влияние порядка](data/order_impact.png)\n\n")
f.write("## 4. Экспертные выводы\n")
f.write("- **Эффект вырождения BST:** На отсортированных последовательностях BST демонстрирует критический рост времени выполнения (деградация до $O(N)$). ")
f.write("Это связано с отсутствием балансировки, превращающим дерево в линейный список.\n")
f.write("- **Инвариантность HashTable:** Хеш-таблица показывает наиболее стабильные результаты. Скорость доступа не коррелирует с порядком входных данных.\n")
f.write("- **Линейная сложность LinkedList:** Связный список предсказуемо неэффективен при поиске, так как требует итерации по всей глубине структуры.\n")
f.write("- **Итоговая оценка:** Для систем с высокой интенсивностью поиска и вставки оптимальным выбором является HashTable.")
print("Готово.")