import time import random import csv import os import sys import matplotlib.pyplot as plt sys.setrecursionlimit(20000) BASE_PATH = r"C:\Users\andre\2026-rff_mp\smirnovad\lab1" DOCS_PATH = os.path.join(BASE_PATH, "docs") DATA_PATH = os.path.join(DOCS_PATH, "data") for p in [DOCS_PATH, DATA_PATH]: if not os.path.exists(p): os.makedirs(p) def ll_insert(head, name, phone): return {'name': name, 'phone': phone, 'next': head} def ll_find(head, name): curr = head while curr: if curr['name'] == name: return curr['phone'] curr = curr['next'] return None def ll_delete(head, name): if not head: return None if head['name'] == name: return head['next'] curr = head while curr['next']: if curr['next']['name'] == name: curr['next'] = curr['next']['next'] return head curr = curr['next'] return head def ll_list_all(head): res = [] curr = head while curr: res.append((curr['name'], curr['phone'])) curr = curr['next'] return sorted(res) def ht_insert(buckets, name, phone): idx = hash(name) % len(buckets) buckets[idx] = ll_insert(buckets[idx], name, phone) def ht_find(buckets, name): idx = hash(name) % len(buckets) return ll_find(buckets[idx], name) def ht_delete(buckets, name): idx = hash(name) % len(buckets) buckets[idx] = ll_delete(buckets[idx], name) def ht_list_all(buckets): all_recs = [] for b in buckets: curr = b while curr: all_recs.append((curr['name'], curr['phone'])) curr = curr['next'] return sorted(all_recs) def bst_insert(root, name, phone): if not root: return {'name': name, 'phone': phone, 'left': None, 'right': None} if name < root['name']: root['left'] = bst_insert(root['left'], name, phone) elif name > root['name']: root['right'] = bst_insert(root['right'], name, phone) else: root['phone'] = phone return root def bst_find(root, name): if not root: return None if root['name'] == name: return root['phone'] if name < root['name']: return bst_find(root['left'], name) return bst_find(root['right'], name) def bst_delete(root, name): if not root: return None if name < root['name']: root['left'] = bst_delete(root['left'], name) elif name > root['name']: root['right'] = bst_delete(root['right'], name) else: if not root['left']: return root['right'] if not root['right']: return root['left'] temp = root['right'] while temp['left']: temp = temp['left'] root['name'], root['phone'] = temp['name'], temp['phone'] root['right'] = bst_delete(root['right'], temp['name']) return root def bst_list_all(root): res = [] def _inorder(node): if node: _inorder(node['left']) res.append((node['name'], node['phone'])) _inorder(node['right']) _inorder(root) return res all_results_csv = [] summary_for_report = [] def run_experiment(struct_type, mode, data): print(f"Processing: {struct_type} ({mode})") ins_times, find_times, del_times = [], [], [] for i in range(5): container = [None]*1000 if struct_type == "HashTable" else None start = time.perf_counter() for n, p in data: if struct_type == "LinkedList": container = ll_insert(container, n, p) elif struct_type == "HashTable": ht_insert(container, n, p) elif struct_type == "BST": container = bst_insert(container, n, p) ins_times.append(time.perf_counter() - start) search_list = [d[0] for d in random.sample(data, 100)] + [f"None_{j}" for j in range(10)] start = time.perf_counter() for s_name in search_list: if struct_type == "LinkedList": ll_find(container, s_name) elif struct_type == "HashTable": ht_find(container, s_name) elif struct_type == "BST": bst_find(container, s_name) find_times.append(time.perf_counter() - start) del_list = [d[0] for d in random.sample(data, 50)] start = time.perf_counter() for d_name in del_list: if struct_type == "LinkedList": container = ll_delete(container, d_name) elif struct_type == "HashTable": ht_delete(container, d_name) elif struct_type == "BST": container = bst_delete(container, d_name) del_times.append(time.perf_counter() - start) all_results_csv.append([struct_type, mode, f"Run {i+1}", ins_times[-1], find_times[-1], del_times[-1]]) avg_ins = sum(ins_times) / 5 avg_find = sum(find_times) / 5 avg_del = sum(del_times) / 5 all_results_csv.append([struct_type, mode, "AVERAGE", avg_ins, avg_find, avg_del]) summary_for_report.append({"name": struct_type, "mode": mode, "ins": avg_ins, "find": avg_find, "del": avg_del}) N = 10000 records_raw = [(f"User_{i:05d}", f"8-900-{random.randint(100, 999)}") for i in range(N)] records_shuffled = records_raw[:] random.shuffle(records_shuffled) records_sorted = sorted(records_raw) for m_name, d_set in [("случайный", records_shuffled), ("сортированный", records_sorted)]: for s_type in ["LinkedList", "HashTable", "BST"]: run_experiment(s_type, m_name, d_set) with open(os.path.join(DATA_PATH, "results.csv"), "w", newline="", encoding="utf-8") as f: writer = csv.writer(f) writer.writerow(["Структура", "Режим", "Итерация", "Вставка", "Поиск", "Удаление"]) writer.writerows(all_results_csv) def create_plots(): labels = ["insert", "find", "delete"] structs = ["LinkedList", "HashTable", "BST"] colors = ['#5dade2', '#e67e22', '#58d68d'] fig1, axs = plt.subplots(1, 3, figsize=(18, 6)) fig1.suptitle("Влияние порядка данных на время операций", fontsize=16, fontweight='bold') for i, s_name in enumerate(structs): rand_data = next(r for r in summary_for_report if r['name'] == s_name and r['mode'] == "случайный") sort_data = next(r for r in summary_for_report if r['name'] == s_name and r['mode'] == "сортированный") x = [0, 1, 2] width = 0.35 axs[i].bar([p - width/2 for p in x], [rand_data['ins'], rand_data['find'], rand_data['del']], width, label='случайный', color=colors[0]) axs[i].bar([p + width/2 for p in x], [sort_data['ins'], sort_data['find'], sort_data['del']], width, label='сортированный', color='#e74c3c', alpha=0.8) axs[i].set_title(s_name, fontweight='bold') axs[i].set_xticks(x) axs[i].set_xticklabels(labels) axs[i].set_ylabel("Время (с)") axs[i].legend() axs[i].grid(axis='y', linestyle='--', alpha=0.3) plt.tight_layout(rect=[0, 0.03, 1, 0.95]) plt.savefig(os.path.join(DATA_PATH, "order_impact.png")) fig2, axs2 = plt.subplots(1, 3, figsize=(18, 6)) fig2.suptitle(f"Сравнение структур данных (N={N})", fontsize=16, fontweight='bold') op_keys = ['ins', 'find', 'del'] op_names = ['insert', 'find', 'delete'] for i, op in enumerate(op_keys): plot_labels = [] plot_values = [] plot_colors = [] for r in summary_for_report: plot_labels.append(f"{r['name']}\n({r['mode'][:4]})") plot_values.append(r[op]) if r['name'] == "LinkedList": plot_colors.append(colors[0]) elif r['name'] == "HashTable": plot_colors.append(colors[1]) else: plot_colors.append(colors[2]) bars = axs2[i].bar(plot_labels, plot_values, color=plot_colors) axs2[i].set_title(f"Операция: {op_names[i]}", fontweight='bold') axs2[i].set_ylabel("Время (с)") axs2[i].tick_params(axis='x', rotation=15) for bar in bars: height = bar.get_height() axs2[i].text(bar.get_x() + bar.get_width()/2., height, f'{height:.4f}', ha='center', va='bottom', fontsize=8) plt.tight_layout(rect=[0, 0.03, 1, 0.95]) plt.savefig(os.path.join(DATA_PATH, "struct_comparison.png")) create_plots() with open(os.path.join(DOCS_PATH, "report.md"), "w", encoding="utf-8") as f: f.write("# Технический отчет: Сравнительный анализ структур данных\n\n") f.write("## 1. Вводные данные\n") f.write(f"Целью теста является оценка производительности LinkedList, HashTable и BST на массиве из {N} элементов. ") f.write("Анализировались сценарии со случайным распределением и предварительной сортировкой ключей.\n\n") f.write("## 2. Результаты измерений (AVG)\n") f.write("| Алгоритм | Входные данные | Вставка (с) | Поиск (с) | Удаление (с) |\n") f.write("| :--- | :--- | :--- | :--- | :--- |\n") for r in summary_for_report: f.write(f"| {r['name']} | {r['mode']} | {r['ins']:.6f} | {r['find']:.6f} | {r['del']:.6f} |\n") f.write("\n## 3. Визуальный анализ\n") f.write("### Сравнение по типам операций\n![Сравнение](data/struct_comparison.png)\n\n") f.write("### Влияние упорядоченности на производительность\n![Влияние порядка](data/order_impact.png)\n\n") f.write("## 4. Экспертные выводы\n") f.write("- **Эффект вырождения BST:** На отсортированных последовательностях BST демонстрирует критический рост времени выполнения (деградация до $O(N)$). ") f.write("Это связано с отсутствием балансировки, превращающим дерево в линейный список.\n") f.write("- **Инвариантность HashTable:** Хеш-таблица показывает наиболее стабильные результаты. Скорость доступа не коррелирует с порядком входных данных.\n") f.write("- **Линейная сложность LinkedList:** Связный список предсказуемо неэффективен при поиске, так как требует итерации по всей глубине структуры.\n") f.write("- **Итоговая оценка:** Для систем с высокой интенсивностью поиска и вставки оптимальным выбором является HashTable.") print("Готово.")