2026-rff_mp/rybakovaa/lab1/docs/data/lab1.py

328 lines
9.6 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

import time
import random
import csv
import os
import sys
sys.setrecursionlimit(20000)
BASE = os.path.dirname(os.path.abspath(__file__))
DATA_PATH = BASE
N = 10000
REPEAT = 5
def ll_insert(head, name, phone):
new_node = {"name": name, "phone": phone, "next": head}
return new_node
def ll_find(head, name):
curr = head
while curr:
if curr["name"] == name:
return curr["phone"]
curr = curr["next"]
return None
def ll_delete(head, name):
if head is None:
return None
if head["name"] == name:
return head["next"]
curr = head
while curr["next"]:
if curr["next"]["name"] == name:
curr["next"] = curr["next"]["next"]
return head
curr = curr["next"]
return head
def ll_list_all(head):
result = []
curr = head
while curr:
result.append((curr["name"], curr["phone"]))
curr = curr["next"]
result.sort()
return result
BUCKET_SIZE = 1000
def ht_insert(buckets, name, phone):
idx = hash(name) % len(buckets)
buckets[idx] = ll_insert(buckets[idx], name, phone)
def ht_find(buckets, name):
idx = hash(name) % len(buckets)
return ll_find(buckets[idx], name)
def ht_delete(buckets, name):
idx = hash(name) % len(buckets)
buckets[idx] = ll_delete(buckets[idx], name)
def ht_list_all(buckets):
result = []
for bucket in buckets:
curr = bucket
while curr:
result.append((curr["name"], curr["phone"]))
curr = curr["next"]
result.sort()
return result
def bst_insert(root, name, phone):
if root is None:
return {"name": name, "phone": phone, "left": None, "right": None}
if name < root["name"]:
root["left"] = bst_insert(root["left"], name, phone)
elif name > root["name"]:
root["right"] = bst_insert(root["right"], name, phone)
else:
root["phone"] = phone
return root
def bst_find(root, name):
if root is None:
return None
if name == root["name"]:
return root["phone"]
if name < root["name"]:
return bst_find(root["left"], name)
return bst_find(root["right"], name)
def bst_delete(root, name):
if root is None:
return None
if name < root["name"]:
root["left"] = bst_delete(root["left"], name)
elif name > root["name"]:
root["right"] = bst_delete(root["right"], name)
else:
if root["left"] is None:
return root["right"]
if root["right"] is None:
return root["left"]
temp = root["right"]
while temp["left"]:
temp = temp["left"]
root["name"] = temp["name"]
root["phone"] = temp["phone"]
root["right"] = bst_delete(root["right"], temp["name"])
return root
def bst_list_all(root):
result = []
def walk(node):
if node is None:
return
walk(node["left"])
result.append((node["name"], node["phone"]))
walk(node["right"])
walk(root)
return result
def make_records(n):
records = []
for i in range(n):
records.append((f"User_{i:05d}", f"8-900-{i % 10000:04d}"))
return records
records_all = make_records(N)
records_shuffled = records_all[:]
random.shuffle(records_shuffled)
records_sorted = sorted(records_all)
all_names = [name for name, phone in records_all]
find_existing = random.sample(all_names, 100)
find_missing = [f"None_{i}" for i in range(10)]
find_names = find_existing + find_missing
random.shuffle(find_names)
delete_names = random.sample(all_names, 50)
all_results = []
summary = []
def build_structure(struct_type, records):
if struct_type == "LinkedList":
head = None
for name, phone in records:
head = ll_insert(head, name, phone)
return head
if struct_type == "HashTable":
buckets = [None] * BUCKET_SIZE
for name, phone in records:
ht_insert(buckets, name, phone)
return buckets
root = None
for name, phone in records:
root = bst_insert(root, name, phone)
return root
def do_find(struct_type, container, names):
for name in names:
if struct_type == "LinkedList":
ll_find(container, name)
elif struct_type == "HashTable":
ht_find(container, name)
else:
bst_find(container, name)
def do_delete(struct_type, container, names):
if struct_type == "LinkedList":
for name in names:
container = ll_delete(container, name)
return container
if struct_type == "HashTable":
for name in names:
ht_delete(container, name)
return container
for name in names:
container = bst_delete(container, name)
return container
def run_one_test(struct_type, mode_name, records):
ins_times = []
find_times = []
del_times = []
for run in range(REPEAT):
start = time.perf_counter()
container = build_structure(struct_type, records)
ins_times.append(time.perf_counter() - start)
start = time.perf_counter()
do_find(struct_type, container, find_names)
find_times.append(time.perf_counter() - start)
start = time.perf_counter()
do_delete(struct_type, container, delete_names)
del_times.append(time.perf_counter() - start)
all_results.append([
struct_type, mode_name, f"Run {run + 1}",
ins_times[-1], find_times[-1], del_times[-1],
])
avg_ins = sum(ins_times) / REPEAT
avg_find = sum(find_times) / REPEAT
avg_del = sum(del_times) / REPEAT
all_results.append([
struct_type, mode_name, "AVERAGE", avg_ins, avg_find, avg_del,
])
summary.append({
"name": struct_type,
"mode": mode_name,
"ins": avg_ins,
"find": avg_find,
"del": avg_del,
})
print("Запуск экспериментов...")
for mode_name, data in [("случайный", records_shuffled), ("сортированный", records_sorted)]:
for struct_type in ["LinkedList", "HashTable", "BST"]:
print(f" {struct_type} ({mode_name})")
run_one_test(struct_type, mode_name, data)
csv_path = os.path.join(DATA_PATH, "results.csv")
with open(csv_path, "w", newline="", encoding="utf-8-sig") as f:
writer = csv.writer(f, delimiter=";")
writer.writerow(["Структура", "Режим", "Итерация", "Вставка", "Поиск", "Удаление"])
writer.writerows(all_results)
print("CSV сохранён:", csv_path)
try:
import matplotlib.pyplot as plt
plt.rcParams["font.sans-serif"] = ["Segoe UI", "Arial", "Tahoma", "DejaVu Sans"]
plt.rcParams["axes.unicode_minus"] = False
labels = ["insert", "find", "delete"]
structs = ["LinkedList", "HashTable", "BST"]
colors = ["#5dade2", "#e67e22", "#58d68d"]
fig1, axs = plt.subplots(1, 3, figsize=(15, 5))
fig1.suptitle("Влияние порядка данных")
for i, s_name in enumerate(structs):
rand_d = next(r for r in summary if r["name"] == s_name and r["mode"] == "случайный")
sort_d = next(r for r in summary if r["name"] == s_name and r["mode"] == "сортированный")
x = [0, 1, 2]
w = 0.35
axs[i].bar([p - w / 2 for p in x], [rand_d["ins"], rand_d["find"], rand_d["del"]], w, label="случайный")
axs[i].bar([p + w / 2 for p in x], [sort_d["ins"], sort_d["find"], sort_d["del"]], w, label="сортированный")
axs[i].set_title(s_name)
axs[i].set_xticks(x)
axs[i].set_xticklabels(labels)
axs[i].legend()
axs[i].grid(axis="y", alpha=0.3)
plt.tight_layout()
plt.savefig(os.path.join(DATA_PATH, "order_impact.png"))
plt.close()
fig2, axs2 = plt.subplots(1, 3, figsize=(15, 5))
fig2.suptitle(f"Сравнение структур (N={N})")
for i, key in enumerate(["ins", "find", "del"]):
vals = []
names = []
for r in summary:
names.append(f"{r['name']}\n({r['mode'][:4]})")
vals.append(r[key])
axs2[i].bar(names, vals, color=colors * 2)
axs2[i].set_title(labels[i])
axs2[i].tick_params(axis="x", rotation=20)
plt.tight_layout()
plt.savefig(os.path.join(DATA_PATH, "struct_comparison.png"))
plt.close()
print("Графики сохранены")
except ImportError:
print("matplotlib не установлен")
report_path = os.path.join(os.path.dirname(BASE), "report.md")
with open(report_path, "w", encoding="utf-8-sig") as f:
f.write("# Отчёт: сравнение структур данных\n\n")
f.write(f"N = {N}, повторов = {REPEAT}\n\n")
f.write("| Структура | Режим | Вставка (с) | Поиск (с) | Удаление (с) |\n")
f.write("| --- | --- | --- | --- | --- |\n")
for r in summary:
f.write(
f"| {r['name']} | {r['mode']} | {r['ins']:.6f} | {r['find']:.6f} | {r['del']:.6f} |\n"
)
f.write("\n## Графики\n\n")
f.write("![Сравнение](data/struct_comparison.png)\n\n")
f.write("![Порядок данных](data/order_impact.png)\n\n")
f.write("## Выводы\n\n")
f.write("- BST на отсортированных данных сильно тормозит (вырождение дерева).\n")
f.write("- Хеш-таблица быстра на поиске и слабо зависит от порядка вставки.\n")
f.write("- Связный список медленный при поиске.\n")
f.write("- Для частого поиска предпочтительна хеш-таблица.\n")
print("Отчёт:", report_path)
print("Готово.")