328 lines
9.6 KiB
Python
328 lines
9.6 KiB
Python
import time
|
||
import random
|
||
import csv
|
||
import os
|
||
import sys
|
||
|
||
sys.setrecursionlimit(20000)
|
||
|
||
BASE = os.path.dirname(os.path.abspath(__file__))
|
||
DATA_PATH = BASE
|
||
|
||
N = 10000
|
||
REPEAT = 5
|
||
|
||
|
||
def ll_insert(head, name, phone):
|
||
new_node = {"name": name, "phone": phone, "next": head}
|
||
return new_node
|
||
|
||
|
||
def ll_find(head, name):
|
||
curr = head
|
||
while curr:
|
||
if curr["name"] == name:
|
||
return curr["phone"]
|
||
curr = curr["next"]
|
||
return None
|
||
|
||
|
||
def ll_delete(head, name):
|
||
if head is None:
|
||
return None
|
||
if head["name"] == name:
|
||
return head["next"]
|
||
curr = head
|
||
while curr["next"]:
|
||
if curr["next"]["name"] == name:
|
||
curr["next"] = curr["next"]["next"]
|
||
return head
|
||
curr = curr["next"]
|
||
return head
|
||
|
||
|
||
def ll_list_all(head):
|
||
result = []
|
||
curr = head
|
||
while curr:
|
||
result.append((curr["name"], curr["phone"]))
|
||
curr = curr["next"]
|
||
result.sort()
|
||
return result
|
||
|
||
|
||
BUCKET_SIZE = 1000
|
||
|
||
|
||
def ht_insert(buckets, name, phone):
|
||
idx = hash(name) % len(buckets)
|
||
buckets[idx] = ll_insert(buckets[idx], name, phone)
|
||
|
||
|
||
def ht_find(buckets, name):
|
||
idx = hash(name) % len(buckets)
|
||
return ll_find(buckets[idx], name)
|
||
|
||
|
||
def ht_delete(buckets, name):
|
||
idx = hash(name) % len(buckets)
|
||
buckets[idx] = ll_delete(buckets[idx], name)
|
||
|
||
|
||
def ht_list_all(buckets):
|
||
result = []
|
||
for bucket in buckets:
|
||
curr = bucket
|
||
while curr:
|
||
result.append((curr["name"], curr["phone"]))
|
||
curr = curr["next"]
|
||
result.sort()
|
||
return result
|
||
|
||
|
||
def bst_insert(root, name, phone):
|
||
if root is None:
|
||
return {"name": name, "phone": phone, "left": None, "right": None}
|
||
if name < root["name"]:
|
||
root["left"] = bst_insert(root["left"], name, phone)
|
||
elif name > root["name"]:
|
||
root["right"] = bst_insert(root["right"], name, phone)
|
||
else:
|
||
root["phone"] = phone
|
||
return root
|
||
|
||
|
||
def bst_find(root, name):
|
||
if root is None:
|
||
return None
|
||
if name == root["name"]:
|
||
return root["phone"]
|
||
if name < root["name"]:
|
||
return bst_find(root["left"], name)
|
||
return bst_find(root["right"], name)
|
||
|
||
|
||
def bst_delete(root, name):
|
||
if root is None:
|
||
return None
|
||
if name < root["name"]:
|
||
root["left"] = bst_delete(root["left"], name)
|
||
elif name > root["name"]:
|
||
root["right"] = bst_delete(root["right"], name)
|
||
else:
|
||
if root["left"] is None:
|
||
return root["right"]
|
||
if root["right"] is None:
|
||
return root["left"]
|
||
temp = root["right"]
|
||
while temp["left"]:
|
||
temp = temp["left"]
|
||
root["name"] = temp["name"]
|
||
root["phone"] = temp["phone"]
|
||
root["right"] = bst_delete(root["right"], temp["name"])
|
||
return root
|
||
|
||
|
||
def bst_list_all(root):
|
||
result = []
|
||
|
||
def walk(node):
|
||
if node is None:
|
||
return
|
||
walk(node["left"])
|
||
result.append((node["name"], node["phone"]))
|
||
walk(node["right"])
|
||
|
||
walk(root)
|
||
return result
|
||
|
||
|
||
def make_records(n):
|
||
records = []
|
||
for i in range(n):
|
||
records.append((f"User_{i:05d}", f"8-900-{i % 10000:04d}"))
|
||
return records
|
||
|
||
|
||
records_all = make_records(N)
|
||
records_shuffled = records_all[:]
|
||
random.shuffle(records_shuffled)
|
||
records_sorted = sorted(records_all)
|
||
|
||
all_names = [name for name, phone in records_all]
|
||
find_existing = random.sample(all_names, 100)
|
||
find_missing = [f"None_{i}" for i in range(10)]
|
||
find_names = find_existing + find_missing
|
||
random.shuffle(find_names)
|
||
delete_names = random.sample(all_names, 50)
|
||
|
||
all_results = []
|
||
summary = []
|
||
|
||
|
||
def build_structure(struct_type, records):
|
||
if struct_type == "LinkedList":
|
||
head = None
|
||
for name, phone in records:
|
||
head = ll_insert(head, name, phone)
|
||
return head
|
||
if struct_type == "HashTable":
|
||
buckets = [None] * BUCKET_SIZE
|
||
for name, phone in records:
|
||
ht_insert(buckets, name, phone)
|
||
return buckets
|
||
root = None
|
||
for name, phone in records:
|
||
root = bst_insert(root, name, phone)
|
||
return root
|
||
|
||
|
||
def do_find(struct_type, container, names):
|
||
for name in names:
|
||
if struct_type == "LinkedList":
|
||
ll_find(container, name)
|
||
elif struct_type == "HashTable":
|
||
ht_find(container, name)
|
||
else:
|
||
bst_find(container, name)
|
||
|
||
|
||
def do_delete(struct_type, container, names):
|
||
if struct_type == "LinkedList":
|
||
for name in names:
|
||
container = ll_delete(container, name)
|
||
return container
|
||
if struct_type == "HashTable":
|
||
for name in names:
|
||
ht_delete(container, name)
|
||
return container
|
||
for name in names:
|
||
container = bst_delete(container, name)
|
||
return container
|
||
|
||
|
||
def run_one_test(struct_type, mode_name, records):
|
||
ins_times = []
|
||
find_times = []
|
||
del_times = []
|
||
|
||
for run in range(REPEAT):
|
||
start = time.perf_counter()
|
||
container = build_structure(struct_type, records)
|
||
ins_times.append(time.perf_counter() - start)
|
||
|
||
start = time.perf_counter()
|
||
do_find(struct_type, container, find_names)
|
||
find_times.append(time.perf_counter() - start)
|
||
|
||
start = time.perf_counter()
|
||
do_delete(struct_type, container, delete_names)
|
||
del_times.append(time.perf_counter() - start)
|
||
|
||
all_results.append([
|
||
struct_type, mode_name, f"Run {run + 1}",
|
||
ins_times[-1], find_times[-1], del_times[-1],
|
||
])
|
||
|
||
avg_ins = sum(ins_times) / REPEAT
|
||
avg_find = sum(find_times) / REPEAT
|
||
avg_del = sum(del_times) / REPEAT
|
||
|
||
all_results.append([
|
||
struct_type, mode_name, "AVERAGE", avg_ins, avg_find, avg_del,
|
||
])
|
||
summary.append({
|
||
"name": struct_type,
|
||
"mode": mode_name,
|
||
"ins": avg_ins,
|
||
"find": avg_find,
|
||
"del": avg_del,
|
||
})
|
||
|
||
|
||
print("Запуск экспериментов...")
|
||
for mode_name, data in [("случайный", records_shuffled), ("сортированный", records_sorted)]:
|
||
for struct_type in ["LinkedList", "HashTable", "BST"]:
|
||
print(f" {struct_type} ({mode_name})")
|
||
run_one_test(struct_type, mode_name, data)
|
||
|
||
csv_path = os.path.join(DATA_PATH, "results.csv")
|
||
with open(csv_path, "w", newline="", encoding="utf-8-sig") as f:
|
||
writer = csv.writer(f, delimiter=";")
|
||
writer.writerow(["Структура", "Режим", "Итерация", "Вставка", "Поиск", "Удаление"])
|
||
writer.writerows(all_results)
|
||
|
||
print("CSV сохранён:", csv_path)
|
||
|
||
try:
|
||
import matplotlib.pyplot as plt
|
||
|
||
plt.rcParams["font.sans-serif"] = ["Segoe UI", "Arial", "Tahoma", "DejaVu Sans"]
|
||
plt.rcParams["axes.unicode_minus"] = False
|
||
|
||
labels = ["insert", "find", "delete"]
|
||
structs = ["LinkedList", "HashTable", "BST"]
|
||
colors = ["#5dade2", "#e67e22", "#58d68d"]
|
||
|
||
fig1, axs = plt.subplots(1, 3, figsize=(15, 5))
|
||
fig1.suptitle("Влияние порядка данных")
|
||
|
||
for i, s_name in enumerate(structs):
|
||
rand_d = next(r for r in summary if r["name"] == s_name and r["mode"] == "случайный")
|
||
sort_d = next(r for r in summary if r["name"] == s_name and r["mode"] == "сортированный")
|
||
x = [0, 1, 2]
|
||
w = 0.35
|
||
axs[i].bar([p - w / 2 for p in x], [rand_d["ins"], rand_d["find"], rand_d["del"]], w, label="случайный")
|
||
axs[i].bar([p + w / 2 for p in x], [sort_d["ins"], sort_d["find"], sort_d["del"]], w, label="сортированный")
|
||
axs[i].set_title(s_name)
|
||
axs[i].set_xticks(x)
|
||
axs[i].set_xticklabels(labels)
|
||
axs[i].legend()
|
||
axs[i].grid(axis="y", alpha=0.3)
|
||
|
||
plt.tight_layout()
|
||
plt.savefig(os.path.join(DATA_PATH, "order_impact.png"))
|
||
plt.close()
|
||
|
||
fig2, axs2 = plt.subplots(1, 3, figsize=(15, 5))
|
||
fig2.suptitle(f"Сравнение структур (N={N})")
|
||
|
||
for i, key in enumerate(["ins", "find", "del"]):
|
||
vals = []
|
||
names = []
|
||
for r in summary:
|
||
names.append(f"{r['name']}\n({r['mode'][:4]})")
|
||
vals.append(r[key])
|
||
axs2[i].bar(names, vals, color=colors * 2)
|
||
axs2[i].set_title(labels[i])
|
||
axs2[i].tick_params(axis="x", rotation=20)
|
||
|
||
plt.tight_layout()
|
||
plt.savefig(os.path.join(DATA_PATH, "struct_comparison.png"))
|
||
plt.close()
|
||
print("Графики сохранены")
|
||
except ImportError:
|
||
print("matplotlib не установлен")
|
||
|
||
report_path = os.path.join(os.path.dirname(BASE), "report.md")
|
||
with open(report_path, "w", encoding="utf-8-sig") as f:
|
||
f.write("# Отчёт: сравнение структур данных\n\n")
|
||
f.write(f"N = {N}, повторов = {REPEAT}\n\n")
|
||
f.write("| Структура | Режим | Вставка (с) | Поиск (с) | Удаление (с) |\n")
|
||
f.write("| --- | --- | --- | --- | --- |\n")
|
||
for r in summary:
|
||
f.write(
|
||
f"| {r['name']} | {r['mode']} | {r['ins']:.6f} | {r['find']:.6f} | {r['del']:.6f} |\n"
|
||
)
|
||
f.write("\n## Графики\n\n")
|
||
f.write("\n\n")
|
||
f.write("\n\n")
|
||
f.write("## Выводы\n\n")
|
||
f.write("- BST на отсортированных данных сильно тормозит (вырождение дерева).\n")
|
||
f.write("- Хеш-таблица быстра на поиске и слабо зависит от порядка вставки.\n")
|
||
f.write("- Связный список медленный при поиске.\n")
|
||
f.write("- Для частого поиска предпочтительна хеш-таблица.\n")
|
||
|
||
print("Отчёт:", report_path)
|
||
print("Готово.")
|