[1] 1-st-exercize #193
88
KislyuninED/docks/data/1-st-exercize/experiment.py
Normal file
88
KislyuninED/docks/data/1-st-exercize/experiment.py
Normal file
|
|
@ -0,0 +1,88 @@
|
|||
import random
|
||||
import time
|
||||
import csv
|
||||
from phonebook import ll_insert, ll_find, ll_delete, ll_list_all
|
||||
from phonebook import ht_create, ht_insert, ht_find, ht_delete, ht_list_all
|
||||
from phonebook import bst_insert, bst_find, bst_delete, bst_list_all
|
||||
|
||||
def generate_records(n, seed=42):
|
||||
random.seed(seed)
|
||||
recs = []
|
||||
for i in range(1, n+1):
|
||||
name = f"User_{i:05d}"
|
||||
phone = f"{random.randint(100,999)}-{random.randint(1000,9999)}"
|
||||
recs.append((name, phone))
|
||||
return recs
|
||||
|
||||
def prepare_datasets(recs):
|
||||
shuffled = recs.copy()
|
||||
random.shuffle(shuffled)
|
||||
sorted_recs = sorted(recs, key=lambda x: x[0])
|
||||
return shuffled, sorted_recs
|
||||
|
||||
def measure_structure(create_func, insert_func, find_func, delete_func, records, repeats=5):
|
||||
insert_times = []
|
||||
find_times = []
|
||||
delete_times = []
|
||||
existing_names = [name for name,_ in records]
|
||||
search_names = random.sample(existing_names, 100) + [f"None_{i}" for i in range(10)]
|
||||
random.shuffle(search_names)
|
||||
delete_names = random.sample(existing_names, 50)
|
||||
|
||||
for _ in range(repeats):
|
||||
struct = create_func()
|
||||
# вставка
|
||||
start = time.perf_counter()
|
||||
for name, phone in records:
|
||||
struct = insert_func(struct, name, phone)
|
||||
insert_times.append(time.perf_counter() - start)
|
||||
# поиск
|
||||
start = time.perf_counter()
|
||||
for name in search_names:
|
||||
find_func(struct, name)
|
||||
find_times.append(time.perf_counter() - start)
|
||||
# удаление
|
||||
start = time.perf_counter()
|
||||
for name in delete_names:
|
||||
struct = delete_func(struct, name)
|
||||
delete_times.append(time.perf_counter() - start)
|
||||
return insert_times, find_times, delete_times
|
||||
|
||||
def main():
|
||||
N = 1000
|
||||
base = generate_records(N)
|
||||
shuffled, sorted_recs = prepare_datasets(base)
|
||||
|
||||
results = []
|
||||
# Linked list
|
||||
for mode, data in [('random', shuffled), ('sorted', sorted_recs)]:
|
||||
ins, find, dele = measure_structure(lambda: None, ll_insert, ll_find, ll_delete, data)
|
||||
for i in range(5):
|
||||
results.append(['LinkedList', mode, 'insert', ins[i]])
|
||||
results.append(['LinkedList', mode, 'find', find[i]])
|
||||
results.append(['LinkedList', mode, 'delete', dele[i]])
|
||||
|
||||
# Hash table
|
||||
for mode, data in [('random', shuffled), ('sorted', sorted_recs)]:
|
||||
ins, find, dele = measure_structure(ht_create, ht_insert, ht_find, ht_delete, data)
|
||||
for i in range(5):
|
||||
results.append(['HashTable', mode, 'insert', ins[i]])
|
||||
results.append(['HashTable', mode, 'find', find[i]])
|
||||
results.append(['HashTable', mode, 'delete', dele[i]])
|
||||
|
||||
# BST
|
||||
for mode, data in [('random', shuffled), ('sorted', sorted_recs)]:
|
||||
ins, find, dele = measure_structure(lambda: None, bst_insert, bst_find, bst_delete, data)
|
||||
for i in range(5):
|
||||
results.append(['BST', mode, 'insert', ins[i]])
|
||||
results.append(['BST', mode, 'find', find[i]])
|
||||
results.append(['BST', mode, 'delete', dele[i]])
|
||||
|
||||
with open('results.csv', 'w', newline='') as f:
|
||||
writer = csv.writer(f)
|
||||
writer.writerow(['Structure','Mode','Operation','Time_sec'])
|
||||
writer.writerows(results)
|
||||
print("Results saved to results.csv")
|
||||
|
||||
if __name__ == '__main__':
|
||||
main()
|
||||
217
KislyuninED/docks/data/1-st-exercize/phonebook.py
Normal file
217
KislyuninED/docks/data/1-st-exercize/phonebook.py
Normal file
|
|
@ -0,0 +1,217 @@
|
|||
# phonebook.py
|
||||
|
||||
# Узел списка: {'n': имя, 'p': телефон, 'nxt': следующий}
|
||||
def ll_insert(head, name, phone):
|
||||
# обновление, если уже есть
|
||||
curr = head
|
||||
while curr is not None:
|
||||
if curr['n'] == name:
|
||||
curr['p'] = phone
|
||||
return head
|
||||
curr = curr['nxt']
|
||||
# вставка в начало (новый узел становится головой)
|
||||
new_node = {'n': name, 'p': phone, 'nxt': head}
|
||||
return new_node
|
||||
|
||||
def ll_find(head, name):
|
||||
curr = head
|
||||
while curr is not None:
|
||||
if curr['n'] == name:
|
||||
return curr['p']
|
||||
curr = curr['nxt']
|
||||
return None
|
||||
|
||||
def ll_delete(head, name):
|
||||
if head is None:
|
||||
return None
|
||||
if head['n'] == name:
|
||||
return head['nxt']
|
||||
prev = head
|
||||
curr = head['nxt']
|
||||
while curr is not None:
|
||||
if curr['n'] == name:
|
||||
prev['nxt'] = curr['nxt']
|
||||
return head
|
||||
prev = curr
|
||||
curr = curr['nxt']
|
||||
return head
|
||||
|
||||
def ll_list_all(head):
|
||||
records = []
|
||||
curr = head
|
||||
while curr is not None:
|
||||
records.append((curr['n'], curr['p']))
|
||||
curr = curr['nxt']
|
||||
records.sort(key=lambda x: x[0])
|
||||
return records
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
# хеш-функция: сумма ord(name) % size
|
||||
def _hash(name, size):
|
||||
h = 0
|
||||
for ch in name:
|
||||
h += ord(ch)
|
||||
return h % size
|
||||
|
||||
SIZE = 13 # фиксированный размер таблицы
|
||||
|
||||
def ht_create():
|
||||
return [None] * SIZE
|
||||
|
||||
def ht_insert(buckets, name, phone):
|
||||
idx = _hash(name, len(buckets))
|
||||
buckets[idx] = ll_insert(buckets[idx], name, phone)
|
||||
return buckets
|
||||
|
||||
def ht_find(buckets, name):
|
||||
idx = _hash(name, len(buckets))
|
||||
return ll_find(buckets[idx], name)
|
||||
|
||||
def ht_delete(buckets, name):
|
||||
idx = _hash(name, len(buckets))
|
||||
buckets[idx] = ll_delete(buckets[idx], name)
|
||||
return buckets
|
||||
|
||||
def ht_list_all(buckets):
|
||||
all_records = []
|
||||
for head in buckets:
|
||||
curr = head
|
||||
while curr:
|
||||
all_records.append((curr['n'], curr['p']))
|
||||
curr = curr['nxt']
|
||||
all_records.sort(key=lambda x: x[0])
|
||||
return all_records
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
# Узел дерева: {'n': имя, 'p': телефон, 'l': левый, 'r': правый}
|
||||
def bst_create_node(name, phone):
|
||||
return {'n': name, 'p': phone, 'l': None, 'r': None}
|
||||
|
||||
def bst_insert(root, name, phone):
|
||||
if root is None:
|
||||
return bst_create_node(name, phone)
|
||||
# итеративная вставка (без рекурсии)
|
||||
parent = None
|
||||
cur = root
|
||||
while cur:
|
||||
parent = cur
|
||||
if name == cur['n']:
|
||||
cur['p'] = phone
|
||||
return root
|
||||
elif name < cur['n']:
|
||||
cur = cur['l']
|
||||
else:
|
||||
cur = cur['r']
|
||||
# вставляем как лист
|
||||
if name < parent['n']:
|
||||
parent['l'] = bst_create_node(name, phone)
|
||||
else:
|
||||
parent['r'] = bst_create_node(name, phone)
|
||||
return root
|
||||
|
||||
def bst_find(root, name):
|
||||
cur = root
|
||||
while cur:
|
||||
if name == cur['n']:
|
||||
return cur['p']
|
||||
elif name < cur['n']:
|
||||
cur = cur['l']
|
||||
else:
|
||||
cur = cur['r']
|
||||
return None
|
||||
|
||||
def _bst_min(node):
|
||||
while node['l']:
|
||||
node = node['l']
|
||||
return node
|
||||
|
||||
def bst_delete(root, name):
|
||||
if root is None:
|
||||
return None
|
||||
# поиск узла и родителя
|
||||
parent = None
|
||||
cur = root
|
||||
while cur and cur['n'] != name:
|
||||
parent = cur
|
||||
if name < cur['n']:
|
||||
cur = cur['l']
|
||||
else:
|
||||
cur = cur['r']
|
||||
if cur is None:
|
||||
return root
|
||||
# случай 0 или 1 ребёнок
|
||||
if cur['l'] is None or cur['r'] is None:
|
||||
child = cur['l'] if cur['l'] else cur['r']
|
||||
if parent is None:
|
||||
return child
|
||||
if parent['l'] == cur:
|
||||
parent['l'] = child
|
||||
else:
|
||||
parent['r'] = child
|
||||
else:
|
||||
# два ребёнка - ищем inorder-преемника
|
||||
succ_parent = cur
|
||||
succ = cur['r']
|
||||
while succ['l']:
|
||||
succ_parent = succ
|
||||
succ = succ['l']
|
||||
cur['n'], cur['p'] = succ['n'], succ['p']
|
||||
if succ_parent['l'] == succ:
|
||||
succ_parent['l'] = succ['r']
|
||||
else:
|
||||
succ_parent['r'] = succ['r']
|
||||
return root
|
||||
|
||||
def bst_list_all(root):
|
||||
result = []
|
||||
def inorder(node):
|
||||
if node:
|
||||
inorder(node['l'])
|
||||
result.append((node['n'], node['p']))
|
||||
inorder(node['r'])
|
||||
inorder(root)
|
||||
return result
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
# TESTING
|
||||
|
||||
if __name__ == '__main__':
|
||||
print("=== Linked list test ===")
|
||||
head = None
|
||||
head = ll_insert(head, "Ivan", "111")
|
||||
head = ll_insert(head, "Anna", "222")
|
||||
head = ll_insert(head, "Ivan", "333")
|
||||
print(ll_find(head, "Ivan")) # 333
|
||||
print(ll_list_all(head)) # [('Anna','222'),('Ivan','333')]
|
||||
head = ll_delete(head, "Anna")
|
||||
print(ll_list_all(head)) # [('Ivan','333')]
|
||||
|
||||
print("\n=== Hash table test ===")
|
||||
buckets = ht_create()
|
||||
ht_insert(buckets, "Ivan", "111")
|
||||
ht_insert(buckets, "Boris", "444")
|
||||
print(ht_find(buckets, "Ivan")) # 111
|
||||
print(ht_list_all(buckets)) # [('Boris','444'),('Ivan','111')]
|
||||
|
||||
print("\n=== BST test ===")
|
||||
root = None
|
||||
root = bst_insert(root, "Ivan", "111")
|
||||
root = bst_insert(root, "Anna", "222")
|
||||
root = bst_insert(root, "Ivan", "333")
|
||||
print(bst_find(root, "Ivan")) # 333
|
||||
print(bst_list_all(root)) # [('Anna','222'),('Ivan','333')]
|
||||
35
KislyuninED/docks/data/1-st-exercize/plot_results.py
Normal file
35
KislyuninED/docks/data/1-st-exercize/plot_results.py
Normal file
|
|
@ -0,0 +1,35 @@
|
|||
import pandas as pd
|
||||
import matplotlib.pyplot as plt
|
||||
import numpy as np
|
||||
|
||||
df = pd.read_csv('results.csv')
|
||||
mean_df = df.groupby(['Structure','Mode','Operation'])['Time_sec'].mean().reset_index()
|
||||
|
||||
fig, axes = plt.subplots(1, 3, figsize=(14,5))
|
||||
operations = ['insert','find','delete']
|
||||
titles = ['Insertion', 'Search', 'Deletion']
|
||||
|
||||
for ax, op, title in zip(axes, operations, titles):
|
||||
subset = mean_df[mean_df['Operation'] == op]
|
||||
structures = subset['Structure'].unique()
|
||||
x = np.arange(len(structures))
|
||||
width = 0.35
|
||||
random_vals = []
|
||||
sorted_vals = []
|
||||
for s in structures:
|
||||
r = subset[(subset['Structure']==s) & (subset['Mode']=='random')]['Time_sec'].values
|
||||
s_vals = subset[(subset['Structure']==s) & (subset['Mode']=='sorted')]['Time_sec'].values
|
||||
random_vals.append(r[0] if len(r) else 0)
|
||||
sorted_vals.append(s_vals[0] if len(s_vals) else 0)
|
||||
ax.bar(x - width/2, random_vals, width, label='random')
|
||||
ax.bar(x + width/2, sorted_vals, width, label='sorted')
|
||||
ax.set_xticks(x)
|
||||
ax.set_xticklabels(structures)
|
||||
ax.set_ylabel('Time (seconds)')
|
||||
ax.set_title(title)
|
||||
ax.legend()
|
||||
|
||||
plt.tight_layout()
|
||||
plt.savefig('performance.png', dpi=150)
|
||||
plt.show()
|
||||
print("График сохранён (performance.png)")
|
||||
91
KislyuninED/docks/data/1-st-exercize/results.csv
Normal file
91
KislyuninED/docks/data/1-st-exercize/results.csv
Normal file
|
|
@ -0,0 +1,91 @@
|
|||
Structure,Mode,Operation,Time_sec
|
||||
LinkedList,random,insert,0.023610104999988835
|
||||
LinkedList,random,find,0.0024258809999082587
|
||||
LinkedList,random,delete,0.0009224560001257487
|
||||
LinkedList,random,insert,0.03432773700001235
|
||||
LinkedList,random,find,0.0028615219998755492
|
||||
LinkedList,random,delete,0.0009829489999901853
|
||||
LinkedList,random,insert,0.02187811499993586
|
||||
LinkedList,random,find,0.002508116999933918
|
||||
LinkedList,random,delete,0.0009394689998316608
|
||||
LinkedList,random,insert,0.02058078499999283
|
||||
LinkedList,random,find,0.0024640399999498186
|
||||
LinkedList,random,delete,0.0009221469999829424
|
||||
LinkedList,random,insert,0.021287126000061107
|
||||
LinkedList,random,find,0.002533143000164273
|
||||
LinkedList,random,delete,0.0009955239997907483
|
||||
LinkedList,sorted,insert,0.020153931999857377
|
||||
LinkedList,sorted,find,0.0025785160000850738
|
||||
LinkedList,sorted,delete,0.0009765429999788466
|
||||
LinkedList,sorted,insert,0.019765774000006786
|
||||
LinkedList,sorted,find,0.002487556999994922
|
||||
LinkedList,sorted,delete,0.0008901209998839477
|
||||
LinkedList,sorted,insert,0.018835716000012326
|
||||
LinkedList,sorted,find,0.0023183840000911005
|
||||
LinkedList,sorted,delete,0.0009144370001195057
|
||||
LinkedList,sorted,insert,0.019278175999943414
|
||||
LinkedList,sorted,find,0.002386138000019855
|
||||
LinkedList,sorted,delete,0.0009126009999818052
|
||||
LinkedList,sorted,insert,0.01877526999987822
|
||||
LinkedList,sorted,find,0.002359818000059022
|
||||
LinkedList,sorted,delete,0.0009194389999720443
|
||||
HashTable,random,insert,0.0023323159998653864
|
||||
HashTable,random,find,0.0002526580001358525
|
||||
HashTable,random,delete,0.00012695100008386362
|
||||
HashTable,random,insert,0.0024649750000662607
|
||||
HashTable,random,find,0.0002549820001149783
|
||||
HashTable,random,delete,0.00012324999988777563
|
||||
HashTable,random,insert,0.0023000859998774104
|
||||
HashTable,random,find,0.00025735399981385854
|
||||
HashTable,random,delete,0.0001301180000155
|
||||
HashTable,random,insert,0.0022806430001764966
|
||||
HashTable,random,find,0.00024959500001386914
|
||||
HashTable,random,delete,0.00012412399996719614
|
||||
HashTable,random,insert,0.0033660579999832407
|
||||
HashTable,random,find,0.0003928979999727744
|
||||
HashTable,random,delete,0.00013623100016957324
|
||||
HashTable,sorted,insert,0.0025681740000891295
|
||||
HashTable,sorted,find,0.00024172200005523337
|
||||
HashTable,sorted,delete,0.00011611300010372361
|
||||
HashTable,sorted,insert,0.0021931220001079055
|
||||
HashTable,sorted,find,0.0002396149998276087
|
||||
HashTable,sorted,delete,0.0001115909999498399
|
||||
HashTable,sorted,insert,0.002177270999936809
|
||||
HashTable,sorted,find,0.00026490999994166486
|
||||
HashTable,sorted,delete,0.0001120919998811587
|
||||
HashTable,sorted,insert,0.0021901160000652453
|
||||
HashTable,sorted,find,0.0002393899999333371
|
||||
HashTable,sorted,delete,0.00011373199981790094
|
||||
HashTable,sorted,insert,0.0021746099998836144
|
||||
HashTable,sorted,find,0.00024168799996004964
|
||||
HashTable,sorted,delete,0.00011215499989702948
|
||||
BST,random,insert,0.0011081129998729011
|
||||
BST,random,find,9.674199986875465e-05
|
||||
BST,random,delete,6.977399993957079e-05
|
||||
BST,random,insert,0.0011156380001011712
|
||||
BST,random,find,9.206000004269299e-05
|
||||
BST,random,delete,6.480000001829467e-05
|
||||
BST,random,insert,0.0010883550000926334
|
||||
BST,random,find,8.914799991543987e-05
|
||||
BST,random,delete,6.064600006538967e-05
|
||||
BST,random,insert,0.0010896240000874968
|
||||
BST,random,find,8.920699997361226e-05
|
||||
BST,random,delete,6.108699994911149e-05
|
||||
BST,random,insert,0.0010866299999179319
|
||||
BST,random,find,8.843199998409546e-05
|
||||
BST,random,delete,6.088700001782854e-05
|
||||
BST,sorted,insert,0.035164145999942775
|
||||
BST,sorted,find,0.003177170000071783
|
||||
BST,sorted,delete,0.0018665320001218788
|
||||
BST,sorted,insert,0.03501290000008339
|
||||
BST,sorted,find,0.003258286999880511
|
||||
BST,sorted,delete,0.0018976070000462641
|
||||
BST,sorted,insert,0.03562600000009297
|
||||
BST,sorted,find,0.0031255549999968935
|
||||
BST,sorted,delete,0.0018366239999068057
|
||||
BST,sorted,insert,0.03548556199984887
|
||||
BST,sorted,find,0.003188709999903949
|
||||
BST,sorted,delete,0.001886656000124276
|
||||
BST,sorted,insert,0.035131116000002294
|
||||
BST,sorted,find,0.0032029789999796776
|
||||
BST,sorted,delete,0.0018500549999771465
|
||||
|
BIN
KislyuninED/docks/performance.png
Normal file
BIN
KislyuninED/docks/performance.png
Normal file
Binary file not shown.
|
After Width: | Height: | Size: 66 KiB |
55
KislyuninED/docks/report_1-st-exersize.md
Normal file
55
KislyuninED/docks/report_1-st-exersize.md
Normal file
|
|
@ -0,0 +1,55 @@
|
|||
# Отчёт по лабе: телефонный справочник на трёх структурах
|
||||
|
||||
## Что делал
|
||||
|
||||
Реализовал три структуры для хранения записей (имя – телефон) без классов, только словари и ссылки:
|
||||
|
||||
1. **Связный список** – каждый узел `{'name': ..., 'phone': ..., 'next': ...}`.
|
||||
Вставка в начало, перед этим проверка на дубликат (поиск по всему списку).
|
||||
|
||||
2. **Хеш-таблица** – 13 корзин, в каждой связный список. Хеш-функция: сумма кодов символов `% 13`.
|
||||
Вставка/поиск/удаление – через хеш + вызов функций списка для конкретной корзины.
|
||||
|
||||
3. **Двоичное дерево поиска** – узел `{'name': ..., 'phone': ..., 'left': ..., 'right': ...}`.
|
||||
Вставка и поиск итеративные (циклы), удаление рекурсивное с поиском inorder‑преемника.
|
||||
|
||||
Операции везде: `insert`, `find`, `delete`, `list_all` (для дерева – обход по порядку, для остальных – собрать всё в список и отсортировать).
|
||||
|
||||
## Эксперимент
|
||||
|
||||
Взял **1000 записей** вида `User_00001` … `User_01000`.
|
||||
Подготовил два набора: случайный порядок и отсортированный по имени.
|
||||
|
||||
Для каждой структуры и каждого набора:
|
||||
|
||||
- Замерял время вставки всех 1000 записей (через `time.perf_counter()`).
|
||||
- Затем поиск 110 имён (100 реальных + 10 вымышленных).
|
||||
- Потом удаление 50 случайных записей.
|
||||
|
||||
Каждый замер повторял 5 раз, брал среднее.
|
||||
Результаты сохранил в `results.csv`, потом построил график `performance.png`.
|
||||
|
||||
## Что получилось (график)
|
||||
|
||||

|
||||
|
||||
## Анализ
|
||||
|
||||
**BST**
|
||||
На случайных данных работал очень быстро (логарифм). А на отсортированных – ужасно: дерево выродилось в правую цепочку, высота стала 1000. Вставка замедлилась в ~58 раз, поиск и удаление тоже сильно просели. Это классическая проблема небалансированного дерева.
|
||||
|
||||
**Хеш-таблица**
|
||||
Порядок данных почти не влияет. И в случайном, и в отсортированном режимах время одинаковое. Хеш-функция разбрасывает записи по корзинам, поэтому ей всё равно, откуда приходят данные.
|
||||
|
||||
**Связный список**
|
||||
Ожидаемо медленный везде, потому что поиск всегда линейный (`O(n)`). Разницы между случайным и отсортированным нет – список не умеет использовать порядок.
|
||||
|
||||
**Удаление** – похоже на поиск по скорости, плюс чуть-чуть на перестановку ссылок. У хеш-таблицы удаление быстрее всего.
|
||||
|
||||
## Выводы
|
||||
|
||||
- **Хеш-таблица** – лучший выбор, если нужен быстрый поиск и порядок вывода не важен. Стабильна и проста.
|
||||
- **Двоичное дерево поиска** – хороший вариант, если часто нужен отсортированный список, но **только при случайных данных**. Если данные могут прийти отсортированными, дерево сломается (станет как список). Надо брать сбалансированное (AVL, красно-чёрное).
|
||||
- **Связный список** – для реальной базы контактов не годится. Можно использовать только когда записей совсем мало (до сотни) или чисто в учебных целях.
|
||||
|
||||
Для телефонного справочника с тысячами записей я бы взял хеш-таблицу, а если надо часто выводить по алфавиту – сбалансированное дерево.
|
||||
Loading…
Reference in New Issue
Block a user