diff --git a/KislyuninED/docks/data/1-st-exercize/experiment.py b/KislyuninED/docks/data/1-st-exercize/experiment.py new file mode 100644 index 0000000..4cfd79a --- /dev/null +++ b/KislyuninED/docks/data/1-st-exercize/experiment.py @@ -0,0 +1,88 @@ +import random +import time +import csv +from phonebook import ll_insert, ll_find, ll_delete, ll_list_all +from phonebook import ht_create, ht_insert, ht_find, ht_delete, ht_list_all +from phonebook import bst_insert, bst_find, bst_delete, bst_list_all + +def generate_records(n, seed=42): + random.seed(seed) + recs = [] + for i in range(1, n+1): + name = f"User_{i:05d}" + phone = f"{random.randint(100,999)}-{random.randint(1000,9999)}" + recs.append((name, phone)) + return recs + +def prepare_datasets(recs): + shuffled = recs.copy() + random.shuffle(shuffled) + sorted_recs = sorted(recs, key=lambda x: x[0]) + return shuffled, sorted_recs + +def measure_structure(create_func, insert_func, find_func, delete_func, records, repeats=5): + insert_times = [] + find_times = [] + delete_times = [] + existing_names = [name for name,_ in records] + search_names = random.sample(existing_names, 100) + [f"None_{i}" for i in range(10)] + random.shuffle(search_names) + delete_names = random.sample(existing_names, 50) + + for _ in range(repeats): + struct = create_func() + # вставка + start = time.perf_counter() + for name, phone in records: + struct = insert_func(struct, name, phone) + insert_times.append(time.perf_counter() - start) + # поиск + start = time.perf_counter() + for name in search_names: + find_func(struct, name) + find_times.append(time.perf_counter() - start) + # удаление + start = time.perf_counter() + for name in delete_names: + struct = delete_func(struct, name) + delete_times.append(time.perf_counter() - start) + return insert_times, find_times, delete_times + +def main(): + N = 1000 + base = generate_records(N) + shuffled, sorted_recs = prepare_datasets(base) + + results = [] + # Linked list + for mode, data in [('random', shuffled), ('sorted', sorted_recs)]: + ins, find, dele = measure_structure(lambda: None, ll_insert, ll_find, ll_delete, data) + for i in range(5): + results.append(['LinkedList', mode, 'insert', ins[i]]) + results.append(['LinkedList', mode, 'find', find[i]]) + results.append(['LinkedList', mode, 'delete', dele[i]]) + + # Hash table + for mode, data in [('random', shuffled), ('sorted', sorted_recs)]: + ins, find, dele = measure_structure(ht_create, ht_insert, ht_find, ht_delete, data) + for i in range(5): + results.append(['HashTable', mode, 'insert', ins[i]]) + results.append(['HashTable', mode, 'find', find[i]]) + results.append(['HashTable', mode, 'delete', dele[i]]) + + # BST + for mode, data in [('random', shuffled), ('sorted', sorted_recs)]: + ins, find, dele = measure_structure(lambda: None, bst_insert, bst_find, bst_delete, data) + for i in range(5): + results.append(['BST', mode, 'insert', ins[i]]) + results.append(['BST', mode, 'find', find[i]]) + results.append(['BST', mode, 'delete', dele[i]]) + + with open('results.csv', 'w', newline='') as f: + writer = csv.writer(f) + writer.writerow(['Structure','Mode','Operation','Time_sec']) + writer.writerows(results) + print("Results saved to results.csv") + +if __name__ == '__main__': + main() \ No newline at end of file diff --git a/KislyuninED/docks/data/1-st-exercize/phonebook.py b/KislyuninED/docks/data/1-st-exercize/phonebook.py new file mode 100644 index 0000000..ab8c2aa --- /dev/null +++ b/KislyuninED/docks/data/1-st-exercize/phonebook.py @@ -0,0 +1,217 @@ +# phonebook.py + +# Узел списка: {'n': имя, 'p': телефон, 'nxt': следующий} +def ll_insert(head, name, phone): + # обновление, если уже есть + curr = head + while curr is not None: + if curr['n'] == name: + curr['p'] = phone + return head + curr = curr['nxt'] + # вставка в начало (новый узел становится головой) + new_node = {'n': name, 'p': phone, 'nxt': head} + return new_node + +def ll_find(head, name): + curr = head + while curr is not None: + if curr['n'] == name: + return curr['p'] + curr = curr['nxt'] + return None + +def ll_delete(head, name): + if head is None: + return None + if head['n'] == name: + return head['nxt'] + prev = head + curr = head['nxt'] + while curr is not None: + if curr['n'] == name: + prev['nxt'] = curr['nxt'] + return head + prev = curr + curr = curr['nxt'] + return head + +def ll_list_all(head): + records = [] + curr = head + while curr is not None: + records.append((curr['n'], curr['p'])) + curr = curr['nxt'] + records.sort(key=lambda x: x[0]) + return records + + + + + + + +# хеш-функция: сумма ord(name) % size +def _hash(name, size): + h = 0 + for ch in name: + h += ord(ch) + return h % size + +SIZE = 13 # фиксированный размер таблицы + +def ht_create(): + return [None] * SIZE + +def ht_insert(buckets, name, phone): + idx = _hash(name, len(buckets)) + buckets[idx] = ll_insert(buckets[idx], name, phone) + return buckets + +def ht_find(buckets, name): + idx = _hash(name, len(buckets)) + return ll_find(buckets[idx], name) + +def ht_delete(buckets, name): + idx = _hash(name, len(buckets)) + buckets[idx] = ll_delete(buckets[idx], name) + return buckets + +def ht_list_all(buckets): + all_records = [] + for head in buckets: + curr = head + while curr: + all_records.append((curr['n'], curr['p'])) + curr = curr['nxt'] + all_records.sort(key=lambda x: x[0]) + return all_records + + + + + + + + +# Узел дерева: {'n': имя, 'p': телефон, 'l': левый, 'r': правый} +def bst_create_node(name, phone): + return {'n': name, 'p': phone, 'l': None, 'r': None} + +def bst_insert(root, name, phone): + if root is None: + return bst_create_node(name, phone) + # итеративная вставка (без рекурсии) + parent = None + cur = root + while cur: + parent = cur + if name == cur['n']: + cur['p'] = phone + return root + elif name < cur['n']: + cur = cur['l'] + else: + cur = cur['r'] + # вставляем как лист + if name < parent['n']: + parent['l'] = bst_create_node(name, phone) + else: + parent['r'] = bst_create_node(name, phone) + return root + +def bst_find(root, name): + cur = root + while cur: + if name == cur['n']: + return cur['p'] + elif name < cur['n']: + cur = cur['l'] + else: + cur = cur['r'] + return None + +def _bst_min(node): + while node['l']: + node = node['l'] + return node + +def bst_delete(root, name): + if root is None: + return None + # поиск узла и родителя + parent = None + cur = root + while cur and cur['n'] != name: + parent = cur + if name < cur['n']: + cur = cur['l'] + else: + cur = cur['r'] + if cur is None: + return root + # случай 0 или 1 ребёнок + if cur['l'] is None or cur['r'] is None: + child = cur['l'] if cur['l'] else cur['r'] + if parent is None: + return child + if parent['l'] == cur: + parent['l'] = child + else: + parent['r'] = child + else: + # два ребёнка - ищем inorder-преемника + succ_parent = cur + succ = cur['r'] + while succ['l']: + succ_parent = succ + succ = succ['l'] + cur['n'], cur['p'] = succ['n'], succ['p'] + if succ_parent['l'] == succ: + succ_parent['l'] = succ['r'] + else: + succ_parent['r'] = succ['r'] + return root + +def bst_list_all(root): + result = [] + def inorder(node): + if node: + inorder(node['l']) + result.append((node['n'], node['p'])) + inorder(node['r']) + inorder(root) + return result + + + + + + +# TESTING + +if __name__ == '__main__': + print("=== Linked list test ===") + head = None + head = ll_insert(head, "Ivan", "111") + head = ll_insert(head, "Anna", "222") + head = ll_insert(head, "Ivan", "333") + print(ll_find(head, "Ivan")) # 333 + print(ll_list_all(head)) # [('Anna','222'),('Ivan','333')] + head = ll_delete(head, "Anna") + print(ll_list_all(head)) # [('Ivan','333')] + + print("\n=== Hash table test ===") + buckets = ht_create() + ht_insert(buckets, "Ivan", "111") + ht_insert(buckets, "Boris", "444") + print(ht_find(buckets, "Ivan")) # 111 + print(ht_list_all(buckets)) # [('Boris','444'),('Ivan','111')] + + print("\n=== BST test ===") + root = None + root = bst_insert(root, "Ivan", "111") + root = bst_insert(root, "Anna", "222") + root = bst_insert(root, "Ivan", "333") + print(bst_find(root, "Ivan")) # 333 + print(bst_list_all(root)) # [('Anna','222'),('Ivan','333')] \ No newline at end of file diff --git a/KislyuninED/docks/data/1-st-exercize/plot_results.py b/KislyuninED/docks/data/1-st-exercize/plot_results.py new file mode 100644 index 0000000..e683d15 --- /dev/null +++ b/KislyuninED/docks/data/1-st-exercize/plot_results.py @@ -0,0 +1,35 @@ +import pandas as pd +import matplotlib.pyplot as plt +import numpy as np + +df = pd.read_csv('results.csv') +mean_df = df.groupby(['Structure','Mode','Operation'])['Time_sec'].mean().reset_index() + +fig, axes = plt.subplots(1, 3, figsize=(14,5)) +operations = ['insert','find','delete'] +titles = ['Insertion', 'Search', 'Deletion'] + +for ax, op, title in zip(axes, operations, titles): + subset = mean_df[mean_df['Operation'] == op] + structures = subset['Structure'].unique() + x = np.arange(len(structures)) + width = 0.35 + random_vals = [] + sorted_vals = [] + for s in structures: + r = subset[(subset['Structure']==s) & (subset['Mode']=='random')]['Time_sec'].values + s_vals = subset[(subset['Structure']==s) & (subset['Mode']=='sorted')]['Time_sec'].values + random_vals.append(r[0] if len(r) else 0) + sorted_vals.append(s_vals[0] if len(s_vals) else 0) + ax.bar(x - width/2, random_vals, width, label='random') + ax.bar(x + width/2, sorted_vals, width, label='sorted') + ax.set_xticks(x) + ax.set_xticklabels(structures) + ax.set_ylabel('Time (seconds)') + ax.set_title(title) + ax.legend() + +plt.tight_layout() +plt.savefig('performance.png', dpi=150) +plt.show() +print("График сохранён (performance.png)") \ No newline at end of file diff --git a/KislyuninED/docks/data/1-st-exercize/results.csv b/KislyuninED/docks/data/1-st-exercize/results.csv new file mode 100644 index 0000000..db687e0 --- /dev/null +++ b/KislyuninED/docks/data/1-st-exercize/results.csv @@ -0,0 +1,91 @@ +Structure,Mode,Operation,Time_sec +LinkedList,random,insert,0.023610104999988835 +LinkedList,random,find,0.0024258809999082587 +LinkedList,random,delete,0.0009224560001257487 +LinkedList,random,insert,0.03432773700001235 +LinkedList,random,find,0.0028615219998755492 +LinkedList,random,delete,0.0009829489999901853 +LinkedList,random,insert,0.02187811499993586 +LinkedList,random,find,0.002508116999933918 +LinkedList,random,delete,0.0009394689998316608 +LinkedList,random,insert,0.02058078499999283 +LinkedList,random,find,0.0024640399999498186 +LinkedList,random,delete,0.0009221469999829424 +LinkedList,random,insert,0.021287126000061107 +LinkedList,random,find,0.002533143000164273 +LinkedList,random,delete,0.0009955239997907483 +LinkedList,sorted,insert,0.020153931999857377 +LinkedList,sorted,find,0.0025785160000850738 +LinkedList,sorted,delete,0.0009765429999788466 +LinkedList,sorted,insert,0.019765774000006786 +LinkedList,sorted,find,0.002487556999994922 +LinkedList,sorted,delete,0.0008901209998839477 +LinkedList,sorted,insert,0.018835716000012326 +LinkedList,sorted,find,0.0023183840000911005 +LinkedList,sorted,delete,0.0009144370001195057 +LinkedList,sorted,insert,0.019278175999943414 +LinkedList,sorted,find,0.002386138000019855 +LinkedList,sorted,delete,0.0009126009999818052 +LinkedList,sorted,insert,0.01877526999987822 +LinkedList,sorted,find,0.002359818000059022 +LinkedList,sorted,delete,0.0009194389999720443 +HashTable,random,insert,0.0023323159998653864 +HashTable,random,find,0.0002526580001358525 +HashTable,random,delete,0.00012695100008386362 +HashTable,random,insert,0.0024649750000662607 +HashTable,random,find,0.0002549820001149783 +HashTable,random,delete,0.00012324999988777563 +HashTable,random,insert,0.0023000859998774104 +HashTable,random,find,0.00025735399981385854 +HashTable,random,delete,0.0001301180000155 +HashTable,random,insert,0.0022806430001764966 +HashTable,random,find,0.00024959500001386914 +HashTable,random,delete,0.00012412399996719614 +HashTable,random,insert,0.0033660579999832407 +HashTable,random,find,0.0003928979999727744 +HashTable,random,delete,0.00013623100016957324 +HashTable,sorted,insert,0.0025681740000891295 +HashTable,sorted,find,0.00024172200005523337 +HashTable,sorted,delete,0.00011611300010372361 +HashTable,sorted,insert,0.0021931220001079055 +HashTable,sorted,find,0.0002396149998276087 +HashTable,sorted,delete,0.0001115909999498399 +HashTable,sorted,insert,0.002177270999936809 +HashTable,sorted,find,0.00026490999994166486 +HashTable,sorted,delete,0.0001120919998811587 +HashTable,sorted,insert,0.0021901160000652453 +HashTable,sorted,find,0.0002393899999333371 +HashTable,sorted,delete,0.00011373199981790094 +HashTable,sorted,insert,0.0021746099998836144 +HashTable,sorted,find,0.00024168799996004964 +HashTable,sorted,delete,0.00011215499989702948 +BST,random,insert,0.0011081129998729011 +BST,random,find,9.674199986875465e-05 +BST,random,delete,6.977399993957079e-05 +BST,random,insert,0.0011156380001011712 +BST,random,find,9.206000004269299e-05 +BST,random,delete,6.480000001829467e-05 +BST,random,insert,0.0010883550000926334 +BST,random,find,8.914799991543987e-05 +BST,random,delete,6.064600006538967e-05 +BST,random,insert,0.0010896240000874968 +BST,random,find,8.920699997361226e-05 +BST,random,delete,6.108699994911149e-05 +BST,random,insert,0.0010866299999179319 +BST,random,find,8.843199998409546e-05 +BST,random,delete,6.088700001782854e-05 +BST,sorted,insert,0.035164145999942775 +BST,sorted,find,0.003177170000071783 +BST,sorted,delete,0.0018665320001218788 +BST,sorted,insert,0.03501290000008339 +BST,sorted,find,0.003258286999880511 +BST,sorted,delete,0.0018976070000462641 +BST,sorted,insert,0.03562600000009297 +BST,sorted,find,0.0031255549999968935 +BST,sorted,delete,0.0018366239999068057 +BST,sorted,insert,0.03548556199984887 +BST,sorted,find,0.003188709999903949 +BST,sorted,delete,0.001886656000124276 +BST,sorted,insert,0.035131116000002294 +BST,sorted,find,0.0032029789999796776 +BST,sorted,delete,0.0018500549999771465 diff --git a/KislyuninED/docks/performance.png b/KislyuninED/docks/performance.png new file mode 100644 index 0000000..b2a4d02 Binary files /dev/null and b/KislyuninED/docks/performance.png differ diff --git a/KislyuninED/docks/report_1-st-exersize.md b/KislyuninED/docks/report_1-st-exersize.md new file mode 100644 index 0000000..1324240 --- /dev/null +++ b/KislyuninED/docks/report_1-st-exersize.md @@ -0,0 +1,55 @@ +# Отчёт по лабе: телефонный справочник на трёх структурах + +## Что делал + +Реализовал три структуры для хранения записей (имя – телефон) без классов, только словари и ссылки: + +1. **Связный список** – каждый узел `{'name': ..., 'phone': ..., 'next': ...}`. + Вставка в начало, перед этим проверка на дубликат (поиск по всему списку). + +2. **Хеш-таблица** – 13 корзин, в каждой связный список. Хеш-функция: сумма кодов символов `% 13`. + Вставка/поиск/удаление – через хеш + вызов функций списка для конкретной корзины. + +3. **Двоичное дерево поиска** – узел `{'name': ..., 'phone': ..., 'left': ..., 'right': ...}`. + Вставка и поиск итеративные (циклы), удаление рекурсивное с поиском inorder‑преемника. + +Операции везде: `insert`, `find`, `delete`, `list_all` (для дерева – обход по порядку, для остальных – собрать всё в список и отсортировать). + +## Эксперимент + +Взял **1000 записей** вида `User_00001` … `User_01000`. +Подготовил два набора: случайный порядок и отсортированный по имени. + +Для каждой структуры и каждого набора: + +- Замерял время вставки всех 1000 записей (через `time.perf_counter()`). +- Затем поиск 110 имён (100 реальных + 10 вымышленных). +- Потом удаление 50 случайных записей. + +Каждый замер повторял 5 раз, брал среднее. +Результаты сохранил в `results.csv`, потом построил график `performance.png`. + +## Что получилось (график) + +![performance](performance.png) + +## Анализ + +**BST** +На случайных данных работал очень быстро (логарифм). А на отсортированных – ужасно: дерево выродилось в правую цепочку, высота стала 1000. Вставка замедлилась в ~58 раз, поиск и удаление тоже сильно просели. Это классическая проблема небалансированного дерева. + +**Хеш-таблица** +Порядок данных почти не влияет. И в случайном, и в отсортированном режимах время одинаковое. Хеш-функция разбрасывает записи по корзинам, поэтому ей всё равно, откуда приходят данные. + +**Связный список** +Ожидаемо медленный везде, потому что поиск всегда линейный (`O(n)`). Разницы между случайным и отсортированным нет – список не умеет использовать порядок. + +**Удаление** – похоже на поиск по скорости, плюс чуть-чуть на перестановку ссылок. У хеш-таблицы удаление быстрее всего. + +## Выводы + +- **Хеш-таблица** – лучший выбор, если нужен быстрый поиск и порядок вывода не важен. Стабильна и проста. +- **Двоичное дерево поиска** – хороший вариант, если часто нужен отсортированный список, но **только при случайных данных**. Если данные могут прийти отсортированными, дерево сломается (станет как список). Надо брать сбалансированное (AVL, красно-чёрное). +- **Связный список** – для реальной базы контактов не годится. Можно использовать только когда записей совсем мало (до сотни) или чисто в учебных целях. + +Для телефонного справочника с тысячами записей я бы взял хеш-таблицу, а если надо часто выводить по алфавиту – сбалансированное дерево. \ No newline at end of file