From fccea76cefd51a6cfc3278fa34c4e2c018f60e92 Mon Sep 17 00:00:00 2001 From: 4eker <423785z@gmail.com> Date: Sun, 10 May 2026 13:00:19 +0300 Subject: [PATCH] add test for HashTable and BinaryTree, bug fix --- pomelovsd/DataStruct/BinaryTree.py | 64 ++-- pomelovsd/DataStruct/data_structures.ipynb | 373 +++++++++++++++++++-- 2 files changed, 379 insertions(+), 58 deletions(-) diff --git a/pomelovsd/DataStruct/BinaryTree.py b/pomelovsd/DataStruct/BinaryTree.py index c2ed808..9d505ec 100644 --- a/pomelovsd/DataStruct/BinaryTree.py +++ b/pomelovsd/DataStruct/BinaryTree.py @@ -1,53 +1,59 @@ -def create_node(name,phone): +def create_node(name, phone): return {"name": name, "phone": phone, "left": None, "right": None} def bst_insert(root, name, phone): - # Случай, если нет узлов if root is None: return create_node(name, phone) - # Случай, если узел слева - elif name < root["name"]: + if name < root["name"]: root["left"] = bst_insert(root["left"], name, phone) - # Случай, если узел справа elif name > root["name"]: root["right"] = bst_insert(root["right"], name, phone) - # Случай, если мы перезаписываем узел else: - root["phone"] = phone + root["phone"] = phone + return root + +def bst_insert_sort(sorted_data, left, right): + if left > right: + return None + mid = (left + right) // 2 + name, phone = sorted_data[mid] + root = create_node(name, phone) + root["left"] = bst_insert_sort(sorted_data, left, mid - 1) + root["right"] = bst_insert_sort(sorted_data, mid + 1, right) return root def bst_find(root, name): if root is None: return None - elif root == root["name"]: + if name == root["name"]: return root["phone"] - elif root < root["name"]: + if name < root["name"]: return bst_find(root["left"], name) - else: - return bst_find(root["right"],name) + return bst_find(root["right"], name) def bst_delete(root, name): if root is None: return None - elif root < root["name"]: - return bst_delete(root["left"], name) - elif root > root["name"]: - return bst_delete(root["right"], name) - # Узел, который надо удалить - else: - # Если нет потомков - if (root["left"] and root["right"]) is None: - return None - # Если есть только левый потомок + + if name < root["name"]: + root["left"] = bst_delete(root["left"], name) + elif name > root["name"]: + root["right"] = bst_delete(root["right"], name) + else: + # Нет детей или только один ребенок if root["left"] is None: return root["right"] - # Если есть только правый потомок if root["right"] is None: return root["left"] - if (root["left"] or root["right"]) is None: - while current and current['left']: - current = current['left'] - root["name"] = current["name"] - root["phone"] = current["phone"] - root["right"] = bst_delete(root["right"],current["name"]) - return root \ No newline at end of file + + # Два ребенка + current = root["right"] + while current["left"] is not None: + current = current["left"] + + root["name"] = current["name"] + root["phone"] = current["phone"] + # Удаляем преемника + root["right"] = bst_delete(root["right"], current["name"]) + + return root \ No newline at end of file diff --git a/pomelovsd/DataStruct/data_structures.ipynb b/pomelovsd/DataStruct/data_structures.ipynb index 94d6967..fac6f8a 100644 --- a/pomelovsd/DataStruct/data_structures.ipynb +++ b/pomelovsd/DataStruct/data_structures.ipynb @@ -2,7 +2,7 @@ "cells": [ { "cell_type": "code", - "execution_count": 51, + "execution_count": 1, "id": "c533959c", "metadata": {}, "outputs": [], @@ -13,7 +13,8 @@ "import time \n", "import random as rand\n", "import csv\n", - "import numpy as np" + "import numpy as np\n", + "import sys" ] }, { @@ -26,15 +27,16 @@ }, { "cell_type": "code", - "execution_count": 52, + "execution_count": 2, "id": "88611f78", "metadata": {}, "outputs": [], "source": [ "N = 10000\n", + "sys.setrecursionlimit(10000) \n", "records_sorted = [(f\"User_{i:05d}\", f\"+7999{i:07d}\") for i in range(N)] \n", "records_shuffled = records_sorted.copy()\n", - "rand.shuffle(records_shuffled) " + "rand.shuffle(records_shuffled)" ] }, { @@ -42,7 +44,7 @@ "id": "9fd1b8cd", "metadata": {}, "source": [ - "## Исследование для LinkedList " + "## Исследование для LinkedList" ] }, { @@ -51,18 +53,20 @@ "metadata": {}, "source": [ "### Добавление всех элементов произвольного кортежа\n", - "- **data_ll_sh** - структура произвольных данных (только последний замер)\n", - "- **time_ll_insert_sh** - Замер времени работы 10000 элементов (5 замеров) " + "- **data_ll_sh** - Структура произвольных данных (только последний замер)\n", + "- **time_ll_insert_sh** - Замер времени работы 10000 элементов (5 замеров) \n", + "- **heads_ll_sh** - Массив голов для массив для произвольного массива" ] }, { "cell_type": "code", - "execution_count": 53, + "execution_count": 3, "id": "11634fa4", "metadata": {}, "outputs": [], "source": [ "time_ll_insert_sh = [] \n", + "heads_ll_sh = []\n", "for n in range(5):\n", " head = None\n", " data_ll_sh = []\n", @@ -71,6 +75,7 @@ " head = ll.ll_insert(head, records_shuffled[i][0], records_shuffled[i][1])\n", " data_ll_sh.append(head)\n", " end = time.perf_counter()\n", + " heads_ll_sh.append(head)\n", " time_ll_insert_sh.append(end - start)" ] }, @@ -81,17 +86,19 @@ "source": [ "### Добавление всех элементов сортированного кортежа\n", "- **data_ll_so** - Структура отсортированных данных (только последний замер)\n", - "- **time_ll_insert_so** - Замер времени работы 10000 элементов (5 замеров) " + "- **time_ll_insert_so** - Замер времени работы 10000 элементов (5 замеров) \n", + "- **heads_ll_so** - Массив голов для массив для сортированного массива" ] }, { "cell_type": "code", - "execution_count": 54, + "execution_count": 4, "id": "9eab4641", "metadata": {}, "outputs": [], "source": [ "time_ll_insert_so = [] \n", + "heads_ll_so = []\n", "for n in range(5):\n", " head = None\n", " data_ll_so = []\n", @@ -100,6 +107,7 @@ " head = ll.ll_insert(head, records_sorted[i][0], records_sorted[i][1])\n", " data_ll_so.append(head)\n", " end = time.perf_counter()\n", + " heads_ll_so.append(head)\n", " time_ll_insert_so.append(end - start)" ] }, @@ -115,7 +123,7 @@ }, { "cell_type": "code", - "execution_count": 55, + "execution_count": 5, "id": "aac6cd23", "metadata": {}, "outputs": [], @@ -147,7 +155,7 @@ }, { "cell_type": "code", - "execution_count": 56, + "execution_count": 6, "id": "5e5ae537", "metadata": {}, "outputs": [], @@ -178,31 +186,22 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 7, "id": "6cdf8a70", "metadata": {}, - "outputs": [ - { - "ename": "KeyError", - "evalue": "9873", - "output_type": "error", - "traceback": [ - "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", - "\u001b[0;31mKeyError\u001b[0m Traceback (most recent call last)", - "Cell \u001b[0;32mIn[59], line 7\u001b[0m\n\u001b[1;32m 5\u001b[0m i \u001b[38;5;241m=\u001b[39m rand\u001b[38;5;241m.\u001b[39mrandint(\u001b[38;5;241m0\u001b[39m, N\u001b[38;5;241m-\u001b[39m\u001b[38;5;241m1\u001b[39m)\n\u001b[1;32m 6\u001b[0m str_delete \u001b[38;5;241m=\u001b[39m records_shuffled[i][\u001b[38;5;241m0\u001b[39m]\n\u001b[0;32m----> 7\u001b[0m data_ll_sh \u001b[38;5;241m=\u001b[39m ll\u001b[38;5;241m.\u001b[39mll_delete(\u001b[43mdata_ll_sh\u001b[49m\u001b[43m[\u001b[49m\u001b[43mi\u001b[49m\u001b[43m]\u001b[49m[\u001b[38;5;241m2\u001b[39m], str_delete)\n\u001b[1;32m 8\u001b[0m end \u001b[38;5;241m=\u001b[39m time\u001b[38;5;241m.\u001b[39mperf_counter()\n\u001b[1;32m 9\u001b[0m time_ll_delete_sh\u001b[38;5;241m.\u001b[39mappend(end \u001b[38;5;241m-\u001b[39m start)\n", - "\u001b[0;31mKeyError\u001b[0m: 9873" - ] - } - ], + "outputs": [], "source": [ "time_ll_delete_sh = []\n", "for n in range(5):\n", + " current_head = heads_ll_sh[n]\n", + "\n", " start = time.perf_counter()\n", " for m in range(50): \n", " i = rand.randint(0, N-1)\n", " str_delete = records_shuffled[i][0]\n", - " data_ll_sh = ll.ll_delete(data_ll_sh[i][2], str_delete)\n", + " current_head = ll.ll_delete(current_head, str_delete)\n", " end = time.perf_counter()\n", + "\n", " time_ll_delete_sh.append(end - start)" ] }, @@ -211,9 +210,325 @@ "id": "8d6156e9", "metadata": {}, "source": [ - "### Удаление элементов в произвольном массиве\n", - "- **time_ll_delete_sh** - Време поиска в произвольном массиве (для 5 замеров)" + "### Удаление элементов в сортированном массиве\n", + "- **time_ll_delete_so** - Време поиска в произвольном массиве (для 5 замеров)" ] + }, + { + "cell_type": "code", + "execution_count": 8, + "id": "575e375c", + "metadata": {}, + "outputs": [], + "source": [ + "time_ll_delete_so = []\n", + "for n in range(5):\n", + " current_head = heads_ll_so[n]\n", + "\n", + " start = time.perf_counter()\n", + " for m in range(50): \n", + " i = rand.randint(0, N-1)\n", + " str_delete = records_sorted[i][0]\n", + " current_head = ll.ll_delete(current_head, str_delete)\n", + " end = time.perf_counter()\n", + "\n", + " time_ll_delete_sh.append(end - start)" + ] + }, + { + "cell_type": "markdown", + "id": "9a95a40b", + "metadata": {}, + "source": [ + "## Исследование BinaryTree" + ] + }, + { + "cell_type": "markdown", + "id": "54c92d21", + "metadata": {}, + "source": [ + "### Добавление всех элементов произвольного кортежа\n", + "- **data_bt_sh** - Структура произвольных данных (только последний замер)\n", + "- **time_bt_insert_sh** - Замер времени работы 10000 элементов (5 замеров) \n", + "- **heads_bt_sh** - Массив голов для массив для произвольного массива" + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "id": "939aa900", + "metadata": {}, + "outputs": [], + "source": [ + "time_bt_insert_sh = [] \n", + "heads_bt_sh = []\n", + "for n in range(5):\n", + " head = None\n", + " data_bt_sh = []\n", + " start = time.perf_counter()\n", + " for i in range(N):\n", + " head = bt.bst_insert(head, records_shuffled[i][0], records_shuffled[i][1])\n", + " data_bt_sh.append(head)\n", + " end = time.perf_counter()\n", + " heads_bt_sh.append(head)\n", + " time_bt_insert_sh.append(end - start)" + ] + }, + { + "cell_type": "markdown", + "id": "e91b5893", + "metadata": {}, + "source": [ + "### Добавление всех элементов сортированного кортежа\n", + "- **data_bt_so** - Структура сортированных данных (только последний замер)\n", + "- **time_bt_insert_so** - Замер времени работы 10000 элементов (5 замеров) \n", + "- **heads_bt_so** - Массив голов для массив для сортированного массива" + ] + }, + { + "cell_type": "code", + "execution_count": 10, + "id": "d17b8108", + "metadata": {}, + "outputs": [], + "source": [ + "time_bt_insert_so = [] \n", + "heads_bt_so = []\n", + "for n in range(5):\n", + " head = None\n", + " data_bt_so = []\n", + " start = time.perf_counter()\n", + " head = bt.bst_insert_sort(records_sorted, 0, len(records_sorted) - 1)\n", + " data_bt_so.append(head)\n", + " end = time.perf_counter()\n", + " heads_bt_so.append(head)\n", + " time_bt_insert_so.append(end - start)" + ] + }, + { + "cell_type": "markdown", + "id": "1e8a3f9e", + "metadata": {}, + "source": [ + "### Поиск элементов в произвольном массиве\n", + "- **time_bt_find_sh** - Време поиска в произвольном массиве (для 5 замеров)\n", + "- **find_bt_sh** - массив найденных данных в произвольном массиве (только последний замер)" + ] + }, + { + "cell_type": "code", + "execution_count": 11, + "id": "4352b11d", + "metadata": {}, + "outputs": [], + "source": [ + "time_bt_find_sh = []\n", + "for n in range(5):\n", + " find_bt_sh = []\n", + " start = time.perf_counter()\n", + " for m in range(100): # замер для 100 случайных узлов \n", + " i = rand.randint(0, N-1)\n", + " str_find = records_shuffled[i][0]\n", + " find_bt_sh.append(bt.bst_find(data_bt_sh[0], str_find))\n", + " for m in range(10): # недоступные данные\n", + " str_find = f\"Node_{m}\"\n", + " find_bt_sh.append(bt.bst_find(data_bt_sh[0], str_find))\n", + " end = time.perf_counter()\n", + " time_bt_find_sh.append(end - start)" + ] + }, + { + "cell_type": "markdown", + "id": "8db5208b", + "metadata": {}, + "source": [ + "### Поиск элементов в отсортированном массиве\n", + "- **time_bt_find_so** - Време поиска в сортированном массиве (для 5 замеров)\n", + "- **find_bt_so** - массив найденных данных в сортированном массиве (только последний замер)" + ] + }, + { + "cell_type": "code", + "execution_count": 12, + "id": "7941e689", + "metadata": {}, + "outputs": [], + "source": [ + "time_bt_find_so = []\n", + "for n in range(5):\n", + " find_bt_so = []\n", + " start = time.perf_counter()\n", + " for m in range(100): # замер для 100 случайных узлов \n", + " i = rand.randint(0, N-1)\n", + " str_find = records_sorted[i][0]\n", + " find_bt_so.append(bt.bst_find(data_bt_so[0], str_find))\n", + " for m in range(10): # недоступные данные\n", + " str_find = f\"Node_{m}\"\n", + " find_bt_so.append(bt.bst_find(data_bt_so[0], str_find))\n", + " end = time.perf_counter()\n", + " time_bt_find_sh.append(end - start)" + ] + }, + { + "cell_type": "markdown", + "id": "ffbe3dfe", + "metadata": {}, + "source": [ + "### Удаление элементов в произвольном массиве\n", + "- **time_bt_delete_sh** - Време поиска в произвольном массиве (для 5 замеров)" + ] + }, + { + "cell_type": "code", + "execution_count": 13, + "id": "4043a9dc", + "metadata": {}, + "outputs": [], + "source": [ + "time_bt_delete_sh = []\n", + "for n in range(5):\n", + " current_head = heads_bt_sh[n]\n", + "\n", + " start = time.perf_counter()\n", + " for m in range(50): \n", + " i = rand.randint(0, N-1)\n", + " str_delete = records_shuffled[i][0]\n", + " current_head = bt.bst_delete(current_head, str_delete)\n", + " end = time.perf_counter()\n", + "\n", + " time_bt_delete_sh.append(end - start)" + ] + }, + { + "cell_type": "markdown", + "id": "7db94391", + "metadata": {}, + "source": [ + "### Удаление элементов в сортированном массиве\n", + "- **time_bt_delete_so** - Време поиска в произвольном массиве (для 5 замеров)" + ] + }, + { + "cell_type": "code", + "execution_count": 14, + "id": "7ab6136c", + "metadata": {}, + "outputs": [], + "source": [ + "time_bt_delete_so = []\n", + "for n in range(5):\n", + " current_head = heads_bt_so[n]\n", + "\n", + " start = time.perf_counter()\n", + " for m in range(50): \n", + " i = rand.randint(0, N-1)\n", + " str_delete = records_sorted[i][0]\n", + " current_head = bt.bst_delete(current_head, str_delete)\n", + " end = time.perf_counter()\n", + "\n", + " time_bt_delete_so.append(end - start)" + ] + }, + { + "cell_type": "markdown", + "id": "0bf5b406", + "metadata": {}, + "source": [ + "## Исследование HashTable" + ] + }, + { + "cell_type": "markdown", + "id": "75586bbc", + "metadata": {}, + "source": [ + "### Добавление всех элементов произвольного кортежа\n", + "- **data_ht_sh** - Структура произвольных данных (только последний замер)\n", + "- **time_ht_insert_sh** - Замер времени работы 10000 элементов (5 замеров) \n", + "- **heads_ht_sh** - Массив голов для массив для произвольного массива" + ] + }, + { + "cell_type": "code", + "execution_count": 17, + "id": "cb1788d1", + "metadata": {}, + "outputs": [], + "source": [ + "time_ht_insert_sh = [] \n", + "for n in range(5):\n", + " buckets = ht.create_ht(size = N)\n", + " data_ht_sh = []\n", + " start = time.perf_counter()\n", + " for i in range(N):\n", + " buckets = ht.ht_insert(buckets, records_shuffled[i][0], records_shuffled[i][1])\n", + " data_ht_sh.append(buckets)\n", + " end = time.perf_counter()\n", + " time_ht_insert_sh.append(end - start)" + ] + }, + { + "cell_type": "markdown", + "id": "6bb2aa16", + "metadata": {}, + "source": [ + "### Добавление всех элементов сортированного кортежа\n", + "- **data_ht_so** - Структура сортированных данных (только последний замер)\n", + "- **time_ht_insert_so** - Замер времени работы 10000 элементов (5 замеров) \n", + "- **heads_ht_so** - Массив голов для массив для сортированного массива" + ] + }, + { + "cell_type": "code", + "execution_count": 18, + "id": "4cb524e4", + "metadata": {}, + "outputs": [], + "source": [ + "time_ht_insert_so = [] \n", + "for n in range(5):\n", + " buckets = ht.create_ht(size = N)\n", + " data_ht_so = []\n", + " start = time.perf_counter()\n", + " for i in range(N):\n", + " buckets = ht.ht_insert(buckets, records_sorted[i][0], records_sorted[i][1])\n", + " data_ht_so.append(buckets)\n", + " end = time.perf_counter()\n", + " time_ht_insert_so.append(end - start)" + ] + }, + { + "cell_type": "markdown", + "id": "9d79016f", + "metadata": {}, + "source": [ + "### Поиск элементов в произвольном массиве\n", + "- **time_ht_find_sh** - Време поиска в произвольном массиве (для 5 замеров)\n", + "- **find_ht_sh** - массив найденных данных в произвольном массиве (только последний замер)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "45cec102", + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "markdown", + "id": "8f11dbad", + "metadata": {}, + "source": [] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "19e7a19a", + "metadata": {}, + "outputs": [], + "source": [] } ], "metadata": {