Merge branch 'master' into pollard_rho_discrete_log

sonianuj287 · web-flow · commit cba4cb8499e5 · 2025-10-21T22:08:38.000+05:30
diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml
@@ -9,13 +9,7 @@ jobs:
   build:
     runs-on: ubuntu-latest
     steps:
-      - run:
-          sudo apt-get update && sudo apt-get install -y libtiff5-dev libjpeg8-dev libopenjp2-7-dev
-          zlib1g-dev libfreetype6-dev liblcms2-dev libwebp-dev tcl8.6-dev tk8.6-dev python3-tk
-          libharfbuzz-dev libfribidi-dev libxcb1-dev
-          libxml2-dev libxslt-dev
-          libhdf5-dev
-          libopenblas-dev
+      - run: sudo apt-get update && sudo apt-get install -y libhdf5-dev
       - uses: actions/checkout@v5
       - uses: astral-sh/setup-uv@v7
         with:
@@ -32,6 +26,7 @@ jobs:
           --ignore=computer_vision/cnn_classification.py
           --ignore=docs/conf.py
           --ignore=dynamic_programming/k_means_clustering_tensorflow.py
+          --ignore=machine_learning/local_weighted_learning/local_weighted_learning.py
           --ignore=machine_learning/lstm/lstm_prediction.py
           --ignore=neural_network/input_data.py
           --ignore=project_euler/
diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md
@@ -99,7 +99,7 @@ We want your work to be readable by others; therefore, we encourage you to note
   ruff check
   ```
 
-- Original code submission require docstrings or comments to describe your work.
+- Original code submissions require docstrings or comments to describe your work.
 
 - More on docstrings and comments:
 
diff --git a/DIRECTORY.md b/DIRECTORY.md
@@ -195,6 +195,7 @@
     * [Permutations](data_structures/arrays/permutations.py)
     * [Prefix Sum](data_structures/arrays/prefix_sum.py)
     * [Product Sum](data_structures/arrays/product_sum.py)
+    * [Rotate Array](data_structures/arrays/rotate_array.py)
     * [Sparse Table](data_structures/arrays/sparse_table.py)
     * [Sudoku Solver](data_structures/arrays/sudoku_solver.py)
   * Binary Tree
@@ -623,6 +624,7 @@
   * [Sequential Minimum Optimization](machine_learning/sequential_minimum_optimization.py)
   * [Similarity Search](machine_learning/similarity_search.py)
   * [Support Vector Machines](machine_learning/support_vector_machines.py)
+  * [T Stochastic Neighbour Embedding](machine_learning/t_stochastic_neighbour_embedding.py)
   * [Word Frequency Functions](machine_learning/word_frequency_functions.py)
   * [Xgboost Classifier](machine_learning/xgboost_classifier.py)
   * [Xgboost Regressor](machine_learning/xgboost_regressor.py)
diff --git a/data_structures/queues/circular_queue.py b/data_structures/queues/circular_queue.py
@@ -17,7 +17,7 @@ def __len__(self) -> int:
         >>> len(cq)
         0
         >>> cq.enqueue("A")  # doctest: +ELLIPSIS
-        <data_structures.queues.circular_queue.CircularQueue object at ...
+        <data_structures.queues.circular_queue.CircularQueue object at ...>
         >>> cq.array
         ['A', None, None, None, None]
         >>> len(cq)
@@ -51,17 +51,24 @@ def enqueue(self, data):
         """
         This function inserts an element at the end of the queue using self.rear value
         as an index.
+
         >>> cq = CircularQueue(5)
         >>> cq.enqueue("A")  # doctest: +ELLIPSIS
-        <data_structures.queues.circular_queue.CircularQueue object at ...
+        <data_structures.queues.circular_queue.CircularQueue object at ...>
         >>> (cq.size, cq.first())
         (1, 'A')
         >>> cq.enqueue("B")  # doctest: +ELLIPSIS
-        <data_structures.queues.circular_queue.CircularQueue object at ...
+        <data_structures.queues.circular_queue.CircularQueue object at ...>
         >>> cq.array
         ['A', 'B', None, None, None]
         >>> (cq.size, cq.first())
         (2, 'A')
+        >>> cq.enqueue("C").enqueue("D").enqueue("E")  # doctest: +ELLIPSIS
+        <data_structures.queues.circular_queue.CircularQueue object at ...>
+        >>> cq.enqueue("F")
+        Traceback (most recent call last):
+           ...
+        Exception: QUEUE IS FULL
         """
         if self.size >= self.n:
             raise Exception("QUEUE IS FULL")
@@ -75,6 +82,7 @@ def dequeue(self):
         """
         This function removes an element from the queue using on self.front value as an
         index and returns it
+
         >>> cq = CircularQueue(5)
         >>> cq.dequeue()
         Traceback (most recent call last):
diff --git a/graphs/graph_adjacency_list.py b/graphs/graph_adjacency_list.py
@@ -61,6 +61,15 @@ def add_vertex(self, vertex: T) -> None:
         """
         Adds a vertex to the graph. If the given vertex already exists,
         a ValueError will be thrown.
+
+        >>> g = GraphAdjacencyList(vertices=[], edges=[], directed=False)
+        >>> g.add_vertex("A")
+        >>> g.adj_list
+        {'A': []}
+        >>> g.add_vertex("A")
+        Traceback (most recent call last):
+        ...
+        ValueError: Incorrect input: A is already in the graph.
         """
         if self.contains_vertex(vertex):
             msg = f"Incorrect input: {vertex} is already in the graph."
diff --git a/machine_learning/apriori_algorithm.py b/machine_learning/apriori_algorithm.py
@@ -11,6 +11,7 @@
 Examples: https://www.kaggle.com/code/earthian/apriori-association-rules-mining
 """
 
+from collections import Counter
 from itertools import combinations
 
 
@@ -44,11 +45,16 @@ def prune(itemset: list, candidates: list, length: int) -> list:
     >>> prune(itemset, candidates, 3)
     []
     """
+    itemset_counter = Counter(tuple(item) for item in itemset)
     pruned = []
     for candidate in candidates:
         is_subsequence = True
         for item in candidate:
-            if item not in itemset or itemset.count(item) < length - 1:
+            item_tuple = tuple(item)
+            if (
+                item_tuple not in itemset_counter
+                or itemset_counter[item_tuple] < length - 1
+            ):
                 is_subsequence = False
                 break
         if is_subsequence:
diff --git a/machine_learning/decision_tree.py b/machine_learning/decision_tree.py
@@ -146,14 +146,13 @@ def predict(self, x):
         """
         if self.prediction is not None:
             return self.prediction
-        elif self.left or self.right is not None:
+        elif self.left is not None and self.right is not None:
             if x >= self.decision_boundary:
                 return self.right.predict(x)
             else:
                 return self.left.predict(x)
         else:
-            print("Error: Decision tree not yet trained")
-            return None
+            raise ValueError("Decision tree not yet trained")
 
 
 class TestDecisionTree:
@@ -201,4 +200,4 @@ def main():
     main()
     import doctest
 
-    doctest.testmod(name="mean_squarred_error", verbose=True)
+    doctest.testmod(name="mean_squared_error", verbose=True)
diff --git a/machine_learning/t_stochastic_neighbour_embedding.py b/machine_learning/t_stochastic_neighbour_embedding.py
@@ -0,0 +1,178 @@
+"""
+t-distributed stochastic neighbor embedding (t-SNE)
+
+For more details, see:
+https://en.wikipedia.org/wiki/T-distributed_stochastic_neighbor_embedding
+"""
+
+import doctest
+
+import numpy as np
+from numpy import ndarray
+from sklearn.datasets import load_iris
+
+
+def collect_dataset() -> tuple[ndarray, ndarray]:
+    """
+    Load the Iris dataset and return features and labels.
+
+    Returns:
+        tuple[ndarray, ndarray]: Feature matrix and target labels.
+
+    >>> features, targets = collect_dataset()
+    >>> features.shape
+    (150, 4)
+    >>> targets.shape
+    (150,)
+    """
+    iris_dataset = load_iris()
+    return np.array(iris_dataset.data), np.array(iris_dataset.target)
+
+
+def compute_pairwise_affinities(data_matrix: ndarray, sigma: float = 1.0) -> ndarray:
+    """
+    Compute high-dimensional affinities (P matrix) using a Gaussian kernel.
+
+    Args:
+        data_matrix: Input data of shape (n_samples, n_features).
+        sigma: Gaussian kernel bandwidth.
+
+    Returns:
+        ndarray: Symmetrized probability matrix.
+
+    >>> x = np.array([[0.0, 0.0], [1.0, 0.0]])
+    >>> probabilities = compute_pairwise_affinities(x)
+    >>> float(round(probabilities[0, 1], 3))
+    0.25
+    """
+    n_samples = data_matrix.shape[0]
+    squared_sum = np.sum(np.square(data_matrix), axis=1)
+    squared_distance = np.add(
+        np.add(-2 * np.dot(data_matrix, data_matrix.T), squared_sum).T, squared_sum
+    )
+
+    affinity_matrix = np.exp(-squared_distance / (2 * sigma**2))
+    np.fill_diagonal(affinity_matrix, 0)
+
+    affinity_matrix /= np.sum(affinity_matrix)
+    return (affinity_matrix + affinity_matrix.T) / (2 * n_samples)
+
+
+def compute_low_dim_affinities(embedding_matrix: ndarray) -> tuple[ndarray, ndarray]:
+    """
+    Compute low-dimensional affinities (Q matrix) using a Student-t distribution.
+
+    Args:
+        embedding_matrix: Low-dimensional embedding of shape (n_samples, n_components).
+
+    Returns:
+        tuple[ndarray, ndarray]: (Q probability matrix, numerator matrix).
+
+    >>> y = np.array([[0.0, 0.0], [1.0, 0.0]])
+    >>> q_matrix, numerators = compute_low_dim_affinities(y)
+    >>> q_matrix.shape
+    (2, 2)
+    """
+    squared_sum = np.sum(np.square(embedding_matrix), axis=1)
+    numerator_matrix = 1 / (
+        1
+        + np.add(
+            np.add(-2 * np.dot(embedding_matrix, embedding_matrix.T), squared_sum).T,
+            squared_sum,
+        )
+    )
+    np.fill_diagonal(numerator_matrix, 0)
+
+    q_matrix = numerator_matrix / np.sum(numerator_matrix)
+    return q_matrix, numerator_matrix
+
+
+def apply_tsne(
+    data_matrix: ndarray,
+    n_components: int = 2,
+    learning_rate: float = 200.0,
+    n_iter: int = 500,
+) -> ndarray:
+    """
+    Apply t-SNE for dimensionality reduction.
+
+    Args:
+        data_matrix: Original dataset (features).
+        n_components: Target dimension (2D or 3D).
+        learning_rate: Step size for gradient descent.
+        n_iter: Number of iterations.
+
+    Returns:
+        ndarray: Low-dimensional embedding of the data.
+
+    >>> features, _ = collect_dataset()
+    >>> embedding = apply_tsne(features, n_components=2, n_iter=50)
+    >>> embedding.shape
+    (150, 2)
+    """
+    if n_components < 1 or n_iter < 1:
+        raise ValueError("n_components and n_iter must be >= 1")
+
+    n_samples = data_matrix.shape[0]
+    rng = np.random.default_rng()
+    embedding = rng.standard_normal((n_samples, n_components)) * 1e-4
+
+    high_dim_affinities = compute_pairwise_affinities(data_matrix)
+    high_dim_affinities = np.maximum(high_dim_affinities, 1e-12)
+
+    embedding_increment = np.zeros_like(embedding)
+    momentum = 0.5
+
+    for iteration in range(n_iter):
+        low_dim_affinities, numerator_matrix = compute_low_dim_affinities(embedding)
+        low_dim_affinities = np.maximum(low_dim_affinities, 1e-12)
+
+        affinity_diff = high_dim_affinities - low_dim_affinities
+
+        gradient = 4 * (
+            np.dot((affinity_diff * numerator_matrix), embedding)
+            - np.multiply(
+                np.sum(affinity_diff * numerator_matrix, axis=1)[:, np.newaxis],
+                embedding,
+            )
+        )
+
+        embedding_increment = momentum * embedding_increment - learning_rate * gradient
+        embedding += embedding_increment
+
+        if iteration == int(n_iter / 4):
+            momentum = 0.8
+
+    return embedding
+
+
+def main() -> None:
+    """
+    Run t-SNE on the Iris dataset and display the first 5 embeddings.
+
+    >>> main()  # doctest: +ELLIPSIS
+    t-SNE embedding (first 5 points):
+    [[...
+    """
+    features, _labels = collect_dataset()
+    embedding = apply_tsne(features, n_components=2, n_iter=300)
+
+    if not isinstance(embedding, np.ndarray):
+        raise TypeError("t-SNE embedding must be an ndarray")
+
+    print("t-SNE embedding (first 5 points):")
+    print(embedding[:5])
+
+    # Optional visualization (Ruff/mypy compliant)
+
+    # import matplotlib.pyplot as plt
+    # plt.scatter(embedding[:, 0], embedding[:, 1], c=labels, cmap="viridis")
+    # plt.title("t-SNE Visualization of the Iris Dataset")
+    # plt.xlabel("Dimension 1")
+    # plt.ylabel("Dimension 2")
+    # plt.show()
+
+
+if __name__ == "__main__":
+    doctest.testmod()
+    main()
diff --git a/maths/factorial.py b/maths/factorial.py
@@ -56,7 +56,7 @@ def factorial_recursive(n: int) -> int:
         raise ValueError("factorial() only accepts integral values")
     if n < 0:
         raise ValueError("factorial() not defined for negative values")
-    return 1 if n in {0, 1} else n * factorial(n - 1)
+    return 1 if n in {0, 1} else n * factorial_recursive(n - 1)
 
 
 if __name__ == "__main__":
diff --git a/maths/fibonacci.py b/maths/fibonacci.py
@@ -183,7 +183,7 @@ def fib_memoization(n: int) -> list[int]:
     """
     if n < 0:
         raise ValueError("n is negative")
-    # Cache must be outside recursuive function
+    # Cache must be outside recursive function
     # other it will reset every time it calls itself.
     cache: dict[int, int] = {0: 0, 1: 1, 2: 1}  # Prefilled cache
 
diff --git a/maths/monte_carlo.py b/maths/monte_carlo.py
@@ -8,7 +8,7 @@
 from statistics import mean
 
 
-def pi_estimator(iterations: int):
+def pi_estimator(iterations: int) -> None:
     """
     An implementation of the Monte Carlo method used to find pi.
     1. Draw a 2x2 square centred at (0,0).
diff --git a/maths/test_factorial.py b/maths/test_factorial.py
@@ -33,5 +33,11 @@ def test_negative_number(function):
         function(-3)
 
 
+@pytest.mark.parametrize("function", [factorial, factorial_recursive])
+def test_float_number(function):
+    with pytest.raises(ValueError):
+        function(1.5)
+
+
 if __name__ == "__main__":
     pytest.main(["-v", __file__])
diff --git a/maths/volume.py b/maths/volume.py
@@ -555,7 +555,7 @@ def main():
     print(f"Torus: {vol_torus(2, 2) = }")  # ~= 157.9
     print(f"Conical Frustum: {vol_conical_frustum(2, 2, 4) = }")  # ~= 58.6
     print(f"Spherical cap: {vol_spherical_cap(1, 2) = }")  # ~= 5.24
-    print(f"Spheres intersetion: {vol_spheres_intersect(2, 2, 1) = }")  # ~= 21.21
+    print(f"Spheres intersection: {vol_spheres_intersect(2, 2, 1) = }")  # ~= 21.21
     print(f"Spheres union: {vol_spheres_union(2, 2, 1) = }")  # ~= 45.81
     print(
         f"Hollow Circular Cylinder: {vol_hollow_circular_cylinder(1, 2, 3) = }"
diff --git a/pyproject.toml b/pyproject.toml
diff --git a/requirements.txt b/requirements.txt
diff --git a/scripts/README.md b/scripts/README.md
diff --git a/sorts/binary_insertion_sort.py b/sorts/binary_insertion_sort.py
diff --git a/sorts/comb_sort.py b/sorts/comb_sort.py