Skip to content

Commit cba4cb8

Browse files
authored
Merge branch 'master' into pollard_rho_discrete_log
2 parents b31cc12 + e2a78d4 commit cba4cb8

File tree

18 files changed

+257
-46
lines changed

18 files changed

+257
-46
lines changed

.github/workflows/build.yml

Lines changed: 2 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -9,13 +9,7 @@ jobs:
99
build:
1010
runs-on: ubuntu-latest
1111
steps:
12-
- run:
13-
sudo apt-get update && sudo apt-get install -y libtiff5-dev libjpeg8-dev libopenjp2-7-dev
14-
zlib1g-dev libfreetype6-dev liblcms2-dev libwebp-dev tcl8.6-dev tk8.6-dev python3-tk
15-
libharfbuzz-dev libfribidi-dev libxcb1-dev
16-
libxml2-dev libxslt-dev
17-
libhdf5-dev
18-
libopenblas-dev
12+
- run: sudo apt-get update && sudo apt-get install -y libhdf5-dev
1913
- uses: actions/checkout@v5
2014
- uses: astral-sh/setup-uv@v7
2115
with:
@@ -32,6 +26,7 @@ jobs:
3226
--ignore=computer_vision/cnn_classification.py
3327
--ignore=docs/conf.py
3428
--ignore=dynamic_programming/k_means_clustering_tensorflow.py
29+
--ignore=machine_learning/local_weighted_learning/local_weighted_learning.py
3530
--ignore=machine_learning/lstm/lstm_prediction.py
3631
--ignore=neural_network/input_data.py
3732
--ignore=project_euler/

CONTRIBUTING.md

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -99,7 +99,7 @@ We want your work to be readable by others; therefore, we encourage you to note
9999
ruff check
100100
```
101101

102-
- Original code submission require docstrings or comments to describe your work.
102+
- Original code submissions require docstrings or comments to describe your work.
103103

104104
- More on docstrings and comments:
105105

DIRECTORY.md

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -195,6 +195,7 @@
195195
* [Permutations](data_structures/arrays/permutations.py)
196196
* [Prefix Sum](data_structures/arrays/prefix_sum.py)
197197
* [Product Sum](data_structures/arrays/product_sum.py)
198+
* [Rotate Array](data_structures/arrays/rotate_array.py)
198199
* [Sparse Table](data_structures/arrays/sparse_table.py)
199200
* [Sudoku Solver](data_structures/arrays/sudoku_solver.py)
200201
* Binary Tree
@@ -623,6 +624,7 @@
623624
* [Sequential Minimum Optimization](machine_learning/sequential_minimum_optimization.py)
624625
* [Similarity Search](machine_learning/similarity_search.py)
625626
* [Support Vector Machines](machine_learning/support_vector_machines.py)
627+
* [T Stochastic Neighbour Embedding](machine_learning/t_stochastic_neighbour_embedding.py)
626628
* [Word Frequency Functions](machine_learning/word_frequency_functions.py)
627629
* [Xgboost Classifier](machine_learning/xgboost_classifier.py)
628630
* [Xgboost Regressor](machine_learning/xgboost_regressor.py)

data_structures/queues/circular_queue.py

Lines changed: 11 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -17,7 +17,7 @@ def __len__(self) -> int:
1717
>>> len(cq)
1818
0
1919
>>> cq.enqueue("A") # doctest: +ELLIPSIS
20-
<data_structures.queues.circular_queue.CircularQueue object at ...
20+
<data_structures.queues.circular_queue.CircularQueue object at ...>
2121
>>> cq.array
2222
['A', None, None, None, None]
2323
>>> len(cq)
@@ -51,17 +51,24 @@ def enqueue(self, data):
5151
"""
5252
This function inserts an element at the end of the queue using self.rear value
5353
as an index.
54+
5455
>>> cq = CircularQueue(5)
5556
>>> cq.enqueue("A") # doctest: +ELLIPSIS
56-
<data_structures.queues.circular_queue.CircularQueue object at ...
57+
<data_structures.queues.circular_queue.CircularQueue object at ...>
5758
>>> (cq.size, cq.first())
5859
(1, 'A')
5960
>>> cq.enqueue("B") # doctest: +ELLIPSIS
60-
<data_structures.queues.circular_queue.CircularQueue object at ...
61+
<data_structures.queues.circular_queue.CircularQueue object at ...>
6162
>>> cq.array
6263
['A', 'B', None, None, None]
6364
>>> (cq.size, cq.first())
6465
(2, 'A')
66+
>>> cq.enqueue("C").enqueue("D").enqueue("E") # doctest: +ELLIPSIS
67+
<data_structures.queues.circular_queue.CircularQueue object at ...>
68+
>>> cq.enqueue("F")
69+
Traceback (most recent call last):
70+
...
71+
Exception: QUEUE IS FULL
6572
"""
6673
if self.size >= self.n:
6774
raise Exception("QUEUE IS FULL")
@@ -75,6 +82,7 @@ def dequeue(self):
7582
"""
7683
This function removes an element from the queue using on self.front value as an
7784
index and returns it
85+
7886
>>> cq = CircularQueue(5)
7987
>>> cq.dequeue()
8088
Traceback (most recent call last):

graphs/graph_adjacency_list.py

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -61,6 +61,15 @@ def add_vertex(self, vertex: T) -> None:
6161
"""
6262
Adds a vertex to the graph. If the given vertex already exists,
6363
a ValueError will be thrown.
64+
65+
>>> g = GraphAdjacencyList(vertices=[], edges=[], directed=False)
66+
>>> g.add_vertex("A")
67+
>>> g.adj_list
68+
{'A': []}
69+
>>> g.add_vertex("A")
70+
Traceback (most recent call last):
71+
...
72+
ValueError: Incorrect input: A is already in the graph.
6473
"""
6574
if self.contains_vertex(vertex):
6675
msg = f"Incorrect input: {vertex} is already in the graph."

machine_learning/apriori_algorithm.py

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,7 @@
1111
Examples: https://www.kaggle.com/code/earthian/apriori-association-rules-mining
1212
"""
1313

14+
from collections import Counter
1415
from itertools import combinations
1516

1617

@@ -44,11 +45,16 @@ def prune(itemset: list, candidates: list, length: int) -> list:
4445
>>> prune(itemset, candidates, 3)
4546
[]
4647
"""
48+
itemset_counter = Counter(tuple(item) for item in itemset)
4749
pruned = []
4850
for candidate in candidates:
4951
is_subsequence = True
5052
for item in candidate:
51-
if item not in itemset or itemset.count(item) < length - 1:
53+
item_tuple = tuple(item)
54+
if (
55+
item_tuple not in itemset_counter
56+
or itemset_counter[item_tuple] < length - 1
57+
):
5258
is_subsequence = False
5359
break
5460
if is_subsequence:

machine_learning/decision_tree.py

Lines changed: 3 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -146,14 +146,13 @@ def predict(self, x):
146146
"""
147147
if self.prediction is not None:
148148
return self.prediction
149-
elif self.left or self.right is not None:
149+
elif self.left is not None and self.right is not None:
150150
if x >= self.decision_boundary:
151151
return self.right.predict(x)
152152
else:
153153
return self.left.predict(x)
154154
else:
155-
print("Error: Decision tree not yet trained")
156-
return None
155+
raise ValueError("Decision tree not yet trained")
157156

158157

159158
class TestDecisionTree:
@@ -201,4 +200,4 @@ def main():
201200
main()
202201
import doctest
203202

204-
doctest.testmod(name="mean_squarred_error", verbose=True)
203+
doctest.testmod(name="mean_squared_error", verbose=True)
Lines changed: 178 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,178 @@
1+
"""
2+
t-distributed stochastic neighbor embedding (t-SNE)
3+
4+
For more details, see:
5+
https://en.wikipedia.org/wiki/T-distributed_stochastic_neighbor_embedding
6+
"""
7+
8+
import doctest
9+
10+
import numpy as np
11+
from numpy import ndarray
12+
from sklearn.datasets import load_iris
13+
14+
15+
def collect_dataset() -> tuple[ndarray, ndarray]:
16+
"""
17+
Load the Iris dataset and return features and labels.
18+
19+
Returns:
20+
tuple[ndarray, ndarray]: Feature matrix and target labels.
21+
22+
>>> features, targets = collect_dataset()
23+
>>> features.shape
24+
(150, 4)
25+
>>> targets.shape
26+
(150,)
27+
"""
28+
iris_dataset = load_iris()
29+
return np.array(iris_dataset.data), np.array(iris_dataset.target)
30+
31+
32+
def compute_pairwise_affinities(data_matrix: ndarray, sigma: float = 1.0) -> ndarray:
33+
"""
34+
Compute high-dimensional affinities (P matrix) using a Gaussian kernel.
35+
36+
Args:
37+
data_matrix: Input data of shape (n_samples, n_features).
38+
sigma: Gaussian kernel bandwidth.
39+
40+
Returns:
41+
ndarray: Symmetrized probability matrix.
42+
43+
>>> x = np.array([[0.0, 0.0], [1.0, 0.0]])
44+
>>> probabilities = compute_pairwise_affinities(x)
45+
>>> float(round(probabilities[0, 1], 3))
46+
0.25
47+
"""
48+
n_samples = data_matrix.shape[0]
49+
squared_sum = np.sum(np.square(data_matrix), axis=1)
50+
squared_distance = np.add(
51+
np.add(-2 * np.dot(data_matrix, data_matrix.T), squared_sum).T, squared_sum
52+
)
53+
54+
affinity_matrix = np.exp(-squared_distance / (2 * sigma**2))
55+
np.fill_diagonal(affinity_matrix, 0)
56+
57+
affinity_matrix /= np.sum(affinity_matrix)
58+
return (affinity_matrix + affinity_matrix.T) / (2 * n_samples)
59+
60+
61+
def compute_low_dim_affinities(embedding_matrix: ndarray) -> tuple[ndarray, ndarray]:
62+
"""
63+
Compute low-dimensional affinities (Q matrix) using a Student-t distribution.
64+
65+
Args:
66+
embedding_matrix: Low-dimensional embedding of shape (n_samples, n_components).
67+
68+
Returns:
69+
tuple[ndarray, ndarray]: (Q probability matrix, numerator matrix).
70+
71+
>>> y = np.array([[0.0, 0.0], [1.0, 0.0]])
72+
>>> q_matrix, numerators = compute_low_dim_affinities(y)
73+
>>> q_matrix.shape
74+
(2, 2)
75+
"""
76+
squared_sum = np.sum(np.square(embedding_matrix), axis=1)
77+
numerator_matrix = 1 / (
78+
1
79+
+ np.add(
80+
np.add(-2 * np.dot(embedding_matrix, embedding_matrix.T), squared_sum).T,
81+
squared_sum,
82+
)
83+
)
84+
np.fill_diagonal(numerator_matrix, 0)
85+
86+
q_matrix = numerator_matrix / np.sum(numerator_matrix)
87+
return q_matrix, numerator_matrix
88+
89+
90+
def apply_tsne(
91+
data_matrix: ndarray,
92+
n_components: int = 2,
93+
learning_rate: float = 200.0,
94+
n_iter: int = 500,
95+
) -> ndarray:
96+
"""
97+
Apply t-SNE for dimensionality reduction.
98+
99+
Args:
100+
data_matrix: Original dataset (features).
101+
n_components: Target dimension (2D or 3D).
102+
learning_rate: Step size for gradient descent.
103+
n_iter: Number of iterations.
104+
105+
Returns:
106+
ndarray: Low-dimensional embedding of the data.
107+
108+
>>> features, _ = collect_dataset()
109+
>>> embedding = apply_tsne(features, n_components=2, n_iter=50)
110+
>>> embedding.shape
111+
(150, 2)
112+
"""
113+
if n_components < 1 or n_iter < 1:
114+
raise ValueError("n_components and n_iter must be >= 1")
115+
116+
n_samples = data_matrix.shape[0]
117+
rng = np.random.default_rng()
118+
embedding = rng.standard_normal((n_samples, n_components)) * 1e-4
119+
120+
high_dim_affinities = compute_pairwise_affinities(data_matrix)
121+
high_dim_affinities = np.maximum(high_dim_affinities, 1e-12)
122+
123+
embedding_increment = np.zeros_like(embedding)
124+
momentum = 0.5
125+
126+
for iteration in range(n_iter):
127+
low_dim_affinities, numerator_matrix = compute_low_dim_affinities(embedding)
128+
low_dim_affinities = np.maximum(low_dim_affinities, 1e-12)
129+
130+
affinity_diff = high_dim_affinities - low_dim_affinities
131+
132+
gradient = 4 * (
133+
np.dot((affinity_diff * numerator_matrix), embedding)
134+
- np.multiply(
135+
np.sum(affinity_diff * numerator_matrix, axis=1)[:, np.newaxis],
136+
embedding,
137+
)
138+
)
139+
140+
embedding_increment = momentum * embedding_increment - learning_rate * gradient
141+
embedding += embedding_increment
142+
143+
if iteration == int(n_iter / 4):
144+
momentum = 0.8
145+
146+
return embedding
147+
148+
149+
def main() -> None:
150+
"""
151+
Run t-SNE on the Iris dataset and display the first 5 embeddings.
152+
153+
>>> main() # doctest: +ELLIPSIS
154+
t-SNE embedding (first 5 points):
155+
[[...
156+
"""
157+
features, _labels = collect_dataset()
158+
embedding = apply_tsne(features, n_components=2, n_iter=300)
159+
160+
if not isinstance(embedding, np.ndarray):
161+
raise TypeError("t-SNE embedding must be an ndarray")
162+
163+
print("t-SNE embedding (first 5 points):")
164+
print(embedding[:5])
165+
166+
# Optional visualization (Ruff/mypy compliant)
167+
168+
# import matplotlib.pyplot as plt
169+
# plt.scatter(embedding[:, 0], embedding[:, 1], c=labels, cmap="viridis")
170+
# plt.title("t-SNE Visualization of the Iris Dataset")
171+
# plt.xlabel("Dimension 1")
172+
# plt.ylabel("Dimension 2")
173+
# plt.show()
174+
175+
176+
if __name__ == "__main__":
177+
doctest.testmod()
178+
main()

maths/factorial.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -56,7 +56,7 @@ def factorial_recursive(n: int) -> int:
5656
raise ValueError("factorial() only accepts integral values")
5757
if n < 0:
5858
raise ValueError("factorial() not defined for negative values")
59-
return 1 if n in {0, 1} else n * factorial(n - 1)
59+
return 1 if n in {0, 1} else n * factorial_recursive(n - 1)
6060

6161

6262
if __name__ == "__main__":

maths/fibonacci.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -183,7 +183,7 @@ def fib_memoization(n: int) -> list[int]:
183183
"""
184184
if n < 0:
185185
raise ValueError("n is negative")
186-
# Cache must be outside recursuive function
186+
# Cache must be outside recursive function
187187
# other it will reset every time it calls itself.
188188
cache: dict[int, int] = {0: 0, 1: 1, 2: 1} # Prefilled cache
189189

0 commit comments

Comments
 (0)