mllab

Posts

9

May 13, 2025

import scipy.io, numpy as np, matplotlib.pyplot as plt from sklearn.model_selection import train_test_split from sklearn.naive_bayes import GaussianNB from sklearn.metrics import accuracy_score d = scipy.io.loadmat('olivettifaces.mat') X, y = d['faces'].T, np.repeat(np.arange(40), 10) Xt, Xs, yt, ys = train_test_split(X, y, test_size=0.3, stratify=y, random_state=42) m = GaussianNB().fit(Xt, yt) yp = m.predict(Xs) print("Acc: %.2f%%" % (accuracy_score(ys, yp) * 100)) i = np.random.choice(len(Xs), 20, replace=False) imgs = Xs[i].reshape(-1, 64, 64).transpose(0, 2, 1) plt.figure(figsize=(12,6)) for j in range(20): plt.subplot(4, 5, j+1) plt.imshow(imgs[j], cmap='gray') plt.title(f"T:{ys[i[j]]}, P:{yp[i[j]]}", fontsize=8) plt.axis('off') plt.tight_layout(); plt.show() #we need to upload olivettiface dataset

8

May 13, 2025

from sklearn.datasets import load_breast_cancer from sklearn.model_selection import train_test_split from sklearn.tree import DecisionTreeClassifier, plot_tree from sklearn.metrics import accuracy_score import matplotlib.pyplot as plt X, y = load_breast_cancer(return_X_y=True) Xt, Xs, yt, ys = train_test_split(X, y, test_size=0.2) m = DecisionTreeClassifier().fit(Xt, yt) print("Acc: %.2f%%" % (accuracy_score(ys, m.predict(Xs)) * 100)) print("Class:", ["Malignant", "Benign"][m.predict([Xs[0]])[0]]) plot_tree(m, filled=True); plt.show()

7

May 13, 2025

import numpy as np, pandas as pd, seaborn as sns, matplotlib.pyplot as plt from sklearn.linear_model import LinearRegression from sklearn.model_selection import train_test_split from sklearn.preprocessing import PolynomialFeatures from sklearn.metrics import mean_squared_error, r2_score from sklearn.datasets import fetch_openml boston = fetch_openml(name='boston', version=1, as_frame=True) X, y = boston.data.values, boston.target.values Xt, Xs, yt, ys = train_test_split(X, y, test_size=0.2) m = LinearRegression().fit(Xt, yt) yp = m.predict(Xs) print("Linear R²=%.2f" % r2_score(ys, yp)) plt.scatter(Xs[:,5], ys, c='b'); plt.scatter(Xs[:,5], yp, c='r'); plt.title("Boston"); plt.show() df = sns.load_dataset('mpg').dropna() df['horsepower'] = pd.to_numeric(df['horsepower'], errors='coerce') df = df.dropna() X, y = df[['horsepower', 'weight']], df['mpg'] Xt, Xs, yt, ys = train_test_split(X, y...

6

May 13, 2025

import numpy as np, matplotlib.pyplot as plt def lwr(x, X, y, tau): W = np.diag(np.exp(-np.sum((X - x)**2, axis=1) / (2 * tau**2))) return x @ np.linalg.pinv(X.T @ W @ X) @ X.T @ W @ y np.random.seed(42) X = np.linspace(0, 2*np.pi, 100) y = np.sin(X) + 0.1*np.random.randn(100) Xb = np.c_[np.ones(X.shape), X] xt = np.linspace(0, 2*np.pi, 200) xtb = np.c_[np.ones(xt.shape), xt] tau = 0.5 yp = np.array([lwr(xi, Xb, y, tau) for xi in xtb]) plt.figure(figsize=(10,6)) plt.scatter(X, y, c='red', alpha=0.7, label='Training Data') plt.plot(xt, yp, c='blue', lw=2, label=f'LWR Fit (tau={tau})') plt.xlabel('X'), plt.ylabel('y'), plt.title('Locally Weighted Regression') plt.legend(), plt.grid(alpha=0.3) plt.show()

5

March 26, 2025

import numpy as np, matplotlib.pyplot as plt from collections import Counter data = np.random.rand(100) train, test = data[:50], data[50:] labels = ['Class1' if x <= 0.5 else 'Class2' for x in train] def knn(x, k): d = sorted([(abs(x - xi), li) for xi, li in zip(train, labels)]) return Counter([l for _, l in d[:k]]).most_common(1)[0][0] k_vals = [1, 2, 3, 4, 5, 20, 30] for k in k_vals: print(f"\nResults for k = {k}:") preds = [knn(x, k) for x in test] for i, p in enumerate(preds, 51): print(f"x{i} (value: {test[i-51]:.4f}) → {p}") plt.figure() plt.scatter(train, [0]*50, c=['blue' if l=='Class1' else 'red' for l in labels], label='Train') plt.scatter([test[i] for i in range(50) if preds[i]=='Class1'], [1]*preds.count('Class1'), c='blue', marker='x', label='Class1') plt.scatter([test[i] for i in range(50) if preds[i]=='Class2']...

4

March 26, 2025

import pandas as pd def find_s_algorithm(file_path): data = pd.read_csv(file_path) print("Training data:") print(data) attributes = data.columns[:-1] class_label = data.columns[-1] hypothesis = ['?' for _ in attributes] for index, row in data.iterrows(): if row[class_label] == 'Yes': for i, value in enumerate(row[attributes]): if hypothesis[i] == '?' or hypothesis[i] == value: hypothesis[i] = value else: hypothesis[i] = '?' return hypothesis file_path = 'training_data.csv'...