Posts

10

from sklearn.datasets import load_breast_cancer from sklearn.cluster import KMeans from sklearn.preprocessing import StandardScaler from sklearn.decomposition import PCA from sklearn.metrics import confusion_matrix, classification_report import matplotlib.pyplot as plt X, y = load_breast_cancer(return_X_y=True) X = StandardScaler().fit_transform(X) y_pred = KMeans(n_clusters=2, random_state=42, n_init=10).fit_predict(X) print(confusion_matrix(y, y_pred)) print(classification_report(y, y_pred)) X_pca = PCA(2).fit_transform(X) plt.scatter(*X_pca.T, c=y_pred, cmap='coolwarm', alpha=0.7) plt.title("K-Means Clustering (PCA)") plt.show()

9

import scipy.io, numpy as np, matplotlib.pyplot as plt from sklearn.model_selection import train_test_split from sklearn.naive_bayes import GaussianNB from sklearn.metrics import accuracy_score d = scipy.io.loadmat('olivettifaces.mat') X, y = d['faces'].T, np.repeat(np.arange(40), 10) Xt, Xs, yt, ys = train_test_split(X, y, test_size=0.3, stratify=y, random_state=42) m = GaussianNB().fit(Xt, yt) yp = m.predict(Xs) print("Acc: %.2f%%" % (accuracy_score(ys, yp) * 100)) i = np.random.choice(len(Xs), 20, replace=False) imgs = Xs[i].reshape(-1, 64, 64).transpose(0, 2, 1) plt.figure(figsize=(12,6)) for j in range(20):     plt.subplot(4, 5, j+1)     plt.imshow(imgs[j], cmap='gray')     plt.title(f"T:{ys[i[j]]}, P:{yp[i[j]]}", fontsize=8)     plt.axis('off') plt.tight_layout(); plt.show() #we need to upload olivettiface dataset

8

from sklearn.datasets import load_breast_cancer from sklearn.model_selection import train_test_split from sklearn.tree import DecisionTreeClassifier, plot_tree from sklearn.metrics import accuracy_score import matplotlib.pyplot as plt X, y = load_breast_cancer(return_X_y=True) Xt, Xs, yt, ys = train_test_split(X, y, test_size=0.2) m = DecisionTreeClassifier().fit(Xt, yt) print("Acc: %.2f%%" % (accuracy_score(ys, m.predict(Xs)) * 100)) print("Class:", ["Malignant", "Benign"][m.predict([Xs[0]])[0]]) plot_tree(m, filled=True); plt.show()

7

 import numpy as np, pandas as pd, seaborn as sns, matplotlib.pyplot as plt from sklearn.linear_model import LinearRegression from sklearn.model_selection import train_test_split from sklearn.preprocessing import PolynomialFeatures from sklearn.metrics import mean_squared_error, r2_score from sklearn.datasets import fetch_openml boston = fetch_openml(name='boston', version=1, as_frame=True) X, y = boston.data.values, boston.target.values Xt, Xs, yt, ys = train_test_split(X, y, test_size=0.2) m = LinearRegression().fit(Xt, yt) yp = m.predict(Xs) print("Linear R²=%.2f" % r2_score(ys, yp)) plt.scatter(Xs[:,5], ys, c='b'); plt.scatter(Xs[:,5], yp, c='r'); plt.title("Boston"); plt.show() df = sns.load_dataset('mpg').dropna() df['horsepower'] = pd.to_numeric(df['horsepower'], errors='coerce') df = df.dropna() X, y = df[['horsepower', 'weight']], df['mpg'] Xt, Xs, yt, ys = train_test_split(X, y...

6

import numpy as np, matplotlib.pyplot as plt def lwr(x, X, y, tau):     W = np.diag(np.exp(-np.sum((X - x)**2, axis=1) / (2 * tau**2)))     return x @ np.linalg.pinv(X.T @ W @ X) @ X.T @ W @ y np.random.seed(42) X = np.linspace(0, 2*np.pi, 100) y = np.sin(X) + 0.1*np.random.randn(100) Xb = np.c_[np.ones(X.shape), X] xt = np.linspace(0, 2*np.pi, 200) xtb = np.c_[np.ones(xt.shape), xt] tau = 0.5 yp = np.array([lwr(xi, Xb, y, tau) for xi in xtb]) plt.figure(figsize=(10,6)) plt.scatter(X, y, c='red', alpha=0.7, label='Training Data') plt.plot(xt, yp, c='blue', lw=2, label=f'LWR Fit (tau={tau})') plt.xlabel('X'), plt.ylabel('y'), plt.title('Locally Weighted Regression') plt.legend(), plt.grid(alpha=0.3) plt.show()

5

import numpy as np, matplotlib.pyplot as plt from collections import Counter data = np.random.rand(100) train, test = data[:50], data[50:] labels = ['Class1' if x <= 0.5 else 'Class2' for x in train] def knn(x, k):     d = sorted([(abs(x - xi), li) for xi, li in zip(train, labels)])     return Counter([l for _, l in d[:k]]).most_common(1)[0][0] k_vals = [1, 2, 3, 4, 5, 20, 30] for k in k_vals:     print(f"\nResults for k = {k}:")     preds = [knn(x, k) for x in test]     for i, p in enumerate(preds, 51):         print(f"x{i} (value: {test[i-51]:.4f}) → {p}")     plt.figure()     plt.scatter(train, [0]*50, c=['blue' if l=='Class1' else 'red' for l in labels], label='Train')     plt.scatter([test[i] for i in range(50) if preds[i]=='Class1'], [1]*preds.count('Class1'), c='blue', marker='x', label='Class1')     plt.scatter([test[i] for i in range(50) if preds[i]=='Class2']...

4

import pandas as pd  def find_s_algorithm(file_path):      data = pd.read_csv(file_path)      print("Training data:")      print(data)     attributes = data.columns[:-1]      class_label = data.columns[-1]      hypothesis = ['?' for _ in attributes]      for index, row in data.iterrows():          if row[class_label] == 'Yes':              for i, value in enumerate(row[attributes]):                  if hypothesis[i] == '?' or hypothesis[i] == value:                      hypothesis[i] = value                  else:                      hypothesis[i] = '?'      return hypothesis  file_path = 'training_data.csv'...