import pandas as pd import numpy as np import matplotlib.pyplot as plt import seaborn as sb from sklearn.neighbors import KNeighborsClassifier from sklearn.model_selection import train_test_split data = pd.read_csv("cakes.csv") print(data.head()) print(data.info()) plt.figure() tmp_data = data.drop(columns=["type"]) sb.heatmap(tmp_data.corr(), square=True, fmt=".2f", annot=True) plt.show() data['eggs'] = data['eggs'] * 63.0 for i in range(len(data)): total_mass = data.at[i, "flour"] + data.at[i, "eggs"] + data.at[i, "milk"] + data.at[i, "butter"] + data.at[i, "sugar"] + data.at[i, "baking_powder"] data.at[i, "flour"] /= total_mass data.at[i, "eggs"] /= total_mass data.at[i, "butter"] /= total_mass data.at[i, "milk"] /= total_mass data.at[i, "baking_powder"] /= total_mass data.at[i, "sugar"] /= total_mass print(data.head()) x = data.drop(columns=["type"]).to_numpy() y = data['type'].to_numpy() plt.scatter(x[:, 1], y, s=30, c='green', marker='o', alpha=0.8, edgecolors="black", label="Brasno") plt.xlabel("Kolicina brasna", fontsize=13) plt.ylabel("Tip kolaca", fontsize=13) plt.legend() plt.show() x = data.drop(columns=["type", "eggs", "baking_powder", "butter"]).to_numpy() y = data["type"].to_numpy() x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.2, shuffle=True) model = KNeighborsClassifier(n_neighbors=7) model.fit(x_train, y_train) print(model.score(x_test, y_test)) print(model.predict(np.array([[200, 300, 150], [250, 350, 150]])))