通过读取sklearn自带鸢尾花数据包,编程实现计算属性的相关性,数据可视化(要求:根据数据属性特点,绘制散点图)
import sklearn.datasets as datasets
X, d = datasets.load_iris(return_X_y=True)
print(X)
print(X.shape)
print(d)
import numpy as np
X, d = datasets.load_iris(return_X_y=True)
np.savez("iris.npz", data=X, target=d)
iris_file = np.load("iris.npz")
X = iris_file["data"]
d = iris_file["target"]
print(X.shape, d.shape, X.dtype, d.dtype)
import matplotlib.pyplot as plt
plt.switch_backend("TkAgg")
x = np.load("iris.npz")["data"]
d1 = np.load("iris.npz")["target"]
x1 = x[:, 0]
x2 = x[:, 1]
plt.scatter(x1, x2, c=d)
plt.show()
colors = ["#ff0000", "#00ff00", "#0000ff"]
for i in range(3):
plt.scatter(x1[d == i], x2[d == i], color=colors[i], label=f"{i}")
plt.legend()
plt.show()
import matplotlib.pyplot as plt
plt.switch_backend("TkAgg")
x = np.load("iris.npz")["data"]
d = np.load("iris.npz")["target"]
def rho(x3, x4):
return np.mean((x3 - np.mean(x3)) * (x4 - np.mean(x4))) / np.std(x3) / np.std(x4)
x3 = x[:, 2]
x4 = x[:, 3]
print(rho(x3, x4))
colors = ["#ff0000", "#00ff00", "#0000ff"]
for i in range(3):
x11 = x3[d == i]
x22 = x4[d == i]
print(i, rho(x11, x22))
plt.scatter(x3[d == i], x4[d == i], color=colors[i], label=f"{i}")
plt.legend()
plt.show()