Professional Documents
Culture Documents
np.random.seed(0)
x = 2 * np.random.rand(100, 1)
y = 4 + 3 * x + np.random.rand(100, 1)
model = LinearRegression()
model.fit(x, y)
x_new = np.array([[0], [2]])
y_pred = model.predict(x_new)
plt.scatter(x, y)
plt.plot(x_new, y_pred, "r-")
plt.xlabel("X")
plt.ylabel("Y")
plt.show()
# Data Visualization
# Example: Create a histogram and a box plot for a numerical feature
plt.figure(figsize=(12, 6))
plt.subplot(1, 2, 1)
sns.histplot(data['numeric_feature'], bins=20, kde=True)
plt.title('Histogram')
plt.subplot(1, 2, 2)
sns.boxplot(y=data['numeric_feature'])
plt.title('Box Plot')
plt.show()
# Descriptive Statistics
summary_stats = data.describe()
# EDA Documentation (Add comments and explanations to your code for documentation)
# Now you can proceed to build and evaluate your machine learning models with the
preprocessed data.
# Regression Example
# Generate random data for regression
np.random.seed(0)
x = 2 * np.random.rand(100, 1)
y = 4 + 3 * x + np.random.rand(100, 1)