{ "cells": [ { "cell_type": "markdown", "metadata": {}, "source": [ "# Linear Regression using scikit-learn" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "# Enable inline plotting in Jupyter Notebook\n", "%matplotlib inline\n", "# Importing necessary libraries\n", "import matplotlib.pyplot as plt\n", "import numpy as np\n", "import seaborn as sns\n", "\n", "# Set the theme for Seaborn visualizations\n", "sns.set_theme()" ] }, { "cell_type": "markdown", "metadata": {}, "source": [] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "# Generate random data using a fixed random seed for reproducibility\n", "rng = np.random.RandomState(1)" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "# Generate 50 random x values scaled between 0 and 10\n", "x = 10 * rng.rand(50)\n", "\n", "# Generate corresponding y values using a linear relation with some added noise\n", "y = 2 * x - 5 + rng.randn(50)" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "x" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "y" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "len(x), len(y)" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "# Create a scatter plot of the data\n", "plt.scatter(x, y, label=\"Data Points\")\n", "\n", "# Add labels to the axes\n", "plt.xlabel(\"x\")\n", "plt.ylabel(\"y\")\n", "\n", "# Add a legend to the plot\n", "plt.legend()\n", "\n", "# Display the plot\n", "plt.show()" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "# Import LinearRegression from scikit-learn\n", "from sklearn.linear_model import LinearRegression\n", "\n", "# Initialize the Linear Regression model\n", "model = LinearRegression(fit_intercept=True)" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "# Fit the model to the data\n", "model.fit(x[:, np.newaxis], y)" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "# Generate a range of x values for prediction\n", "xfit = np.linspace(0, 10, 1000)" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "# Predict y values based on the fitted model\n", "yfit = model.predict(xfit[:, np.newaxis])" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "# Plot the original data and the fitted line\n", "plt.scatter(x, y, label=\"Data Points\")\n", "plt.plot(xfit, yfit, color=\"red\", label=\"Fitted Line\")\n", "\n", "# Add labels to the axes\n", "plt.xlabel(\"x\")\n", "plt.ylabel(\"y\")\n", "\n", "# Add a legend to the plot\n", "plt.legend()\n", "\n", "# Display the plot\n", "plt.show()" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "# Print the best-fit values of slope and y-intercept. \n", "print(\"Model slope:\\t %0.2f\" %(model.coef_[0]))\n", "print(\"Model intercept:\\t %0.2f\" %(model.intercept_))" ] }, { "cell_type": "markdown", "metadata": {}, "source": [] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [] } ], "metadata": { "kernelspec": { "display_name": "Python 3", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.12.4" } }, "nbformat": 4, "nbformat_minor": 4 }