-
Notifications
You must be signed in to change notification settings - Fork 0
/
Minor Project -Used Cars Price Detection
1 lines (1 loc) · 76.1 KB
/
Minor Project -Used Cars Price Detection
1
{"metadata":{"kernelspec":{"language":"python","display_name":"Python 3","name":"python3"},"language_info":{"name":"python","version":"3.7.6","mimetype":"text/x-python","codemirror_mode":{"name":"ipython","version":3},"pygments_lexer":"ipython3","nbconvert_exporter":"python","file_extension":".py"},"kaggle":{"accelerator":"none","dataSources":[{"sourceId":4897786,"sourceType":"datasetVersion","datasetId":2840264}],"dockerImageVersionId":30017,"isInternetEnabled":false,"language":"python","sourceType":"notebook","isGpuEnabled":false}},"nbformat_minor":4,"nbformat":4,"cells":[{"cell_type":"markdown","source":"# MINOR PROJECT \n\n***\n\n## Used Car Price Prediction \n\nGiven *data about used cars*, let's try to predict the **price** of a given car. \n \nWe will use linear regression and gradient boosting (LightGBM) to make our predictions.","metadata":{}},{"cell_type":"markdown","source":"# Getting Started","metadata":{}},{"cell_type":"code","source":"import numpy as np\nimport pandas as pd\n\nfrom sklearn.preprocessing import StandardScaler\nfrom sklearn.model_selection import train_test_split\n\nfrom sklearn.linear_model import LinearRegression\nimport lightgbm as lgb\n\nfrom sklearn.metrics import mean_squared_error","metadata":{"_uuid":"d629ff2d2480ee46fbb7e2d37f6b5fab8052498a","_cell_guid":"79c7e3d0-c299-4dcb-8224-4455121ee9b0","execution":{"iopub.status.busy":"2024-05-03T04:03:07.302905Z","iopub.execute_input":"2024-05-03T04:03:07.303398Z","iopub.status.idle":"2024-05-03T04:03:07.311113Z","shell.execute_reply.started":"2024-05-03T04:03:07.303268Z","shell.execute_reply":"2024-05-03T04:03:07.309376Z"},"trusted":true},"execution_count":4,"outputs":[]},{"cell_type":"code","source":"data = pd.read_csv('/kaggle/input/used-car-price-dataset/used_car_dataset.csv')","metadata":{"execution":{"iopub.status.busy":"2024-05-03T05:06:22.615895Z","iopub.execute_input":"2024-05-03T05:06:22.616572Z","iopub.status.idle":"2024-05-03T05:06:22.635839Z","shell.execute_reply.started":"2024-05-03T05:06:22.616501Z","shell.execute_reply":"2024-05-03T05:06:22.634971Z"},"trusted":true},"execution_count":30,"outputs":[]},{"cell_type":"code","source":"data","metadata":{"execution":{"iopub.status.busy":"2024-05-03T05:06:26.902622Z","iopub.execute_input":"2024-05-03T05:06:26.903402Z","iopub.status.idle":"2024-05-03T05:06:26.931065Z","shell.execute_reply.started":"2024-05-03T05:06:26.903348Z","shell.execute_reply":"2024-05-03T05:06:26.929976Z"},"trusted":true},"execution_count":31,"outputs":[{"execution_count":31,"output_type":"execute_result","data":{"text/plain":" car_name car_price_in_rupees \\\n0 Hyundai Grand i10 Magna 1.2 Kappa VTVT [2017-2... ₹ 4.45 Lakh \n1 Maruti Suzuki Alto 800 Lxi ₹ 2.93 Lakh \n2 Tata Safari XZ Plus New ₹ 22.49 Lakh \n3 Maruti Suzuki Ciaz ZXI+ ₹ 6.95 Lakh \n4 Jeep Compass Sport Plus 1.4 Petrol [2019-2020] ₹ 12 Lakh \n... ... ... \n2100 Ford Figo Titanium1.5 TDCi ₹ 3.6 Lakh \n2101 MINI Cooper Countryman Cooper D ₹ 22 Lakh \n2102 Hyundai Verna 1.6 VTVT SX ₹ 8.38 Lakh \n2103 Maruti Suzuki Ciaz VXi+ AT ₹ 6.75 Lakh \n2104 Hyundai Verna 1.6 VTVT SX ₹ 8.76 Lakh \n\n kms_driven fuel_type city year_of_manufacture \n0 22,402 km Petrol Mumbai 2016 \n1 10,344 km Petrol Kolkata 2019 \n2 12,999 km Diesel Bangalore 2021 \n3 45,000 km Petrol Thane 2016 \n4 11,193 km Petrol Kolkata 2019 \n... ... ... ... ... \n2100 42,158 km Diesel Kolkata 2015 \n2101 68,862 km Diesel Hyderabad 2013 \n2102 37,622 km Petrol Chennai 2018 \n2103 64,726 km Petrol Mumbai 2017 \n2104 29,150 km Petrol Pune 2017 \n\n[2105 rows x 6 columns]","text/html":"<div>\n<style scoped>\n .dataframe tbody tr th:only-of-type {\n vertical-align: middle;\n }\n\n .dataframe tbody tr th {\n vertical-align: top;\n }\n\n .dataframe thead th {\n text-align: right;\n }\n</style>\n<table border=\"1\" class=\"dataframe\">\n <thead>\n <tr style=\"text-align: right;\">\n <th></th>\n <th>car_name</th>\n <th>car_price_in_rupees</th>\n <th>kms_driven</th>\n <th>fuel_type</th>\n <th>city</th>\n <th>year_of_manufacture</th>\n </tr>\n </thead>\n <tbody>\n <tr>\n <th>0</th>\n <td>Hyundai Grand i10 Magna 1.2 Kappa VTVT [2017-2...</td>\n <td>₹ 4.45 Lakh</td>\n <td>22,402 km</td>\n <td>Petrol</td>\n <td>Mumbai</td>\n <td>2016</td>\n </tr>\n <tr>\n <th>1</th>\n <td>Maruti Suzuki Alto 800 Lxi</td>\n <td>₹ 2.93 Lakh</td>\n <td>10,344 km</td>\n <td>Petrol</td>\n <td>Kolkata</td>\n <td>2019</td>\n </tr>\n <tr>\n <th>2</th>\n <td>Tata Safari XZ Plus New</td>\n <td>₹ 22.49 Lakh</td>\n <td>12,999 km</td>\n <td>Diesel</td>\n <td>Bangalore</td>\n <td>2021</td>\n </tr>\n <tr>\n <th>3</th>\n <td>Maruti Suzuki Ciaz ZXI+</td>\n <td>₹ 6.95 Lakh</td>\n <td>45,000 km</td>\n <td>Petrol</td>\n <td>Thane</td>\n <td>2016</td>\n </tr>\n <tr>\n <th>4</th>\n <td>Jeep Compass Sport Plus 1.4 Petrol [2019-2020]</td>\n <td>₹ 12 Lakh</td>\n <td>11,193 km</td>\n <td>Petrol</td>\n <td>Kolkata</td>\n <td>2019</td>\n </tr>\n <tr>\n <th>...</th>\n <td>...</td>\n <td>...</td>\n <td>...</td>\n <td>...</td>\n <td>...</td>\n <td>...</td>\n </tr>\n <tr>\n <th>2100</th>\n <td>Ford Figo Titanium1.5 TDCi</td>\n <td>₹ 3.6 Lakh</td>\n <td>42,158 km</td>\n <td>Diesel</td>\n <td>Kolkata</td>\n <td>2015</td>\n </tr>\n <tr>\n <th>2101</th>\n <td>MINI Cooper Countryman Cooper D</td>\n <td>₹ 22 Lakh</td>\n <td>68,862 km</td>\n <td>Diesel</td>\n <td>Hyderabad</td>\n <td>2013</td>\n </tr>\n <tr>\n <th>2102</th>\n <td>Hyundai Verna 1.6 VTVT SX</td>\n <td>₹ 8.38 Lakh</td>\n <td>37,622 km</td>\n <td>Petrol</td>\n <td>Chennai</td>\n <td>2018</td>\n </tr>\n <tr>\n <th>2103</th>\n <td>Maruti Suzuki Ciaz VXi+ AT</td>\n <td>₹ 6.75 Lakh</td>\n <td>64,726 km</td>\n <td>Petrol</td>\n <td>Mumbai</td>\n <td>2017</td>\n </tr>\n <tr>\n <th>2104</th>\n <td>Hyundai Verna 1.6 VTVT SX</td>\n <td>₹ 8.76 Lakh</td>\n <td>29,150 km</td>\n <td>Petrol</td>\n <td>Pune</td>\n <td>2017</td>\n </tr>\n </tbody>\n</table>\n<p>2105 rows × 6 columns</p>\n</div>"},"metadata":{}}]},{"cell_type":"code","source":"data.info()","metadata":{"execution":{"iopub.status.busy":"2024-05-03T05:06:59.742792Z","iopub.execute_input":"2024-05-03T05:06:59.743266Z","iopub.status.idle":"2024-05-03T05:06:59.758056Z","shell.execute_reply.started":"2024-05-03T05:06:59.743212Z","shell.execute_reply":"2024-05-03T05:06:59.756337Z"},"trusted":true},"execution_count":33,"outputs":[{"name":"stdout","text":"<class 'pandas.core.frame.DataFrame'>\nRangeIndex: 2105 entries, 0 to 2104\nData columns (total 6 columns):\n # Column Non-Null Count Dtype \n--- ------ -------------- ----- \n 0 car_name 2105 non-null object\n 1 car_price_in_rupees 2105 non-null object\n 2 kms_driven 2105 non-null object\n 3 fuel_type 2105 non-null object\n 4 city 2105 non-null object\n 5 year_of_manufacture 2105 non-null int64 \ndtypes: int64(1), object(5)\nmemory usage: 98.8+ KB\n","output_type":"stream"}]},{"cell_type":"markdown","source":"# Preprocessing","metadata":{}},{"cell_type":"code","source":"data.isna().sum()","metadata":{"execution":{"iopub.status.busy":"2024-05-03T05:07:13.456019Z","iopub.execute_input":"2024-05-03T05:07:13.456475Z","iopub.status.idle":"2024-05-03T05:07:13.468115Z","shell.execute_reply.started":"2024-05-03T05:07:13.456423Z","shell.execute_reply":"2024-05-03T05:07:13.466990Z"},"trusted":true},"execution_count":34,"outputs":[{"execution_count":34,"output_type":"execute_result","data":{"text/plain":"car_name 0\ncar_price_in_rupees 0\nkms_driven 0\nfuel_type 0\ncity 0\nyear_of_manufacture 0\ndtype: int64"},"metadata":{}}]},{"cell_type":"code","source":"null_columns = data.columns[data.isna().mean() > 0.25]\n\ndata = data.drop(null_columns, axis=1)","metadata":{"execution":{"iopub.status.busy":"2024-05-03T04:04:07.419621Z","iopub.execute_input":"2024-05-03T04:04:07.420057Z","iopub.status.idle":"2024-05-03T04:04:08.398050Z","shell.execute_reply.started":"2024-05-03T04:04:07.420017Z","shell.execute_reply":"2024-05-03T04:04:08.396674Z"},"trusted":true},"execution_count":9,"outputs":[]},{"cell_type":"code","source":"data","metadata":{"execution":{"iopub.status.busy":"2024-05-03T05:07:27.657839Z","iopub.execute_input":"2024-05-03T05:07:27.658235Z","iopub.status.idle":"2024-05-03T05:07:27.681608Z","shell.execute_reply.started":"2024-05-03T05:07:27.658200Z","shell.execute_reply":"2024-05-03T05:07:27.679874Z"},"trusted":true},"execution_count":35,"outputs":[{"execution_count":35,"output_type":"execute_result","data":{"text/plain":" car_name car_price_in_rupees \\\n0 Hyundai Grand i10 Magna 1.2 Kappa VTVT [2017-2... ₹ 4.45 Lakh \n1 Maruti Suzuki Alto 800 Lxi ₹ 2.93 Lakh \n2 Tata Safari XZ Plus New ₹ 22.49 Lakh \n3 Maruti Suzuki Ciaz ZXI+ ₹ 6.95 Lakh \n4 Jeep Compass Sport Plus 1.4 Petrol [2019-2020] ₹ 12 Lakh \n... ... ... \n2100 Ford Figo Titanium1.5 TDCi ₹ 3.6 Lakh \n2101 MINI Cooper Countryman Cooper D ₹ 22 Lakh \n2102 Hyundai Verna 1.6 VTVT SX ₹ 8.38 Lakh \n2103 Maruti Suzuki Ciaz VXi+ AT ₹ 6.75 Lakh \n2104 Hyundai Verna 1.6 VTVT SX ₹ 8.76 Lakh \n\n kms_driven fuel_type city year_of_manufacture \n0 22,402 km Petrol Mumbai 2016 \n1 10,344 km Petrol Kolkata 2019 \n2 12,999 km Diesel Bangalore 2021 \n3 45,000 km Petrol Thane 2016 \n4 11,193 km Petrol Kolkata 2019 \n... ... ... ... ... \n2100 42,158 km Diesel Kolkata 2015 \n2101 68,862 km Diesel Hyderabad 2013 \n2102 37,622 km Petrol Chennai 2018 \n2103 64,726 km Petrol Mumbai 2017 \n2104 29,150 km Petrol Pune 2017 \n\n[2105 rows x 6 columns]","text/html":"<div>\n<style scoped>\n .dataframe tbody tr th:only-of-type {\n vertical-align: middle;\n }\n\n .dataframe tbody tr th {\n vertical-align: top;\n }\n\n .dataframe thead th {\n text-align: right;\n }\n</style>\n<table border=\"1\" class=\"dataframe\">\n <thead>\n <tr style=\"text-align: right;\">\n <th></th>\n <th>car_name</th>\n <th>car_price_in_rupees</th>\n <th>kms_driven</th>\n <th>fuel_type</th>\n <th>city</th>\n <th>year_of_manufacture</th>\n </tr>\n </thead>\n <tbody>\n <tr>\n <th>0</th>\n <td>Hyundai Grand i10 Magna 1.2 Kappa VTVT [2017-2...</td>\n <td>₹ 4.45 Lakh</td>\n <td>22,402 km</td>\n <td>Petrol</td>\n <td>Mumbai</td>\n <td>2016</td>\n </tr>\n <tr>\n <th>1</th>\n <td>Maruti Suzuki Alto 800 Lxi</td>\n <td>₹ 2.93 Lakh</td>\n <td>10,344 km</td>\n <td>Petrol</td>\n <td>Kolkata</td>\n <td>2019</td>\n </tr>\n <tr>\n <th>2</th>\n <td>Tata Safari XZ Plus New</td>\n <td>₹ 22.49 Lakh</td>\n <td>12,999 km</td>\n <td>Diesel</td>\n <td>Bangalore</td>\n <td>2021</td>\n </tr>\n <tr>\n <th>3</th>\n <td>Maruti Suzuki Ciaz ZXI+</td>\n <td>₹ 6.95 Lakh</td>\n <td>45,000 km</td>\n <td>Petrol</td>\n <td>Thane</td>\n <td>2016</td>\n </tr>\n <tr>\n <th>4</th>\n <td>Jeep Compass Sport Plus 1.4 Petrol [2019-2020]</td>\n <td>₹ 12 Lakh</td>\n <td>11,193 km</td>\n <td>Petrol</td>\n <td>Kolkata</td>\n <td>2019</td>\n </tr>\n <tr>\n <th>...</th>\n <td>...</td>\n <td>...</td>\n <td>...</td>\n <td>...</td>\n <td>...</td>\n <td>...</td>\n </tr>\n <tr>\n <th>2100</th>\n <td>Ford Figo Titanium1.5 TDCi</td>\n <td>₹ 3.6 Lakh</td>\n <td>42,158 km</td>\n <td>Diesel</td>\n <td>Kolkata</td>\n <td>2015</td>\n </tr>\n <tr>\n <th>2101</th>\n <td>MINI Cooper Countryman Cooper D</td>\n <td>₹ 22 Lakh</td>\n <td>68,862 km</td>\n <td>Diesel</td>\n <td>Hyderabad</td>\n <td>2013</td>\n </tr>\n <tr>\n <th>2102</th>\n <td>Hyundai Verna 1.6 VTVT SX</td>\n <td>₹ 8.38 Lakh</td>\n <td>37,622 km</td>\n <td>Petrol</td>\n <td>Chennai</td>\n <td>2018</td>\n </tr>\n <tr>\n <th>2103</th>\n <td>Maruti Suzuki Ciaz VXi+ AT</td>\n <td>₹ 6.75 Lakh</td>\n <td>64,726 km</td>\n <td>Petrol</td>\n <td>Mumbai</td>\n <td>2017</td>\n </tr>\n <tr>\n <th>2104</th>\n <td>Hyundai Verna 1.6 VTVT SX</td>\n <td>₹ 8.76 Lakh</td>\n <td>29,150 km</td>\n <td>Petrol</td>\n <td>Pune</td>\n <td>2017</td>\n </tr>\n </tbody>\n</table>\n<p>2105 rows × 6 columns</p>\n</div>"},"metadata":{}}]},{"cell_type":"code","source":"data","metadata":{"execution":{"iopub.status.busy":"2024-05-03T05:07:48.949218Z","iopub.execute_input":"2024-05-03T05:07:48.949642Z","iopub.status.idle":"2024-05-03T05:07:48.970343Z","shell.execute_reply.started":"2024-05-03T05:07:48.949604Z","shell.execute_reply":"2024-05-03T05:07:48.969318Z"},"trusted":true},"execution_count":37,"outputs":[{"execution_count":37,"output_type":"execute_result","data":{"text/plain":" car_name car_price_in_rupees \\\n0 Hyundai Grand i10 Magna 1.2 Kappa VTVT [2017-2... ₹ 4.45 Lakh \n1 Maruti Suzuki Alto 800 Lxi ₹ 2.93 Lakh \n2 Tata Safari XZ Plus New ₹ 22.49 Lakh \n3 Maruti Suzuki Ciaz ZXI+ ₹ 6.95 Lakh \n4 Jeep Compass Sport Plus 1.4 Petrol [2019-2020] ₹ 12 Lakh \n... ... ... \n2100 Ford Figo Titanium1.5 TDCi ₹ 3.6 Lakh \n2101 MINI Cooper Countryman Cooper D ₹ 22 Lakh \n2102 Hyundai Verna 1.6 VTVT SX ₹ 8.38 Lakh \n2103 Maruti Suzuki Ciaz VXi+ AT ₹ 6.75 Lakh \n2104 Hyundai Verna 1.6 VTVT SX ₹ 8.76 Lakh \n\n kms_driven fuel_type city year_of_manufacture \n0 22,402 km Petrol Mumbai 2016 \n1 10,344 km Petrol Kolkata 2019 \n2 12,999 km Diesel Bangalore 2021 \n3 45,000 km Petrol Thane 2016 \n4 11,193 km Petrol Kolkata 2019 \n... ... ... ... ... \n2100 42,158 km Diesel Kolkata 2015 \n2101 68,862 km Diesel Hyderabad 2013 \n2102 37,622 km Petrol Chennai 2018 \n2103 64,726 km Petrol Mumbai 2017 \n2104 29,150 km Petrol Pune 2017 \n\n[2105 rows x 6 columns]","text/html":"<div>\n<style scoped>\n .dataframe tbody tr th:only-of-type {\n vertical-align: middle;\n }\n\n .dataframe tbody tr th {\n vertical-align: top;\n }\n\n .dataframe thead th {\n text-align: right;\n }\n</style>\n<table border=\"1\" class=\"dataframe\">\n <thead>\n <tr style=\"text-align: right;\">\n <th></th>\n <th>car_name</th>\n <th>car_price_in_rupees</th>\n <th>kms_driven</th>\n <th>fuel_type</th>\n <th>city</th>\n <th>year_of_manufacture</th>\n </tr>\n </thead>\n <tbody>\n <tr>\n <th>0</th>\n <td>Hyundai Grand i10 Magna 1.2 Kappa VTVT [2017-2...</td>\n <td>₹ 4.45 Lakh</td>\n <td>22,402 km</td>\n <td>Petrol</td>\n <td>Mumbai</td>\n <td>2016</td>\n </tr>\n <tr>\n <th>1</th>\n <td>Maruti Suzuki Alto 800 Lxi</td>\n <td>₹ 2.93 Lakh</td>\n <td>10,344 km</td>\n <td>Petrol</td>\n <td>Kolkata</td>\n <td>2019</td>\n </tr>\n <tr>\n <th>2</th>\n <td>Tata Safari XZ Plus New</td>\n <td>₹ 22.49 Lakh</td>\n <td>12,999 km</td>\n <td>Diesel</td>\n <td>Bangalore</td>\n <td>2021</td>\n </tr>\n <tr>\n <th>3</th>\n <td>Maruti Suzuki Ciaz ZXI+</td>\n <td>₹ 6.95 Lakh</td>\n <td>45,000 km</td>\n <td>Petrol</td>\n <td>Thane</td>\n <td>2016</td>\n </tr>\n <tr>\n <th>4</th>\n <td>Jeep Compass Sport Plus 1.4 Petrol [2019-2020]</td>\n <td>₹ 12 Lakh</td>\n <td>11,193 km</td>\n <td>Petrol</td>\n <td>Kolkata</td>\n <td>2019</td>\n </tr>\n <tr>\n <th>...</th>\n <td>...</td>\n <td>...</td>\n <td>...</td>\n <td>...</td>\n <td>...</td>\n <td>...</td>\n </tr>\n <tr>\n <th>2100</th>\n <td>Ford Figo Titanium1.5 TDCi</td>\n <td>₹ 3.6 Lakh</td>\n <td>42,158 km</td>\n <td>Diesel</td>\n <td>Kolkata</td>\n <td>2015</td>\n </tr>\n <tr>\n <th>2101</th>\n <td>MINI Cooper Countryman Cooper D</td>\n <td>₹ 22 Lakh</td>\n <td>68,862 km</td>\n <td>Diesel</td>\n <td>Hyderabad</td>\n <td>2013</td>\n </tr>\n <tr>\n <th>2102</th>\n <td>Hyundai Verna 1.6 VTVT SX</td>\n <td>₹ 8.38 Lakh</td>\n <td>37,622 km</td>\n <td>Petrol</td>\n <td>Chennai</td>\n <td>2018</td>\n </tr>\n <tr>\n <th>2103</th>\n <td>Maruti Suzuki Ciaz VXi+ AT</td>\n <td>₹ 6.75 Lakh</td>\n <td>64,726 km</td>\n <td>Petrol</td>\n <td>Mumbai</td>\n <td>2017</td>\n </tr>\n <tr>\n <th>2104</th>\n <td>Hyundai Verna 1.6 VTVT SX</td>\n <td>₹ 8.76 Lakh</td>\n <td>29,150 km</td>\n <td>Petrol</td>\n <td>Pune</td>\n <td>2017</td>\n </tr>\n </tbody>\n</table>\n<p>2105 rows × 6 columns</p>\n</div>"},"metadata":{}}]},{"cell_type":"code","source":"{column: len(data[column].unique()) for column in data.columns if data.dtypes[column] == 'object'}","metadata":{"execution":{"iopub.status.busy":"2024-05-03T05:07:56.173453Z","iopub.execute_input":"2024-05-03T05:07:56.173891Z","iopub.status.idle":"2024-05-03T05:07:56.187624Z","shell.execute_reply.started":"2024-05-03T05:07:56.173852Z","shell.execute_reply":"2024-05-03T05:07:56.186334Z"},"trusted":true},"execution_count":38,"outputs":[{"execution_count":38,"output_type":"execute_result","data":{"text/plain":"{'car_name': 946,\n 'car_price_in_rupees': 811,\n 'kms_driven': 1628,\n 'fuel_type': 8,\n 'city': 16}"},"metadata":{}}]},{"cell_type":"code","source":"data = data.drop('kms_driven', axis=1)","metadata":{"execution":{"iopub.status.busy":"2024-05-03T05:08:22.538805Z","iopub.execute_input":"2024-05-03T05:08:22.539248Z","iopub.status.idle":"2024-05-03T05:08:22.546784Z","shell.execute_reply.started":"2024-05-03T05:08:22.539211Z","shell.execute_reply":"2024-05-03T05:08:22.545218Z"},"trusted":true},"execution_count":39,"outputs":[]},{"cell_type":"code","source":"def onehot_encode(df, columns, prefixes):\n df = df.copy()\n for column, prefix in zip(columns, prefixes):\n dummies = pd.get_dummies(df[column], prefix=prefix)\n df = pd.concat([df, dummies], axis=1)\n df = df.drop(column, axis=1)\n return df","metadata":{"execution":{"iopub.status.busy":"2024-05-03T05:13:25.907315Z","iopub.execute_input":"2024-05-03T05:13:25.907813Z","iopub.status.idle":"2024-05-03T05:13:25.916266Z","shell.execute_reply.started":"2024-05-03T05:13:25.907771Z","shell.execute_reply":"2024-05-03T05:13:25.914769Z"},"trusted":true},"execution_count":47,"outputs":[]},{"cell_type":"code","source":"data = onehot_encode(\n data,\n ['car_name', 'car_price_in_rupees', 'fuel_type', 'city'],\n ['cname', 'price', 'fname', 'state']\n)","metadata":{"execution":{"iopub.status.busy":"2024-05-03T05:15:55.179839Z","iopub.execute_input":"2024-05-03T05:15:55.180257Z","iopub.status.idle":"2024-05-03T05:15:55.235095Z","shell.execute_reply.started":"2024-05-03T05:15:55.180220Z","shell.execute_reply":"2024-05-03T05:15:55.233867Z"},"trusted":true},"execution_count":49,"outputs":[]},{"cell_type":"code","source":"data","metadata":{"execution":{"iopub.status.busy":"2024-05-03T05:16:04.140269Z","iopub.execute_input":"2024-05-03T05:16:04.140692Z","iopub.status.idle":"2024-05-03T05:16:04.174522Z","shell.execute_reply.started":"2024-05-03T05:16:04.140649Z","shell.execute_reply":"2024-05-03T05:16:04.173353Z"},"trusted":true},"execution_count":50,"outputs":[{"execution_count":50,"output_type":"execute_result","data":{"text/plain":" year_of_manufacture cname_Audi A3 35 TDI Premium + Sunroof \\\n0 2016 0 \n1 2019 0 \n2 2021 0 \n3 2016 0 \n4 2019 0 \n... ... ... \n2100 2015 0 \n2101 2013 0 \n2102 2018 0 \n2103 2017 0 \n2104 2017 0 \n\n cname_Audi A3 35 TDI Premium Plus \\\n0 0 \n1 0 \n2 0 \n3 0 \n4 0 \n... ... \n2100 0 \n2101 0 \n2102 0 \n2103 0 \n2104 0 \n\n cname_Audi A3 35 TDI Premium Plus + Sunroof \\\n0 0 \n1 0 \n2 0 \n3 0 \n4 0 \n... ... \n2100 0 \n2101 0 \n2102 0 \n2103 0 \n2104 0 \n\n cname_Audi A3 35 TDI Technology + Sunroof \\\n0 0 \n1 0 \n2 0 \n3 0 \n4 0 \n... ... \n2100 0 \n2101 0 \n2102 0 \n2103 0 \n2104 0 \n\n cname_Audi A3 35 TFSI Premium Plus cname_Audi A3 40 TFSI Premium \\\n0 0 0 \n1 0 0 \n2 0 0 \n3 0 0 \n4 0 0 \n... ... ... \n2100 0 0 \n2101 0 0 \n2102 0 0 \n2103 0 0 \n2104 0 0 \n\n cname_Audi A3 40 TFSI Premium Plus + sunroof \\\n0 0 \n1 0 \n2 0 \n3 0 \n4 0 \n... ... \n2100 0 \n2101 0 \n2102 0 \n2103 0 \n2104 0 \n\n cname_Audi A4 2.0 TDI (143 bhp) cname_Audi A4 2.0 TDI (143bhp) ... \\\n0 0 0 ... \n1 0 0 ... \n2 0 0 ... \n3 0 0 ... \n4 0 0 ... \n... ... ... ... \n2100 0 0 ... \n2101 0 0 ... \n2102 0 0 ... \n2103 0 0 ... \n2104 0 0 ... \n\n state_Gurgaon state_Hyderabad state_Kolkata state_Mumbai \\\n0 0 0 0 1 \n1 0 0 1 0 \n2 0 0 0 0 \n3 0 0 0 0 \n4 0 0 1 0 \n... ... ... ... ... \n2100 0 0 1 0 \n2101 0 1 0 0 \n2102 0 0 0 0 \n2103 0 0 0 1 \n2104 0 0 0 0 \n\n state_Noida state_Pallikarnai state_Poonamallee state_Pune \\\n0 0 0 0 0 \n1 0 0 0 0 \n2 0 0 0 0 \n3 0 0 0 0 \n4 0 0 0 0 \n... ... ... ... ... \n2100 0 0 0 0 \n2101 0 0 0 0 \n2102 0 0 0 0 \n2103 0 0 0 0 \n2104 0 0 0 1 \n\n state_Thane state_Thiruvallur \n0 0 0 \n1 0 0 \n2 0 0 \n3 1 0 \n4 0 0 \n... ... ... \n2100 0 0 \n2101 0 0 \n2102 0 0 \n2103 0 0 \n2104 0 0 \n\n[2105 rows x 1782 columns]","text/html":"<div>\n<style scoped>\n .dataframe tbody tr th:only-of-type {\n vertical-align: middle;\n }\n\n .dataframe tbody tr th {\n vertical-align: top;\n }\n\n .dataframe thead th {\n text-align: right;\n }\n</style>\n<table border=\"1\" class=\"dataframe\">\n <thead>\n <tr style=\"text-align: right;\">\n <th></th>\n <th>year_of_manufacture</th>\n <th>cname_Audi A3 35 TDI Premium + Sunroof</th>\n <th>cname_Audi A3 35 TDI Premium Plus</th>\n <th>cname_Audi A3 35 TDI Premium Plus + Sunroof</th>\n <th>cname_Audi A3 35 TDI Technology + Sunroof</th>\n <th>cname_Audi A3 35 TFSI Premium Plus</th>\n <th>cname_Audi A3 40 TFSI Premium</th>\n <th>cname_Audi A3 40 TFSI Premium Plus + sunroof</th>\n <th>cname_Audi A4 2.0 TDI (143 bhp)</th>\n <th>cname_Audi A4 2.0 TDI (143bhp)</th>\n <th>...</th>\n <th>state_Gurgaon</th>\n <th>state_Hyderabad</th>\n <th>state_Kolkata</th>\n <th>state_Mumbai</th>\n <th>state_Noida</th>\n <th>state_Pallikarnai</th>\n <th>state_Poonamallee</th>\n <th>state_Pune</th>\n <th>state_Thane</th>\n <th>state_Thiruvallur</th>\n </tr>\n </thead>\n <tbody>\n <tr>\n <th>0</th>\n <td>2016</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>...</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>1</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n </tr>\n <tr>\n <th>1</th>\n <td>2019</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>...</td>\n <td>0</td>\n <td>0</td>\n <td>1</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n </tr>\n <tr>\n <th>2</th>\n <td>2021</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>...</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n </tr>\n <tr>\n <th>3</th>\n <td>2016</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>...</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>1</td>\n <td>0</td>\n </tr>\n <tr>\n <th>4</th>\n <td>2019</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>...</td>\n <td>0</td>\n <td>0</td>\n <td>1</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n </tr>\n <tr>\n <th>...</th>\n <td>...</td>\n <td>...</td>\n <td>...</td>\n <td>...</td>\n <td>...</td>\n <td>...</td>\n <td>...</td>\n <td>...</td>\n <td>...</td>\n <td>...</td>\n <td>...</td>\n <td>...</td>\n <td>...</td>\n <td>...</td>\n <td>...</td>\n <td>...</td>\n <td>...</td>\n <td>...</td>\n <td>...</td>\n <td>...</td>\n <td>...</td>\n </tr>\n <tr>\n <th>2100</th>\n <td>2015</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>...</td>\n <td>0</td>\n <td>0</td>\n <td>1</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n </tr>\n <tr>\n <th>2101</th>\n <td>2013</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>...</td>\n <td>0</td>\n <td>1</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n </tr>\n <tr>\n <th>2102</th>\n <td>2018</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>...</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n </tr>\n <tr>\n <th>2103</th>\n <td>2017</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>...</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>1</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n </tr>\n <tr>\n <th>2104</th>\n <td>2017</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>...</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>1</td>\n <td>0</td>\n <td>0</td>\n </tr>\n </tbody>\n</table>\n<p>2105 rows × 1782 columns</p>\n</div>"},"metadata":{}}]},{"cell_type":"code","source":"data.isna().sum().sum()","metadata":{"execution":{"iopub.status.busy":"2024-05-03T05:16:14.860446Z","iopub.execute_input":"2024-05-03T05:16:14.861328Z","iopub.status.idle":"2024-05-03T05:16:14.891065Z","shell.execute_reply.started":"2024-05-03T05:16:14.861259Z","shell.execute_reply":"2024-05-03T05:16:14.889925Z"},"trusted":true},"execution_count":51,"outputs":[{"execution_count":51,"output_type":"execute_result","data":{"text/plain":"0"},"metadata":{}}]},{"cell_type":"markdown","source":"# Splitting and Scaling","metadata":{}},{"cell_type":"code","source":"y = data.loc[:, 'year_of_manufacture']\nX = data.drop('year_of_manufacture', axis=1)","metadata":{"execution":{"iopub.status.busy":"2024-05-03T05:17:21.349495Z","iopub.execute_input":"2024-05-03T05:17:21.349917Z","iopub.status.idle":"2024-05-03T05:17:21.359688Z","shell.execute_reply.started":"2024-05-03T05:17:21.349879Z","shell.execute_reply":"2024-05-03T05:17:21.358257Z"},"trusted":true},"execution_count":54,"outputs":[]},{"cell_type":"code","source":"data","metadata":{"execution":{"iopub.status.busy":"2024-05-03T05:29:18.859033Z","iopub.execute_input":"2024-05-03T05:29:18.859478Z","iopub.status.idle":"2024-05-03T05:29:18.893583Z","shell.execute_reply.started":"2024-05-03T05:29:18.859428Z","shell.execute_reply":"2024-05-03T05:29:18.892402Z"},"trusted":true},"execution_count":77,"outputs":[{"execution_count":77,"output_type":"execute_result","data":{"text/plain":" year_of_manufacture cname_Audi A3 35 TDI Premium + Sunroof \\\n0 2016 0 \n1 2019 0 \n2 2021 0 \n3 2016 0 \n4 2019 0 \n... ... ... \n2100 2015 0 \n2101 2013 0 \n2102 2018 0 \n2103 2017 0 \n2104 2017 0 \n\n cname_Audi A3 35 TDI Premium Plus \\\n0 0 \n1 0 \n2 0 \n3 0 \n4 0 \n... ... \n2100 0 \n2101 0 \n2102 0 \n2103 0 \n2104 0 \n\n cname_Audi A3 35 TDI Premium Plus + Sunroof \\\n0 0 \n1 0 \n2 0 \n3 0 \n4 0 \n... ... \n2100 0 \n2101 0 \n2102 0 \n2103 0 \n2104 0 \n\n cname_Audi A3 35 TDI Technology + Sunroof \\\n0 0 \n1 0 \n2 0 \n3 0 \n4 0 \n... ... \n2100 0 \n2101 0 \n2102 0 \n2103 0 \n2104 0 \n\n cname_Audi A3 35 TFSI Premium Plus cname_Audi A3 40 TFSI Premium \\\n0 0 0 \n1 0 0 \n2 0 0 \n3 0 0 \n4 0 0 \n... ... ... \n2100 0 0 \n2101 0 0 \n2102 0 0 \n2103 0 0 \n2104 0 0 \n\n cname_Audi A3 40 TFSI Premium Plus + sunroof \\\n0 0 \n1 0 \n2 0 \n3 0 \n4 0 \n... ... \n2100 0 \n2101 0 \n2102 0 \n2103 0 \n2104 0 \n\n cname_Audi A4 2.0 TDI (143 bhp) cname_Audi A4 2.0 TDI (143bhp) ... \\\n0 0 0 ... \n1 0 0 ... \n2 0 0 ... \n3 0 0 ... \n4 0 0 ... \n... ... ... ... \n2100 0 0 ... \n2101 0 0 ... \n2102 0 0 ... \n2103 0 0 ... \n2104 0 0 ... \n\n state_Gurgaon state_Hyderabad state_Kolkata state_Mumbai \\\n0 0 0 0 1 \n1 0 0 1 0 \n2 0 0 0 0 \n3 0 0 0 0 \n4 0 0 1 0 \n... ... ... ... ... \n2100 0 0 1 0 \n2101 0 1 0 0 \n2102 0 0 0 0 \n2103 0 0 0 1 \n2104 0 0 0 0 \n\n state_Noida state_Pallikarnai state_Poonamallee state_Pune \\\n0 0 0 0 0 \n1 0 0 0 0 \n2 0 0 0 0 \n3 0 0 0 0 \n4 0 0 0 0 \n... ... ... ... ... \n2100 0 0 0 0 \n2101 0 0 0 0 \n2102 0 0 0 0 \n2103 0 0 0 0 \n2104 0 0 0 1 \n\n state_Thane state_Thiruvallur \n0 0 0 \n1 0 0 \n2 0 0 \n3 1 0 \n4 0 0 \n... ... ... \n2100 0 0 \n2101 0 0 \n2102 0 0 \n2103 0 0 \n2104 0 0 \n\n[2105 rows x 1782 columns]","text/html":"<div>\n<style scoped>\n .dataframe tbody tr th:only-of-type {\n vertical-align: middle;\n }\n\n .dataframe tbody tr th {\n vertical-align: top;\n }\n\n .dataframe thead th {\n text-align: right;\n }\n</style>\n<table border=\"1\" class=\"dataframe\">\n <thead>\n <tr style=\"text-align: right;\">\n <th></th>\n <th>year_of_manufacture</th>\n <th>cname_Audi A3 35 TDI Premium + Sunroof</th>\n <th>cname_Audi A3 35 TDI Premium Plus</th>\n <th>cname_Audi A3 35 TDI Premium Plus + Sunroof</th>\n <th>cname_Audi A3 35 TDI Technology + Sunroof</th>\n <th>cname_Audi A3 35 TFSI Premium Plus</th>\n <th>cname_Audi A3 40 TFSI Premium</th>\n <th>cname_Audi A3 40 TFSI Premium Plus + sunroof</th>\n <th>cname_Audi A4 2.0 TDI (143 bhp)</th>\n <th>cname_Audi A4 2.0 TDI (143bhp)</th>\n <th>...</th>\n <th>state_Gurgaon</th>\n <th>state_Hyderabad</th>\n <th>state_Kolkata</th>\n <th>state_Mumbai</th>\n <th>state_Noida</th>\n <th>state_Pallikarnai</th>\n <th>state_Poonamallee</th>\n <th>state_Pune</th>\n <th>state_Thane</th>\n <th>state_Thiruvallur</th>\n </tr>\n </thead>\n <tbody>\n <tr>\n <th>0</th>\n <td>2016</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>...</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>1</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n </tr>\n <tr>\n <th>1</th>\n <td>2019</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>...</td>\n <td>0</td>\n <td>0</td>\n <td>1</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n </tr>\n <tr>\n <th>2</th>\n <td>2021</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>...</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n </tr>\n <tr>\n <th>3</th>\n <td>2016</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>...</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>1</td>\n <td>0</td>\n </tr>\n <tr>\n <th>4</th>\n <td>2019</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>...</td>\n <td>0</td>\n <td>0</td>\n <td>1</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n </tr>\n <tr>\n <th>...</th>\n <td>...</td>\n <td>...</td>\n <td>...</td>\n <td>...</td>\n <td>...</td>\n <td>...</td>\n <td>...</td>\n <td>...</td>\n <td>...</td>\n <td>...</td>\n <td>...</td>\n <td>...</td>\n <td>...</td>\n <td>...</td>\n <td>...</td>\n <td>...</td>\n <td>...</td>\n <td>...</td>\n <td>...</td>\n <td>...</td>\n <td>...</td>\n </tr>\n <tr>\n <th>2100</th>\n <td>2015</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>...</td>\n <td>0</td>\n <td>0</td>\n <td>1</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n </tr>\n <tr>\n <th>2101</th>\n <td>2013</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>...</td>\n <td>0</td>\n <td>1</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n </tr>\n <tr>\n <th>2102</th>\n <td>2018</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>...</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n </tr>\n <tr>\n <th>2103</th>\n <td>2017</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>...</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>1</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n </tr>\n <tr>\n <th>2104</th>\n <td>2017</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>...</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>1</td>\n <td>0</td>\n <td>0</td>\n </tr>\n </tbody>\n</table>\n<p>2105 rows × 1782 columns</p>\n</div>"},"metadata":{}}]},{"cell_type":"code","source":"scaler = StandardScaler()\n\nX = scaler.fit_transform(X)","metadata":{"execution":{"iopub.status.busy":"2024-05-03T05:17:25.089104Z","iopub.execute_input":"2024-05-03T05:17:25.089915Z","iopub.status.idle":"2024-05-03T05:17:25.245333Z","shell.execute_reply.started":"2024-05-03T05:17:25.089858Z","shell.execute_reply":"2024-05-03T05:17:25.244089Z"},"trusted":true},"execution_count":55,"outputs":[]},{"cell_type":"code","source":"X_train, X_test, y_train, y_test = train_test_split(X, y, train_size=0.7, random_state=34)","metadata":{"execution":{"iopub.status.busy":"2024-05-03T05:17:28.768661Z","iopub.execute_input":"2024-05-03T05:17:28.769281Z","iopub.status.idle":"2024-05-03T05:17:28.871602Z","shell.execute_reply.started":"2024-05-03T05:17:28.769240Z","shell.execute_reply":"2024-05-03T05:17:28.870580Z"},"trusted":true},"execution_count":56,"outputs":[]},{"cell_type":"markdown","source":"# Training","metadata":{}},{"cell_type":"code","source":"lin_model = LinearRegression()\n\nlin_model.fit(X_train, y_train)\n\nlin_y_preds = lin_model.predict(X_test)","metadata":{"execution":{"iopub.status.busy":"2024-05-03T05:17:34.505167Z","iopub.execute_input":"2024-05-03T05:17:34.505613Z","iopub.status.idle":"2024-05-03T05:17:35.570164Z","shell.execute_reply.started":"2024-05-03T05:17:34.505574Z","shell.execute_reply":"2024-05-03T05:17:35.569088Z"},"trusted":true},"execution_count":57,"outputs":[]},{"cell_type":"code","source":"lgb_model = lgb.LGBMRegressor(\n boosting_type='gbdt',\n num_leaves=31,\n n_estimators=100,\n reg_lambda=1.0\n)\n\nlgb_model.fit(X_train, y_train)\n\nlgb_y_preds = lgb_model.predict(X_test)","metadata":{"execution":{"iopub.status.busy":"2024-05-03T05:17:37.855640Z","iopub.execute_input":"2024-05-03T05:17:37.856090Z","iopub.status.idle":"2024-05-03T05:17:38.162505Z","shell.execute_reply.started":"2024-05-03T05:17:37.856055Z","shell.execute_reply":"2024-05-03T05:17:38.161396Z"},"trusted":true},"execution_count":58,"outputs":[]},{"cell_type":"code","source":"lin_loss = np.sqrt(mean_squared_error(y_test, lin_y_preds))\nlgb_loss = np.sqrt(mean_squared_error(y_test, lgb_y_preds))","metadata":{"execution":{"iopub.status.busy":"2024-05-03T05:17:44.273125Z","iopub.execute_input":"2024-05-03T05:17:44.273817Z","iopub.status.idle":"2024-05-03T05:17:44.280151Z","shell.execute_reply.started":"2024-05-03T05:17:44.273776Z","shell.execute_reply":"2024-05-03T05:17:44.279207Z"},"trusted":true},"execution_count":59,"outputs":[]},{"cell_type":"code","source":"print(data.columns)","metadata":{"execution":{"iopub.status.busy":"2024-05-03T05:31:31.406667Z","iopub.execute_input":"2024-05-03T05:31:31.407552Z","iopub.status.idle":"2024-05-03T05:31:31.415326Z","shell.execute_reply.started":"2024-05-03T05:31:31.407488Z","shell.execute_reply":"2024-05-03T05:31:31.413786Z"},"trusted":true},"execution_count":79,"outputs":[{"name":"stdout","text":"Index(['year_of_manufacture', 'cname_Audi A3 35 TDI Premium + Sunroof',\n 'cname_Audi A3 35 TDI Premium Plus',\n 'cname_Audi A3 35 TDI Premium Plus + Sunroof',\n 'cname_Audi A3 35 TDI Technology + Sunroof',\n 'cname_Audi A3 35 TFSI Premium Plus', 'cname_Audi A3 40 TFSI Premium',\n 'cname_Audi A3 40 TFSI Premium Plus + sunroof',\n 'cname_Audi A4 2.0 TDI (143 bhp)', 'cname_Audi A4 2.0 TDI (143bhp)',\n ...\n 'state_Gurgaon', 'state_Hyderabad', 'state_Kolkata', 'state_Mumbai',\n 'state_Noida', 'state_Pallikarnai', 'state_Poonamallee', 'state_Pune',\n 'state_Thane', 'state_Thiruvallur'],\n dtype='object', length=1782)\n","output_type":"stream"}]},{"cell_type":"code","source":"These two lines of code calculate and print the Root Mean Squared Error (RMSE) for two different models:\na Linear Regression model (lin_loss) and a Gradient Boosted model (lgb_loss).\n \nRMSE is a measure of the average magnitude of the errors between predicted and actual values.\nLower RMSE values indicate better model performance in terms of prediction accuracy.","metadata":{},"execution_count":null,"outputs":[]},{"cell_type":"code","source":"print(\"Linear Regression RMSE:\", lin_loss)\nprint(\"Gradient Boosted RMSE:\", lgb_loss)","metadata":{"execution":{"iopub.status.busy":"2024-05-03T05:25:45.662574Z","iopub.execute_input":"2024-05-03T05:25:45.663099Z","iopub.status.idle":"2024-05-03T05:25:45.670017Z","shell.execute_reply.started":"2024-05-03T05:25:45.663047Z","shell.execute_reply":"2024-05-03T05:25:45.668419Z"},"trusted":true},"execution_count":71,"outputs":[{"name":"stdout","text":"Linear Regression RMSE: 340159668607913.44\nGradient Boosted RMSE: 2.86068559927202\n","output_type":"stream"}]},{"cell_type":"code","source":"These two lines of code calculate and print the R-squared scores for two different models (linear regression and gradient boosted) on a test dataset.\nThe R-squared score measures the proportion of the variance in the dependent variable that is predictable from the independent variables in a regression model.\nHigher R-squared scores indicate a better fit of the model to the data.","metadata":{},"execution_count":null,"outputs":[]},{"cell_type":"code","source":"print(\"Linear Regression R^2 Score:\", lin_model.score(X_test, y_test))\nprint(\"Gradient Boosted R^2 Score:\", lgb_model.score(X_test, y_test))","metadata":{"execution":{"iopub.status.busy":"2024-05-03T05:17:49.704445Z","iopub.execute_input":"2024-05-03T05:17:49.704908Z","iopub.status.idle":"2024-05-03T05:17:49.724745Z","shell.execute_reply.started":"2024-05-03T05:17:49.704865Z","shell.execute_reply":"2024-05-03T05:17:49.723755Z"},"trusted":true},"execution_count":61,"outputs":[{"name":"stdout","text":"Linear Regression R^2 Score: -1.3682711338193156e+28\nGradient Boosted R^2 Score: 0.03228653313268315\n","output_type":"stream"}]},{"cell_type":"code","source":"EVALUATION OF THE MODEL","metadata":{},"execution_count":null,"outputs":[]},{"cell_type":"code","source":"import matplotlib.pyplot as plt\nfrom sklearn.linear_model import LinearRegression\nfrom sklearn.model_selection import train_test_split\nimport pandas as pd\n\n# Assuming you have a DataFrame 'data' with columns 'kms_driven' and 'price'\n# Split the data into training and testing sets\nX_train, X_test, y_train, y_test = train_test_split(data[['year_of_manufacture']], data['cname_Audi A3 35 TDI Premium + Sunroof'], test_size=0.2, random_state=42)\n\n# Train a linear regression model\nmodel = LinearRegression()\nmodel.fit(X_train, y_train)\n\n# Make predictions on the testing set\ny_pred = model.predict(X_test)\n\n# Visualize the results using a scatter plot\nplt.figure(figsize=(8, 6))\nplt.scatter(X_test, y_test, color='blue', label='Actual')\nplt.scatter(X_test, y_pred, color='red', label='Predicted')\nplt.xlabel('year_of_manufacture')\nplt.ylabel('cname_Audi A3 35 TDI Premium + Sunroof')\nplt.title('Actual vs Predicted Price')\nplt.legend()\nplt.show()\n","metadata":{"execution":{"iopub.status.busy":"2024-05-03T05:32:11.305537Z","iopub.execute_input":"2024-05-03T05:32:11.306139Z","iopub.status.idle":"2024-05-03T05:32:11.600866Z","shell.execute_reply.started":"2024-05-03T05:32:11.306100Z","shell.execute_reply":"2024-05-03T05:32:11.599508Z"},"trusted":true},"execution_count":80,"outputs":[{"output_type":"display_data","data":{"text/plain":"<Figure size 576x432 with 1 Axes>","image/png":"\n"},"metadata":{"needs_background":"light"}}]}]}