import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib as mpl
import matplotlib.pyplot as plt
import plotly.express as px
from sklearn import preprocessing
from sklearn.linear_model import LinearRegression
import random
from sklearn import datasets, linear_model
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score
from sklearn.model_selection import cross_val_score, train_test_split, KFold
from sklearn.preprocessing import StandardScaler
%matplotlib inline
Importamos el .csv resultante del TP1
data_location = "CABA_properatti_amenities.csv"
proper_CABA = pd.read_csv(data_location)
proper_CABA.head(10)
Unnamed: 0 | property_type | place_name | state_name | lat | lon | price_aprox_usd | surface_total_in_m2 | surface_covered_in_m2 | price_per_m2 | rooms | terraza | patio | parrilla | salon | laundry | pileta | cochera | estado_nuevo | estado_a_reciclar | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | 0 | PH | Mataderos | Capital Federal | -34.661824 | -58.508839 | 62000.0 | 55.0 | 40.0 | 1127.272727 | 2.0 | 0 | 1 | 0 | 0 | 1 | 0 | 0 | 1 | 0 |
1 | 2 | apartment | Mataderos | Capital Federal | -34.652262 | -58.522982 | 72000.0 | 55.0 | 55.0 | 1309.090909 | 2.0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 |
2 | 7 | apartment | Belgrano | Capital Federal | -34.559873 | -58.443362 | 138000.0 | 45.0 | 40.0 | 3066.666667 | 1.0 | 0 | 0 | 0 | 0 | 1 | 1 | 0 | 1 | 0 |
3 | 8 | apartment | Belgrano | Capital Federal | -34.559873 | -58.443362 | 195000.0 | 65.0 | 60.0 | 3000.000000 | 3.0 | 0 | 0 | 0 | 0 | 1 | 1 | 0 | 1 | 0 |
4 | 13 | apartment | Palermo Soho | Capital Federal | -34.580504 | -58.405874 | 111700.0 | 50.0 | 30.0 | 2234.000000 | 1.0 | 1 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 |
5 | 14 | apartment | Palermo Soho | Capital Federal | -34.580504 | -58.405874 | 147900.0 | 42.0 | 31.0 | 3521.428571 | 1.0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 |
6 | 16 | PH | Mataderos | Capital Federal | -34.652356 | -58.501624 | 239000.0 | 140.0 | 98.0 | 1707.142857 | 4.0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 |
7 | 19 | apartment | Palermo | Capital Federal | -34.580504 | -58.405874 | 350000.0 | 104.0 | 96.0 | 3365.384615 | 3.0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 |
8 | 21 | apartment | Palermo | Capital Federal | -34.590926 | -58.411665 | 270500.0 | 118.0 | 73.0 | 2292.372881 | 4.0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 |
9 | 29 | apartment | Flores | Capital Federal | -34.635118 | -58.473964 | 75000.0 | 43.0 | 43.0 | 1744.186047 | 2.0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 |
Borramos las columnas que no nos van a servir para el analisis porque tienen datos irrelevantes
columnas_borrar=["Unnamed: 0"]
CABA = proper_CABA.drop(columnas_borrar, axis=1)
CABA.info()
<class 'pandas.core.frame.DataFrame'> RangeIndex: 28762 entries, 0 to 28761 Data columns (total 19 columns): # Column Non-Null Count Dtype --- ------ -------------- ----- 0 property_type 28762 non-null object 1 place_name 28762 non-null object 2 state_name 28762 non-null object 3 lat 28762 non-null float64 4 lon 28762 non-null float64 5 price_aprox_usd 28762 non-null float64 6 surface_total_in_m2 28762 non-null float64 7 surface_covered_in_m2 28762 non-null float64 8 price_per_m2 28762 non-null float64 9 rooms 28762 non-null float64 10 terraza 28762 non-null int64 11 patio 28762 non-null int64 12 parrilla 28762 non-null int64 13 salon 28762 non-null int64 14 laundry 28762 non-null int64 15 pileta 28762 non-null int64 16 cochera 28762 non-null int64 17 estado_nuevo 28762 non-null int64 18 estado_a_reciclar 28762 non-null int64 dtypes: float64(7), int64(9), object(3) memory usage: 4.2+ MB
cantidad_nulos = CABA.isnull().sum()
cantidad_nulos
property_type 0 place_name 0 state_name 0 lat 0 lon 0 price_aprox_usd 0 surface_total_in_m2 0 surface_covered_in_m2 0 price_per_m2 0 rooms 0 terraza 0 patio 0 parrilla 0 salon 0 laundry 0 pileta 0 cochera 0 estado_nuevo 0 estado_a_reciclar 0 dtype: int64
CABA["property_type"].value_counts()
apartment 23996 house 1858 PH 1751 store 1157 Name: property_type, dtype: int64
CABA["rooms"].value_counts()
2.0 7456 3.0 7406 1.0 6592 4.0 4950 5.0 1580 6.0 424 7.0 186 8.0 80 9.0 32 10.0 29 12.0 11 11.0 9 17.0 3 22.0 1 15.0 1 14.0 1 13.0 1 Name: rooms, dtype: int64
CABA["state_name"].value_counts()
Capital Federal 28762 Name: state_name, dtype: int64
print('Mean -> ' + str(CABA.price_aprox_usd.mean()))
print('Median -> ' + str(CABA.price_aprox_usd.median()))
Mean -> 270334.0809748974 Median -> 158000.0
Graficamos para validar que los datos sean coherentes
px.box(CABA.price_aprox_usd, orientation='h')
q1 = CABA.price_aprox_usd.quantile(0.25)
q2 = CABA.price_aprox_usd.quantile(0.5)
q3 = CABA.price_aprox_usd.quantile(0.75)
q4 = CABA.price_aprox_usd.quantile(1)
inter_range = q3 - q1
outlier_high = q3 + (1.5*inter_range)
outlier_low = q1 - (1.5*inter_range)
print('q1 -> ' + str(q1))
print('q2 -> ' + str(q2))
print('q3 -> ' + str(q3))
print('q4 -> ' + str(q4))
print('inter_range -> ' + str(inter_range))
print('upper fence -> ' + str(outlier_high))
q1 -> 105000.0 q2 -> 158000.0 q3 -> 285000.0 q4 -> 46545445.0 inter_range -> 180000.0 upper fence -> 555000.0
px.box(CABA.surface_total_in_m2, orientation='h')
Vemos que hay casos donde la superficie en m2 es cero y también siguen existiendo outliers
CABA_sup_no_out_mask = ( CABA.surface_total_in_m2 > 0) & ( CABA.surface_total_in_m2 <= 12000)
CABA = CABA[CABA_sup_no_out_mask]
px.box(CABA.surface_total_in_m2, orientation='h')
cantidad_nulos = CABA.isnull().sum()
cantidad_nulos
property_type 0 place_name 0 state_name 0 lat 0 lon 0 price_aprox_usd 0 surface_total_in_m2 0 surface_covered_in_m2 0 price_per_m2 0 rooms 0 terraza 0 patio 0 parrilla 0 salon 0 laundry 0 pileta 0 cochera 0 estado_nuevo 0 estado_a_reciclar 0 dtype: int64
Ahora analizamos el precio por m2
px.box(CABA.price_per_m2, orientation='h')
qp1 = CABA.price_per_m2.quantile(0.25)
qp2 = CABA.price_per_m2.quantile(0.5)
qp3 = CABA.price_per_m2.quantile(0.75)
qp4 = CABA.price_per_m2.quantile(1)
inter_range_p = qp3 - qp1
outlier_high_p = qp3 + (1.5*inter_range_p)
outlier_low_p = qp1 - (1.5*inter_range_p)
outlier_high_p
print('qp1 -> ' + str(qp1))
print('qp2 -> ' + str(qp2))
print('qp3 -> ' + str(qp3))
print('qp4 -> ' + str(qp4))
print('inter_range_p -> ' + str(inter_range_p))
print('upper fence -> ' + str(outlier_high_p))
qp1 -> 1960.5263157894735 qp2 -> 2452.830188679245 qp3 -> 3063.508403361345 qp4 -> 2600000.0 inter_range_p -> 1102.9820875718713 upper fence -> 4717.981534719152
print('Mean -> ' + str(CABA.price_per_m2.mean()))
print('Median -> ' + str(CABA.price_per_m2.median()))
Mean -> 3433.135492209381 Median -> 2452.830188679245
CABA.price_per_m2.describe()
count 2.874700e+04 mean 3.433135e+03 std 1.758346e+04 min 6.730000e+00 25% 1.960526e+03 50% 2.452830e+03 75% 3.063508e+03 max 2.600000e+06 Name: price_per_m2, dtype: float64
CABA.shape
(28747, 19)
En referencia a lo observado en el gráfico y en los estadísticos descriptivos, decidimos no tener en cuenta propiedades cuyo precio por m2 sea inferior a USD 1126 o superior a USD 5422
CABA_no_out_mask = ( CABA.price_per_m2 <= 5422.193749999995) & ( CABA.price_per_m2 >= 1126.6666666666667)
CABA_no_out = CABA[CABA_no_out_mask]
CABA_no_out.shape
(25498, 19)
print('Perdemos', len(CABA)-len(CABA_no_out),'pero consideramos que esto permitirá mejorar el modelo')
Perdemos 3249 pero consideramos que esto permitirá mejorar el modelo
CABA_no_out.price_per_m2.describe()
count 25498.000000 mean 2547.964293 std 803.538220 min 1126.666667 25% 2000.000000 50% 2435.897436 75% 2941.812016 max 5421.875000 Name: price_per_m2, dtype: float64
Vemos en el dataset anterior cuantos valores toma place_name
CABA_no_out.place_name.unique()
array(['Mataderos', 'Belgrano', 'Palermo Soho', 'Palermo', 'Flores', 'Boedo', 'Las Cañitas', 'Balvanera', 'Caballito', 'Nuñez', 'Almagro', 'Capital Federal', 'Colegiales', 'Barrio Norte', 'Recoleta', 'Congreso', 'Villa Crespo', 'Chacarita', 'Puerto Madero', 'Villa Urquiza', 'Palermo Hollywood', 'Saavedra', 'Parque Chas', 'Barracas', 'Paternal', 'Agronomía', 'Villa Pueyrredón', 'Coghlan', 'Parque Centenario', 'San Telmo', 'Monserrat', 'Floresta', 'Villa Devoto', 'Boca', 'San Cristobal', 'Abasto', 'Versalles', 'Villa del Parque', 'Monte Castro', 'Retiro', 'Parque Patricios', 'Palermo Chico', 'Liniers', 'Centro / Microcentro', 'Once', 'Tribunales', 'Velez Sarsfield', 'Catalinas', 'San Nicolás', 'Parque Chacabuco', 'Parque Avellaneda', 'Constitución', 'Palermo Viejo', 'Villa Lugano', 'Villa Luro', 'Villa General Mitre', 'Villa Ortuzar', 'Villa Santa Rita', 'Pompeya', 'Villa Soldati', 'Villa Real', 'Villa Riachuelo'], dtype=object)
#Borramos las observaciones que en vez de Barrio dicen "Capital Federal"
capital_0_mask=(CABA_no_out.place_name == 'Capital Federal')
capital_0 = CABA_no_out[capital_0_mask]
capital_0.place_name.value_counts()
Capital Federal 567 Name: place_name, dtype: int64
Corresponden a publicaciones cuyo lugar fue descripto como "Capital Federal" si hacer referencia a un barrio. Decidimos borrarlos
CABA_ok_mask = CABA_no_out.place_name != 'Capital Federal'
CABA_ok = CABA_no_out[CABA_ok_mask]
CABA_ok.shape
(24931, 19)
CABA_ok.place_name.value_counts() ## 61 barrios porque se borraron los datos que eran "Capital Federal"
Belgrano 2437 Palermo 2315 Caballito 2010 Recoleta 1336 Villa Urquiza 1322 ... Velez Sarsfield 28 Pompeya 21 Villa Soldati 5 Catalinas 3 Villa Riachuelo 3 Name: place_name, Length: 61, dtype: int64
CABA_ok.head(3)
property_type | place_name | state_name | lat | lon | price_aprox_usd | surface_total_in_m2 | surface_covered_in_m2 | price_per_m2 | rooms | terraza | patio | parrilla | salon | laundry | pileta | cochera | estado_nuevo | estado_a_reciclar | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | PH | Mataderos | Capital Federal | -34.661824 | -58.508839 | 62000.0 | 55.0 | 40.0 | 1127.272727 | 2.0 | 0 | 1 | 0 | 0 | 1 | 0 | 0 | 1 | 0 |
1 | apartment | Mataderos | Capital Federal | -34.652262 | -58.522982 | 72000.0 | 55.0 | 55.0 | 1309.090909 | 2.0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 |
2 | apartment | Belgrano | Capital Federal | -34.559873 | -58.443362 | 138000.0 | 45.0 | 40.0 | 3066.666667 | 1.0 | 0 | 0 | 0 | 0 | 1 | 1 | 0 | 1 | 0 |
columnas_para_correlacion = ['property_type', 'place_name', 'price_aprox_usd', 'price_per_m2', 'lat', 'lon', 'surface_total_in_m2','surface_covered_in_m2','rooms']
CABA_para_correlacion = CABA_ok [columnas_para_correlacion].copy()
CABA_para_correlacion.head(3)
property_type | place_name | price_aprox_usd | price_per_m2 | lat | lon | surface_total_in_m2 | surface_covered_in_m2 | rooms | |
---|---|---|---|---|---|---|---|---|---|
0 | PH | Mataderos | 62000.0 | 1127.272727 | -34.661824 | -58.508839 | 55.0 | 40.0 | 2.0 |
1 | apartment | Mataderos | 72000.0 | 1309.090909 | -34.652262 | -58.522982 | 55.0 | 55.0 | 2.0 |
2 | apartment | Belgrano | 138000.0 | 3066.666667 | -34.559873 | -58.443362 | 45.0 | 40.0 | 1.0 |
# visualizamos la matriz de correlación en Seaborn usando a heatmap
sns.heatmap(CABA_para_correlacion.corr(), vmin=-1, vmax=1, center=0, cmap="YlGnBu");
Generamos variables dummies para poder trabajar con variables categóricas
#Drop_first true para no tener multicolinealidad, se fue la columna de dummies PH
dummy_prop_type = pd.get_dummies(CABA_ok['property_type'],drop_first=True)
dummy_prop_type.head()
apartment | house | store | |
---|---|---|---|
0 | 0 | 0 | 0 |
1 | 1 | 0 | 0 |
2 | 1 | 0 | 0 |
3 | 1 | 0 | 0 |
4 | 1 | 0 | 0 |
CABA_ok.place_name.sort_values().unique()
array(['Abasto', 'Agronomía', 'Almagro', 'Balvanera', 'Barracas', 'Barrio Norte', 'Belgrano', 'Boca', 'Boedo', 'Caballito', 'Catalinas', 'Centro / Microcentro', 'Chacarita', 'Coghlan', 'Colegiales', 'Congreso', 'Constitución', 'Flores', 'Floresta', 'Las Cañitas', 'Liniers', 'Mataderos', 'Monserrat', 'Monte Castro', 'Nuñez', 'Once', 'Palermo', 'Palermo Chico', 'Palermo Hollywood', 'Palermo Soho', 'Palermo Viejo', 'Parque Avellaneda', 'Parque Centenario', 'Parque Chacabuco', 'Parque Chas', 'Parque Patricios', 'Paternal', 'Pompeya', 'Puerto Madero', 'Recoleta', 'Retiro', 'Saavedra', 'San Cristobal', 'San Nicolás', 'San Telmo', 'Tribunales', 'Velez Sarsfield', 'Versalles', 'Villa Crespo', 'Villa Devoto', 'Villa General Mitre', 'Villa Lugano', 'Villa Luro', 'Villa Ortuzar', 'Villa Pueyrredón', 'Villa Real', 'Villa Riachuelo', 'Villa Santa Rita', 'Villa Soldati', 'Villa Urquiza', 'Villa del Parque'], dtype=object)
#Drop_first true para no tener multicolinealidad, se fue la columna de dummies 'Abasto'
dummy_barrio = pd.get_dummies(CABA_ok['place_name'],drop_first=True)
dummy_barrio.head()
Agronomía | Almagro | Balvanera | Barracas | Barrio Norte | Belgrano | Boca | Boedo | Caballito | Catalinas | ... | Villa Lugano | Villa Luro | Villa Ortuzar | Villa Pueyrredón | Villa Real | Villa Riachuelo | Villa Santa Rita | Villa Soldati | Villa Urquiza | Villa del Parque | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | ... | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 |
1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | ... | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 |
2 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | ... | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 |
3 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | ... | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 |
4 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | ... | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 |
5 rows × 60 columns
# concatenamos los datasets
CABA_con_dummies = pd.concat([CABA_ok,dummy_prop_type,dummy_barrio],axis=1)
CABA_con_dummies.head(10)
property_type | place_name | state_name | lat | lon | price_aprox_usd | surface_total_in_m2 | surface_covered_in_m2 | price_per_m2 | rooms | ... | Villa Lugano | Villa Luro | Villa Ortuzar | Villa Pueyrredón | Villa Real | Villa Riachuelo | Villa Santa Rita | Villa Soldati | Villa Urquiza | Villa del Parque | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | PH | Mataderos | Capital Federal | -34.661824 | -58.508839 | 62000.0 | 55.0 | 40.0 | 1127.272727 | 2.0 | ... | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 |
1 | apartment | Mataderos | Capital Federal | -34.652262 | -58.522982 | 72000.0 | 55.0 | 55.0 | 1309.090909 | 2.0 | ... | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 |
2 | apartment | Belgrano | Capital Federal | -34.559873 | -58.443362 | 138000.0 | 45.0 | 40.0 | 3066.666667 | 1.0 | ... | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 |
3 | apartment | Belgrano | Capital Federal | -34.559873 | -58.443362 | 195000.0 | 65.0 | 60.0 | 3000.000000 | 3.0 | ... | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 |
4 | apartment | Palermo Soho | Capital Federal | -34.580504 | -58.405874 | 111700.0 | 50.0 | 30.0 | 2234.000000 | 1.0 | ... | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 |
5 | apartment | Palermo Soho | Capital Federal | -34.580504 | -58.405874 | 147900.0 | 42.0 | 31.0 | 3521.428571 | 1.0 | ... | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 |
6 | PH | Mataderos | Capital Federal | -34.652356 | -58.501624 | 239000.0 | 140.0 | 98.0 | 1707.142857 | 4.0 | ... | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 |
7 | apartment | Palermo | Capital Federal | -34.580504 | -58.405874 | 350000.0 | 104.0 | 96.0 | 3365.384615 | 3.0 | ... | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 |
8 | apartment | Palermo | Capital Federal | -34.590926 | -58.411665 | 270500.0 | 118.0 | 73.0 | 2292.372881 | 4.0 | ... | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 |
9 | apartment | Flores | Capital Federal | -34.635118 | -58.473964 | 75000.0 | 43.0 | 43.0 | 1744.186047 | 2.0 | ... | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 |
10 rows × 82 columns
#Borramos las columnas que no nos van a servir para la regresion porque tienen datos en formato string#
columnas_borrar=["property_type", "place_name", "state_name"]
CABA_para_regresion = CABA_con_dummies.drop(columnas_borrar, axis=1)
CABA_para_regresion.shape
(24931, 79)
CABA_para_regresion.head(4)
lat | lon | price_aprox_usd | surface_total_in_m2 | surface_covered_in_m2 | price_per_m2 | rooms | terraza | patio | parrilla | ... | Villa Lugano | Villa Luro | Villa Ortuzar | Villa Pueyrredón | Villa Real | Villa Riachuelo | Villa Santa Rita | Villa Soldati | Villa Urquiza | Villa del Parque | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | -34.661824 | -58.508839 | 62000.0 | 55.0 | 40.0 | 1127.272727 | 2.0 | 0 | 1 | 0 | ... | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 |
1 | -34.652262 | -58.522982 | 72000.0 | 55.0 | 55.0 | 1309.090909 | 2.0 | 0 | 0 | 0 | ... | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 |
2 | -34.559873 | -58.443362 | 138000.0 | 45.0 | 40.0 | 3066.666667 | 1.0 | 0 | 0 | 0 | ... | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 |
3 | -34.559873 | -58.443362 | 195000.0 | 65.0 | 60.0 | 3000.000000 | 3.0 | 0 | 0 | 0 | ... | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 |
4 rows × 79 columns
#chequeamos si hay nulos
CABA_para_regresion.isnull().values.any()
False
CABA_para_regresion.info()
<class 'pandas.core.frame.DataFrame'> Int64Index: 24931 entries, 0 to 28760 Data columns (total 79 columns): # Column Non-Null Count Dtype --- ------ -------------- ----- 0 lat 24931 non-null float64 1 lon 24931 non-null float64 2 price_aprox_usd 24931 non-null float64 3 surface_total_in_m2 24931 non-null float64 4 surface_covered_in_m2 24931 non-null float64 5 price_per_m2 24931 non-null float64 6 rooms 24931 non-null float64 7 terraza 24931 non-null int64 8 patio 24931 non-null int64 9 parrilla 24931 non-null int64 10 salon 24931 non-null int64 11 laundry 24931 non-null int64 12 pileta 24931 non-null int64 13 cochera 24931 non-null int64 14 estado_nuevo 24931 non-null int64 15 estado_a_reciclar 24931 non-null int64 16 apartment 24931 non-null uint8 17 house 24931 non-null uint8 18 store 24931 non-null uint8 19 Agronomía 24931 non-null uint8 20 Almagro 24931 non-null uint8 21 Balvanera 24931 non-null uint8 22 Barracas 24931 non-null uint8 23 Barrio Norte 24931 non-null uint8 24 Belgrano 24931 non-null uint8 25 Boca 24931 non-null uint8 26 Boedo 24931 non-null uint8 27 Caballito 24931 non-null uint8 28 Catalinas 24931 non-null uint8 29 Centro / Microcentro 24931 non-null uint8 30 Chacarita 24931 non-null uint8 31 Coghlan 24931 non-null uint8 32 Colegiales 24931 non-null uint8 33 Congreso 24931 non-null uint8 34 Constitución 24931 non-null uint8 35 Flores 24931 non-null uint8 36 Floresta 24931 non-null uint8 37 Las Cañitas 24931 non-null uint8 38 Liniers 24931 non-null uint8 39 Mataderos 24931 non-null uint8 40 Monserrat 24931 non-null uint8 41 Monte Castro 24931 non-null uint8 42 Nuñez 24931 non-null uint8 43 Once 24931 non-null uint8 44 Palermo 24931 non-null uint8 45 Palermo Chico 24931 non-null uint8 46 Palermo Hollywood 24931 non-null uint8 47 Palermo Soho 24931 non-null uint8 48 Palermo Viejo 24931 non-null uint8 49 Parque Avellaneda 24931 non-null uint8 50 Parque Centenario 24931 non-null uint8 51 Parque Chacabuco 24931 non-null uint8 52 Parque Chas 24931 non-null uint8 53 Parque Patricios 24931 non-null uint8 54 Paternal 24931 non-null uint8 55 Pompeya 24931 non-null uint8 56 Puerto Madero 24931 non-null uint8 57 Recoleta 24931 non-null uint8 58 Retiro 24931 non-null uint8 59 Saavedra 24931 non-null uint8 60 San Cristobal 24931 non-null uint8 61 San Nicolás 24931 non-null uint8 62 San Telmo 24931 non-null uint8 63 Tribunales 24931 non-null uint8 64 Velez Sarsfield 24931 non-null uint8 65 Versalles 24931 non-null uint8 66 Villa Crespo 24931 non-null uint8 67 Villa Devoto 24931 non-null uint8 68 Villa General Mitre 24931 non-null uint8 69 Villa Lugano 24931 non-null uint8 70 Villa Luro 24931 non-null uint8 71 Villa Ortuzar 24931 non-null uint8 72 Villa Pueyrredón 24931 non-null uint8 73 Villa Real 24931 non-null uint8 74 Villa Riachuelo 24931 non-null uint8 75 Villa Santa Rita 24931 non-null uint8 76 Villa Soldati 24931 non-null uint8 77 Villa Urquiza 24931 non-null uint8 78 Villa del Parque 24931 non-null uint8 dtypes: float64(7), int64(9), uint8(63) memory usage: 4.7 MB
columnas = list(CABA_para_regresion.columns)
print(columnas)
['lat', 'lon', 'price_aprox_usd', 'surface_total_in_m2', 'surface_covered_in_m2', 'price_per_m2', 'rooms', 'terraza', 'patio', 'parrilla', 'salon', 'laundry', 'pileta', 'cochera', 'estado_nuevo', 'estado_a_reciclar', 'apartment', 'house', 'store', 'Agronomía', 'Almagro', 'Balvanera', 'Barracas', 'Barrio Norte', 'Belgrano', 'Boca', 'Boedo', 'Caballito', 'Catalinas', 'Centro / Microcentro', 'Chacarita', 'Coghlan', 'Colegiales', 'Congreso', 'Constitución', 'Flores', 'Floresta', 'Las Cañitas', 'Liniers', 'Mataderos', 'Monserrat', 'Monte Castro', 'Nuñez', 'Once', 'Palermo', 'Palermo Chico', 'Palermo Hollywood', 'Palermo Soho', 'Palermo Viejo', 'Parque Avellaneda', 'Parque Centenario', 'Parque Chacabuco', 'Parque Chas', 'Parque Patricios', 'Paternal', 'Pompeya', 'Puerto Madero', 'Recoleta', 'Retiro', 'Saavedra', 'San Cristobal', 'San Nicolás', 'San Telmo', 'Tribunales', 'Velez Sarsfield', 'Versalles', 'Villa Crespo', 'Villa Devoto', 'Villa General Mitre', 'Villa Lugano', 'Villa Luro', 'Villa Ortuzar', 'Villa Pueyrredón', 'Villa Real', 'Villa Riachuelo', 'Villa Santa Rita', 'Villa Soldati', 'Villa Urquiza', 'Villa del Parque']
columnas.remove('price_per_m2')
print(columnas)
['lat', 'lon', 'price_aprox_usd', 'surface_total_in_m2', 'surface_covered_in_m2', 'rooms', 'terraza', 'patio', 'parrilla', 'salon', 'laundry', 'pileta', 'cochera', 'estado_nuevo', 'estado_a_reciclar', 'apartment', 'house', 'store', 'Agronomía', 'Almagro', 'Balvanera', 'Barracas', 'Barrio Norte', 'Belgrano', 'Boca', 'Boedo', 'Caballito', 'Catalinas', 'Centro / Microcentro', 'Chacarita', 'Coghlan', 'Colegiales', 'Congreso', 'Constitución', 'Flores', 'Floresta', 'Las Cañitas', 'Liniers', 'Mataderos', 'Monserrat', 'Monte Castro', 'Nuñez', 'Once', 'Palermo', 'Palermo Chico', 'Palermo Hollywood', 'Palermo Soho', 'Palermo Viejo', 'Parque Avellaneda', 'Parque Centenario', 'Parque Chacabuco', 'Parque Chas', 'Parque Patricios', 'Paternal', 'Pompeya', 'Puerto Madero', 'Recoleta', 'Retiro', 'Saavedra', 'San Cristobal', 'San Nicolás', 'San Telmo', 'Tribunales', 'Velez Sarsfield', 'Versalles', 'Villa Crespo', 'Villa Devoto', 'Villa General Mitre', 'Villa Lugano', 'Villa Luro', 'Villa Ortuzar', 'Villa Pueyrredón', 'Villa Real', 'Villa Riachuelo', 'Villa Santa Rita', 'Villa Soldati', 'Villa Urquiza', 'Villa del Parque']
CABA_para_regresion
lat | lon | price_aprox_usd | surface_total_in_m2 | surface_covered_in_m2 | price_per_m2 | rooms | terraza | patio | parrilla | ... | Villa Lugano | Villa Luro | Villa Ortuzar | Villa Pueyrredón | Villa Real | Villa Riachuelo | Villa Santa Rita | Villa Soldati | Villa Urquiza | Villa del Parque | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | -34.661824 | -58.508839 | 62000.0 | 55.0 | 40.0 | 1127.272727 | 2.0 | 0 | 1 | 0 | ... | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 |
1 | -34.652262 | -58.522982 | 72000.0 | 55.0 | 55.0 | 1309.090909 | 2.0 | 0 | 0 | 0 | ... | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 |
2 | -34.559873 | -58.443362 | 138000.0 | 45.0 | 40.0 | 3066.666667 | 1.0 | 0 | 0 | 0 | ... | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 |
3 | -34.559873 | -58.443362 | 195000.0 | 65.0 | 60.0 | 3000.000000 | 3.0 | 0 | 0 | 0 | ... | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 |
4 | -34.580504 | -58.405874 | 111700.0 | 50.0 | 30.0 | 2234.000000 | 1.0 | 1 | 0 | 0 | ... | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 |
... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
28755 | -34.559873 | -58.443362 | 128000.0 | 38.0 | 35.0 | 3368.421053 | 3.0 | 1 | 0 | 1 | ... | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 |
28756 | -34.587425 | -58.397372 | 165000.0 | 44.0 | 39.0 | 3750.000000 | 1.0 | 0 | 0 | 1 | ... | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 |
28757 | -34.563685 | -58.442683 | 410000.0 | 157.0 | 157.0 | 2611.464968 | 3.0 | 0 | 0 | 0 | ... | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 |
28758 | -34.563685 | -58.442683 | 410000.0 | 157.0 | 157.0 | 2611.464968 | 3.0 | 0 | 0 | 0 | ... | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 |
28760 | -34.570639 | -58.475596 | 131500.0 | 46.0 | 39.0 | 2858.695652 | 1.0 | 1 | 0 | 1 | ... | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 |
24931 rows × 79 columns
CABA_100= CABA_para_regresion.sample(n=100 , random_state=123)
eliminar=CABA_100.index
CABA_para_regresion=CABA_para_regresion.drop(index=eliminar).copy()
CABA_para_regresion
lat | lon | price_aprox_usd | surface_total_in_m2 | surface_covered_in_m2 | price_per_m2 | rooms | terraza | patio | parrilla | ... | Villa Lugano | Villa Luro | Villa Ortuzar | Villa Pueyrredón | Villa Real | Villa Riachuelo | Villa Santa Rita | Villa Soldati | Villa Urquiza | Villa del Parque | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | -34.661824 | -58.508839 | 62000.0 | 55.0 | 40.0 | 1127.272727 | 2.0 | 0 | 1 | 0 | ... | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 |
1 | -34.652262 | -58.522982 | 72000.0 | 55.0 | 55.0 | 1309.090909 | 2.0 | 0 | 0 | 0 | ... | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 |
2 | -34.559873 | -58.443362 | 138000.0 | 45.0 | 40.0 | 3066.666667 | 1.0 | 0 | 0 | 0 | ... | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 |
3 | -34.559873 | -58.443362 | 195000.0 | 65.0 | 60.0 | 3000.000000 | 3.0 | 0 | 0 | 0 | ... | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 |
4 | -34.580504 | -58.405874 | 111700.0 | 50.0 | 30.0 | 2234.000000 | 1.0 | 1 | 0 | 0 | ... | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 |
... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
28755 | -34.559873 | -58.443362 | 128000.0 | 38.0 | 35.0 | 3368.421053 | 3.0 | 1 | 0 | 1 | ... | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 |
28756 | -34.587425 | -58.397372 | 165000.0 | 44.0 | 39.0 | 3750.000000 | 1.0 | 0 | 0 | 1 | ... | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 |
28757 | -34.563685 | -58.442683 | 410000.0 | 157.0 | 157.0 | 2611.464968 | 3.0 | 0 | 0 | 0 | ... | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 |
28758 | -34.563685 | -58.442683 | 410000.0 | 157.0 | 157.0 | 2611.464968 | 3.0 | 0 | 0 | 0 | ... | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 |
28760 | -34.570639 | -58.475596 | 131500.0 | 46.0 | 39.0 | 2858.695652 | 1.0 | 1 | 0 | 1 | ... | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 |
24831 rows × 79 columns
feature_cols = columnas
X = CABA_para_regresion[feature_cols]
y = CABA_para_regresion.price_per_m2
CABA_Xtrain, CABA_Xtest, CABA_ytrain, CABA_ytest = train_test_split(X, y, random_state=123)
# creamos nuevamente X and y
X = CABA_Xtrain
y = CABA_ytrain
# creamos el modelo y fiteamos
linreg = LinearRegression()
model = linreg.fit(X, y)
# Imprimimos coeficientes
print (linreg.intercept_)
print (linreg.coef_)
217196.3821238267 [ 1.02520984e+03 3.07249682e+03 3.19019848e-03 -8.21742636e+00 1.02615651e+00 -4.54176250e+01 -6.09039917e+01 -1.66820254e+02 2.61586628e+01 4.41905602e+01 -3.41471580e+01 2.24830647e+02 1.27879843e+02 3.55994728e+01 -2.47481763e+02 5.60148062e+00 2.20716823e+00 2.51580423e+02 2.16707724e+02 1.22687482e+02 -6.20010777e+01 -1.19880131e+02 5.15327802e+02 6.58563829e+02 -3.58954457e+02 -2.56029994e+02 3.04972122e+02 -1.59060623e+01 9.83944646e+01 9.54493713e+01 3.91168483e+02 4.19238624e+02 -9.85808386e+01 -3.73616513e+02 1.33380994e+02 1.42040682e+02 8.03255723e+02 2.93606645e+02 2.89604845e+02 -8.47438875e+01 2.70968335e+02 6.54761171e+02 -1.06151432e+02 6.53885306e+02 8.58386292e+02 6.83984588e+02 7.23118762e+02 5.67378270e+02 -2.97325902e+01 2.84407441e+02 2.68687329e+01 2.03213634e+02 -9.34817494e+01 1.27724505e+02 -4.03142403e+01 1.38711167e+03 6.42091597e+02 3.54576871e+02 3.12520854e+02 -2.89593334e+01 2.07291690e+01 -9.08469342e+00 -4.20840893e+01 1.91438853e+02 4.19774306e+02 3.80407900e+02 4.56834492e+02 1.97157105e+02 4.24506898e+01 2.63259509e+02 5.99002679e+02 4.02594486e+02 3.45102994e+02 -1.54090138e+01 2.13649109e+02 -2.67991359e+02 4.48548134e+02 3.40922351e+02]
# imprimos la metrica que mide la bondad de ajusto del modelo. En este caso el R2.
print ('R2_train=', ' ', model.score(X, y)) ### R2_train= 0.5773183568859408 (CON COMUNAS)
R2_train= 0.669454735944299
# Ahora probamos con los valores de test
Xt=CABA_Xtest
yt=CABA_ytest
print ('R2_test=', ' ', model.score(Xt, yt)) #R2_test= 0.5319143306475157 (CON COMUNAS)
R2_test= 0.6695211106514514
## para observarlo mejor miramos el nombre con el coeficiente
list(zip(feature_cols, linreg.coef_))
[('lat', 1025.209837371704), ('lon', 3072.496819612948), ('price_aprox_usd', 0.0031901984768509806), ('surface_total_in_m2', -8.217426363745773), ('surface_covered_in_m2', 1.0261565080286346), ('rooms', -45.41762499038754), ('terraza', -60.903991667226194), ('patio', -166.82025434712338), ('parrilla', 26.158662753751884), ('salon', 44.19056018840561), ('laundry', -34.14715804334), ('pileta', 224.830646822042), ('cochera', 127.87984265979185), ('estado_nuevo', 35.59947282933214), ('estado_a_reciclar', -247.48176339469396), ('apartment', 5.601480616483286), ('house', 2.2071682323957162), ('store', 251.58042252846744), ('Agronomía', 216.7077235211785), ('Almagro', 122.68748174356624), ('Balvanera', -62.00107773514246), ('Barracas', -119.88013083964427), ('Barrio Norte', 515.3278021934572), ('Belgrano', 658.5638286456942), ('Boca', -358.95445739148687), ('Boedo', -256.02999429034344), ('Caballito', 304.97212223470706), ('Catalinas', -15.906062313626267), ('Centro / Microcentro', 98.39446458149374), ('Chacarita', 95.44937134993178), ('Coghlan', 391.1684833891707), ('Colegiales', 419.2386238645372), ('Congreso', -98.58083861776933), ('Constitución', -373.6165130290824), ('Flores', 133.38099381148209), ('Floresta', 142.04068157035982), ('Las Cañitas', 803.255722574287), ('Liniers', 293.60664477855124), ('Mataderos', 289.6048454025032), ('Monserrat', -84.74388748279918), ('Monte Castro', 270.96833510740885), ('Nuñez', 654.7611710722172), ('Once', -106.15143170703455), ('Palermo', 653.8853059388196), ('Palermo Chico', 858.386291634913), ('Palermo Hollywood', 683.9845879730714), ('Palermo Soho', 723.118761990414), ('Palermo Viejo', 567.3782697964153), ('Parque Avellaneda', -29.73259019853296), ('Parque Centenario', 284.4074405743081), ('Parque Chacabuco', 26.86873290552856), ('Parque Chas', 203.21363351067185), ('Parque Patricios', -93.48174944380685), ('Paternal', 127.72450537132985), ('Pompeya', -40.3142403307216), ('Puerto Madero', 1387.111666198594), ('Recoleta', 642.091597192173), ('Retiro', 354.57687080383016), ('Saavedra', 312.5208537664762), ('San Cristobal', -28.95933344689655), ('San Nicolás', 20.72916898931026), ('San Telmo', -9.08469342270223), ('Tribunales', -42.084089340918595), ('Velez Sarsfield', 191.43885325045528), ('Versalles', 419.77430649660835), ('Villa Crespo', 380.4078998775279), ('Villa Devoto', 456.8344922746144), ('Villa General Mitre', 197.15710496664894), ('Villa Lugano', 42.4506897883104), ('Villa Luro', 263.2595090829028), ('Villa Ortuzar', 599.0026792612895), ('Villa Pueyrredón', 402.5944855970647), ('Villa Real', 345.1029942050745), ('Villa Riachuelo', -15.409013770501906), ('Villa Santa Rita', 213.6491093537947), ('Villa Soldati', -267.9913586394433), ('Villa Urquiza', 448.54813404860795), ('Villa del Parque', 340.92235054920934)]
##Vamos a normalizar las variables para graficarlas
#### Normalizamos todas las variables
rooms_norm = CABA_para_regresion["rooms"]
rooms_min = np.min(rooms_norm)
rooms_max = np.max(rooms_norm)
rooms_norm = [(x - rooms_min) / (rooms_max - rooms_min) for x in rooms_norm]
lat_norm = CABA_para_regresion["lat"]
lat_min = np.min(lat_norm)
lat_max = np.max(lat_norm)
lat_norm = [(x - lat_min) / (lat_max - lat_min) for x in lat_norm]
lon_norm = CABA_para_regresion["lon"]
lon_min = np.min(lon_norm)
lon_max = np.max(lon_norm)
lon_norm = [(x - lon_min) / (lon_max - lon_min) for x in lon_norm]
price_aprox_usd_norm = CABA_para_regresion["price_aprox_usd"]
price_aprox_usd_min = np.min(price_aprox_usd_norm)
price_aprox_usd_max = np.max(price_aprox_usd_norm)
price_aprox_usd_norm = [(x - price_aprox_usd_min) / (price_aprox_usd_max - price_aprox_usd_min) for x in price_aprox_usd_norm]
surface_total_in_m2_norm = CABA_para_regresion["surface_total_in_m2"]
surface_total_in_m2_min = np.min(surface_total_in_m2_norm)
surface_total_in_m2_max = np.max(surface_total_in_m2_norm)
surface_total_in_m2_norm = [(x - surface_total_in_m2_min) / (surface_total_in_m2_max - surface_total_in_m2_min) for x in surface_total_in_m2_norm]
surface_covered_in_m2_norm = CABA_para_regresion["surface_covered_in_m2"]
surface_covered_in_m2_min = np.min(surface_covered_in_m2_norm)
surface_covered_in_m2_max = np.max(surface_covered_in_m2_norm)
surface_covered_in_m2_norm = [(x - surface_covered_in_m2_min) / (surface_covered_in_m2_max - surface_covered_in_m2_min) for x in surface_covered_in_m2_norm]
price_per_m2_norm = CABA_para_regresion["price_per_m2"]
price_per_m2_min = np.min(price_per_m2_norm)
price_per_m2_max = np.max(price_per_m2_norm)
price_per_m2_norm = [(x - price_per_m2_min) / (price_per_m2_max - price_per_m2_min) for x in price_per_m2_norm]
#generamos un dataframe con los valores normalizados
CABA_NORM= pd.DataFrame(list(zip(lat_norm,lon_norm,price_aprox_usd_norm,surface_total_in_m2_norm,surface_covered_in_m2_norm,price_per_m2_norm,rooms_norm)),columns =['lat', 'lon','price_aprox_usd','surface_total_in_m2','surface_covered_in_m2','price_per_m2','rooms'])
# Exploramos más features
feature_cols_norm = ["lat", "lon", "surface_total_in_m2", "price_aprox_usd", "surface_covered_in_m2", "rooms"]
# plots múltiples en seaborn
sns.pairplot(CABA_NORM, x_vars=feature_cols_norm, y_vars='price_per_m2', kind='reg');
# matriz de correlación (rangos de 1 a -1)
CABA_para_regresion.corr()
lat | lon | price_aprox_usd | surface_total_in_m2 | surface_covered_in_m2 | price_per_m2 | rooms | terraza | patio | parrilla | ... | Villa Lugano | Villa Luro | Villa Ortuzar | Villa Pueyrredón | Villa Real | Villa Riachuelo | Villa Santa Rita | Villa Soldati | Villa Urquiza | Villa del Parque | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
lat | 1.000000 | -0.250400 | 0.123326 | 0.044971 | 0.036446 | 0.258045 | 0.028010 | 0.042470 | -0.032329 | 0.020605 | ... | -0.112299 | -0.119924 | 0.025149 | 0.030233 | -0.018255 | -0.026251 | -0.017620 | -0.022443 | 0.167541 | -0.029801 |
lon | -0.250400 | 1.000000 | 0.084947 | 0.015080 | 0.030348 | 0.153849 | -0.015349 | -0.138851 | -0.117395 | -0.098623 | ... | -0.064157 | -0.205648 | -0.047801 | -0.139124 | -0.076916 | -0.009368 | -0.047923 | -0.008279 | -0.298199 | -0.163599 |
price_aprox_usd | 0.123326 | 0.084947 | 1.000000 | 0.860070 | 0.819020 | 0.312747 | 0.469777 | 0.107696 | 0.065623 | 0.014915 | ... | -0.022108 | -0.031635 | 0.029713 | -0.014195 | -0.005928 | -0.003451 | -0.008605 | -0.007258 | -0.050904 | -0.026118 |
surface_total_in_m2 | 0.044971 | 0.015080 | 0.860070 | 1.000000 | 0.937360 | -0.058111 | 0.494506 | 0.131901 | 0.165543 | 0.023312 | ... | 0.000861 | -0.013642 | 0.055917 | 0.002628 | 0.007758 | 0.002570 | 0.005335 | -0.001579 | -0.042118 | -0.008089 |
surface_covered_in_m2 | 0.036446 | 0.030348 | 0.819020 | 0.937360 | 1.000000 | -0.029964 | 0.456769 | 0.086894 | 0.127717 | -0.003818 | ... | 0.003908 | -0.014553 | 0.057019 | -0.001780 | 0.003973 | 0.001454 | -0.003259 | -0.000322 | -0.042593 | -0.010052 |
... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
Villa Riachuelo | -0.026251 | -0.009368 | -0.003451 | 0.002570 | 0.001454 | -0.016824 | 0.005810 | 0.010058 | 0.006422 | -0.006653 | ... | -0.000670 | -0.001322 | -0.000617 | -0.000880 | -0.000376 | 1.000000 | -0.000474 | -0.000156 | -0.002600 | -0.001437 |
Villa Santa Rita | -0.017620 | -0.047923 | -0.008605 | 0.005335 | -0.003259 | -0.030268 | -0.001865 | 0.008242 | 0.032497 | 0.003522 | ... | -0.002627 | -0.005181 | -0.002418 | -0.003447 | -0.001473 | -0.000474 | 1.000000 | -0.000611 | -0.010192 | -0.005630 |
Villa Soldati | -0.022443 | -0.008279 | -0.007258 | -0.001579 | -0.000322 | -0.022017 | 0.008205 | 0.004396 | 0.011067 | -0.008590 | ... | -0.000865 | -0.001707 | -0.000797 | -0.001136 | -0.000485 | -0.000156 | -0.000611 | 1.000000 | -0.003357 | -0.001855 |
Villa Urquiza | 0.167541 | -0.298199 | -0.050904 | -0.042118 | -0.042593 | -0.016778 | -0.029891 | 0.082089 | 0.014456 | 0.090954 | ... | -0.014426 | -0.028450 | -0.013280 | -0.018931 | -0.008089 | -0.002600 | -0.010192 | -0.003357 | 1.000000 | -0.030917 |
Villa del Parque | -0.029801 | -0.163599 | -0.026118 | -0.008089 | -0.010052 | -0.050128 | 0.039210 | -0.011309 | 0.022108 | -0.017554 | ... | -0.007970 | -0.015717 | -0.007336 | -0.010458 | -0.004469 | -0.001437 | -0.005630 | -0.001855 | -0.030917 | 1.000000 |
79 rows × 79 columns
# visualizamos la matriz de correlación en Seaborn usando a heatmap
sns.heatmap(CABA_para_regresion.corr(), vmin=-1, vmax=1, center=0, cmap="YlGnBu");
cv = KFold(10, shuffle=True)
lassocv = linear_model.LassoCV(normalize=True).fit(CABA_Xtrain, CABA_ytrain)
scores = cross_val_score(lassocv, CABA_Xtrain, CABA_ytrain, cv=cv, scoring='r2')
print ("r^2:", lassocv.score(CABA_Xtrain, CABA_ytrain))
print ("alpha:", lassocv.alpha_)
r^2: 0.6691990257988953 alpha: 0.003199590044391265
scores = cross_val_score(lassocv, CABA_Xtest, CABA_ytest, cv=cv, scoring='r2')
print ("r^2:", lassocv.score(CABA_Xtest, CABA_ytest))
print ("alpha:", lassocv.alpha_)
r^2: 0.6694694617747674 alpha: 0.003199590044391265
lasso = linear_model.Lasso(alpha=0.002811914108344165, normalize=True)
# Ajustamos nuevamente, esta vez con regularizacion
X = CABA_Xtrain
y = CABA_ytrain
lasso_model =lasso.fit(X, y)
predictions = lasso_model.predict(X)
print ("r^2:", lasso_model.score(X, y)) #con alpha = 0.5 --> r^2: 0.5098385825168807 (con comuna) y 0.4904763180070667 (sin comuna)
r^2: 0.6692369809467804
Xt=CABA_Xtest
yt=CABA_ytest
print ("r^2:", lasso_model.score(Xt, yt)) #con alpha = 0.5 --> r^2: 0.4875681932460285 (con comuna) y 0.5092279239407673 (sin comuna)
r^2: 0.6694835898509823
rlm = linear_model.Ridge(alpha=0.0001, normalize=True)
# Ajustamos nuevamente, esta vez con regularizacion
X = CABA_Xtrain
y = CABA_ytrain
ridge_model = rlm.fit(X, y)
predictions = ridge_model.predict(X)
print ("r^2:", ridge_model.score(X, y)) #con alpha = 0.5 --> r^2: 0.496124980682607 (con comuna) y 0.4893773534731626 (sin comuna)
r^2: 0.6694540257063422
Xt=CABA_Xtest
yt=CABA_ytest
print ("r^2:", ridge_model.score(Xt, yt)) #con alpha = 0.5 --> r^2: 0.47811073280496075 (con comuna) y 0.4729377985558373 (sin comuna)
r^2: 0.6694969474454349
# instanciamos un modelo
rlmcv = linear_model.RidgeCV(alphas=np.linspace(0.0001,0.01, 1000), cv=5, normalize=True,scoring='r2')
# Ajustamos nuevamente nuestro modelo, esta vez con RidgeCV
X = CABA_Xtrain
y = CABA_ytrain
rlmcv.fit(X, y)
predictions = rlmcv.predict(X)
print ("r^2:", rlmcv.score(X, y)) # r^2: 0.5592333948827667 (CON COMUNA)
r^2: 0.6678952031855221
rlmcv.alpha_
0.007591891891891892
Xt=CABA_Xtest
yt=CABA_ytest
print ("r^2:", rlmcv.score(Xt, yt)) # r^2: 0.5240608432715783 (CON COMUNA)
r^2: 0.6661970535458268
print ("alpha:", rlmcv.alphas)
alpha: [0.0001 0.00010991 0.00011982 0.00012973 0.00013964 0.00014955 0.00015946 0.00016937 0.00017928 0.00018919 0.0001991 0.00020901 0.00021892 0.00022883 0.00023874 0.00024865 0.00025856 0.00026847 0.00027838 0.00028829 0.0002982 0.00030811 0.00031802 0.00032793 0.00033784 0.00034775 0.00035766 0.00036757 0.00037748 0.00038739 0.0003973 0.00040721 0.00041712 0.00042703 0.00043694 0.00044685 0.00045676 0.00046667 0.00047658 0.00048649 0.0004964 0.00050631 0.00051622 0.00052613 0.00053604 0.00054595 0.00055586 0.00056577 0.00057568 0.00058559 0.0005955 0.00060541 0.00061532 0.00062523 0.00063514 0.00064505 0.00065495 0.00066486 0.00067477 0.00068468 0.00069459 0.0007045 0.00071441 0.00072432 0.00073423 0.00074414 0.00075405 0.00076396 0.00077387 0.00078378 0.00079369 0.0008036 0.00081351 0.00082342 0.00083333 0.00084324 0.00085315 0.00086306 0.00087297 0.00088288 0.00089279 0.0009027 0.00091261 0.00092252 0.00093243 0.00094234 0.00095225 0.00096216 0.00097207 0.00098198 0.00099189 0.0010018 0.00101171 0.00102162 0.00103153 0.00104144 0.00105135 0.00106126 0.00107117 0.00108108 0.00109099 0.0011009 0.00111081 0.00112072 0.00113063 0.00114054 0.00115045 0.00116036 0.00117027 0.00118018 0.00119009 0.0012 0.00120991 0.00121982 0.00122973 0.00123964 0.00124955 0.00125946 0.00126937 0.00127928 0.00128919 0.0012991 0.00130901 0.00131892 0.00132883 0.00133874 0.00134865 0.00135856 0.00136847 0.00137838 0.00138829 0.0013982 0.00140811 0.00141802 0.00142793 0.00143784 0.00144775 0.00145766 0.00146757 0.00147748 0.00148739 0.0014973 0.00150721 0.00151712 0.00152703 0.00153694 0.00154685 0.00155676 0.00156667 0.00157658 0.00158649 0.0015964 0.00160631 0.00161622 0.00162613 0.00163604 0.00164595 0.00165586 0.00166577 0.00167568 0.00168559 0.0016955 0.00170541 0.00171532 0.00172523 0.00173514 0.00174505 0.00175495 0.00176486 0.00177477 0.00178468 0.00179459 0.0018045 0.00181441 0.00182432 0.00183423 0.00184414 0.00185405 0.00186396 0.00187387 0.00188378 0.00189369 0.0019036 0.00191351 0.00192342 0.00193333 0.00194324 0.00195315 0.00196306 0.00197297 0.00198288 0.00199279 0.0020027 0.00201261 0.00202252 0.00203243 0.00204234 0.00205225 0.00206216 0.00207207 0.00208198 0.00209189 0.0021018 0.00211171 0.00212162 0.00213153 0.00214144 0.00215135 0.00216126 0.00217117 0.00218108 0.00219099 0.0022009 0.00221081 0.00222072 0.00223063 0.00224054 0.00225045 0.00226036 0.00227027 0.00228018 0.00229009 0.0023 0.00230991 0.00231982 0.00232973 0.00233964 0.00234955 0.00235946 0.00236937 0.00237928 0.00238919 0.0023991 0.00240901 0.00241892 0.00242883 0.00243874 0.00244865 0.00245856 0.00246847 0.00247838 0.00248829 0.0024982 0.00250811 0.00251802 0.00252793 0.00253784 0.00254775 0.00255766 0.00256757 0.00257748 0.00258739 0.0025973 0.00260721 0.00261712 0.00262703 0.00263694 0.00264685 0.00265676 0.00266667 0.00267658 0.00268649 0.0026964 0.00270631 0.00271622 0.00272613 0.00273604 0.00274595 0.00275586 0.00276577 0.00277568 0.00278559 0.0027955 0.00280541 0.00281532 0.00282523 0.00283514 0.00284505 0.00285495 0.00286486 0.00287477 0.00288468 0.00289459 0.0029045 0.00291441 0.00292432 0.00293423 0.00294414 0.00295405 0.00296396 0.00297387 0.00298378 0.00299369 0.0030036 0.00301351 0.00302342 0.00303333 0.00304324 0.00305315 0.00306306 0.00307297 0.00308288 0.00309279 0.0031027 0.00311261 0.00312252 0.00313243 0.00314234 0.00315225 0.00316216 0.00317207 0.00318198 0.00319189 0.0032018 0.00321171 0.00322162 0.00323153 0.00324144 0.00325135 0.00326126 0.00327117 0.00328108 0.00329099 0.0033009 0.00331081 0.00332072 0.00333063 0.00334054 0.00335045 0.00336036 0.00337027 0.00338018 0.00339009 0.0034 0.00340991 0.00341982 0.00342973 0.00343964 0.00344955 0.00345946 0.00346937 0.00347928 0.00348919 0.0034991 0.00350901 0.00351892 0.00352883 0.00353874 0.00354865 0.00355856 0.00356847 0.00357838 0.00358829 0.0035982 0.00360811 0.00361802 0.00362793 0.00363784 0.00364775 0.00365766 0.00366757 0.00367748 0.00368739 0.0036973 0.00370721 0.00371712 0.00372703 0.00373694 0.00374685 0.00375676 0.00376667 0.00377658 0.00378649 0.0037964 0.00380631 0.00381622 0.00382613 0.00383604 0.00384595 0.00385586 0.00386577 0.00387568 0.00388559 0.0038955 0.00390541 0.00391532 0.00392523 0.00393514 0.00394505 0.00395495 0.00396486 0.00397477 0.00398468 0.00399459 0.0040045 0.00401441 0.00402432 0.00403423 0.00404414 0.00405405 0.00406396 0.00407387 0.00408378 0.00409369 0.0041036 0.00411351 0.00412342 0.00413333 0.00414324 0.00415315 0.00416306 0.00417297 0.00418288 0.00419279 0.0042027 0.00421261 0.00422252 0.00423243 0.00424234 0.00425225 0.00426216 0.00427207 0.00428198 0.00429189 0.0043018 0.00431171 0.00432162 0.00433153 0.00434144 0.00435135 0.00436126 0.00437117 0.00438108 0.00439099 0.0044009 0.00441081 0.00442072 0.00443063 0.00444054 0.00445045 0.00446036 0.00447027 0.00448018 0.00449009 0.0045 0.00450991 0.00451982 0.00452973 0.00453964 0.00454955 0.00455946 0.00456937 0.00457928 0.00458919 0.0045991 0.00460901 0.00461892 0.00462883 0.00463874 0.00464865 0.00465856 0.00466847 0.00467838 0.00468829 0.0046982 0.00470811 0.00471802 0.00472793 0.00473784 0.00474775 0.00475766 0.00476757 0.00477748 0.00478739 0.0047973 0.00480721 0.00481712 0.00482703 0.00483694 0.00484685 0.00485676 0.00486667 0.00487658 0.00488649 0.0048964 0.00490631 0.00491622 0.00492613 0.00493604 0.00494595 0.00495586 0.00496577 0.00497568 0.00498559 0.0049955 0.00500541 0.00501532 0.00502523 0.00503514 0.00504505 0.00505495 0.00506486 0.00507477 0.00508468 0.00509459 0.0051045 0.00511441 0.00512432 0.00513423 0.00514414 0.00515405 0.00516396 0.00517387 0.00518378 0.00519369 0.0052036 0.00521351 0.00522342 0.00523333 0.00524324 0.00525315 0.00526306 0.00527297 0.00528288 0.00529279 0.0053027 0.00531261 0.00532252 0.00533243 0.00534234 0.00535225 0.00536216 0.00537207 0.00538198 0.00539189 0.0054018 0.00541171 0.00542162 0.00543153 0.00544144 0.00545135 0.00546126 0.00547117 0.00548108 0.00549099 0.0055009 0.00551081 0.00552072 0.00553063 0.00554054 0.00555045 0.00556036 0.00557027 0.00558018 0.00559009 0.0056 0.00560991 0.00561982 0.00562973 0.00563964 0.00564955 0.00565946 0.00566937 0.00567928 0.00568919 0.0056991 0.00570901 0.00571892 0.00572883 0.00573874 0.00574865 0.00575856 0.00576847 0.00577838 0.00578829 0.0057982 0.00580811 0.00581802 0.00582793 0.00583784 0.00584775 0.00585766 0.00586757 0.00587748 0.00588739 0.0058973 0.00590721 0.00591712 0.00592703 0.00593694 0.00594685 0.00595676 0.00596667 0.00597658 0.00598649 0.0059964 0.00600631 0.00601622 0.00602613 0.00603604 0.00604595 0.00605586 0.00606577 0.00607568 0.00608559 0.0060955 0.00610541 0.00611532 0.00612523 0.00613514 0.00614505 0.00615495 0.00616486 0.00617477 0.00618468 0.00619459 0.0062045 0.00621441 0.00622432 0.00623423 0.00624414 0.00625405 0.00626396 0.00627387 0.00628378 0.00629369 0.0063036 0.00631351 0.00632342 0.00633333 0.00634324 0.00635315 0.00636306 0.00637297 0.00638288 0.00639279 0.0064027 0.00641261 0.00642252 0.00643243 0.00644234 0.00645225 0.00646216 0.00647207 0.00648198 0.00649189 0.0065018 0.00651171 0.00652162 0.00653153 0.00654144 0.00655135 0.00656126 0.00657117 0.00658108 0.00659099 0.0066009 0.00661081 0.00662072 0.00663063 0.00664054 0.00665045 0.00666036 0.00667027 0.00668018 0.00669009 0.0067 0.00670991 0.00671982 0.00672973 0.00673964 0.00674955 0.00675946 0.00676937 0.00677928 0.00678919 0.0067991 0.00680901 0.00681892 0.00682883 0.00683874 0.00684865 0.00685856 0.00686847 0.00687838 0.00688829 0.0068982 0.00690811 0.00691802 0.00692793 0.00693784 0.00694775 0.00695766 0.00696757 0.00697748 0.00698739 0.0069973 0.00700721 0.00701712 0.00702703 0.00703694 0.00704685 0.00705676 0.00706667 0.00707658 0.00708649 0.0070964 0.00710631 0.00711622 0.00712613 0.00713604 0.00714595 0.00715586 0.00716577 0.00717568 0.00718559 0.0071955 0.00720541 0.00721532 0.00722523 0.00723514 0.00724505 0.00725495 0.00726486 0.00727477 0.00728468 0.00729459 0.0073045 0.00731441 0.00732432 0.00733423 0.00734414 0.00735405 0.00736396 0.00737387 0.00738378 0.00739369 0.0074036 0.00741351 0.00742342 0.00743333 0.00744324 0.00745315 0.00746306 0.00747297 0.00748288 0.00749279 0.0075027 0.00751261 0.00752252 0.00753243 0.00754234 0.00755225 0.00756216 0.00757207 0.00758198 0.00759189 0.0076018 0.00761171 0.00762162 0.00763153 0.00764144 0.00765135 0.00766126 0.00767117 0.00768108 0.00769099 0.0077009 0.00771081 0.00772072 0.00773063 0.00774054 0.00775045 0.00776036 0.00777027 0.00778018 0.00779009 0.0078 0.00780991 0.00781982 0.00782973 0.00783964 0.00784955 0.00785946 0.00786937 0.00787928 0.00788919 0.0078991 0.00790901 0.00791892 0.00792883 0.00793874 0.00794865 0.00795856 0.00796847 0.00797838 0.00798829 0.0079982 0.00800811 0.00801802 0.00802793 0.00803784 0.00804775 0.00805766 0.00806757 0.00807748 0.00808739 0.0080973 0.00810721 0.00811712 0.00812703 0.00813694 0.00814685 0.00815676 0.00816667 0.00817658 0.00818649 0.0081964 0.00820631 0.00821622 0.00822613 0.00823604 0.00824595 0.00825586 0.00826577 0.00827568 0.00828559 0.0082955 0.00830541 0.00831532 0.00832523 0.00833514 0.00834505 0.00835495 0.00836486 0.00837477 0.00838468 0.00839459 0.0084045 0.00841441 0.00842432 0.00843423 0.00844414 0.00845405 0.00846396 0.00847387 0.00848378 0.00849369 0.0085036 0.00851351 0.00852342 0.00853333 0.00854324 0.00855315 0.00856306 0.00857297 0.00858288 0.00859279 0.0086027 0.00861261 0.00862252 0.00863243 0.00864234 0.00865225 0.00866216 0.00867207 0.00868198 0.00869189 0.0087018 0.00871171 0.00872162 0.00873153 0.00874144 0.00875135 0.00876126 0.00877117 0.00878108 0.00879099 0.0088009 0.00881081 0.00882072 0.00883063 0.00884054 0.00885045 0.00886036 0.00887027 0.00888018 0.00889009 0.0089 0.00890991 0.00891982 0.00892973 0.00893964 0.00894955 0.00895946 0.00896937 0.00897928 0.00898919 0.0089991 0.00900901 0.00901892 0.00902883 0.00903874 0.00904865 0.00905856 0.00906847 0.00907838 0.00908829 0.0090982 0.00910811 0.00911802 0.00912793 0.00913784 0.00914775 0.00915766 0.00916757 0.00917748 0.00918739 0.0091973 0.00920721 0.00921712 0.00922703 0.00923694 0.00924685 0.00925676 0.00926667 0.00927658 0.00928649 0.0092964 0.00930631 0.00931622 0.00932613 0.00933604 0.00934595 0.00935586 0.00936577 0.00937568 0.00938559 0.0093955 0.00940541 0.00941532 0.00942523 0.00943514 0.00944505 0.00945495 0.00946486 0.00947477 0.00948468 0.00949459 0.0095045 0.00951441 0.00952432 0.00953423 0.00954414 0.00955405 0.00956396 0.00957387 0.00958378 0.00959369 0.0096036 0.00961351 0.00962342 0.00963333 0.00964324 0.00965315 0.00966306 0.00967297 0.00968288 0.00969279 0.0097027 0.00971261 0.00972252 0.00973243 0.00974234 0.00975225 0.00976216 0.00977207 0.00978198 0.00979189 0.0098018 0.00981171 0.00982162 0.00983153 0.00984144 0.00985135 0.00986126 0.00987117 0.00988108 0.00989099 0.0099009 0.00991081 0.00992072 0.00993063 0.00994054 0.00995045 0.00996036 0.00997027 0.00998018 0.00999009 0.01 ]
X_100 = CABA_100[feature_cols]
y_100 = CABA_100.price_per_m2
precio_m2_predecido=lassocv.predict(X_100)
precio_m2_real=CABA_100.price_per_m2
precio_real_vs_prediccion=pd.DataFrame ({'precio_m2_real':precio_m2_real,
'precio_m2_prediccion':precio_m2_predecido,
'lat':CABA_100.lat,
'lon':CABA_100.lon})
precio_real_vs_prediccion
precio_m2_real | precio_m2_prediccion | lat | lon | |
---|---|---|---|---|
16763 | 2941.176471 | 3365.752207 | -34.580504 | -58.405874 |
11681 | 2260.344828 | 3146.229585 | -34.598737 | -58.426602 |
4417 | 2093.017442 | 1961.334965 | -34.625386 | -58.461752 |
3279 | 4279.069767 | 3484.253832 | -34.594305 | -58.421268 |
27010 | 2611.940299 | 2778.372715 | -34.596020 | -58.388387 |
... | ... | ... | ... | ... |
12312 | 2307.692308 | 2806.461122 | -34.556037 | -58.446535 |
1660 | 3888.888889 | 3789.600970 | -34.592188 | -58.379074 |
5719 | 1932.000000 | 2178.070797 | -34.621769 | -58.422108 |
14224 | 1795.454545 | 2049.965511 | -34.606950 | -58.409365 |
2214 | 2244.897959 | 2098.702279 | -34.631323 | -58.466145 |
100 rows × 4 columns
precio_real_vs_prediccion['Diferencia'] = (precio_real_vs_prediccion['precio_m2_real'] - precio_real_vs_prediccion['precio_m2_prediccion'])/precio_real_vs_prediccion['precio_m2_real']
precio_real_vs_prediccion ## CUANDO LA DIFERENCIA DA NEGATIVA EL PREDECIDO ES MAS CARO QUE EL REAL
precio_m2_real | precio_m2_prediccion | lat | lon | Diferencia | |
---|---|---|---|---|---|
16763 | 2941.176471 | 3365.752207 | -34.580504 | -58.405874 | -0.144356 |
11681 | 2260.344828 | 3146.229585 | -34.598737 | -58.426602 | -0.391925 |
4417 | 2093.017442 | 1961.334965 | -34.625386 | -58.461752 | 0.062915 |
3279 | 4279.069767 | 3484.253832 | -34.594305 | -58.421268 | 0.185745 |
27010 | 2611.940299 | 2778.372715 | -34.596020 | -58.388387 | -0.063720 |
... | ... | ... | ... | ... | ... |
12312 | 2307.692308 | 2806.461122 | -34.556037 | -58.446535 | -0.216133 |
1660 | 3888.888889 | 3789.600970 | -34.592188 | -58.379074 | 0.025531 |
5719 | 1932.000000 | 2178.070797 | -34.621769 | -58.422108 | -0.127366 |
14224 | 1795.454545 | 2049.965511 | -34.606950 | -58.409365 | -0.141753 |
2214 | 2244.897959 | 2098.702279 | -34.631323 | -58.466145 | 0.065124 |
100 rows × 5 columns
precio_real_vs_prediccion.Diferencia.min() ## LA OPORTUNIDAD DE COMPRAR BARATO Y VENDER CARO
-0.3919246067186869
PROPS=np.linspace(0, 99,100)
PROPS
array([ 0., 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15., 16., 17., 18., 19., 20., 21., 22., 23., 24., 25., 26., 27., 28., 29., 30., 31., 32., 33., 34., 35., 36., 37., 38., 39., 40., 41., 42., 43., 44., 45., 46., 47., 48., 49., 50., 51., 52., 53., 54., 55., 56., 57., 58., 59., 60., 61., 62., 63., 64., 65., 66., 67., 68., 69., 70., 71., 72., 73., 74., 75., 76., 77., 78., 79., 80., 81., 82., 83., 84., 85., 86., 87., 88., 89., 90., 91., 92., 93., 94., 95., 96., 97., 98., 99.])
#creo la figura
fig = plt.figure(figsize=(12, 8))
# creo los axes
ax = plt.axes()
ax.set(xlabel = "Número de observaciones", ylabel = "Usd / m2", title = "Precio real Vs. Precio predicho")
#ax.legend(loc='lower right', title = "series");
# dibujo en axes:
ax.plot(PROPS, precio_real_vs_prediccion.precio_m2_real, label='precio real');
# superpongo otro gráfico en axes:
ax.plot(PROPS, precio_real_vs_prediccion.precio_m2_prediccion, label='precio predicción');
etiquetas = ['Precio real', 'Precio predicción']
ax.legend(etiquetas, loc='upper right', frameon=True);
#precio_real_vs_prediccion.to_csv('../Desafio2Grupal/real_vs_prediccion.csv')
Prop_para_vender = precio_real_vs_prediccion[precio_real_vs_prediccion.Diferencia < 0]
Prop_para_vender.shape
(46, 5)
fig = px.scatter_mapbox(Prop_para_vender, lat="lat", lon="lon", zoom=9, height=600, color='Diferencia',color_continuous_scale=["red", "green", "blue"])
fig.update_layout(mapbox_style="carto-positron")
fig.update_layout(margin={"r":0,"t":0,"l":0,"b":0})
fig.show()