import pandas as pd
data= pd. read_csv( "advertising.csv" )
print ( data. head( ) )
% matplotlib inline
import matplotlib. pyplot as plt
fig, axs = plt. subplots( 1 , 3 , sharey= True )
data. plot( kind= 'scatter' , x= 'TV' , y= 'sales' , ax= axs[ 0 ] , figsize= ( 16 , 8 ) )
data. plot( kind= 'scatter' , x= 'radio' , y= 'sales' , ax= axs[ 1 ] )
data. plot( kind= 'scatter' , x= 'newspaper' , y= 'sales' , ax= axs[ 2 ] )
plt. show( )
import statsmodels. api as sm
data[ 'const' ] = 1
lm_1 = sm. OLS( endog= data[ 'sales' ] , exog= data[ [ 'const' , 'TV' ] ] , missing= 'drop' ) . fit( )
print ( lm_1. summary( ) )
lm_1. predict( pd. DataFrame( { 'TV' : [ 50 ] } ) )
print ( "regression parameters:\n" )
print ( lm_1. params)
print ( "\n" )
print ( "confidence intervals:\n" )
print ( lm_1. conf_int( ) )
print ( "\n" )
print ( "p-values:\n" )
print ( lm_1. pvalues)
print ( "\n" )
print ( "R-squared value\n" )
print ( lm_1. rsquared)
print ( "\n" )
from statsmodels. iolib. summary2 import summary_col
import statsmodels. formula. api as smf
lm_2= smf. ols( formula= "sales ~ TV + radio" , data= data) . fit( )
lm_3= smf. ols( formula= "sales ~ TV + radio + newspaper" , data= data) . fit( )
info_dict= { 'R-squared' : lambda x: "{:.2f}" . format ( x. rsquared) ,
'No. observations' : lambda x: "{0:d}" . format ( int ( x. nobs) ) }
results_table = summary_col( results= [ lm_1, lm_2, lm_3] ,
float_format= '%0.2f' ,
stars = True ,
model_names= [ 'Model 1' ,
'Model 2' ,
'Model 3' ] ,
info_dict= info_dict,
regressor_order= [ 'Intercept' ,
'TV' ,
'radio' ,
'newspaper' ] )
results_table. add_title( 'Table 1 - OLS Regressions' )
print ( results_table)
import statsmodels. api as sm
advertising= pd. read_csv( "advertising.csv" )
y = advertising. sales
x = advertising. TV
X = sm. add_constant( x)
model = sm. OLS( y, X) . fix( )
model. summary( )
model. params[ 0 ] + models. params[ 1 ] * media. TV
y_hat = model. predict( x)
plt. scatter( x, y, alpha = 0.4 )
plt. xlabel( 'TV' )
plt. ylabel( 'Sales' )
plt. plot( x, y_hat, 'r' , alpha = 0.8 )
plt. show( )
x1 = advertising[ [ "TV" , "radio" , "newspaper" ] ]
y1 = advertising[ 'sales' ]
X = sm. add_constant( x1)
model2 = sm. OLS( y1, X) . fit( )
model2. summary( )
import statsmodels. formula. api as smf
model3 = smf. ols( 'sales~ TV * radio + newspaper, data = advertising) . fit( )
model4 = smf. ols( "sale~TV+radio**2+newspaper" , data = advertising) . fit