#!/usr/bin/env python
"""
Module for stats and fitting classes.
Created June 2015
Copyright (C) Damien Farrell
This program is free software; you can redistribute it and/or
modify it under the terms of the GNU General Public License
as published by the Free Software Foundation; either version 2
of the License, or (at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program; if not, write to the Free Software
Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
"""
from __future__ import absolute_import, division, print_function
try:
from tkinter import *
from tkinter.ttk import *
except:
from Tkinter import *
from ttk import *
import types
import numpy as np
import pandas as pd
import pylab as plt
from collections import OrderedDict
import operator
from .dialogs import *
try:
import statsmodels
import statsmodels.formula.api as smf
import statsmodels.api as sm
from patsy import dmatrices
except:
print('statsmodel not installed')
[docs]class StatsViewer(Frame):
"""Provides a frame for model viewing interaction"""
def __init__(self, table, parent=None):
self.parent = parent
self.table = table
self.table.sv = self
self.fit = None
self.model = None
if self.parent != None:
Frame.__init__(self, parent)
self.main = self.master
else:
self.main = Toplevel()
self.master = self.main
self.main.title('Model Fitting App')
self.main.protocol("WM_DELETE_WINDOW", self.quit)
self.setupGUI()
self.pf = pf = self.table.pf
return
[docs] def setupGUI(self):
"""Add GUI elements"""
df = self.table.getSelectedDataFrame()
formula = self.guessFormula()
self.formulavar = StringVar()
self.formulavar.set(formula)
ef = Frame(self.main, padding=2)
ef.pack(side=TOP,fill=BOTH,expand=1)
Label(ef,text='formula:').pack(side=LEFT)
e = Entry(ef, textvariable=self.formulavar, font="Courier 13 bold")
e.pack(side=LEFT,fill=BOTH,expand=1)
f = Frame(self.main, padding=2)
f.pack(side=TOP,fill=BOTH)
Label(f,text='estimator:').pack(side=LEFT,padx=2)
self.modelvar = StringVar()
self.modelvar.set('ols')
c = Combobox(f, values=['ols','gls','logit'], width=4,
textvariable=self.modelvar)
c.pack(side=LEFT,fill=BOTH,expand=1)
b = Button(f, text="Fit", command=self.doFit)
b.pack(side=LEFT,fill=X,expand=1)
b = Button(f, text="Summary", command=self.summary)
b.pack(side=LEFT,fill=X,expand=1)
b = Button(f, text="Close", command=self.quit)
b.pack(side=LEFT,fill=X,expand=1)
f = LabelFrame(self.main, text='plots', padding=2)
f.pack(side=TOP,fill=BOTH)
Label(f,text='plot type:').pack(side=LEFT)
self.plotvar = StringVar()
self.plotvar.set('default')
c = Combobox(f, values=['default','predicted vs test',
'fit line','regression plots','qqplot',
'all regressors','leverage','influence'], width=15,
textvariable=self.plotvar)
c.pack(side=LEFT,fill=BOTH)
Label(f,text='plot indep. variable:').pack(side=LEFT,padx=2)
self.indvar = StringVar()
self.indvarwidget = c = Combobox(f, values=list(df.columns), width=8,
textvariable=self.indvar)
c.pack(side=LEFT,fill=BOTH,expand=1)
b = Button(f, text="Plot", command=self.showPlot)
b.pack(side=LEFT,fill=X,expand=1)
self.updateData()
return
[docs] def getModel(self, formula, data, est='ols'):
"""Select model to use"""
s = self.table.multiplerowlist
if len(s) == 0:
sub = data.index
else:
sub = data.index[s]
self.sub = sub
y,X = dmatrices(formula, data=data, return_type='dataframe')
self.X = X
self.y = y
Xf = X.iloc[sub]
yf = y.iloc[sub]
if est == 'ols':
#model = smf.ols(formula=formula, data=s)
model = sm.OLS(yf, Xf)
elif est == 'gls':
model = sm.GLS(y, X)
elif est == 'logit':
model = sm.Logit(y, X)
return model
[docs] def doFit(self):
"""Do model fit on selected subset of rows. Will only use
the currently selected rows for fitting."""
data = self.table.model.df
if len(data) == 0 or len(data.columns)<1:
return
self.formula = formula = self.formulavar.get()
est = self.modelvar.get()
try:
self.model = mod = self.getModel(formula, data, est)
except Exception as e:
self.pf.showWarning(e)
return
self.fit = fit = mod.fit()
self.summary()
self.updateData()
return
[docs] def showPlot(self):
"""Do plots"""
pf = self.pf
fit = self.fit
if fit == None:
pf.showWarning('no fitted model')
return
df = self.table.model.df
s = self.sub
fig = pf.fig
fig.clear()
ax = fig.add_subplot(111)
#plotframe options
pf.mplopts.applyOptions()
kwds = pf.mplopts.kwds
kind = self.plotvar.get()
indvar = self.indvar.get()
if indvar == '':
indvar = self.model.exog_names[1]
if kind == 'default':
if isinstance(self.model, sm.OLS) or isinstance(self.model, sm.GLS):
self.plotRegression(fit, indvar, ax=ax, **kwds)
elif isinstance(self.model, sm.Logit):
self.plotLogit(fit, indvar, ax)
elif kind == 'predicted vs test':
self.plotPrediction(fit, ax)
elif kind == 'fit line':
try:
sm.graphics.plot_fit(fit, indvar, ax=ax)
except ValueError:
pf.showWarning('%s is not an independent variable' %indvar,ax=ax)
elif kind == 'regression plots':
fig.clear()
sm.graphics.plot_regress_exog(fit, indvar, fig=fig)
elif kind == 'influence':
sm.graphics.influence_plot(fit, ax=ax, criterion="cooks")
elif kind == 'leverage':
from statsmodels.graphics.regressionplots import plot_leverage_resid2
plot_leverage_resid2(fit, ax=ax)
elif kind =='qqplot':
sm.graphics.qqplot(fit.resid, line='r', ax=ax)
elif kind == 'all regressors':
fig.clear()
sm.graphics.plot_partregress_grid(fit, fig=fig)
fig.tight_layout()
fig.canvas.draw()
return
[docs] def plotPrediction(self, fit, ax):
"""Plot predicted vs. test"""
sub = self.sub
if len(sub) == 0:
sub = X.index
Xout = self.X.iloc[~self.X.index.isin(sub)]
yout = self.y.iloc[~self.y.index.isin(sub)]
ypred = fit.predict(Xout)
ax.scatter(yout, ypred, alpha=0.6, edgecolor='black',
color='blue', lw=0.5, label='fit')
ax.plot(ax.get_xlim(), ax.get_xlim(), ls="--", lw=2, c=".2")
ax.set_xlabel('test')
ax.set_ylabel('predicted')
ax.set_title('predicted vs test data')
import statsmodels.tools.eval_measures as em
yt = yout.squeeze().values
rmse = em.rmse(yt, ypred)
ax.text(0.9,0.1,'rmse: '+ str(round(rmse,3)),ha='right',
va='top', transform=ax.transAxes)
return
[docs] def plotRegression(self, fit, indvar, ax, **kwds):
"""Plot custom statsmodels fit result for linear regression"""
depvar = self.model.endog_names
if indvar == '':
indvar = self.model.exog_names[1]
params = list(self.model.exog_names)
if indvar not in params:
self.pf.showWarning('chosen col is not a parameter',ax=ax)
return
#predict out of sample
sub = self.sub
if len(sub) == 0:
sub = X.index
Xout = self.X.iloc[~self.X.index.isin(sub)]
yout = self.y.iloc[~self.y.index.isin(sub)]
yfit = fit.predict(Xout)
x = Xout[indvar]
#fitted data
Xin = self.X.iloc[sub]
yin = self.y.iloc[sub]
marker=kwds['marker']
if marker == '':
marker='o'
#s=kwds['s']
s = 10
cmap = plt.cm.get_cmap(kwds['colormap'])
ax.scatter(Xin[indvar], yin, alpha=0.6, color=cmap(.2), label='fitted data',
marker=marker,s=s)
ax.scatter(x, yout, alpha=0.3, color='gray', label='out of sample',
marker=marker,s=s)
ax.scatter(x, yfit, alpha=0.6, edgecolor='black',
color='red', s=s, lw=0.5, label='fit (out sample)')
i=0.05
for k,p in fit.params.iteritems():
ax.text(0.9,i, k+': '+ str(round(p,3)), ha='right',
va='top', transform=ax.transAxes)
i+=.05
ax.text(0.1,0.05, 'R2: '+ str(fit.rsquared), ha='left',
va='top', transform=ax.transAxes)
ax.legend()
ax.set_title('fitted versus %s' %indvar)
ax.set_xlabel(indvar)
ax.set_ylabel(depvar)
#confidence intervals
#from statsmodels.sandbox.regression.predstd import wls_prediction_std
#prstd, iv_l, iv_u = wls_prediction_std(fit)
#ax.plot(x1, iv_u, 'r--')
#ax.plot(x1, iv_l, 'r--')
#plt.tight_layout()
return
[docs] def plotLogit(self, fit, indvar, ax, **kwds):
"""Plot Logit results"""
X = self.X
y = self.y
#predict out of sample
ypred = fit.predict(X)
#ax.plot(X.index, ypred, 'bo', X.index, y, 'mo', alpha=.25)
ax.plot(ypred, X[indvar], 'o')
ax.set_xlabel("Predicted")
ax.set_ylabel(indvar)
print (fit.pred_table(0.5))
return
[docs] def summary(self):
"""Fit summary"""
s = self.fit.summary()
from .dialogs import SimpleEditor
if not hasattr(self, 'fitinfo') or self.fitinfo == None:
self.w = w = Toplevel(self.parent)
def deletewin():
self.fitinfo = None
self.w.destroy()
w.protocol("WM_DELETE_WINDOW", deletewin)
self.fitinfo = SimpleEditor(w, height=25, width=85)
self.fitinfo.pack(in_=w, fill=BOTH, expand=Y)
self.fitinfo.text.insert(END, s)
self.fitinfo.text.see(END)
return
@classmethod
def _doimport(self):
"""Try to import statsmodels. If not installed return false"""
try:
import statsmodels
return 1
except:
print('statsmodels not installed')
return 0
def _checkNumeric(self, df):
x = df.convert_objects()._get_numeric_data()
if x.empty==True:
return False
[docs] def updateData(self):
"""Update data widgets"""
df = self.table.model.df
if self.model is not None:
self.indvarwidget['values'] = list(self.model.exog_names)
return
[docs] def quit(self):
#self.main.withdraw()
self.table.sv = None
self.main.destroy()
return