@@ -49,13 +49,26 @@ def fit_glm(Y, X, A=None, family='gaussian', disp_family='poisson',
4949 impute : bool
5050 whether to impute potential outcomes and get predicted values
5151 offset : bool
52- whether to use log of sum of Y as offset
52+ whether to use log of sum of Y as offset
53+
54+ Returns
55+ -------
56+ B : array
57+ d x p matrix of coefficients
58+ Yhat : array
59+ n x p x a matrix of predicted values
60+ disp_glm : array
61+ p x 1 vector of dispersion parameters
62+ offsets : array
63+ n x 1 vector of offsets
64+ resid_deviance : array
65+ n x p matrix of deviance residuals
5366 '''
5467 np .random .seed (random_state )
5568
5669 if family not in ['gaussian' , 'poisson' , 'nb' ]:
5770 raise ValueError ('Family not recognized' )
58-
71+
5972 d = X .shape [1 ]
6073
6174 if A is None :
@@ -70,8 +83,6 @@ def fit_glm(Y, X, A=None, family='gaussian', disp_family='poisson',
7083 X_test = X
7184 X_test = np .c_ [X ,np .zeros_like (A )]
7285 X = np .c_ [X ,A ]
73- # X_test = X.copy()
74-
7586 a = A .shape [1 ]
7687
7788 if offset is not None and offset is not False :
@@ -90,7 +101,7 @@ def fit_glm(Y, X, A=None, family='gaussian', disp_family='poisson',
90101 pprint .pprint ('Fitting {} GLM{}...' .format (family , '' if offsets is None else ' with offset' ))
91102 is_constant = np .all (X == X [0 , :], axis = 0 )
92103 alpha [is_constant ] = 0
93- # alpha[:-a] = 0
104+
94105
95106 families = {
96107 'gaussian' : lambda disp : sm .families .Gaussian (),
@@ -122,11 +133,11 @@ def fit_model(j, Y, X, offsets, family, disp, impute, alpha):
122133 Yhat_0 = np .zeros ((Y .shape [0 ], a ))
123134 Yhat_1 = np .zeros ((Y .shape [0 ], a ))
124135 if impute is not False :
125- for j in range (a ):
136+ for k in range (a ):
126137 X_test_copy = X_test .copy ()
127- Yhat_0 [:,j ] = mod .predict (X_test_copy , offset = offsets )
128- X_test_copy [:, d + j ] = 1 # Update the j-th column with all ones
129- Yhat_1 [:,j ] = mod .predict (X_test_copy , offset = offsets )
138+ Yhat_0 [:,k ] = mod .predict (X_test_copy , offset = offsets )
139+ X_test_copy [:, d + k ] = 1
140+ Yhat_1 [:,k ] = mod .predict (X_test_copy , offset = offsets )
130141 else :
131142 Yhat_0 [:,:] = Yhat_1 [:,:] = mod .predict (X , offset = offsets ).reshape (- 1 , a )
132143
@@ -146,9 +157,11 @@ def fit_model(j, Y, X, offsets, family, disp, impute, alpha):
146157
147158 results = Parallel (n_jobs = n_jobs )(delayed (fit_model )(
148159 j , Y , X , offsets , family , disp_glm , impute , alpha ) for j in tqdm (range (Y .shape [1 ])))
160+ pprint .pprint ('Fitting GLM done.' )
161+ if verbose : pprint .pprint ('Fitting GLM done.' )
149162
150163 B , Yhat_0 , Yhat_1 , resid_deviance = zip (* results )
151- B = np .array (B )
164+ B = np .array (B )
152165 Yhat_0 = np .array (Yhat_0 ).transpose (1 , 0 , 2 )
153166 Yhat_1 = np .array (Yhat_1 ).transpose (1 , 0 , 2 )
154167 resid_deviance = np .array (resid_deviance ).T
@@ -171,7 +184,6 @@ def estimate_disp(Y, X=None, A=None, Y_hat=None, disp_family='gaussian', offset=
171184 else :
172185 offsets = None
173186 sf = 1.
174-
175187
176188 if Y_hat is None :
177189 if verbose :
@@ -183,8 +195,7 @@ def estimate_disp(Y, X=None, A=None, Y_hat=None, disp_family='gaussian', offset=
183195 if disp_family == 'gaussian' :
184196 Y_norm = Y / sf
185197 reg = LinearRegression (fit_intercept = False ).fit (X , Y_norm )
186- Y_hat = reg .predict (X )
187- # Y_hat = np.clip(Y_hat, 0, 1) * sf
198+ Y_hat = reg .predict (X )
188199 elif disp_family == 'poisson' :
189200 Y_hat = fit_glm (Y , X , None , offset = offsets , family = 'poisson' , impute = False , ** kwargs )[1 ]
190201 Y_hat /= sf
0 commit comments