In the below code for chapter 8/03_ml4t_with_zipline.ipynb.
returns are shifted forward, which makes sense to me since we are trying to anticipate the next day returns with prior data.
but why are the features shifted back? doesn’t shifting the features back allow look ahead bias to creep in?
shift_by = N_FORWARD_DAYS + 1
outcome = returns[shift_by:].flatten()
features = np.dstack(inputs)[:-shift_by]
class LinearModel(CustomFactor):
“”“Obtain model predictions”“”
train_on_weekday = [0, 2, 4]
def __init__(self, *args, **kwargs):
super().__init__(self, *args, **kwargs)
self._scaler = StandardScaler()
self._model = SGDRegressor(penalty='L2')
self._trained = False
def _train_model(self, today, returns, inputs):
scaler = self._scaler
model = self._model
shift_by = N_FORWARD_DAYS + 1
outcome = returns[shift_by:].flatten()
features = np.dstack(inputs)[:-shift_by]