Hello,
I saw a previous post under the same name, but I didn’t find it to answer my question. I am trying to do multi-gpu computing on a remote server. I am training an XGBoost model using dask (obviously).
When I run my python file, the file runs for a few seconds before crashing. Here is my output (not the full thing because it’s super long):
Traceback (most recent call last):
File "/home/dskinne3/NHL_Win_Predictor/xgboost_train.py", line 224, in <module>
res = get_xgboost_and_pickle(team, df_train, df_test)
File "/home/dskinne3/NHL_Win_Predictor/xgboost_train.py", line 172, in get_xgboost_and_pickle
grid_search.fit(X_train, y_train)
File "/home/dskinne3/.conda/envs/nhl_pred/lib/python3.10/site-packages/dask_ml/model_selection/_search.py", line 1266, in fit
futures = scheduler(
File "/home/dskinne3/.conda/envs/nhl_pred/lib/python3.10/site-packages/distributed/client.py", line 3247, in get
futures = self._graph_to_futures(
File "/home/dskinne3/.conda/envs/nhl_pred/lib/python3.10/site-packages/distributed/client.py", line 3146, in _graph_to_futures
header, frames = serialize(ToPickle(dsk), on_error="raise")
File "/home/dskinne3/.conda/envs/nhl_pred/lib/python3.10/site-packages/distributed/protocol/serialize.py", line 374, in serialize
raise TypeError(msg, str(x)[:10000]) from exc
TypeError: ('Could not serialize object of type HighLevelGraph', '<ToPickle: HighLevelGraph with 1 layers.\n<dask.highlevelgraph.HighLevelGraph object at 0x7fe3901eb280>\n 0. 140615353003200\n>')
Here are my imports
from dask import dataframe as dd
from dask_ml.model_selection import GridSearchCV
from dask.distributed import Client
from dask_cuda import LocalCUDACluster
import pickle
from sklearn.model_selection import train_test_split, StratifiedKFold
from xgboost import XGBClassifier
Here is my code (this is not the full function or proper function arguments, but the code I removed is not the problem because it’s just basic pandas stuff that worked before using a GPU):
def get_xgboost_and_pickle(X_train, y_train, X_test, y_test):
# Turn into dask
X_train = dd.from_pandas(X_train, npartitions=1)
y_train = dd.from_pandas(y_train, npartitions=1)
X_test = dd.from_pandas(X_test, npartitions=1)
y_test = dd.from_pandas(y_test, npartitions=1)
# Get cuda stuff
cluster = LocalCUDACluster()
client = Client(cluster)
# Train the model
params = {
'min_child_weight': [1, 5, 10],
'gamma': [0.5, 1, 5],
'subsample': [0.6, 1.0],
'colsample_bytree': [0.6, 1.0],
'max_depth': [3, 5, 10],
'eta': [0.3, 0.1, 0.05],
'tree_method': ['hist'],
'lambda': [0.98],
'eval_metric': ['logloss'],
'device': ['cuda'],
'client': [client]
}
# Define cross-validation strategy
cv = StratifiedKFold(n_splits=3, shuffle=True, random_state=42)
# Define the model
# model = dxgb.XGBClassifier(use_label_encoder=False)
model = XGBClassifier(use_label_encoder=False)
# Perform GridSearchCV
grid_search = GridSearchCV(estimator=model, param_grid=params, cv=cv, scoring='accuracy', n_jobs=-1, refit=True)
grid_search.fit(X_train, y_train)
test_accuracy = grid_search.score(X_test, y_test)
# Get the best model
model = grid_search.best_estimator_
# Pickle the model
with open(f'team_xgboost_files/{team_one}.pkl', 'wb') as f:
pickle.dump(model, f)
return test_accuracy
and my initial call
if __name__ == '__main__':
...
# Iterate through each team
pbar = tqdm(total=len(df_last_two['home_name'].unique()))
for team in df_last_two['home_name'].unique():
if team == 'American All-Stars':
continue
res = get_xgboost_and_pickle(team, df_train, df_test)
pbar.update(1)
pbar.set_description(f'{team} - {res}')
Any idea why this might be happening? I am super new to dask and am probably doing a bunch of things wrong.