Hi All,
I have a dask DF and trying to fit a RandomForest model using GridsearchCV.
The RF model is cuml because I am trying to train it on a multi-gpu environment.
I am able to train a RF model on gpus easily but when trying to use gridsearchcv to tune some of the hyper params. I get an error.
Below is the code which I am using
import cudf,cuml
from dask.distributed import Client,wait
import dask_cudf
from dask_cuda import LocalCUDACluster
import dask_cudf
import cudf
import cuml
from cuml.dask.ensemble import RandomForestClassifier
from dask_ml.model_selection import GridSearchCV
from sklearn.metrics import log_loss
cluster = None # (Optional) Specify existing scheduler port
if cluster is None:
cluster = LocalCUDACluster(n_workers=2,threads_per_worker=2
#,protocol=protocol,
)
#print(capacity * device_spill_frac)
# Create the distributed client
client = Client(cluster)
client
# Define the random forest classifier and hyperparameters to tune
rfc = RandomForestClassifier()
X = df.drop(['target'], axis=1).astype('float32')
y = df["target"].astype('int32')
X_train, X_valid, y_train, y_valid = train_test_split(
X, y, test_size=0.2, random_state=42
)
params = {'max_depth': [15, 20], 'n_estimators': [100, 200]}
# Use GridSearchCV to tune hyperparameters with cross-validation
grid_search = GridSearchCV(rfc, params, cv=3, scoring='neg_log_loss')
grid_search.fit(X_train, y_train)
# Create a new RandomForestClassifier object with the best hyperparameters
best_rfc = RandomForestClassifier(n_estimators=grid_search.best_params_['n_estimators'],
max_depth=grid_search.best_params_['max_depth'],
client=client)
# Fit the new model object on the entire training dataset
best_rfc.fit(X_train, y_train) # both X_train and y_train are dask cudf datasets
# Use the fitted model to predict on the test set
y_pred = best_rfc.predict(X_test)
# Calculate the predicted probabilities on the test set
y_pred_proba = best_rfc.predict_proba(X_test)
# Evaluate the log loss of the predictions
logloss = log_loss(y_test.to_array(), y_pred_proba.to_array())
print("Log Loss:", logloss)
below is the error message:
2023-04-05 21:00:47,104 - distributed.protocol.core - CRITICAL - Failed to Serialize
Traceback (most recent call last):
** File “/opt/conda/envs/rapids-23.02-new2/lib/python3.10/site-packages/distributed/protocol/core.py”, line 109, in dumps**
** frames[0] = msgpack.dumps(msg, default=_encode_default, use_bin_type=True)**
** File “/opt/conda/envs/rapids-23.02-new2/lib/python3.10/site-packages/msgpack/init.py”, line 38, in packb**
return Packer(**kwargs).pack(o)
File “msgpack/_packer.pyx”, line 294, in msgpack._cmsgpack.Packer.pack
File “msgpack/_packer.pyx”, line 300, in msgpack._cmsgpack.Packer.pack
File “msgpack/_packer.pyx”, line 297, in msgpack._cmsgpack.Packer.pack
File “msgpack/_packer.pyx”, line 264, in msgpack._cmsgpack.Packer._pack
File “msgpack/_packer.pyx”, line 231, in msgpack._cmsgpack.Packer._pack
File “msgpack/_packer.pyx”, line 231, in msgpack._cmsgpack.Packer._pack
File “msgpack/_packer.pyx”, line 264, in msgpack._cmsgpack.Packer._pack
File “msgpack/_packer.pyx”, line 231, in msgpack._cmsgpack.Packer._pack
File “msgpack/_packer.pyx”, line 231, in msgpack._cmsgpack.Packer._pack
File “msgpack/_packer.pyx”, line 231, in msgpack._cmsgpack.Packer._pack
File “msgpack/_packer.pyx”, line 285, in msgpack._cmsgpack.Packer._pack
File “/opt/conda/envs/rapids-23.02-new2/lib/python3.10/site-packages/distributed/protocol/core.py”, line 100, in _encode_default
frames.extend(create_serialized_sub_frames(obj))
File “/opt/conda/envs/rapids-23.02-new2/lib/python3.10/site-packages/distributed/protocol/core.py”, line 60, in create_serialized_sub_frames
sub_header, sub_frames = serialize_and_split(
File “/opt/conda/envs/rapids-23.02-new2/lib/python3.10/site-packages/distributed/protocol/serialize.py”, line 444, in serialize_and_split
header, frames = serialize(x, serializers, on_error, context)
File “/opt/conda/envs/rapids-23.02-new2/lib/python3.10/site-packages/distributed/protocol/serialize.py”, line 266, in serialize
return serialize(
File “/opt/conda/envs/rapids-23.02-new2/lib/python3.10/site-packages/distributed/protocol/serialize.py”, line 366, in serialize
raise TypeError(msg, str(x)[:10000])
TypeError: (‘Could not serialize object of type RandomForestClassifier’, ‘<cuml.dask.ensemble.randomforestclassifier.RandomForestClassifier object at 0x7f5247bbd180>’)
2023-04-05 21:00:47,140 - distributed.comm.utils - ERROR - (‘Could not serialize object of type RandomForestClassifier’, ‘<cuml.dask.ensemble.randomforestclassifier.RandomForestClassifier object at 0x7f5247bbd180>’)
Traceback (most recent call last):
File “/opt/conda/envs/rapids-23.02-new2/lib/python3.10/site-packages/distributed/comm/utils.py”, line 55, in _to_frames
return list(protocol.dumps(msg, **kwargs))
File “/opt/conda/envs/rapids-23.02-new2/lib/python3.10/site-packages/distributed/protocol/core.py”, line 109, in dumps
frames[0] = msgpack.dumps(msg, default=_encode_default, use_bin_type=True)
File “/opt/conda/envs/rapids-23.02-new2/lib/python3.10/site-packages/msgpack/init.py”, line 38, in packb
return Packer(**kwargs).pack(o)
File “msgpack/_packer.pyx”, line 294, in msgpack._cmsgpack.Packer.pack
File “msgpack/_packer.pyx”, line 300, in msgpack._cmsgpack.Packer.pack
File “msgpack/_packer.pyx”, line 297, in msgpack._cmsgpack.Packer.pack
File “msgpack/_packer.pyx”, line 264, in msgpack._cmsgpack.Packer._pack
File “msgpack/_packer.pyx”, line 231, in msgpack._cmsgpack.Packer._pack
File “msgpack/_packer.pyx”, line 231, in msgpack._cmsgpack.Packer._pack
File “msgpack/_packer.pyx”, line 264, in msgpack._cmsgpack.Packer._pack
File “msgpack/_packer.pyx”, line 231, in msgpack._cmsgpack.Packer._pack
File “msgpack/_packer.pyx”, line 231, in msgpack._cmsgpack.Packer._pack
File “msgpack/_packer.pyx”, line 231, in msgpack._cmsgpack.Packer._pack
File “msgpack/_packer.pyx”, line 285, in msgpack._cmsgpack.Packer._pack
File “/opt/conda/envs/rapids-23.02-new2/lib/python3.10/site-packages/distributed/protocol/core.py”, line 100, in _encode_default
frames.extend(create_serialized_sub_frames(obj))
File “/opt/conda/envs/rapids-23.02-new2/lib/python3.10/site-packages/distributed/protocol/core.py”, line 60, in create_serialized_sub_frames
sub_header, sub_frames = serialize_and_split(
File “/opt/conda/envs/rapids-23.02-new2/lib/python3.10/site-packages/distributed/protocol/serialize.py”, line 444, in serialize_and_split
header, frames = serialize(x, serializers, on_error, context)
File “/opt/conda/envs/rapids-23.02-new2/lib/python3.10/site-packages/distributed/protocol/serialize.py”, line 266, in serialize
return serialize(
File “/opt/conda/envs/rapids-23.02-new2/lib/python3.10/site-packages/distributed/protocol/serialize.py”, line 366, in serialize
raise TypeError(msg, str(x)[:10000])
TypeError: (‘Could not serialize object of type RandomForestClassifier’, ‘<cuml.dask.ensemble.randomforestclassifier.RandomForestClassifier object at 0x7f5247bbd180>’)
2023-04-05 21:00:47,177 - distributed.batched - ERROR - Error in batched write
Traceback (most recent call last):
File “/opt/conda/envs/rapids-23.02-new2/lib/python3.10/site-packages/distributed/batched.py”, line 115, in _background_send
nbytes = yield coro
File “/opt/conda/envs/rapids-23.02-new2/lib/python3.10/site-packages/tornado/gen.py”, line 769, in run
value = future.result()
File “/opt/conda/envs/rapids-23.02-new2/lib/python3.10/site-packages/distributed/comm/tcp.py”, line 271, in write
frames = await to_frames(
File “/opt/conda/envs/rapids-23.02-new2/lib/python3.10/site-packages/distributed/comm/utils.py”, line 70, in to_frames
return await offload(_to_frames)
File “/opt/conda/envs/rapids-23.02-new2/lib/python3.10/site-packages/distributed/utils.py”, line 1417, in offload
return await loop.run_in_executor(
File “/opt/conda/envs/rapids-23.02-new2/lib/python3.10/concurrent/futures/thread.py”, line 58, in run
result = self.fn(*self.args, **self.kwargs)
File “/opt/conda/envs/rapids-23.02-new2/lib/python3.10/site-packages/distributed/utils.py”, line 1418, in
_offload_executor, lambda: context.run(fn, *args, **kwargs)
File “/opt/conda/envs/rapids-23.02-new2/lib/python3.10/site-packages/distributed/comm/utils.py”, line 55, in _to_frames
return list(protocol.dumps(msg, **kwargs))
File “/opt/conda/envs/rapids-23.02-new2/lib/python3.10/site-packages/distributed/protocol/core.py”, line 109, in dumps
frames[0] = msgpack.dumps(msg, default=_encode_default, use_bin_type=True)
File “/opt/conda/envs/rapids-23.02-new2/lib/python3.10/site-packages/msgpack/init.py”, line 38, in packb
return Packer(**kwargs).pack(o)
File “msgpack/_packer.pyx”, line 294, in msgpack._cmsgpack.Packer.pack
File “msgpack/_packer.pyx”, line 300, in msgpack._cmsgpack.Packer.pack
File “msgpack/_packer.pyx”, line 297, in msgpack._cmsgpack.Packer.pack
File “msgpack/_packer.pyx”, line 264, in msgpack._cmsgpack.Packer._pack
File “msgpack/_packer.pyx”, line 231, in msgpack._cmsgpack.Packer._pack
File “msgpack/_packer.pyx”, line 231, in msgpack._cmsgpack.Packer._pack
File “msgpack/_packer.pyx”, line 264, in msgpack._cmsgpack.Packer._pack
File “msgpack/_packer.pyx”, line 231, in msgpack._cmsgpack.Packer._pack
File “msgpack/_packer.pyx”, line 231, in msgpack._cmsgpack.Packer._pack
File “msgpack/_packer.pyx”, line 231, in msgpack._cmsgpack.Packer._pack
File “msgpack/_packer.pyx”, line 285, in msgpack._cmsgpack.Packer._pack
File “/opt/conda/envs/rapids-23.02-new2/lib/python3.10/site-packages/distributed/protocol/core.py”, line 100, in _encode_default
frames.extend(create_serialized_sub_frames(obj))
File “/opt/conda/envs/rapids-23.02-new2/lib/python3.10/site-packages/distributed/protocol/core.py”, line 60, in create_serialized_sub_frames
sub_header, sub_frames = serialize_and_split(
File “/opt/conda/envs/rapids-23.02-new2/lib/python3.10/site-packages/distributed/protocol/serialize.py”, line 444, in serialize_and_split
header, frames = serialize(x, serializers, on_error, context)
File “/opt/conda/envs/rapids-23.02-new2/lib/python3.10/site-packages/distributed/protocol/serialize.py”, line 266, in serialize
return serialize(
File “/opt/conda/envs/rapids-23.02-new2/lib/python3.10/site-packages/distributed/protocol/serialize.py”, line 366, in serialize
raise TypeError(msg, str(x)[:10000])
TypeError: (‘Could not serialize object of type RandomForestClassifier’, ‘<cuml.dask.ensemble.randomforestclassifier.RandomForestClassifier object at 0x7f5247bbd180>’)
(‘randomforestclassifier-fit-score-b98a22c018ca550dd087139172aad0d4’, 3, 1) has failed… retrying
2023-04-05 21:00:47,216 - distributed.core - ERROR - Exception while handling op retry
Traceback (most recent call last):
File “/opt/conda/envs/rapids-23.02-new2/lib/python3.10/site-packages/distributed/core.py”, line 818, in _handle_comm
result = handler(**msg)
File “/opt/conda/envs/rapids-23.02-new2/lib/python3.10/site-packages/distributed/scheduler.py”, line 4726, in stimulus_retry
ts = self.tasks[key]
KeyError: “(‘randomforestclassifier-fit-score-b98a22c018ca550dd087139172aad0d4’, 3, 1)”
Any idea what could be wrong in this?