I am trying to process a table row-wise (applying the same pre-processing for each row). I am able to get it working with apply function but when trying to run with map partitions, I am getting errors and not able to get why the error is coming up. Can’t map partitions do processing row-wise? This is the minimal code for the same
import numpy as np
import pandas as pd
import dask.dataframe as dd
def inc10(x):
return x + 10
def inc100(x):
return x + 100
def process_row(row):
proc_x = inc10(row['x'])
proc_y = inc100(row['y'])
return [proc_x, proc_y]
df = pd.DataFrame({'x': range(150), 'y': range(150)})
ddf = dd.from_pandas(df, npartitions=1)
# this works
proc_df = ddf.apply(lambda row: process_row(row), axis=1, meta=('proc_df', object))
proc_df.compute()
# this gives an error (KeyError: 'x')
proc_df_mp = ddf.map_partitions(lambda part : part.apply(lambda row: process_row(row)), meta=('proc_df_mp', object))
proc_df_mp.compute()