I am trying to read a database table using dask read_sql_table. This table has a column of int data type which has some rows as null which is causing below error.
ddf = dd.read_sql_table(table_name='customers',
npartitions=1,
con=con_url,
index_col='customerNumber')
print(len(ddf))
File "/mnt/d/Project/rebiz/backend/db/views/large_table_load_test.py", line 96, in dask_fun
print(len(ddf))
File "/home/raj/wizbi/lib/python3.10/site-packages/dask/dataframe/core.py", line 5018, in __len__
return len(s)
File "/home/raj/wizbi/lib/python3.10/site-packages/dask/dataframe/core.py", line 994, in __len__
).compute()
File "/home/raj/wizbi/lib/python3.10/site-packages/dask/base.py", line 342, in compute
(result,) = compute(self, traverse=False, **kwargs)
File "/home/raj/wizbi/lib/python3.10/site-packages/dask/base.py", line 628, in compute
results = schedule(dsk, keys, **kwargs)
File "/home/raj/wizbi/lib/python3.10/site-packages/dask/dataframe/io/sql.py", line 412, in _read_sql_chunk
return df.astype(meta.dtypes.to_dict(), copy=False)
File "/home/raj/wizbi/lib/python3.10/site-packages/pandas/core/dtypes/astype.py", line 182, in _astype_float_to_int_nansafe
raise IntCastingNaNError(
pandas.errors.IntCastingNaNError: Cannot convert non-finite values (NA or inf) to integer