Tried to create a dask dataframe with read_parquet() and raised "TypeError: ‘<’ not supported between instances of ‘NoneType’ and ‘str’ ". The same line used to work couple days ago, but is not work now. What is causing the problem?
Below is the error message.
File ~/parts_consumption_sk/lib/python3.9/site-packages/dask/dataframe/io/parquet/core.py:326, in read_parquet(path, columns, filters, categories, index, storage_options, engine, gather_statistics, ignore_metadata_file, metadata_task_size, split_row_groups, chunksize, aggregate_files, **kwargs)
323 raise ValueError("read_parquet options require gather_statistics=True")
324 gather_statistics = True
--> 326 read_metadata_result = engine.read_metadata(
327 fs,
328 paths,
329 categories=categories,
330 index=index,
331 gather_statistics=gather_statistics,
332 filters=filters,
333 split_row_groups=split_row_groups,
334 chunksize=chunksize,
335 aggregate_files=aggregate_files,
336 ignore_metadata_file=ignore_metadata_file,
337 metadata_task_size=metadata_task_size,
338 **kwargs,
339 )
341 # In the future, we may want to give the engine the
342 # option to return a dedicated element for `common_kwargs`.
343 # However, to avoid breaking the API, we just embed this
344 # data in the first element of `parts` for now.
345 # The logic below is inteded to handle backward and forward
346 # compatibility with a user-defined engine.
347 meta, statistics, parts, index = read_metadata_result[:4]
File ~/parts_consumption_sk/lib/python3.9/site-packages/dask/dataframe/io/parquet/arrow.py:319, in ArrowDatasetEngine.read_metadata(cls, fs, paths, categories, index, gather_statistics, filters, split_row_groups, chunksize, aggregate_files, ignore_metadata_file, metadata_task_size, **kwargs)
301 @classmethod
302 def read_metadata(
303 cls,
(...)
317
318 # Stage 1: Collect general dataset information
--> 319 dataset_info = cls._collect_dataset_info(
320 paths,
321 fs,
322 categories,
323 index,
324 gather_statistics,
325 filters,
326 split_row_groups,
327 chunksize,
328 aggregate_files,
329 ignore_metadata_file,
330 metadata_task_size,
331 **kwargs.get("dataset", {}),
332 )
334 # Stage 2: Generate output `meta`
335 meta = cls._create_dd_meta(dataset_info)
File ~/parts_consumption_sk/lib/python3.9/site-packages/dask/dataframe/io/parquet/arrow.py:915, in ArrowDatasetEngine._collect_dataset_info(cls, paths, fs, categories, index, gather_statistics, filters, split_row_groups, chunksize, aggregate_files, ignore_metadata_file, metadata_task_size, **dataset_kwargs)
913 partition_names = list(hive_categories)
914 for name in partition_names:
--> 915 partition_obj.append(PartitionObj(name, hive_categories[name]))
917 # Check the `aggregate_files` setting
918 aggregation_depth = _get_aggregation_depth(aggregate_files, partition_names)
File ~/parts_consumption_sk/lib/python3.9/site-packages/dask/dataframe/io/parquet/arrow.py:152, in PartitionObj.__init__(self, name, keys)
150 def __init__(self, name, keys):
151 self.name = name
--> 152 self.keys = sorted(keys)
TypeError: '<' not supported between instances of 'NoneType' and 'str'