I have encountered an issue but I would like to have an informal opinion before adding it to GitHub (maybe it’s expected behavior)it has to do with percentiles and masked arrays. Here is the gist https://gist.github.com/Campostrini/fbed9e033ab26b90fb4c885c10419dcb you need py38 for the self documenting strings
import dask.array as da
import numpy as np
r1 = list(range(9))
r2 = list(range(8))
r2.append(np.nan)
dar1 = da.from_array(r1)
dar2 = da.from_array(r2)
masked_dar1 = da.ma.masked_where(dar1 > 7, dar1)
masked_dar2 = da.ma.masked_where(da.isnan(dar2), dar2)
median1 = da.median(dar1, axis=0).compute()
median2 = da.median(dar2, axis=0).compute()
median1_masked = da.median(masked_dar1, axis=0).compute()
median2_masked = da.median(masked_dar2, axis=0).compute()
median1_p50 = da.percentile(dar1, 50).compute()
median2_p50 = da.percentile(dar2, 50).compute()
median1_p50_masked = da.percentile(masked_dar1, 50).compute()
median2_p50_masked = da.percentile(masked_dar2, 50).compute()
print(f"{dar1.compute()=} \n{dar2.compute()=} \n{masked_dar1.compute()=} \n{masked_dar2.compute()=}")
print(f"{median1=} {median2=} {median1_masked=} {median2_masked=} {median1_p50=} {median2_p50=}")
print(f"{median1_p50_masked=} {median2_p50_masked=}")
# expected behaviour
r3 = list(range(8))
dar3 = da.from_array(r3)
expected_median = da.median(dar3, axis=0).compute()
expected_median_p50 = da.percentile(dar3, 50).compute()
print(f"Expectations with masks: {expected_median=} {expected_median_p50=}")
# how to get around
alternative_median = da.median(dar1[~da.ma.getmaskarray(masked_dar1)], axis=0).compute()
print(f"Pseudo fix: {alternative_median=}")