# 价格按指定区间分箱
def price_cut_bins(s):
unique_len = len(s.unique())
if unique_len>10:
s= s.replace(-1,np.nan)
df_bin = pd.to_numeric(pd.cut(s, bins=[0,100,200,300,500,700,1000,1500,2000,2500,99999999],
labels=[100,200,300,500,700,1000,1500,2000,2500,99999]
,include_lowest = True
)
)
# df_bin.fillna(-1,inplace=True)
return df_bin
return s
# 按分位数分箱
def perc_cut_bins(s):
unique_len = len(s.unique())
if unique_len>21:
s = s.replace(-1,np.nan)
q=[0.025,0.05,0.075,0.1,0.125,0.15,0.175,0.2,0.25,0.3,0.35,0.4,0.45,0.5,0.55,0.6,0.65,0.7,0.8,0.95,1]
perc_list = list(s.quantile(q)) + [999999999]
print(perc_list)
perc_list_fin = sorted(list(set([round(i,6) for i in perc_list])))
print(perc_list_fin)
# labels = [str(i)+':'+str(perc_list_fin[i])+'-'+str(perc_list_fin[i+1]) for i in range(len(perc_list_fin)-1)]
labels = [ round(i,6) for i in perc_list_fin[:-1]]
print(labels)
df_bin = pd.to_numeric(pd.cut(s, bins = perc_list_fin
, labels=labels
, include_lowest = True
)
)
# df_bin.fillna(-1,inplace=True)
return df_bin
return s