import math
import pandas as pd
import csv
from tqdm import tqdm
def mask(x):
if not x.cls1:
return x
else:
if pd.isna(x.cls4):
return x
else:
if x.cls4.find(' ') == -1:
x.cls1=x.cls4
return x
else:
words=x.cls4.split()
x.cls1=words[0]
x.cls2=words[1]
return x
df = pd.read_csv("./2000qV15.txt", sep='\t', encoding='utf8', dtype=str, quoting=csv.QUOTE_NONE)
# df=df.dropna(subset=['url'], how='any').reset_index(drop=True)
# df=df[df['obj_url'].isin(["-"])]['obj_url'].count()
df1 = pd.read_csv("./2000qV15.res.hangye", sep='\t', encoding='utf8', dtype=str, quoting=csv.QUOTE_NONE)
df = df.merge(df1, how="left", on=["query"], suffixes=('', '_DROP'))
df=df.apply(mask, axis=1 )
df.to_csv("./titi.txt", index=False, sep='\t',encoding='utf8', quoting=csv.QUOTE_NONE)
# df=df.drop(['cls4'], axis=1)
print(df)