Fuzzywazzy in data science
import numpy as np
import pandas as pd
from fuzzywuzzy import process
import fuzzywuzzy
df = pd.DataFrame({"name":["alice_sahu","alice sahu","banika_pradhan","banika pradhan,","banika pradhan","swapita_maheswari"],"age":[20,22,35,21,19,30]})
name = df['name']
unique_array = df['name'].unique()
#print(unique_array)
match_value = fuzzywuzzy.process.extract("alice_sahu",name,limit=10,scorer=fuzzywuzzy.fuzz.token_sort_ratio)
#print(match_value)
df['name'] = df['name'].str.strip(',')
#print(df)
close_matches = []
for match in match_value:
#for matches in match:
if match[1]==90:
close_matches.append(match[0])
print(close_matches)
row_matches = df['name'].isin(close_matches)
#name = df['name']
df.loc[row_matches,'name']=["alice_sahu"]
print(df)
Comments
Post a Comment