Fuzzywazzy in data science
import numpy as np import pandas as pd from fuzzywuzzy import process import fuzzywuzzy df = pd.DataFrame({"name":["alice_sahu","alice sahu","banika_pradhan","banika pradhan,","banika pradhan","swapita_maheswari"],"age":[20,22,35,21,19,30]}) name = df['name'] unique_array = df['name'].unique() #print(unique_array) match_value = fuzzywuzzy.process.extract("alice_sahu",name,limit=10,scorer=fuzzywuzzy.fuzz.token_sort_ratio) #print(match_value) df['name'] = df['name'].str.strip(',') #print(df) close_matches = [] for match in match_value: #for matches in match: if match[1]==90: close_matches.append(match[0]) print(close_matches) row_matches = df['name'].isin(close_matches) #name = df['name'] df.loc[row_matches,'name']=["alice_sahu"] print(df)