import pandas as pd
# 读取数据
data = pd.read_csv('data.csv')
# 去重
data.drop_duplicates()import pandas as pd
# 读取数据
data = pd.read_csv('data.csv')
# 缺失值处理
data.fillna(0)import numpy as np
# 读取数据
data = np.loadtxt('data.csv', delimiter=',')
# 异常值处理
p1 = np.percentile(data, 25)
p2 = np.percentile(data, 75)
upper_bound = p2 + 1.5 * (p2 - p1)
lower_bound = p1 - 1.5 * (p2 - p1)
data[(data > lower_bound) & (data < upper_bound)]import pandas as pd
data = pd.read_csv('data.csv')
data.drop_duplicates(subset=['col1', 'col2'], keep='first', inplace=True)import pandas as pd
data = pd.read_csv('data.csv')
data.fillna(value=0, inplace=True)import numpy as np
data = np.loadtxt('data.csv', delimiter=',')
p1 = np.percentile(data, 25)
p2 = np.percentile(data, 75)
upper_bound = p2 + 1.5 * (p2 - p1)
lower_bound = p1 - 1.5 * (p2 - p1)
data[(data > lower_bound) & (data < upper_bound)]import pandas as pd
# 读取数据
data = pd.read_csv('data.csv')
# 去重
data.drop_duplicates(subset=['col1', 'col2'], keep='first', inplace=True)
# 缺失值处理
data.fillna(value=0, inplace=True)
# 异常值处理
p1 = np.percentile(data, 25)
p2 = np.percentile(data, 75)
upper_bound = p2 + 1.5 * (p2 - p1)
lower_bound = p1 - 1.5 * (p2 - p1)
data[(data > lower_bound) & (data < upper_bound)]本文为翻滚的胖子原创文章,转载无需和我联系,但请注明来自猿教程iskeys.com
