import pandas as pd # 读取数据 data = pd.read_csv('data.csv') # 去重 data.drop_duplicates()
import pandas as pd # 读取数据 data = pd.read_csv('data.csv') # 缺失值处理 data.fillna(0)
import numpy as np # 读取数据 data = np.loadtxt('data.csv', delimiter=',') # 异常值处理 p1 = np.percentile(data, 25) p2 = np.percentile(data, 75) upper_bound = p2 + 1.5 * (p2 - p1) lower_bound = p1 - 1.5 * (p2 - p1) data[(data > lower_bound) & (data < upper_bound)]
import pandas as pd data = pd.read_csv('data.csv') data.drop_duplicates(subset=['col1', 'col2'], keep='first', inplace=True)
import pandas as pd data = pd.read_csv('data.csv') data.fillna(value=0, inplace=True)
import numpy as np data = np.loadtxt('data.csv', delimiter=',') p1 = np.percentile(data, 25) p2 = np.percentile(data, 75) upper_bound = p2 + 1.5 * (p2 - p1) lower_bound = p1 - 1.5 * (p2 - p1) data[(data > lower_bound) & (data < upper_bound)]
import pandas as pd # 读取数据 data = pd.read_csv('data.csv') # 去重 data.drop_duplicates(subset=['col1', 'col2'], keep='first', inplace=True) # 缺失值处理 data.fillna(value=0, inplace=True) # 异常值处理 p1 = np.percentile(data, 25) p2 = np.percentile(data, 75) upper_bound = p2 + 1.5 * (p2 - p1) lower_bound = p1 - 1.5 * (p2 - p1) data[(data > lower_bound) & (data < upper_bound)]
本文为翻滚的胖子原创文章,转载无需和我联系,但请注明来自猿教程iskeys.com