使用anaconda
python 導(dǎo)入csv文件
from pandas import read_csv;
df =read_csv('E://1.csv',encoding ='UTF-8')
encoding 指定編碼utf-8
python 導(dǎo)入文本文件
from pandas import read_table;
df =read_table('E://2.txt',encoding ='UTF-8',names=['age','name'],sep=',')
加入列明
導(dǎo)入 excel
from pandas import read_excel;
df = read_excel('E://3.xls')
python 導(dǎo)出文本文件
from pandas import DataFrame;
df =DataFrame({
'age':[21,22,23],
'name':['ken','john','zhangsa']
});
df.to_csv('E://df.csv');
df.to_csv('E://df.csv'店印,index=Fales); 導(dǎo)出時(shí)候不帶序號(hào)
處理重復(fù)值 == 數(shù)據(jù)清洗
drop_duplicates()
from pandas import read_csv;
df =read_csv('E://shuju/data.csv',encoding='utf-8')
netOF = df.drop_duplicates();
缺失值的處理
處理方式
數(shù)據(jù)補(bǔ)齊? 刪除對(duì)應(yīng)缺失行
去除數(shù)據(jù)結(jié)構(gòu)中值為空的數(shù)據(jù)
使用 dropna()
from pandas import read_csv;
df =read_csv('E://shuju/1.csv',encoding='utf-8')
newdf= df.dropna();
空格值數(shù)據(jù)的處理
strip()使用
from pandas import read_csv;
df =read_csv('E://shuju/data.csv',encoding='utf-8')
nameName = df['name'].str.strip();
df['name'] =nameName
字段的抽取
使用slice(start,stop)
from pandas import read_csv;
df =read_csv('E://shuju/data.csv',encoding='utf-8')
df['tel'] =df['tel'].astype(str);? astype 轉(zhuǎn)換成字符串
bands =df['tel'].str.slice(0,3)
字段拆分split(sep,n忠荞,expand=false)
sep 用于分割的字符串
n 分割為多少列
expand 是否展開(kāi)為數(shù)據(jù)庫(kù)
expand 為true 返回DataFrame false 返回series
from pandas import read_csv;
df =read_csv('E://shuju/data.csv',encoding='utf-8');
newDF =df['name'].str.split(' ',1,True);
newDF.columns=['band','name'];
記錄抽取
對(duì)指根據(jù)一定的條件對(duì)數(shù)據(jù)進(jìn)行抽取
dataframe[condition]
condition 過(guò)濾的條件 返回值是dataframe
記錄抽取 常用的條件類(lèi)型
>< >=<=? 如 df[df.comments>1000];
范圍運(yùn)算
between(left,right)
df[df.commebts.between(1000,1000)]
空值匹配
pandas.isnull(column)
df[pandas.isnull(df.title)]
字符串匹配
str.contains(patten,na =False)
df[df.title.str.contains('臺(tái)電',na=Fale)]
邏輯運(yùn)算
與& 或| 取反not
import pandas;
from pandas imoprt read_csv
df[df.comments>10000];
隨機(jī)抽取函數(shù)
numpy.random.randint(start,end,number)
start 開(kāi)始 end 結(jié)束? number 從哪里開(kāi)始
import numpy;
from pandas import read_csv;
df =read_csv('E://shuju/data.csv',encoding='utf-8');
r= numpy.random.randint(0,10,3);
df.loc[r,:];