Basic
import pandas as pd
print (f" Using {pd.__name__},Version {pd.__version__}")
Using pandas,Version 0.23.0
创建空Dataframe
df = pd.DataFrame()
print(df)
Empty DataFrame
Columns: []
Index: []
从Dict创建Dataframe
dict = {'name':["Tom", "Bob", "Mary", "James"],
'age': [18, 30, 25, 40],
'city':["Beijing", "ShangHai","GuangZhou", "ShenZhen"]}
df = pd.DataFrame(dict)
df
name
age
city
0
Tom
18
Beijing
1
Bob
30
ShangHai
2
Mary
25
GuangZhou
3
James
40
ShenZhen
index = pd.Index(["Tom", "Bob", "Mary", "James"],name = 'person')
cols = ['age','city']
data = [[18,'Beijing'],
[30,'ShangHai'],
[25,'GuangZhou'],
[40,'ShenZhen']]
df =pd.DataFrame(index = index,data =data,columns = cols)
df
age
city
person
Tom
18
Beijing
Bob
30
ShangHai
Mary
25
GuangZhou
James
40
ShenZhen
对columns的基础操作
add column
dict = {'name':["Tom", "Bob", "Mary", "James"],
'age': [18, 30, 25, 40],
'city':["Beijing", "ShangHai","GuangZhou", "ShenZhen"]}
df = pd.DataFrame(dict)
df
name
age
city
0
Tom
18
Beijing
1
Bob
30
ShangHai
2
Mary
25
GuangZhou
3
James
40
ShenZhen
df['country'] = 'USA'
df
name
age
city
country
0
Tom
18
Beijing
USA
1
Bob
30
ShangHai
USA
2
Mary
25
GuangZhou
USA
3
James
40
ShenZhen
USA
df['adress'] = df['country']
df
name
age
city
country
adress
0
Tom
18
Beijing
USA
USA
1
Bob
30
ShangHai
USA
USA
2
Mary
25
GuangZhou
USA
USA
3
James
40
ShenZhen
USA
USA
Change column values
df['country'] = 'China'
df
name
age
city
country
adress
0
Tom
18
Beijing
China
USA
1
Bob
30
ShangHai
China
USA
2
Mary
25
GuangZhou
China
USA
3
James
40
ShenZhen
China
USA
df['adress'] = df['city']+','+ df['country']
df
name
age
city
country
adress
0
Tom
18
Beijing
China
Beijing,China
1
Bob
30
ShangHai
China
ShangHai,China
2
Mary
25
GuangZhou
China
GuangZhou,China
3
James
40
ShenZhen
China
ShenZhen,China
Delete columns
df.drop('country',axis=1, inplace=True)
del df['city']
df
name
age
adress
0
Tom
18
Beijing,China
1
Bob
30
ShangHai,China
2
Mary
25
GuangZhou,China
3
James
40
ShenZhen,China
Select columns
df['age']
0 18
1 30
2 25
3 40
Name: age, dtype: int64
df.name
0 Tom
1 Bob
2 Mary
3 James
Name: name, dtype: object
df[['age','name']]
age
name
0
18
Tom
1
30
Bob
2
25
Mary
3
40
James
df.columns
Index(['name', 'age', 'adress'], dtype='object')
Rename columns
df.rename(index = str, columns = {'age':'Age','name':'Name','adress':'Adress'},inplace=True)
df
Name
Age
Adress
0
Tom
18
Beijing,China
1
Bob
30
ShangHai,China
2
Mary
25
GuangZhou,China
3
James
40
ShenZhen,China
df.rename(str.lower, axis='columns',inplace =True)
df
name
age
adress
0
Tom
18
Beijing,China
1
Bob
30
ShangHai,China
2
Mary
25
GuangZhou,China
3
James
40
ShenZhen,China
df.rename(str.capitalize, axis='columns',inplace =True)
df
Name
Age
Adress
0
Tom
18
Beijing,China
1
Bob
30
ShangHai,China
2
Mary
25
GuangZhou,China
3
James
40
ShenZhen,China
Set column value with conditions
df['Group'] = 'elderly'
df.loc[df['Age']<=18, 'Group'] = 'young'
df.loc[(df['Age'] >18) & (df['Age'] <= 30), 'Group'] = 'middle_aged'
df
Name
Age
Adress
Group
0
Tom
18
Beijing,China
young
1
Bob
30
ShangHai,China
middle_aged
2
Mary
25
GuangZhou,China
middle_aged
3
James
40
ShenZhen,China
elderly
对rows的基础操作
loc函数查询
df
Name
Age
Adress
Group
0
Tom
18
Beijing,China
young
1
Bob
30
ShangHai,China
middle_aged
2
Mary
25
GuangZhou,China
middle_aged
3
James
40
ShenZhen,China
elderly
df.loc[:]
Name
Age
Adress
Group
0
Tom
18
Beijing,China
young
1
Bob
30
ShangHai,China
middle_aged
2
Mary
25
GuangZhou,China
middle_aged
3
James
40
ShenZhen,China
elderly
loc函数条件查询
df.loc[df['Age']>20]
Name
Age
Adress
Group
1
Bob
30
ShangHai,China
middle_aged
2
Mary
25
GuangZhou,China
middle_aged
3
James
40
ShenZhen,China
elderly
loc函数条件行列查询
df.loc[df['Group']=='middle_aged','Name']
1 Bob
2 Mary
Name: Name, dtype: object
Where 查询
filter_adult = df['Age']>25
result = df.where(filter_adult)
result
Name
Age
Adress
Group
0
NaN
NaN
NaN
NaN
1
Bob
30.0
ShangHai,China
middle_aged
2
NaN
NaN
NaN
NaN
3
James
40.0
ShenZhen,China
elderly
Query 筛选
df
Name
Age
Adress
Group
0
Tom
18
Beijing,China
young
1
Bob
30
ShangHai,China
middle_aged
2
Mary
25
GuangZhou,China
middle_aged
3
James
40
ShenZhen,China
elderly
df.query('Group=="middle_aged"'and 'Age>30' )
Name
Age
Adress
Group
3
James
40
ShenZhen,China
elderly
Dataframe其他信息
df.shape
(4, 4)
df.describe()
Age
count
4.000000
mean
28.250000
std
9.251126
min
18.000000
25%
23.250000
50%
27.500000
75%
32.500000
max
40.000000
df.head(3)
df.tail(3)
Name
Age
Adress
Group
1
Bob
30
ShangHai,China
middle_aged
2
Mary
25
GuangZhou,China
middle_aged
3
James
40
ShenZhen,China
elderly
读写CSV
把df导出为CSV,不要index
df.to_csv('person.csv',index=None,sep=',')
读取CSV为dataframe
person = pd.read_csv('person.csv')
person
Name
Age
Adress
Group
0
Tom
18
Beijing,China
young
1
Bob
30
ShangHai,China
middle_aged
2
Mary
25
GuangZhou,China
middle_aged
3
James
40
ShenZhen,China
elderly
Last updated
Was this helpful?