索引和遍历#
import pandas as pd
import numpy as np
以字典的方式索引#
df = pd.DataFrame(np.random.rand(3, 2), columns=['first', 'second'], index=['a', 'b', 'c'])
df
first | second | |
---|---|---|
a | 0.241914 | 0.936741 |
b | 0.721130 | 0.812072 |
c | 0.707245 | 0.819285 |
"""
可直接取某列
不能直接取某行,如df['a']会报错
"""
df['first']
a 0.241914
b 0.721130
c 0.707245
Name: first, dtype: float64
# 先column,再index
df['first']['b']
0.7211298502376191
loc与iloc#
# 使用loc取某行
df.loc['a']
first 0.241914
second 0.936741
Name: a, dtype: float64
# 使用loc时,先index,再column
df.loc[:'b', :'first']
first | |
---|---|
a | 0.241914 |
b | 0.721130 |
# 使用iloc时,索引方式和数组相同
df.iloc[:2, :2]
first | second | |
---|---|---|
a | 0.241914 | 0.936741 |
b | 0.721130 | 0.812072 |
# 修改数值
df.iloc[2, 1] = 100
df
first | second | |
---|---|---|
a | 0.241914 | 0.936741 |
b | 0.721130 | 0.812072 |
c | 0.707245 | 100.000000 |
# 直接修改df.values
df.values[0, 1] = 1
df
first | second | |
---|---|---|
a | 0.241914 | 1.000000 |
b | 0.721130 | 0.812072 |
c | 0.707245 | 100.000000 |
按条件索引#
df[df['second'] >= 0.5]
first | second | |
---|---|---|
a | 0.241914 | 1.000000 |
b | 0.721130 | 0.812072 |
c | 0.707245 | 100.000000 |
df[(df['first'] >= 0.5) & (df['first'] <= 1.0)]
first | second | |
---|---|---|
b | 0.721130 | 0.812072 |
c | 0.707245 | 100.000000 |
遍历dataframe#
# 按行遍历
for x, y in df.iterrows():
# index
print(x)
# 此行的series
print(y)
print(y['second'])
a
first 0.241914
second 1.000000
Name: a, dtype: float64
1.0
b
first 0.721130
second 0.812072
Name: b, dtype: float64
0.8120721605424106
c
first 0.707245
second 100.000000
Name: c, dtype: float64
100.0
# 按列遍历
for x, y in df.items():
# column
print(x)
# 此列的series
print(y)
first
a 0.241914
b 0.721130
c 0.707245
Name: first, dtype: float64
second
a 1.000000
b 0.812072
c 100.000000
Name: second, dtype: float64