创建pandas对象#
import pandas as pd
import numpy as np
Series#
指定values和index来创建Series#
data = pd.Series([0.5, 1.0, 2.0], index=['a', 'b', 'c'])
data
a 0.5
b 1.0
c 2.0
dtype: float64
# 查看values
data.values
array([0.5, 1. , 2. ])
# 查看index,它是一个pd.Index对象
data.index
Index(['a', 'b', 'c'], dtype='object')
字典转化为Series#
population_dict = {"California": 3833,
"Texas":2644,
"New York": 1965}
# 由字典转化
population = pd.Series(population_dict)
population
California 3833
Texas 2644
New York 1965
dtype: int64
DataFrame#
指定values,index和columns来创建DataFrame#
# 指定 data, columns, index
df = pd.DataFrame(np.random.rand(3, 2), columns=['first', 'second'], index=['a', 'b', 'c'])
df
first | second | |
---|---|---|
a | 0.748481 | 0.989163 |
b | 0.033341 | 0.370059 |
c | 0.514828 | 0.872531 |
# 查看values
df.values
array([[0.74848094, 0.98916317],
[0.03334097, 0.370059 ],
[0.51482786, 0.8725308 ]])
# index和columns都是pd.Index对象
df.index, df.columns
(Index(['a', 'b', 'c'], dtype='object'),
Index(['first', 'second'], dtype='object'))
双重字典转化为DataFrame#
area_dict = {'California': 423967, 'Texas': 695662}
population_area_dict = {'population': population_dict, 'area': area_dict}
# 默认外层是columns,内层是index,无对应数据则为NaN
pd.DataFrame(population_area_dict)
population | area | |
---|---|---|
California | 3833 | 423967.0 |
Texas | 2644 | 695662.0 |
New York | 1965 | NaN |
多个Series转化成DataFrame#
area = pd.Series(area_dict)
area
California 423967
Texas 695662
dtype: int64
pd.DataFrame({'population': population, 'area': area})
population | area | |
---|---|---|
California | 3833 | 423967.0 |
New York | 1965 | NaN |
Texas | 2644 | 695662.0 |