创建pandas对象#

import pandas as pd
import numpy as np

Series#

指定values和index来创建Series#

data = pd.Series([0.5, 1.0, 2.0], index=['a', 'b', 'c'])
data
a    0.5
b    1.0
c    2.0
dtype: float64
# 查看values
data.values
array([0.5, 1. , 2. ])
# 查看index,它是一个pd.Index对象
data.index
Index(['a', 'b', 'c'], dtype='object')

字典转化为Series#

population_dict = {"California": 3833, 
                   "Texas":2644, 
                   "New York": 1965}
# 由字典转化
population = pd.Series(population_dict)
population
California    3833
Texas         2644
New York      1965
dtype: int64

DataFrame#

指定values,index和columns来创建DataFrame#

# 指定 data, columns, index
df = pd.DataFrame(np.random.rand(3, 2), columns=['first', 'second'], index=['a', 'b', 'c'])
df
first second
a 0.748481 0.989163
b 0.033341 0.370059
c 0.514828 0.872531
# 查看values
df.values
array([[0.74848094, 0.98916317],
       [0.03334097, 0.370059  ],
       [0.51482786, 0.8725308 ]])
# index和columns都是pd.Index对象
df.index, df.columns
(Index(['a', 'b', 'c'], dtype='object'),
 Index(['first', 'second'], dtype='object'))

双重字典转化为DataFrame#

area_dict = {'California': 423967, 'Texas': 695662}
population_area_dict = {'population': population_dict, 'area': area_dict}
# 默认外层是columns,内层是index,无对应数据则为NaN
pd.DataFrame(population_area_dict)
population area
California 3833 423967.0
Texas 2644 695662.0
New York 1965 NaN

多个Series转化成DataFrame#

area = pd.Series(area_dict)
area
California    423967
Texas         695662
dtype: int64
pd.DataFrame({'population': population, 'area': area})
population area
California 3833 423967.0
New York 1965 NaN
Texas 2644 695662.0