# 创建pandas对象

In [1]:
import pandas as pd
import numpy as np

## Series

### 指定values和index来创建Series

In [2]:
data = pd.Series([0.5, 1.0, 2.0], index=['a', 'b', 'c'])
data

a    0.5
b    1.0
c    2.0
dtype: float64

In [3]:
# 查看values
data.values

array([0.5, 1. , 2. ])

In [4]:
# 查看index，它是一个pd.Index对象
data.index

Index(['a', 'b', 'c'], dtype='object')

### 字典转化为Series

In [5]:
population_dict = {"California": 3833, 
                   "Texas":2644, 
                   "New York": 1965}

In [6]:
# 由字典转化
population = pd.Series(population_dict)
population

California    3833
Texas         2644
New York      1965
dtype: int64

## DataFrame

### 指定values,index和columns来创建DataFrame

In [7]:
# 指定 data, columns, index
df = pd.DataFrame(np.random.rand(3, 2), columns=['first', 'second'], index=['a', 'b', 'c'])
df

Unnamed: 0,first,second
a,0.469889,0.63069
b,0.149367,0.849425
c,0.720319,0.209318


In [8]:
# 查看values
df.values

array([[0.46988868, 0.63068969],
       [0.14936727, 0.84942502],
       [0.7203189 , 0.20931765]])

In [9]:
# index和columns都是pd.Index对象
df.index, df.columns

(Index(['a', 'b', 'c'], dtype='object'),
 Index(['first', 'second'], dtype='object'))

### 双重字典转化为DataFrame

In [10]:
area_dict = {'California': 423967, 'Texas': 695662}
population_area_dict = {'population': population_dict, 'area': area_dict}

In [11]:
# 默认外层是columns，内层是index，无对应数据则为NaN
pd.DataFrame(population_area_dict)

Unnamed: 0,population,area
California,3833,423967.0
Texas,2644,695662.0
New York,1965,


### 多个Series转化成DataFrame

In [12]:
area = pd.Series(area_dict)
area

California    423967
Texas         695662
dtype: int64

In [13]:
pd.DataFrame({'population': population, 'area': area})

Unnamed: 0,population,area
California,3833,423967.0
New York,1965,
Texas,2644,695662.0
