{
"cells": [
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# 创建pandas对象"
]
},
{
"cell_type": "code",
"execution_count": 1,
"metadata": {},
"outputs": [],
"source": [
"import pandas as pd\n",
"import numpy as np"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Series"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### 指定values和index来创建Series"
]
},
{
"cell_type": "code",
"execution_count": 2,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"a 0.5\n",
"b 1.0\n",
"c 2.0\n",
"dtype: float64"
]
},
"execution_count": 2,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"data = pd.Series([0.5, 1.0, 2.0], index=['a', 'b', 'c'])\n",
"data"
]
},
{
"cell_type": "code",
"execution_count": 3,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"array([0.5, 1. , 2. ])"
]
},
"execution_count": 3,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# 查看values\n",
"data.values"
]
},
{
"cell_type": "code",
"execution_count": 4,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"Index(['a', 'b', 'c'], dtype='object')"
]
},
"execution_count": 4,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# 查看index,它是一个pd.Index对象\n",
"data.index"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### 字典转化为Series"
]
},
{
"cell_type": "code",
"execution_count": 5,
"metadata": {},
"outputs": [],
"source": [
"population_dict = {\"California\": 3833, \n",
" \"Texas\":2644, \n",
" \"New York\": 1965}"
]
},
{
"cell_type": "code",
"execution_count": 6,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"California 3833\n",
"Texas 2644\n",
"New York 1965\n",
"dtype: int64"
]
},
"execution_count": 6,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# 由字典转化\n",
"population = pd.Series(population_dict)\n",
"population"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## DataFrame"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### 指定values,index和columns来创建DataFrame"
]
},
{
"cell_type": "code",
"execution_count": 7,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"
\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" first | \n",
" second | \n",
"
\n",
" \n",
" \n",
" \n",
" a | \n",
" 0.469889 | \n",
" 0.630690 | \n",
"
\n",
" \n",
" b | \n",
" 0.149367 | \n",
" 0.849425 | \n",
"
\n",
" \n",
" c | \n",
" 0.720319 | \n",
" 0.209318 | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
" first second\n",
"a 0.469889 0.630690\n",
"b 0.149367 0.849425\n",
"c 0.720319 0.209318"
]
},
"execution_count": 7,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# 指定 data, columns, index\n",
"df = pd.DataFrame(np.random.rand(3, 2), columns=['first', 'second'], index=['a', 'b', 'c'])\n",
"df"
]
},
{
"cell_type": "code",
"execution_count": 8,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"array([[0.46988868, 0.63068969],\n",
" [0.14936727, 0.84942502],\n",
" [0.7203189 , 0.20931765]])"
]
},
"execution_count": 8,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# 查看values\n",
"df.values"
]
},
{
"cell_type": "code",
"execution_count": 9,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"(Index(['a', 'b', 'c'], dtype='object'),\n",
" Index(['first', 'second'], dtype='object'))"
]
},
"execution_count": 9,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# index和columns都是pd.Index对象\n",
"df.index, df.columns"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### 双重字典转化为DataFrame"
]
},
{
"cell_type": "code",
"execution_count": 10,
"metadata": {},
"outputs": [],
"source": [
"area_dict = {'California': 423967, 'Texas': 695662}\n",
"population_area_dict = {'population': population_dict, 'area': area_dict}"
]
},
{
"cell_type": "code",
"execution_count": 11,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" population | \n",
" area | \n",
"
\n",
" \n",
" \n",
" \n",
" California | \n",
" 3833 | \n",
" 423967.0 | \n",
"
\n",
" \n",
" Texas | \n",
" 2644 | \n",
" 695662.0 | \n",
"
\n",
" \n",
" New York | \n",
" 1965 | \n",
" NaN | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
" population area\n",
"California 3833 423967.0\n",
"Texas 2644 695662.0\n",
"New York 1965 NaN"
]
},
"execution_count": 11,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# 默认外层是columns,内层是index,无对应数据则为NaN\n",
"pd.DataFrame(population_area_dict)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### 多个Series转化成DataFrame"
]
},
{
"cell_type": "code",
"execution_count": 12,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"California 423967\n",
"Texas 695662\n",
"dtype: int64"
]
},
"execution_count": 12,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"area = pd.Series(area_dict)\n",
"area"
]
},
{
"cell_type": "code",
"execution_count": 13,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" population | \n",
" area | \n",
"
\n",
" \n",
" \n",
" \n",
" California | \n",
" 3833 | \n",
" 423967.0 | \n",
"
\n",
" \n",
" New York | \n",
" 1965 | \n",
" NaN | \n",
"
\n",
" \n",
" Texas | \n",
" 2644 | \n",
" 695662.0 | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
" population area\n",
"California 3833 423967.0\n",
"New York 1965 NaN\n",
"Texas 2644 695662.0"
]
},
"execution_count": 13,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"pd.DataFrame({'population': population, 'area': area})"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.8.3"
}
},
"nbformat": 4,
"nbformat_minor": 4
}