{ "cells": [ { "cell_type": "markdown", "metadata": {}, "source": [ "# 创建pandas对象" ] }, { "cell_type": "code", "execution_count": 1, "metadata": {}, "outputs": [], "source": [ "import pandas as pd\n", "import numpy as np" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Series" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### 指定values和index来创建Series" ] }, { "cell_type": "code", "execution_count": 2, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "a 0.5\n", "b 1.0\n", "c 2.0\n", "dtype: float64" ] }, "execution_count": 2, "metadata": {}, "output_type": "execute_result" } ], "source": [ "data = pd.Series([0.5, 1.0, 2.0], index=['a', 'b', 'c'])\n", "data" ] }, { "cell_type": "code", "execution_count": 3, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "array([0.5, 1. , 2. ])" ] }, "execution_count": 3, "metadata": {}, "output_type": "execute_result" } ], "source": [ "# 查看values\n", "data.values" ] }, { "cell_type": "code", "execution_count": 4, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "Index(['a', 'b', 'c'], dtype='object')" ] }, "execution_count": 4, "metadata": {}, "output_type": "execute_result" } ], "source": [ "# 查看index,它是一个pd.Index对象\n", "data.index" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### 字典转化为Series" ] }, { "cell_type": "code", "execution_count": 5, "metadata": {}, "outputs": [], "source": [ "population_dict = {\"California\": 3833, \n", " \"Texas\":2644, \n", " \"New York\": 1965}" ] }, { "cell_type": "code", "execution_count": 6, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "California 3833\n", "Texas 2644\n", "New York 1965\n", "dtype: int64" ] }, "execution_count": 6, "metadata": {}, "output_type": "execute_result" } ], "source": [ "# 由字典转化\n", "population = pd.Series(population_dict)\n", "population" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## DataFrame" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### 指定values,index和columns来创建DataFrame" ] }, { "cell_type": "code", "execution_count": 7, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
firstsecond
a0.4698890.630690
b0.1493670.849425
c0.7203190.209318
\n", "
" ], "text/plain": [ " first second\n", "a 0.469889 0.630690\n", "b 0.149367 0.849425\n", "c 0.720319 0.209318" ] }, "execution_count": 7, "metadata": {}, "output_type": "execute_result" } ], "source": [ "# 指定 data, columns, index\n", "df = pd.DataFrame(np.random.rand(3, 2), columns=['first', 'second'], index=['a', 'b', 'c'])\n", "df" ] }, { "cell_type": "code", "execution_count": 8, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "array([[0.46988868, 0.63068969],\n", " [0.14936727, 0.84942502],\n", " [0.7203189 , 0.20931765]])" ] }, "execution_count": 8, "metadata": {}, "output_type": "execute_result" } ], "source": [ "# 查看values\n", "df.values" ] }, { "cell_type": "code", "execution_count": 9, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "(Index(['a', 'b', 'c'], dtype='object'),\n", " Index(['first', 'second'], dtype='object'))" ] }, "execution_count": 9, "metadata": {}, "output_type": "execute_result" } ], "source": [ "# index和columns都是pd.Index对象\n", "df.index, df.columns" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### 双重字典转化为DataFrame" ] }, { "cell_type": "code", "execution_count": 10, "metadata": {}, "outputs": [], "source": [ "area_dict = {'California': 423967, 'Texas': 695662}\n", "population_area_dict = {'population': population_dict, 'area': area_dict}" ] }, { "cell_type": "code", "execution_count": 11, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
populationarea
California3833423967.0
Texas2644695662.0
New York1965NaN
\n", "
" ], "text/plain": [ " population area\n", "California 3833 423967.0\n", "Texas 2644 695662.0\n", "New York 1965 NaN" ] }, "execution_count": 11, "metadata": {}, "output_type": "execute_result" } ], "source": [ "# 默认外层是columns,内层是index,无对应数据则为NaN\n", "pd.DataFrame(population_area_dict)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### 多个Series转化成DataFrame" ] }, { "cell_type": "code", "execution_count": 12, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "California 423967\n", "Texas 695662\n", "dtype: int64" ] }, "execution_count": 12, "metadata": {}, "output_type": "execute_result" } ], "source": [ "area = pd.Series(area_dict)\n", "area" ] }, { "cell_type": "code", "execution_count": 13, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
populationarea
California3833423967.0
New York1965NaN
Texas2644695662.0
\n", "
" ], "text/plain": [ " population area\n", "California 3833 423967.0\n", "New York 1965 NaN\n", "Texas 2644 695662.0" ] }, "execution_count": 13, "metadata": {}, "output_type": "execute_result" } ], "source": [ "pd.DataFrame({'population': population, 'area': area})" ] } ], "metadata": { "kernelspec": { "display_name": "Python 3", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.8.3" } }, "nbformat": 4, "nbformat_minor": 4 }