保存与加载

Contents

保存与加载#

import pandas as pd
import numpy as np

与dict转换#

"""
使用指定data,index,columns的方式创建dataframe
下面使用了ord和chr来遍历字母表
"""
index = [chr(i) for i in range(ord('a'), ord('a') + 8)]
columns = ['duration', 'playtime', 'up', 'favorite', 'comment', 'share']
df = pd.DataFrame(np.random.randint(1, 100, (len(index), len(columns))), index=index, columns=columns)
df.values

array([[19, 52, 71,  7, 86, 90],
       [13, 39, 57, 62, 96, 91],
       [30, 92, 98, 53, 60, 29],
       [79, 85, 99, 93, 23, 93],
       [82, 27,  2, 81, 32, 96],
       [96, 80,  3, 89, 85, 38],
       [18, 30, 84, 17, 64, 81],
       [36, 96, 78, 33, 59, 65]])

# df -> dict
data_dict = df.to_dict(orient='index')
data_dict

{'a': {'duration': 19,
  'playtime': 52,
  'up': 71,
  'favorite': 7,
  'comment': 86,
  'share': 90},
 'b': {'duration': 13,
  'playtime': 39,
  'up': 57,
  'favorite': 62,
  'comment': 96,
  'share': 91},
 'c': {'duration': 30,
  'playtime': 92,
  'up': 98,
  'favorite': 53,
  'comment': 60,
  'share': 29},
 'd': {'duration': 79,
  'playtime': 85,
  'up': 99,
  'favorite': 93,
  'comment': 23,
  'share': 93},
 'e': {'duration': 82,
  'playtime': 27,
  'up': 2,
  'favorite': 81,
  'comment': 32,
  'share': 96},
 'f': {'duration': 96,
  'playtime': 80,
  'up': 3,
  'favorite': 89,
  'comment': 85,
  'share': 38},
 'g': {'duration': 18,
  'playtime': 30,
  'up': 84,
  'favorite': 17,
  'comment': 64,
  'share': 81},
 'h': {'duration': 36,
  'playtime': 96,
  'up': 78,
  'favorite': 33,
  'comment': 59,
  'share': 65}}

# dict -> df
df_dict = pd.DataFrame.from_dict(data_dict, orient='index')
df_dict

	duration	playtime	up	favorite	comment	share
a	19	52	71	7	86	90
b	13	39	57	62	96	91
c	30	92	98	53	60	29
d	79	85	99	93	23	93
e	82	27	2	81	32	96
f	96	80	3	89	85	38
g	18	30	84	17	64	81
h	36	96	78	33	59	65

csv文件#

# 存储为csv文件
df.to_csv('sample.csv')

# 需要指定index_col=0
df_csv = pd.read_csv('sample.csv', index_col=0)
df_csv.values

array([[19, 52, 71,  7, 86, 90],
       [13, 39, 57, 62, 96, 91],
       [30, 92, 98, 53, 60, 29],
       [79, 85, 99, 93, 23, 93],
       [82, 27,  2, 81, 32, 96],
       [96, 80,  3, 89, 85, 38],
       [18, 30, 84, 17, 64, 81],
       [36, 96, 78, 33, 59, 65]])

excel文件#

df.to_excel('sample.xlsx')

# 若不指定index_col=0，会多出一行'Unnamed: 0'
df_excel = pd.read_excel('sample.xlsx')
df_excel

	Unnamed: 0	duration	playtime	up	favorite	comment	share
0	a	19	52	71	7	86	90
1	b	13	39	57	62	96	91
2	c	30	92	98	53	60	29
3	d	79	85	99	93	23	93
4	e	82	27	2	81	32	96
5	f	96	80	3	89	85	38
6	g	18	30	84	17	64	81
7	h	36	96	78	33	59	65