保存与加载#

import pandas as pd
import numpy as np

与dict转换#

"""
使用指定data,index,columns的方式创建dataframe
下面使用了ord和chr来遍历字母表
"""
index = [chr(i) for i in range(ord('a'), ord('a') + 8)]
columns = ['duration', 'playtime', 'up', 'favorite', 'comment', 'share']
df = pd.DataFrame(np.random.randint(1, 100, (len(index), len(columns))), index=index, columns=columns)
df.values
array([[19, 52, 71,  7, 86, 90],
       [13, 39, 57, 62, 96, 91],
       [30, 92, 98, 53, 60, 29],
       [79, 85, 99, 93, 23, 93],
       [82, 27,  2, 81, 32, 96],
       [96, 80,  3, 89, 85, 38],
       [18, 30, 84, 17, 64, 81],
       [36, 96, 78, 33, 59, 65]])
# df -> dict
data_dict = df.to_dict(orient='index')
data_dict
{'a': {'duration': 19,
  'playtime': 52,
  'up': 71,
  'favorite': 7,
  'comment': 86,
  'share': 90},
 'b': {'duration': 13,
  'playtime': 39,
  'up': 57,
  'favorite': 62,
  'comment': 96,
  'share': 91},
 'c': {'duration': 30,
  'playtime': 92,
  'up': 98,
  'favorite': 53,
  'comment': 60,
  'share': 29},
 'd': {'duration': 79,
  'playtime': 85,
  'up': 99,
  'favorite': 93,
  'comment': 23,
  'share': 93},
 'e': {'duration': 82,
  'playtime': 27,
  'up': 2,
  'favorite': 81,
  'comment': 32,
  'share': 96},
 'f': {'duration': 96,
  'playtime': 80,
  'up': 3,
  'favorite': 89,
  'comment': 85,
  'share': 38},
 'g': {'duration': 18,
  'playtime': 30,
  'up': 84,
  'favorite': 17,
  'comment': 64,
  'share': 81},
 'h': {'duration': 36,
  'playtime': 96,
  'up': 78,
  'favorite': 33,
  'comment': 59,
  'share': 65}}
# dict -> df
df_dict = pd.DataFrame.from_dict(data_dict, orient='index')
df_dict
duration playtime up favorite comment share
a 19 52 71 7 86 90
b 13 39 57 62 96 91
c 30 92 98 53 60 29
d 79 85 99 93 23 93
e 82 27 2 81 32 96
f 96 80 3 89 85 38
g 18 30 84 17 64 81
h 36 96 78 33 59 65

csv文件#

# 存储为csv文件
df.to_csv('sample.csv')
# 需要指定index_col=0
df_csv = pd.read_csv('sample.csv', index_col=0)
df_csv.values
array([[19, 52, 71,  7, 86, 90],
       [13, 39, 57, 62, 96, 91],
       [30, 92, 98, 53, 60, 29],
       [79, 85, 99, 93, 23, 93],
       [82, 27,  2, 81, 32, 96],
       [96, 80,  3, 89, 85, 38],
       [18, 30, 84, 17, 64, 81],
       [36, 96, 78, 33, 59, 65]])

excel文件#

df.to_excel('sample.xlsx')
# 若不指定index_col=0,会多出一行'Unnamed: 0'
df_excel = pd.read_excel('sample.xlsx')
df_excel
Unnamed: 0 duration playtime up favorite comment share
0 a 19 52 71 7 86 90
1 b 13 39 57 62 96 91
2 c 30 92 98 53 60 29
3 d 79 85 99 93 23 93
4 e 82 27 2 81 32 96
5 f 96 80 3 89 85 38
6 g 18 30 84 17 64 81
7 h 36 96 78 33 59 65