py-excel处理
处理普通Excel
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
"""
1. 读取指定 sheet表
2. 读取整表
3. 数据为空的话以 空格 替换
"""
import pandas as pd
from sys import argv, path
import os
import time
import json
# 读取Excel文件
#file_path = "~/Desktop/证书管理.xlsx"
scripts_dir = os.path.dirname(__file__)
file_path = scripts_dir + "/领域环境服务配置.xlsx"
# 读取所有表格名
excel_file = pd.ExcelFile(file_path)
sheet_names = excel_file.sheet_names
# 处理指定表数据
sheet_name = "Sheet1"
# 新的DataFrame,用于存储分割后的数据
#new_rows = []
# 指定表数据获取
df = pd.read_excel(file_path, sheet_name=sheet_name)
# 数据为空的话以空格替换,防止在json导出后空数据为nan
df = df.fillna('')
# 添加新列
#df['purchase_channel'] = 'aliyun'
#df['region'] = ''
json_list = df.to_dict(orient='records')
# 打印JSON字符串
json_info = json.dumps(json_list, ensure_ascii=False, indent=4)
print(json_info)
#print(f"source: {file_path}")
#print(f"dest: {file_name}")
处理Excel中多sheet
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
"""
处理需求:
1. 多sheet表格
2. 处理指定行数据
3. 将每行数据中包含 换行的数据 合并为一行,并添加到新行,统一再导出为新的sheet内容
"""
import pandas as pd
from sys import argv, path
import os
import time
import json
# 读取Excel文件
#file_path = "~/Desktop/证书管理.xlsx"
scripts_dir = os.path.dirname(__file__)
file_path = scripts_dir + "/xt_certificate20240712_150810.xlsx"
# 读取所有表格名
excel_file = pd.ExcelFile(file_path)
sheet_names = excel_file.sheet_names
# 处理指定表数据
#sheet_name = "使用方梳理-用途维度"
# 处理指定列数据: 有多行信息
multi_line_column = "station_inst_id"
# 新的DataFrame,用于存储分割后的数据
new_rows = []
# 指定表数据获取
#df = pd.read_excel(file_path, sheet_name=sheet_name)
df = pd.read_excel(file_path)
# 数据为空的话以空格替换,防止在json导出后空数据为nan
df = df.fillna('')
# 添加新列
#df['purchase_channel'] = 'aliyun'
#df['region'] = ''
# 遍历每一行
for _, row in df.iterrows():
# 获取多行信息,并将其分割为单行
multi_line_data = str(row[multi_line_column]).split('\n')
# 遍历每一个分割后的单行信息
for line in multi_line_data:
new_row = row.copy() # 复制当前行的所有数据
new_row[multi_line_column] = line # 替换指定列的数据为单行信息
new_rows.append(new_row) # 将新行添加到新的DataFrame中
# 创建新的DataFrame
new_df = pd.DataFrame(new_rows)
# 如果需要,可以将新DataFrame保存到文件中
file_name = scripts_dir + '/xt_certificate-' + str(time.strftime("%Y%m%d_%H%M%S", time.localtime())) + '.xlsx'
new_df.to_excel(file_name, index=False)
json_list = new_df.to_dict(orient='records')
#purchase_channel = "aliyun"
for item in json_list:
if item['status'] == "" or item['status'] == "待处理":
item['status'] = "UP"
elif item['status'] == "处理中":
item['status'] = "IP"
elif item['status'] == "已处理":
item['status'] = "P"
else:
print(f"数据异常,请处理")
exit(1)
#if item['region']:
# item['region'] = [item['region']]
if item['station'] == "其他渠道":
item['station'] = "other"
elif item['station'] == "视频直播":
item['station'] = "video-live"
elif item['station'] == "视频点播":
item['station'] = "video-vod"
# 打印JSON字符串
json_info = json.dumps(json_list, ensure_ascii=False, indent=4)
print(json_info)
#print(f"source: {file_path}")
#print(f"dest: {file_name}")
本文由作者按照
CC BY 4.0
进行授权