文章

py-excel处理

处理普通Excel

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
"""
1. 读取指定 sheet表
2. 读取整表
3. 数据为空的话以 空格 替换
"""
import pandas as pd
from sys import argv, path
import os
import time
import json

# 读取Excel文件
#file_path = "~/Desktop/证书管理.xlsx"
scripts_dir = os.path.dirname(__file__)
file_path = scripts_dir + "/领域环境服务配置.xlsx"

# 读取所有表格名
excel_file = pd.ExcelFile(file_path)
sheet_names = excel_file.sheet_names

# 处理指定表数据
sheet_name = "Sheet1"

# 新的DataFrame,用于存储分割后的数据
#new_rows = []

# 指定表数据获取
df = pd.read_excel(file_path, sheet_name=sheet_name)

# 数据为空的话以空格替换,防止在json导出后空数据为nan
df = df.fillna('')

# 添加新列
#df['purchase_channel'] = 'aliyun'
#df['region'] = ''

json_list = df.to_dict(orient='records')


# 打印JSON字符串
json_info = json.dumps(json_list, ensure_ascii=False, indent=4)

print(json_info)
#print(f"source: {file_path}")
#print(f"dest: {file_name}")

处理Excel中多sheet

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
"""
处理需求:
1. 多sheet表格
2. 处理指定行数据
3. 将每行数据中包含 换行的数据 合并为一行,并添加到新行,统一再导出为新的sheet内容
"""
import pandas as pd
from sys import argv, path
import os
import time
import json

# 读取Excel文件
#file_path = "~/Desktop/证书管理.xlsx"
scripts_dir = os.path.dirname(__file__)
file_path = scripts_dir + "/xt_certificate20240712_150810.xlsx"

# 读取所有表格名
excel_file = pd.ExcelFile(file_path)
sheet_names = excel_file.sheet_names

# 处理指定表数据
#sheet_name = "使用方梳理-用途维度"

# 处理指定列数据: 有多行信息
multi_line_column = "station_inst_id"

# 新的DataFrame,用于存储分割后的数据
new_rows = []

# 指定表数据获取
#df = pd.read_excel(file_path, sheet_name=sheet_name)
df = pd.read_excel(file_path)

# 数据为空的话以空格替换,防止在json导出后空数据为nan
df = df.fillna('')

# 添加新列
#df['purchase_channel'] = 'aliyun'
#df['region'] = ''

# 遍历每一行
for _, row in df.iterrows():
    # 获取多行信息,并将其分割为单行
    multi_line_data = str(row[multi_line_column]).split('\n')
    
    # 遍历每一个分割后的单行信息
    for line in multi_line_data:
        new_row = row.copy()  # 复制当前行的所有数据
        new_row[multi_line_column] = line  # 替换指定列的数据为单行信息
        new_rows.append(new_row)  # 将新行添加到新的DataFrame中

# 创建新的DataFrame
new_df = pd.DataFrame(new_rows)

# 如果需要,可以将新DataFrame保存到文件中
file_name = scripts_dir + '/xt_certificate-' + str(time.strftime("%Y%m%d_%H%M%S", time.localtime())) + '.xlsx'
new_df.to_excel(file_name, index=False)

json_list = new_df.to_dict(orient='records')

#purchase_channel = "aliyun"
for item in json_list:
    if item['status'] == "" or item['status'] == "待处理":
        item['status'] = "UP"
    elif item['status'] == "处理中":
        item['status'] = "IP"
    elif item['status'] == "已处理":
        item['status'] = "P"
    else:
        print(f"数据异常,请处理")
        exit(1)

    #if item['region']:
    #    item['region'] = [item['region']]

    if item['station'] == "其他渠道":
        item['station'] = "other"
    elif item['station'] == "视频直播":
        item['station'] = "video-live"
    elif item['station'] == "视频点播":
        item['station'] = "video-vod"

# 打印JSON字符串
json_info = json.dumps(json_list, ensure_ascii=False, indent=4)

print(json_info)
#print(f"source: {file_path}")
#print(f"dest: {file_name}")
本文由作者按照 CC BY 4.0 进行授权