6
pandas 操作 excel 备忘
source link: https://blog.kelu.org/tech/2022/11/12/python-pandas-excel.html
Go to the source link to view the article. You can view the picture content, updated content and better typesetting reading experience. If the link is broken, please click the button below to view the snapshot at that time.
以下链接是我边搜边写 pandas 代码的过程,记录一下。写完手头需求之后基本也懂了。
总结性内容:
细节操作:
我的test代码:
import pandas as pd
import yaml
import datetime
class Project():
def __init__(self,name):
self.name = name
@staticmethod
def columns():
return {
'name':'项目名称',
'requestCount':'需求总数',
"requestDoneCount":'需求完成数',
'percentRequestCount':'需求完成率',
'requestKpiCount':"一个月前需求总数",
"requestKpiDoneCount":'一个月前需求完成数',
'percentKpiRequestCount':'一个月前需求完成率',
'director':'主责人',
}
def doneFlag():
return ("a","b","c","d","e")
def directorOverwrite():
return {
"xxx平台":"无",
}
def getProjectDict(df,now):
ft = df[df.主责中心 == "kelu.org中心"]
ftProjects = ft.groupby("blog")
datetimeNow=datetime.datetime.strptime(now,"%Y%m%d")
kpiTime=datetimeNow.replace(month=datetimeNow.month - 1).strftime("%Y-%m-%d")
projectDict = {}
for mainSys, sysReqs in ftProjects:
project = Project(mainSys)
project.requestCount = sysReqs.shape[0]
sysReqsDone = sysReqs[sysReqs['状态'].isin(Project.doneFlag())]
project.requestDoneCount = sysReqsDone.shape[0]
if(project.requestCount == 0):
project.percentRequestCount = 1
else:
project.percentRequestCount = round((float(project.requestDoneCount) / project.requestCount), 4)
sysReqs['需求创建时间'] = pd.to_datetime(sysReqs['需求创建时间'])
sysReqsKpi = sysReqs[sysReqs['需求创建时间'] < kpiTime]
project.requestKpiCount = sysReqsKpi.shape[0]
sysReqsKpiDone = sysReqsKpi[sysReqsKpi['状态'].isin(Project.doneFlag())]
project.requestKpiDoneCount = sysReqsKpiDone.shape[0]
if(project.requestKpiCount == 0):
project.percentKpiRequestCount = 1
else:
project.percentKpiRequestCount = round((float(project.requestKpiDoneCount) / project.requestKpiCount), 4)
if mainSys in Project.directorOverwrite():
project.director = Project.directorOverwrite()[mainSys]
else:
for _, reqData in sysReqs.iterrows():
project.director = reqData.主责人
break
project.reqs = sysReqs
projectDict[mainSys] = vars(project)
# print(type(projectDict[mainSys]))
return projectDict
def exportMainSheet(projectDict):
df=pd.DataFrame(projectDict).transpose()
newDf=df[[
'name',
'requestCount',
"requestDoneCount",
'percentRequestCount',
'requestKpiCount',
'requestKpiDoneCount',
'percentKpiRequestCount',
'director'
]].sort_values(by=['percentKpiRequestCount','requestKpiCount'],ascending=False)
# 处理输出内容
newDf.reset_index(drop=True, inplace=True)
newDf.index = newDf.index + 1
newDf.loc['合计'] = newDf[['requestCount',"requestDoneCount",'requestKpiCount','requestKpiDoneCount']].sum()
newDf.loc['合计','percentRequestCount'] = round((float(newDf.loc['合计','requestDoneCount']) / newDf.loc['合计','requestCount']), 4)
newDf.loc['合计','percentKpiRequestCount'] = round((float(newDf.loc['合计','requestKpiDoneCount']) / newDf.loc['合计','requestKpiCount']), 4)
newDf[u'percentRequestCount']= newDf['percentRequestCount'].apply(lambda x:format(x, '.2%'))
newDf[u'percentKpiRequestCount']= newDf['percentKpiRequestCount'].apply(lambda x:format(x, '.2%'))
newDf.rename(columns=Project.columns(),inplace=True)
return newDf
def main():
now='20221115'
inputFilename='input'+now+'.xlsx'
outputFilename='output'+now+'.xlsx'
df = pd.read_excel(inputFilename,sheet_name='需求列表')
projectDict = getProjectDict(df,now)
# print(yaml.dump(projectDict,allow_unicode=True))
with pd.ExcelWriter(outputFilename) as writer:
exportMainSheet(projectDict).to_excel(writer, sheet_name="汇总")
directorDict = {}
for projectItem in projectDict.values():
if(projectItem['percentRequestCount'] < 1):
sysReqs = projectItem['reqs']
reqsNotDone = sysReqs[~sysReqs['状态'].isin(Project.doneFlag())]
if projectItem['director'] in directorDict:
reqsNotDone = pd.concat([reqsNotDone,directorDict[projectItem['director']]])
directorDict[projectItem['director']] = reqsNotDone
for director, reqsNotDone in directorDict.items():
reqsNotDone.reset_index(drop=True, inplace=True)
reqsNotDone.index = reqsNotDone.index + 1
reqsNotDone.to_excel(writer, sheet_name=director)
if __name__=="__main__":
main()
Recommend
About Joyk
Aggregate valuable and interesting links.
Joyk means Joy of geeK