6

pandas 操作 excel 备忘

 1 year ago
source link: https://blog.kelu.org/tech/2022/11/12/python-pandas-excel.html
Go to the source link to view the article. You can view the picture content, updated content and better typesetting reading experience. If the link is broken, please click the button below to view the snapshot at that time.
neoserver,ios ssh client

pandas 操作 excel 备忘

2022-11-12     tech python pandas
python.jpg

以下链接是我边搜边写 pandas 代码的过程,记录一下。写完手头需求之后基本也懂了。

总结性内容:

细节操作:

我的test代码:

import pandas as pd
import yaml
import datetime

class Project():
    def __init__(self,name):
        self.name = name

    @staticmethod
    def columns():
        return {
            'name':'项目名称',
            'requestCount':'需求总数',
            "requestDoneCount":'需求完成数', 
            'percentRequestCount':'需求完成率',
            'requestKpiCount':"一个月前需求总数",
            "requestKpiDoneCount":'一个月前需求完成数',
            'percentKpiRequestCount':'一个月前需求完成率',
            'director':'主责人',
        }

    def doneFlag():
        return ("a","b","c","d","e")
    def directorOverwrite():
        return {
            "xxx平台":"无",
        }

def getProjectDict(df,now):
    ft = df[df.主责中心 == "kelu.org中心"]
    ftProjects = ft.groupby("blog")
    datetimeNow=datetime.datetime.strptime(now,"%Y%m%d")
    kpiTime=datetimeNow.replace(month=datetimeNow.month - 1).strftime("%Y-%m-%d")

    projectDict = {}

    for mainSys, sysReqs in ftProjects:
        project = Project(mainSys)
        project.requestCount = sysReqs.shape[0]
        sysReqsDone = sysReqs[sysReqs['状态'].isin(Project.doneFlag())]
        project.requestDoneCount = sysReqsDone.shape[0]
        if(project.requestCount == 0):
            project.percentRequestCount = 1
        else:
            project.percentRequestCount = round((float(project.requestDoneCount) / project.requestCount), 4)

        sysReqs['需求创建时间'] = pd.to_datetime(sysReqs['需求创建时间'])
        sysReqsKpi = sysReqs[sysReqs['需求创建时间'] < kpiTime]
        project.requestKpiCount = sysReqsKpi.shape[0]
        sysReqsKpiDone = sysReqsKpi[sysReqsKpi['状态'].isin(Project.doneFlag())]
        project.requestKpiDoneCount = sysReqsKpiDone.shape[0]
        if(project.requestKpiCount == 0):
            project.percentKpiRequestCount = 1
        else:
            project.percentKpiRequestCount = round((float(project.requestKpiDoneCount) / project.requestKpiCount), 4)

        if mainSys in Project.directorOverwrite():
            project.director = Project.directorOverwrite()[mainSys]
        else:
            for _, reqData in sysReqs.iterrows():
                project.director = reqData.主责人
                break

        project.reqs = sysReqs

        projectDict[mainSys] = vars(project)
        # print(type(projectDict[mainSys]))

    return projectDict


def exportMainSheet(projectDict):
    df=pd.DataFrame(projectDict).transpose()
    newDf=df[[
        'name',
        'requestCount',
        "requestDoneCount",
        'percentRequestCount',
        'requestKpiCount',
        'requestKpiDoneCount',
        'percentKpiRequestCount',
        'director'
        ]].sort_values(by=['percentKpiRequestCount','requestKpiCount'],ascending=False)

    # 处理输出内容
    newDf.reset_index(drop=True, inplace=True)
    newDf.index = newDf.index + 1
    newDf.loc['合计'] = newDf[['requestCount',"requestDoneCount",'requestKpiCount','requestKpiDoneCount']].sum()
    newDf.loc['合计','percentRequestCount'] = round((float(newDf.loc['合计','requestDoneCount']) / newDf.loc['合计','requestCount']), 4)
    newDf.loc['合计','percentKpiRequestCount'] = round((float(newDf.loc['合计','requestKpiDoneCount']) / newDf.loc['合计','requestKpiCount']), 4)

    newDf[u'percentRequestCount']= newDf['percentRequestCount'].apply(lambda x:format(x, '.2%'))
    newDf[u'percentKpiRequestCount']= newDf['percentKpiRequestCount'].apply(lambda x:format(x, '.2%'))

    newDf.rename(columns=Project.columns(),inplace=True)

    return newDf

def main():
    now='20221115'
    inputFilename='input'+now+'.xlsx'
    outputFilename='output'+now+'.xlsx'

    df = pd.read_excel(inputFilename,sheet_name='需求列表')
    projectDict = getProjectDict(df,now)
    # print(yaml.dump(projectDict,allow_unicode=True))


    with pd.ExcelWriter(outputFilename) as writer:
        exportMainSheet(projectDict).to_excel(writer, sheet_name="汇总")
        directorDict = {}
        for projectItem in projectDict.values():
            if(projectItem['percentRequestCount'] < 1):
                sysReqs = projectItem['reqs']
                reqsNotDone = sysReqs[~sysReqs['状态'].isin(Project.doneFlag())]
                if projectItem['director'] in directorDict:
                    reqsNotDone = pd.concat([reqsNotDone,directorDict[projectItem['director']]])
                directorDict[projectItem['director']] = reqsNotDone

        for director, reqsNotDone in directorDict.items():
            reqsNotDone.reset_index(drop=True, inplace=True)
            reqsNotDone.index = reqsNotDone.index + 1
            reqsNotDone.to_excel(writer, sheet_name=director)



if __name__=="__main__":
    main()

About Joyk


Aggregate valuable and interesting links.
Joyk means Joy of geeK