python处理csv数据的方法
本文实例讲述了python处理csv数据的方法。分享给大家供大家参考。具体如下:
Python代码:
代码如下:
#coding=utf-8
__author__ = 'dehua.li'
from datetime import *
import datetime
import csv
import sys
import time
import string
import os
import os.path
import pylab as plt
rootdir='/nethome/dehua.li/orderlifeCycleData/xingzheng'
writeFileDir="/nethome/dehua.li/orderlifeMyWork/xingzheng/csv"
heyueFile="/nethome/dehua.li/orderlifeCycleData/heyue_150128.csv"
ms_acked="1"
msg=[]
ex=[]
def getTheDate(date):
[filenamePart1,filenamePart2]=string.split(filename,'.')
[filenamePart11,filenamePart12,filenamePart13]=string.split(filenamePart1,'_')
return filenamePart13
LocalTime=datetime.datetime.fromtimestamp(time.mktime(time.strptime("2014-11-04 20:59:59","%Y-%m-%d %H:%M:%S")))
for parent,dirname,filenames in os.walk(rootdir):
for filename in filenames:
fileNameWrite=os.path.join(writeFileDir,filename)
print fileNameWrite
csvfile00=open(fileNameWrite,'wb')
writer1=csv.writer(csvfile00)
writer1.writerow(['FeedCode','OrderId','Status','LocalTime','Time','Exchange'])
fileName=os.path.join(parent,filename)
[filenamePart1,filenamePart2]=string.split(filename,'.')
[filenamePart11,filenamePart12,filenamePart13]=string.split(filenamePart1,'_')
#filenamePart11_filenamePart12_filenamePart13.filenamePart2:dongzheng_orderlifeCycleData_20150111.csv
print fileName
with open(fileName,'rb') as csvfile:
reader=csv.reader(csvfile)
CsvItem=[row for row in reader]
for item in CsvItem:
if item[3]=='TPO':
#print " filter TPO "
continue
if item[12]=='Sent':
[tm_local,ms_local]=string.split(item[15],'.')
[tm_localup,ms_localup]=string.split(item[19],'.')
LocalTime=datetime.datetime.fromtimestamp(time.mktime(time.strptime(tm_local,"%Y-%m-%d %H:%M:%S")))
LocalUpdate=datetime.datetime.fromtimestamp(time.mktime(time.strptime(tm_localup,"%Y-%m-%d %H:%M:%S")))
tm=int(((LocalTime-LocalUpdate).seconds))*1000
ms_sent=str(int(ms_local)-int(ms_localup)+tm)
if int(ms_sent)>10*60*1000:
print "ms_sent>600000"
continue
if(int(ms_local)-int(ms_localup)+tm)<0:
print 'wrong1'
msg=[]
msg.append(item[0])
msg.append(item[1])
msg.append(item[12])
msg.append(item[15])
msg.append(ms_sent)
with open(heyueFile,'rb') as csvfile1:
reader=csv.reader(csvfile1)
CsvItem=[row for row in reader]
for Item in CsvItem:
if Item[1]==item[0]:
msg.append(Item[3])
writer1.writerow(msg)
#print 'write ok'
ex=Item[3]
break
csvfile1.close()
with open(fileName,'rb') as csvfile22:
reader=csv.reader(csvfile22)
CsvItem2=[row for row in reader]
for item_ in CsvItem2:
if item_[12]=='Acked' and item_[1]==item[1]:
[tm_local2,ms_local2]=string.split(item_[15],'.')
LocalTime2=datetime.datetime.fromtimestamp(time.mktime(time.strptime(tm_local2,"%Y-%m-%d %H:%M:%S")))
tm2=int(((LocalTime2-LocalTime).seconds))*1000
ms_acked=str(int(ms_local2)-int(ms_local)+tm2)
if int(ms_acked)>10*60*1000:
print "MSacked>600000"
continue
msg=[]
msg.append(item_[0])
msg.append(item_[1])
msg.append(item_[12])
msg.append(item_[15])
msg.append(ms_acked)
with open(heyueFile,'rb') as csvfile111:
reader=csv.reader(csvfile111)
CsvItem=[row for row in reader]
for Item in CsvItem:
if Item[1]==item[0]:
msg.append(Item[3])
writer1.writerow(msg)
#print 'write ok'
break
#print "write ok"
csvfile22.close()
csvfile.close()
csvfile00.close()
代码如下:
#coding=utf-8
#__author__ = 'dehua.li'
from datetime import *
import datetime
import csv
import sys
import time
import string
import os
import os.path
import pylab as plt
def median(lst):
even = (0 if len(lst) % 2 else 1) + 1
half = (len(lst) - 1) / 2
return sum(sorted(lst)[half:half + even]) / float(even)
def mean(lst):
if len(lst)==0:
return 0
return sum(lst)/len(lst)
nightLine="21:01:00"
morningLine="09:01:00"
def getTheDate(date):
[filenamePart1,filenamePart2]=string.split(filename,'.')
[filenamePart11,filenamePart12,filenamePart13]=string.split(filenamePart1,'_')
return filenamePart13
def afterOneMin(time):
[tm_local,ms_local]=string.split(time,'.')
[ymd,hms]=string.split(tm_local,' ')
flag=0
if hms>"21:01:00":
flag=1
elif hms>"09:01:00" and hms<"20:00:00":
flag=1
elif hms>"00:00:00" and hms<"05:00:00":
flag=1
return flag
rootdir="/nethome/dehua.li/orderlifeMyWork/xingzheng/csv"
#csvfileMaxMin = open('e:\dehua.li\csv\__xingzhenMaxMin.csv','wb')
#writer1 = csv.writer(csvfileMaxMin)
#writer1.writerow(['FeedCode','date','SentMaxTime','SentMaxLocalTime','SentMinTime','SentMinLocalTime','SentMeanTime','SentMedian','AckedMaxTime','AckedMaxLocalTime','AckedMinTime','AckedMinLocalTime','AckedMeanTime','AckedMedianTime','Exchange'])
#writer1.writerow(['FeedCode','date','SentMaxTime','SentMinTime','SentMeanTime','SentMedian','AckedMaxTime','AckedMinTime','AckedMeanTime','AckedMedianTime','Exchange'])
msg=[]
codeList=list()
orderList=list()
itemSentList=[]
itemAckedList=[]
feedCode=[]
exchange=[]
zhengshangSentMedian=0
zhengshangSentMean=0
zhengshangAckedMedian=0
zhengshangAckedMean=0
dashangSentMedian=0
dashangSentMean=0
dashangAckedMedian=0
dashangAckedMean=0
shangqiSentMedian=0
shangqiSentMean=0
shangqiAckedMedian=0
shangqiAckedMean=0
zhongjinSentMedian=0
zhongjinSentMean=0
zhongjinAckedMedian=0
zhongjinAckedMean=0
zhengshangSent=[]
zhengshangAcked=[]
dashangSent=[]
dashangAcked=[]
shangqiSent=[]
shangqiAcked=[]
zhongjinSent=[]
zhongjinAcked=[]
zhengshangSentMedianAll=[]
zhengshangSentMeanAll=[]
zhengshangAckedMedianAll=[]
zhengshangAckedMeanAll=[]
dashangSentMedianAll=[]
dashangSentMeanAll=[]
dashangAckedMedianAll=[]
dashangAckedMeanAll=[]
shangqiSentMedianAll=[]
shangqiSentMeanAll=[]
shangqiAckedMedianAll=[]
shangqiAckedMeanAll=[]
zhongjinSentMedianAll=[]
zhongjinSentMeanAll=[]
zhongjinAckedMedianAll=[]
zhongjinAckedMeanAll=[]
zhengshang='0'
dashang='0'
shangqi='0'
zhongjin='0'
with open('/nethome/dehua.li/orderlifeCycleData/heyue_150128.csv','rb') as csvfile:
reader=csv.reader(csvfile)
csvItem=[row for row in reader]
zhengshang=csvItem[300][3]
dashang=csvItem[5][3]
shangqi=csvItem[165][3]
zhongjin=csvItem[435][3]
#for item in csvItem:
# if item[3]==zhengshang:
# print item
for parent,dirname,filenames in os.walk(rootdir):
for filename in filenames:
fileName=os.path.join(rootdir,filename)
csvfile1=open(fileName,'rb')
reader=csv.reader(csvfile1)
CsvItem=[row for row in reader]
for item in CsvItem:
if item[0]=='FeedCode':
continue
if item[0] not in codeList:
codeList.append(item[0])
#print CsvItem[15]
if len(item)<=5:
print fileName
print item
print '++++++++++++++++++++++++++++++'
#if afterOneMin(item[3])==0:
# print item[3]
# continue
if item[5]==zhengshang and item[2]=='Sent':
zhengshangSent.append(int(item[4]))
elif item[5]==zhengshang and item[2]=='Acked':
zhengshangAcked.append(int(item[4]))
elif item[5]==dashang and item[2]=='Sent':
dashangSent.append(int(item[4]))
elif item[5]==dashang and item[2]=='Acked':
dashangAcked.append(int(item[4]))
elif item[5]==shangqi and item[2]=='Sent':
shangqiSent.append(int(item[4]))
if int(item[4])>=600000:
print "------------"
print item
elif item[5]==shangqi and item[2]=='Acked':
shangqiAcked.append(int(item[4]))
elif item[5]==zhongjin and item[2]=='Sent':
zhongjinSent.append(int(item[4]))
elif item[5]==zhongjin and item[2]=='Acked':
zhongjinAcked.append(int(item[4]))
else:
print "wrong info"
print item
if mean(shangqiSent)>420000:
print sum(shangqiSent)
print len(shangqiSent)
print item
print fileName
print shangqiSent
zhengshangSentMedian=median(zhengshangSent)
zhengshangSentMean=mean(zhengshangSent)
zhengshangAckedMedian=median(zhengshangAcked)
zhengshangAckedMean=mean(zhengshangAcked)
dashangSentMedian=median(dashangSent)
dashangSentMean=mean(dashangSent)
dashangAckedMedian=median(dashangAcked)
dashangAckedMean=mean(dashangAcked)
shangqiSentMedian=median(shangqiSent)
shangqiSentMean=mean(shangqiSent)
shangqiAckedMedian=median(shangqiAcked)
shangqiAckedMean=mean(shangqiAcked)
zhongjinSentMedian=median(zhongjinSent)
zhongjinSentMean=mean(zhongjinSent)
zhongjinAckedMedian=median(zhongjinAcked)
zhongjinAckedMean=mean(zhongjinAcked)
#if mean(shangqiSent)>70:
# print '================================'
# print fileName
#print codeList
'''
for listItem in codeList:
itemSentList=[]
itemAckedList=[]
for item in CsvItem:
if item[0]==listItem and item[2]=='Sent':
itemSentList.append(int(item[4]))
exchange=item[5]
elif item[0]==listItem and item[2]=='Acked':
itemAckedList.append(int(item[4]))
#print itemSentList
itemMaxSent=max(itemSentList)
itemMinSent=min(itemSentList)
itemAvgSent=sum(itemSentList)/len(itemSentList)
itemMaxAcked=max(itemAckedList)
itemMinAcked=min(itemAckedList)
itemAvgAcked=sum(itemAckedList)/len(itemAckedList)
SentMedian=median(itemSentList)
AckedMedian=median(itemAckedList)
msg=[]
msg.append(listItem) #0
msg.append("2015/01/14") #1
msg.append(itemMaxSent) #2
msg.append(itemMinSent) #3
msg.append(itemAvgSent) #4
msg.append(SentMedian) #5
msg.append(itemMaxAcked) #6
msg.append(itemMinAcked) #7
msg.append(itemAvgAcked) #8
msg.append(AckedMedian) #9
msg.append(exchange) #10
if len(msg)>15:
print "------------------------------"
print msg
writer1.writerow(msg)
'''
zhengshangSentMedianAll.append(zhengshangSentMedian)
zhengshangSentMeanAll.append(zhengshangSentMean)
zhengshangAckedMedianAll.append(zhengshangAckedMedian)
zhengshangAckedMeanAll.append(zhengshangAckedMean)
dashangSentMedianAll.append(dashangSentMedian)
dashangSentMeanAll.append(dashangSentMean)
dashangAckedMedianAll.append(dashangAckedMedian)
dashangAckedMeanAll.append(dashangAckedMean)
shangqiSentMedianAll.append(shangqiSentMedian)
shangqiSentMeanAll.append(shangqiSentMean)
shangqiAckedMedianAll.append(shangqiAckedMedian)
shangqiAckedMeanAll.append(shangqiAckedMean)
zhongjinSentMedianAll.append(zhongjinSentMedian)
zhongjinSentMeanAll.append(zhongjinSentMean)
zhongjinAckedMedianAll.append(zhongjinAckedMedian)
zhongjinAckedMeanAll.append(zhongjinAckedMean)
plt.figure(1)
plt.figure(2)
plt.figure(3)
plt.figure(4)
plt.figure(1)
plt.title('SentMean r-zhengshang b-dashang,green-shangqi grey-zhongjin')
plt.plot(range(1,len(zhengshangSentMeanAll)+1),zhengshangSentMeanAll,'r')
plt.plot(range(1,len(dashangSentMeanAll)+1),dashangSentMeanAll,'b')
plt.plot(range(1,len(shangqiSentMeanAll)+1),shangqiSentMeanAll,'g')
plt.plot(range(1,len(zhongjinSentMeanAll)+1),zhongjinSentMeanAll,'grey')
plt.savefig('/nethome/dehua.li/orderlifeMyWork/xingzheng/data_noTPO_in10minutes/SentMean.png')
plt.figure(2)
plt.title('SentMedian r-zhengshang b-dashang,green-shangqi grey-zhongjin')
plt.plot(range(1,len(zhengshangSentMedianAll)+1),zhengshangSentMedianAll,'r')
plt.plot(range(1,len(dashangSentMedianAll)+1),dashangSentMedianAll,'b')
plt.plot(range(1,len(shangqiSentMedianAll)+1),shangqiSentMedianAll,'g')
plt.plot(range(1,len(zhongjinSentMedianAll)+1),zhongjinSentMedianAll,'grey')
plt.savefig('/nethome/dehua.li/orderlifeMyWork/xingzheng/data_noTPO_in10minutes/SentMedian.png')
plt.figure(3)
plt.title('AckedMean r-zhengshang b-dashang,green-shangqi grey-zhongjin')
plt.plot(range(1,len(zhengshangAckedMeanAll)+1),zhengshangAckedMeanAll,'r')
plt.plot(range(1,len(dashangAckedMeanAll)+1),dashangAckedMeanAll,'b')
plt.plot(range(1,len(shangqiAckedMeanAll)+1),shangqiAckedMeanAll,'g')
plt.plot(range(1,len(zhongjinAckedMeanAll)+1),zhongjinAckedMeanAll,'grey')
plt.savefig('/nethome/dehua.li/orderlifeMyWork/xingzheng/data_noTPO_in10minutes/AckedMean.png')
plt.figure(4)
plt.title('AckedMedian r-zhengshang b-dashang,green-shangqi grey-zhongjin')
plt.plot(range(1,len(zhengshangAckedMedianAll)+1),zhengshangAckedMedianAll,'r')
plt.plot(range(1,len(dashangAckedMedianAll)+1),dashangAckedMedianAll,'b')
plt.plot(range(1,len(shangqiAckedMedianAll)+1),shangqiAckedMedianAll,'g')
plt.plot(range(1,len(zhongjinAckedMedianAll)+1),zhongjinAckedMedianAll,'grey')
plt.savefig('/nethome/dehua.li/orderlifeMyWork/xingzheng/data_noTPO_in10minutes/AckedMedian.png')
plt.show()
print 'over'
希望本文所述对大家的Python程序设计有所帮助。
数据分析咨询请扫描二维码
CDA数据分析师在中国航信高科技产业园进行了面向测试度量的数据分析培训课程,培训人数近2 ...
2024-05-01CDA数据分析师走进深圳迈瑞生物医疗电子股份有限公司,在迈瑞总部展开了为期两天的培训,本次课程参训人员线上及线下近百人, ...
2024-05-01CDA数据分析师在合肥市对合肥阳光新能源科技有限公司开展了为期8天的企业内训。 合肥阳光新能源科技 ...
2024-05-01CDA数据分析师走进海尔大学,进行了《数据治理与数据中台建设的道与术》专题培训,培训现场爆满,近百人参加了此次培训。 ...
2024-05-01在中国银行苏州分行培训中心开始数据分析师培训,此次培训课程共10天内容,包括Excel、MySQL、概率论与数理统计、SPSS等内容, ...
2024-05-01从实际的业务需求出发,结合行业的典型应用特点,围绕实际的商业问题,探讨数据挖掘、机器学习模型在金融领域的应用,包括获客、信用评分、细分画像、交叉销售、反欺诈、违规识别、时序预测、运筹优化、流程挖掘九个方面,形成 ...
2024-05-01本次培训课程为线上+线下的模式,由于学员编程能力不一、部分学员没有编程基础,故提供统计学、python基 ...
2024-05-01华夏银行信用卡中心-机器学习培训 1、课程亮点 取材于业界一流企业和顶级咨询公司的行业实践;已经被证明是人人 ...
2024-05-01主 题:数据中台建设及数据分析应用主题分享 1. 数据中台市场洞察 2. 主流数据中台产品比较 3. 某企业数据中 ...
2024-05-01围绕“数据驱动”战略,全力打造我行 300 人数字化人才梯队,着力培养数字化管理人才、大数据专业团队 ...
2024-05-01在当今数据驱动的商业环境中,数据分析成为了企业决策的重要依据。通过对大量数据的收集、处理和分析,企业能够更好地理解市场 ...
2024-04-29在人工智能(AI)的世界里,提示词(Prompt)是一种强大的工具,它能够引导AI按照用户的需求产生特定的输出。本文将深入探讨AI ...
2024-04-29CDA立足未来职场,拓展前沿视野——对外经贸大学保险学院举办“三全育人大讲堂”分享行业最新动态。 ...
2024-04-294月2日,CDA数据分析师创始发起人兼协会理事长赵坚毅博士受邀在浙江万里学院举办了一场以“数字化能力在职场中的作用” ...
2024-04-29随机森林(Random Forests)现在机器学习中比较火的一个算法,是一种基于Bagging的集成学习方法,能够很好地处理分类和回归的问 ...
2022-12-23方差分析是数据分析中常用的一种统计分析方法,接下来让我们简单了解一下方差分析的基本思想和原理吧。 方差分析(Analysis ...
2022-12-23来源:关于数据分析与可视化 关于streamlit-aggrid 数据排序 表格样式的调整 数据 ...
2022-08-03作者:麦叔 定义 「把上面晦涩的概念汇成一句话就是:」 ❝ 回调函数就是一个被作为参 ...
2022-08-03现今,高学历人群日益增多,物以稀为贵的高学历光环淡去。无论本科生还是研究生,甚至博士生,求职竞争力都大不如前,就业压力越来越大。
2022-06-01某家企业10个人面试,有9个本科生……如何脱颖而出,除得体的举止和良好的沟通力外,证书成重要筹码,这也是很多人考证的关键所在。
2022-04-14