Download csv file from yahoo
urllib urlretrieve
/*
python2: from urllib import urlretrieve
*/
// Python3
from urllib import request
// download csv file to local
request.urlretrieve('http://table.finance.yahoo.com/table.csv?s=000001.sz', 'pingan.csv')
CSV Module
import csv, os
with open('pingan.csv', 'r') as rf:
reader = csv.reader(rf)
print(reader)
for row in reader:
print(row)
rf.seek(0) // rf指針要?dú)w0朵锣,否則近范,下面next的時(shí)候會(huì)報(bào)錯(cuò)
# rf.seek(0, os.SEEK_SET)
with open('pingan_copy.csv', 'wb') as wf:
writer = csv.writer(wf)
writer.writerow(next(reader))
writer.writerow(next(reader))
writer.writerow(next(reader))
wf.flush() // 文本中馬上可見(jiàn)
'''
// 這里讀寫(xiě)方式要把'rb'/'wb'改為'r'/'w'才行
# with open('pingan.csv', 'rb') as rf:
# with open('pingan_copy.csv', 'wb') as wf:
# _csv.Error: iterator should return strings, not bytes (did you open the file in text mode?)
// 這里要把reader.next()改為next(reader)才行
# writer.writerow(reader.next())
# '_csv.reader' object has no attribute 'next'
'''
Final code
import csv
with open('pingan.csv', 'r') as rf:
reader = csv.reader(rf)
with open('pingan2.csv', 'w') as wf:
writer = csv.writer(wf)
headers = next(reader) # 越過(guò)第一行份帐,因?yàn)槭菢?biāo)題行凿试,不是數(shù)據(jù)
writer.writerow(headers)
'''
['Date', 'Open', 'High', 'Low', 'Close', 'Volume', 'Adj Close']
['2016-09-09', '9.40', '9.43', '9.36', '9.38', '32743100', '9.38']
每列的數(shù)據(jù),可以直接取,比如日期Date:row[0],如成交量Volume:row[5],但是這里取到的是string胸哥,而不是數(shù)值
'''
for row in reader:
if row[0] < '2016-01-01': # 日期可以直接比較
break
if int(row[5]) >= 50000000:
writer.writerow(row)
print('end')