-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathrussellhandler.py
206 lines (191 loc) · 8.32 KB
/
russellhandler.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
from sqlhandler import SqlHandler
# Modules needed for RussellHandler class
import bs4 as bs
import urllib.request
import requests
import csv
import pandas_datareader.data as web
import pytz
import pandas.io.sql as pdsql
import datetime
import os
# HELPER FUNCTIONS FOR RussellHandler object
def chunks(l, n):
"""
Helper function to split list of holdings into
chunks and pass into Pandas data reader in bulks
"""
for i in range(0, len(l), n):
yield l[i:i+n]
def getCurrentHoldings(path_to_db):
"""
Reads current holdings file - holdings.txt
Returns a list of current holdings
"""
if os.path.isfile(os.path.join(path_to_db, 'holdings3000.txt')) == False:
return [0]
else:
with open('holdings3000.txt', 'r') as f:
output = f.readlines()
return [x.strip() for x in output]
def csvYield(soup):
"""
Pass csv urls soup. Example:
source = urllib.request.urlopen('https://www.ishares.com/us/products/239714/ishares-russell-3000-etf')
soup = bs.BeautifulSoup(source, 'lxml')
Returns a generator yielding a tuple of sector name and list of tickers and
names corresponding to that sector
"""
for url in soup.find_all('a'):
link = str(url.get('href'))
if 'csv' in link:
csv_link = 'https://www.ishares.com' + link
break
print(csv_link)
with requests.Session() as s:
download = s.get(csv_link)
decoded_content = download.content.decode('utf-8')
cr = csv.reader(decoded_content.splitlines(), delimiter=',')
internal_list = list(cr)
yield internal_list[2][1] # yields the date of csv file
internal = (row for row in internal_list if len(row) == 12)
# So that it goes nicely sector by sector
sector_tickers = dict()
for ticker, name, ass_class, weight, price, shares, mv, nv, sector, *rest in internal:
sector = str(sector).replace(' ', '_')
if sector not in sector_tickers:
sector_tickers[sector] = list()
if sector not in ['nan', 'Cash_and/or_Derivatives']:
# if '.' in ticker:
# ticker.replace('.','-')
sector_tickers[sector].append((ticker,name))
for sector in sector_tickers.keys():
yield (sector, sector_tickers[sector])
def downloadAndUpdate(tickers, start, end, conn):
"""
Downloads the price data for a given date range and upploads into database
"""
data = web.DataReader(tickers, 'yahoo', start, end)
for item in ['Open', 'High','Low']:
data[item] = data[item]*data['Adj Close']/data['Close']
data.rename(items={'Open':'open', 'High':'high', 'Low':'low', 'Adj Close':'close', 'Volume':'volume'},
inplace = True)
data.drop(['Close'], inplace = True)
for i in range(len(list(data.minor_axis))):
print("Updating {} into database for dates {} to {}".format(data.minor_axis[i], start, end))
pdsql.to_sql(data.minor_xs(data.minor_axis[i]).reset_index(),
name = data.minor_axis[i],
con = conn, index=False, if_exists='append')
print('Finished uploading that many tickers: ', len(tickers))
return len(tickers)
class RussellHandler(SqlHandler):
def __init__(self, path_to_db, db_name):
super(RussellHandler, self).__init__(path_to_db, db_name)
def updateHoldings(self, new_holdings):
"""
Updates the holdings.txt after major russell update of the holdings
"""
self.logger.info('Updated the holdings.txt file')
with open('holdings3000.txt', 'w') as f:
for i in new_holdings:
f.write(i+'\n')
def regUpdate(self, custom_update=None):
"""
Updates Russell 3000 database up to the latest available data on yahoo
By reading the contents of the holdings.txt file in order to know the current
constituents and update accordingly
"""
self.logger.info('Starting regular update')
start = self.getLatestDate('AAPL')
start += datetime.timedelta(days=1)
end = datetime.datetime.today().utcnow()
total_up = 0
if custom_update == None:
for chunk in chunks(getCurrentHoldings(self.path), 300): # can change the chunks for memory
# print(chunk)
total_up += downloadAndUpdate(chunk, start, end, self.con)
self.logger.info('Uploaded {}'.format(total_up))
else:
for chunk in chunks(custom_update, 300): # can change the chunks for memory
print(chunk)
total_up += downloadAndUpdate(chunk, start, end, self.con)
self.logger.info('Finished regular update')
self.logger.info('Number of holdings in txt: {}'.format(len(getCurrentHoldings(self.path))))
self.logger.info('Number holdings updated now:{}'.format(total_up))
return
def initDB(self):
"""
Full update with reading the webpage for new holdings
"""
self.logger.info('FULL UPDATE INITIALIZED')
source = urllib.request.urlopen('https://www.ishares.com/us/products/239714/ishares-russell-3000-etf')
soup = bs.BeautifulSoup(source, 'lxml')
gen = csvYield(soup)
# should print out the date of the csv file
print('\n', 'The date of the downloaded csv file is :', next(gen), '\n')
decision = str(input('Does the date satisfy you? ENTER OR NO '))
print('\n')
if decision == 'NO':
quit()
current_holdings = set(getCurrentHoldings(self.path))
new_holdings = set()
for sector, list_of_tuples in gen:
if sector != 'Sector': # For some reason this was there?
if sector != 'Cash_and/or_Derivatives':
new_holdings.update(set([ticker[0] for ticker in list_of_tuples]))
#### WITH TIME THINGS WHICH I USUALLY PASS ON DOWNLOAD COULD BE ADDED BELOW
new_holdings -= {'P5N994', '-', 'NYLDA', 'LEAP'}
new_holdings = list(new_holdings)
#### Remove '.' from tickers, and change other known tickers
for i in range(len(new_holdings)):
if '.' in new_holdings[i]:
new_holdings[i] = new_holdings[i].replace('.','-')
if new_holdings[i] == 'FCEA':
new_holdings[i] = 'FCE-A'
if new_holdings[i] == 'LGFB':
new_holdings[i] = 'LGF-B'
if new_holdings[i] == 'LGFA':
new_holdings[i] = 'LGF-A'
if new_holdings[i] == 'BRKB':
new_holdings[i] = 'BRK-B'
new_holdings = set(new_holdings)
if len(current_holdings) < 10:
# the database has not been populated, serves as initDB part
end_today = datetime.datetime.today().utcnow()
start = end_today - datetime.timedelta(days=60*3)
self.logger.info('Populating database for the first time')
for chunk in chunks(list(new_holdings), 300):
downloadAndUpdate(chunk, start, end_today, self.con)
self.updateHoldings(list(new_holdings))
self.logger.info('Finshed populating database for the first time')
else:
# serves as fullUpdate part
# update intersection of new holdings and
intersection = list(new_holdings & current_holdings)
# self.regUpdate(custom_update = intersection)
# Get all the new ones
new = new_holdings - current_holdings
del current_holdings
del new_holdings
new = list(new)
if not new:
print('Nothing new to add')
quit()
end = datetime.datetime.today().utcnow()
start = end - datetime.timedelta(days=60)
for chunk in chunks(new, 300):
downloadAndUpdate(chunk, start, end, self.con)
print('These guys have been added:')
print(new, '\n')
self.updateHoldings(new+intersection)
print('Holdings txt updated, done')
self.logger.info('FULL UPDATE FINISHED')
return
# if __name__ == '__main__':
# # Testing
# path_to_db = '/media/jevjev/FC5891D85891924E/FINDB'
# db_name = 'russell3000.db'
#
# with RussellHandler(path_to_db, db_name) as obj:
# gen = obj.
# print('Done')