Skip to content

Commit 7af4731

Browse files
authored
Merge pull request #703 from addisonlynch/refactor-reader-config
BUG/ENH: Refactor date handling
2 parents bdd4104 + 9d76009 commit 7af4731

File tree

14 files changed

+152
-53
lines changed

14 files changed

+152
-53
lines changed

docs/source/remote_data.rst

+4-2
Original file line numberDiff line numberDiff line change
@@ -140,7 +140,9 @@ Historical Time Series Data
140140
Through the
141141
`Alpha Vantage <https://www.alphavantage.co/documentation>`__ Time Series
142142
endpoints, it is possible to obtain historical equities data for individual
143-
symbols. The following endpoints are available:
143+
symbols. For daily, weekly, and monthly frequencies, 20+ years of historical data is available. The past 3-5 days of intraday data is also available.
144+
145+
The following endpoints are available:
144146

145147
* ``av-intraday`` - Intraday Time Series
146148
* ``av-daily`` - Daily Time Series
@@ -591,7 +593,7 @@ example is to download 'Trade Union Density' data which set code is 'TUD'.
591593
import pandas_datareader.data as web
592594
import datetime
593595
594-
df = web.DataReader('TUD', 'oecd', end=datetime.datetime(2012, 1, 1))
596+
df = web.DataReader('TUD', 'oecd')
595597
596598
df.columns
597599

docs/source/whatsnew/v0.8.0.txt

+1
Original file line numberDiff line numberDiff line change
@@ -31,6 +31,7 @@ Enhancements
3131
- Added testing on Python 3.7 (:issue:`667`)
3232
- Allow IEX to read less than 1 year of data (:issue:`649`)
3333
- Allow data download from Poland using stooq (:issue:`597`)
34+
- All time series readers now use a rolling default starting date (most are 5 years before the current date. Intraday readers are 3-5 days from the current date)
3435

3536
.. _whatsnew_080.api_breaking:
3637

pandas_datareader/_utils.py

+19-4
Original file line numberDiff line numberDiff line change
@@ -16,9 +16,16 @@ class RemoteDataError(IOError):
1616

1717
def _sanitize_dates(start, end):
1818
"""
19-
Return (datetime_start, datetime_end) tuple
20-
if start is None - default is 2010/01/01
19+
Return (timestamp_start, timestamp_end) tuple
20+
if start is None - default is 5 years before the current date
2121
if end is None - default is today
22+
23+
Parameters
24+
----------
25+
start: str, int, date, datetime, timestamp
26+
Desired start date
27+
end: str, int, date, datetime, timestamp
28+
Desired end date
2229
"""
2330
if is_number(start):
2431
# regard int as year
@@ -30,9 +37,17 @@ def _sanitize_dates(start, end):
3037
end = to_datetime(end)
3138

3239
if start is None:
33-
start = dt.datetime(2010, 1, 1)
40+
# default to 5 years before today
41+
today = dt.date.today()
42+
start = today - dt.timedelta(days=365 * 5)
3443
if end is None:
35-
end = dt.datetime.today()
44+
# default to today
45+
end = dt.date.today()
46+
try:
47+
start = to_datetime(start)
48+
end = to_datetime(end)
49+
except (TypeError, ValueError):
50+
raise ValueError("Invalid date format.")
3651
if start > end:
3752
raise ValueError("start must be an earlier date than end")
3853
return start, end

pandas_datareader/av/time_series.py

+19-10
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
from datetime import datetime
1+
import datetime as dt
22

33
from pandas_datareader.av import AlphaVantage
44

@@ -13,11 +13,12 @@ class AVTimeSeriesReader(AlphaVantage):
1313
----------
1414
symbols : string
1515
Single stock symbol (ticker)
16-
start : string, (defaults to '1/1/2010')
17-
Starting date, timestamp. Parses many different kind of date
18-
representations (e.g., 'JAN-01-2010', '1/1/10', 'Jan, 1, 1980')
19-
end : string, (defaults to today)
20-
Ending date, timestamp. Same format as starting date.
16+
start : string, int, date, datetime, timestamp
17+
Starting date. Parses many different kind of date
18+
representations (e.g., 'JAN-01-2010', '1/1/10', 'Jan, 1, 1980'). Defaults to
19+
20 years before current date.
20+
end : string, int, date, datetime, timestamp
21+
Ending date
2122
retry_count : int, default 3
2223
Number of times to retry query request.
2324
pause : int, default 0.1
@@ -52,6 +53,7 @@ def __init__(
5253
chunksize=25,
5354
api_key=None,
5455
):
56+
self._func = function
5557
super(AVTimeSeriesReader, self).__init__(
5658
symbols=symbols,
5759
start=start,
@@ -62,19 +64,26 @@ def __init__(
6264
api_key=api_key,
6365
)
6466

65-
self._func = function
67+
@property
68+
def default_start_date(self):
69+
d_days = 3 if self.intraday else 365 * 20
70+
return dt.datetime.today() - dt.timedelta(days=d_days)
6671

6772
@property
6873
def function(self):
6974
return self._func
7075

76+
@property
77+
def intraday(self):
78+
return True if self.function == "TIME_SERIES_INTRADAY" else False
79+
7180
@property
7281
def output_size(self):
7382
""" Used to limit the size of the Alpha Vantage query when
7483
possible.
7584
"""
76-
delta = datetime.now() - self.start
77-
return "full" if delta.days > 80 else "compact"
85+
delta = dt.datetime.now() - self.start
86+
return "compact" if delta.days < 80 and not self.intraday else "full"
7887

7988
@property
8089
def data_key(self):
@@ -88,7 +97,7 @@ def params(self):
8897
"apikey": self.api_key,
8998
"outputsize": self.output_size,
9099
}
91-
if self.function == "TIME_SERIES_INTRADAY":
100+
if self.intraday:
92101
p.update({"interval": "1min"})
93102
return p
94103

pandas_datareader/base.py

+12-5
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,4 @@
1+
import datetime
12
import time
23
import warnings
34

@@ -27,11 +28,11 @@ class _BaseReader(object):
2728
----------
2829
symbols : {str, List[str]}
2930
String symbol of like of symbols
30-
start : string, (defaults to '1/1/2010')
31-
Starting date, timestamp. Parses many different kind of date
31+
start : string, int, date, datetime, timestamp
32+
Starting date. Parses many different kind of date
3233
representations (e.g., 'JAN-01-2010', '1/1/10', 'Jan, 1, 1980')
33-
end : string, (defaults to today)
34-
Ending date, timestamp. Same format as starting date.
34+
end : string, int, date, datetime, timestamp
35+
Ending date
3536
retry_count : int, default 3
3637
Number of times to retry query request.
3738
pause : float, default 0.1
@@ -59,7 +60,7 @@ def __init__(
5960

6061
self.symbols = symbols
6162

62-
start, end = _sanitize_dates(start, end)
63+
start, end = _sanitize_dates(start or self.default_start_date, end)
6364
self.start = start
6465
self.end = end
6566

@@ -76,6 +77,12 @@ def close(self):
7677
"""Close network session"""
7778
self.session.close()
7879

80+
@property
81+
def default_start_date(self):
82+
"""Default start date for reader. Defaults to 5 years before current date"""
83+
today = datetime.date.today()
84+
return today - datetime.timedelta(days=365 * 5)
85+
7986
@property
8087
def url(self):
8188
"""API URL"""

pandas_datareader/iex/daily.py

+11-5
Original file line numberDiff line numberDiff line change
@@ -27,11 +27,12 @@ class IEXDailyReader(_DailyBaseReader):
2727
symbols : string, array-like object (list, tuple, Series), or DataFrame
2828
Single stock symbol (ticker), array-like object of symbols or
2929
DataFrame with index containing stock symbols.
30-
start : string, (defaults to '1/1/2010')
31-
Starting date, timestamp. Parses many different kind of date
32-
representations (e.g., 'JAN-01-2010', '1/1/10', 'Jan, 1, 1980')
33-
end : string, (defaults to today)
34-
Ending date, timestamp. Same format as starting date.
30+
start : string, int, date, datetime, timestamp
31+
Starting date. Parses many different kind of date
32+
representations (e.g., 'JAN-01-2010', '1/1/10', 'Jan, 1, 1980'). Defaults to
33+
15 years before current date
34+
end : string, int, date, datetime, timestamp
35+
Ending date
3536
retry_count : int, default 3
3637
Number of times to retry query request.
3738
pause : int, default 0.1
@@ -80,6 +81,11 @@ def __init__(
8081
chunksize=chunksize,
8182
)
8283

84+
@property
85+
def default_start_date(self):
86+
today = datetime.date.today()
87+
return today - datetime.timedelta(days=365 * 15)
88+
8389
@property
8490
def url(self):
8591
"""API URL"""

pandas_datareader/moex.py

+6-5
Original file line numberDiff line numberDiff line change
@@ -15,11 +15,12 @@ class MoexReader(_DailyBaseReader):
1515
symbols : str, an array-like object (list, tuple, Series), or a DataFrame
1616
A single stock symbol (secid), an array-like object of symbols or
1717
a DataFrame with an index containing stock symbols.
18-
start : str, (defaults to '1/1/2010')
19-
The starting date, timestamp. Parses many different kind of date
20-
representations (e.g., 'JAN-01-2010', '1/1/10', 'Jan, 1, 1980')
21-
end : str, (defaults to today)
22-
The ending date, timestamp. Same format as starting date.
18+
start : string, int, date, datetime, timestamp
19+
Starting date. Parses many different kind of date
20+
representations (e.g., 'JAN-01-2010', '1/1/10', 'Jan, 1, 1980'). Defaults to
21+
20 years before current date.
22+
end : string, int, date, datetime, timestamp
23+
Ending date
2324
retry_count : int, default 3
2425
The number of times to retry query request.
2526
pause : int, default 0.1

pandas_datareader/quandl.py

+6-5
Original file line numberDiff line numberDiff line change
@@ -23,11 +23,12 @@ class QuandlReader(_DailyBaseReader):
2323
Beware of ambiguous symbols (different securities per country)!
2424
Note: Cannot use more than a single string because of the inflexible
2525
way the URL is composed of url and _get_params in the superclass
26-
start : string
27-
Starting date, timestamp. Parses many different kind of date
28-
representations (e.g., 'JAN-01-2010', '1/1/10', 'Jan, 1, 1980')
29-
end : string, (defaults to today)
30-
Ending date, timestamp. Same format as starting date.
26+
start : string, int, date, datetime, timestamp
27+
Starting date. Parses many different kind of date
28+
representations (e.g., 'JAN-01-2010', '1/1/10', 'Jan, 1, 1980'). Defaults to
29+
20 years before current date.
30+
end : string, int, date, datetime, timestamp
31+
Ending date
3132
retry_count : int, default 3
3233
Number of times to retry query request.
3334
pause : int, default 0.1

pandas_datareader/stooq.py

+6-2
Original file line numberDiff line numberDiff line change
@@ -12,8 +12,12 @@ class StooqDailyReader(_DailyBaseReader):
1212
symbols : string, array-like object (list, tuple, Series), or DataFrame
1313
Single stock symbol (ticker), array-like object of symbols or
1414
DataFrame with index containing stock symbols.
15-
start: string, date which to start interval at YYYYMMDD.
16-
end: string, date which to end interval at YYYYMMDD.
15+
start : string, int, date, datetime, timestamp
16+
Starting date. Parses many different kind of date
17+
representations (e.g., 'JAN-01-2010', '1/1/10', 'Jan, 1, 1980'). Defaults to
18+
20 years before current date.
19+
end : string, int, date, datetime, timestamp
20+
Ending date
1721
retry_count : int, default 3
1822
Number of times to retry query request.
1923
pause : int, default 0.1

pandas_datareader/tests/test_base.py

+5
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,4 @@
1+
import datetime as dt
12
import pytest
23
import requests
34

@@ -26,6 +27,10 @@ def test_invalid_format(self):
2627
b._format = "IM_NOT_AN_IMPLEMENTED_TYPE"
2728
b._read_one_data("a", None)
2829

30+
def test_default_start_date(self):
31+
b = base._BaseReader([])
32+
assert b.default_start_date == dt.date.today() - dt.timedelta(days=365 * 5)
33+
2934

3035
class TestDailyBaseReader(object):
3136
def test_get_params(self):

pandas_datareader/tests/test_utils.py

+45
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,45 @@
1+
import datetime as dt
2+
import pandas as pd
3+
import pytest
4+
5+
from pandas_datareader._utils import _sanitize_dates
6+
7+
8+
class TestUtils(object):
9+
@pytest.mark.parametrize(
10+
"input_date",
11+
[
12+
"2019-01-01",
13+
"JAN-01-2010",
14+
dt.datetime(2019, 1, 1),
15+
dt.date(2019, 1, 1),
16+
pd.Timestamp(2019, 1, 1),
17+
],
18+
)
19+
def test_sanitize_dates(self, input_date):
20+
expected_start = pd.to_datetime(input_date)
21+
expected_end = pd.to_datetime(dt.date.today())
22+
result = _sanitize_dates(input_date, None)
23+
assert result == (expected_start, expected_end)
24+
25+
def test_sanitize_dates_int(self):
26+
start_int = 2018
27+
end_int = 2019
28+
expected_start = pd.to_datetime(dt.datetime(start_int, 1, 1))
29+
expected_end = pd.to_datetime(dt.datetime(end_int, 1, 1))
30+
assert _sanitize_dates(start_int, end_int) == (expected_start, expected_end)
31+
32+
def test_sanitize_invalid_dates(self):
33+
with pytest.raises(ValueError):
34+
_sanitize_dates(2019, 2018)
35+
36+
with pytest.raises(ValueError):
37+
_sanitize_dates("2019-01-01", "2018-01-01")
38+
39+
with pytest.raises(ValueError):
40+
_sanitize_dates("20199", None)
41+
42+
def test_sanitize_dates_defaults(self):
43+
default_start = pd.to_datetime(dt.date.today() - dt.timedelta(days=365 * 5))
44+
default_end = pd.to_datetime(dt.date.today())
45+
assert _sanitize_dates(None, None) == (default_start, default_end)

pandas_datareader/tiingo.py

+6-5
Original file line numberDiff line numberDiff line change
@@ -34,11 +34,12 @@ class TiingoIEXHistoricalReader(_BaseReader):
3434
----------
3535
symbols : {str, List[str]}
3636
String symbol of like of symbols
37-
start : str, (defaults to '1/1/2010')
38-
Starting date, timestamp. Parses many different kind of date
39-
representations (e.g., 'JAN-01-2010', '1/1/10', 'Jan, 1, 1980')
40-
end : str, (defaults to today)
41-
Ending date, timestamp. Same format as starting date.
37+
start : string, int, date, datetime, timestamp
38+
Starting date. Parses many different kind of date
39+
representations (e.g., 'JAN-01-2010', '1/1/10', 'Jan, 1, 1980'). Defaults to
40+
20 years before current date.
41+
end : string, int, date, datetime, timestamp
42+
Ending date
4243
retry_count : int, default 3
4344
Number of times to retry query request.
4445
pause : float, default 0.1

pandas_datareader/tsp.py

+6-5
Original file line numberDiff line numberDiff line change
@@ -12,11 +12,12 @@ class TSPReader(_BaseReader):
1212
symbols : str, array-like object (list, tuple, Series), or DataFrame
1313
Single stock symbol (ticker), array-like object of symbols or
1414
DataFrame with index containing stock symbols.
15-
start : str, (defaults to '1/1/2010')
16-
Starting date, timestamp. Parses many different kind of date
17-
representations (e.g., 'JAN-01-2010', '1/1/10', 'Jan, 1, 1980')
18-
end : str, (defaults to today)
19-
Ending date, timestamp. Same format as starting date.
15+
start : string, int, date, datetime, timestamp
16+
Starting date. Parses many different kind of date
17+
representations (e.g., 'JAN-01-2010', '1/1/10', 'Jan, 1, 1980'). Defaults to
18+
20 years before current date.
19+
end : string, int, date, datetime, timestamp
20+
Ending date
2021
retry_count : int, default 3
2122
Number of times to retry query request.
2223
pause : int, default 0.1

pandas_datareader/yahoo/daily.py

+6-5
Original file line numberDiff line numberDiff line change
@@ -22,11 +22,12 @@ class YahooDailyReader(_DailyBaseReader):
2222
symbols : string, array-like object (list, tuple, Series), or DataFrame
2323
Single stock symbol (ticker), array-like object of symbols or
2424
DataFrame with index containing stock symbols.
25-
start : string, (defaults to '1/1/2010')
26-
Starting date, timestamp. Parses many different kind of date
27-
representations (e.g., 'JAN-01-2010', '1/1/10', 'Jan, 1, 1980')
28-
end : string, (defaults to today)
29-
Ending date, timestamp. Same format as starting date.
25+
start : string, int, date, datetime, timestamp
26+
Starting date. Parses many different kind of date
27+
representations (e.g., 'JAN-01-2010', '1/1/10', 'Jan, 1, 1980'). Defaults to
28+
5 years before current date.
29+
end : string, int, date, datetime, timestamp
30+
Ending date
3031
retry_count : int, default 3
3132
Number of times to retry query request.
3233
pause : int, default 0.1

0 commit comments

Comments
 (0)