import pandas as pd
import matplotlib as mpl
import matplotlib.pyplot as plt
import numpy as np
import json
import Quandl as quandl
import os
#KEY = '...'
#!mkdir ./.keys
#with open('./.keys/quandl-api-key.json','w') as f:
# json.dump({'key': KEY}, f)
KEYFILE = './.keys/quandl-api-key.json'
if os.path.exists(KEYFILE):
with open('./.keys/quandl-api-key.json','r') as f:
quandl_token = json.load(f)['key']
import functools
_quandl_get = functools.partial(quandl.get, authtoken=quandl_token)
else:
_quandl_get = quandl.get
print(pd.__version__, np.__version__)
# http://www.quandl.com/FRED-Federal-Reserve-Economic-Data/USARGDPR-Real-GDP-in-the-United-States
# http://www.quandl.com/FRED-Federal-Reserve-Economic-Data/GDP-Gross-Domestic-Product-1-Decimal
# http://www.quandl.com/FRED-Federal-Reserve-Economic-Data/FYGFD-Gross-Federal-Debt
# http://www.quandl.com/FRED-Federal-Reserve-Economic-Data/USAPOPL-Population-in-the-United-States
# http://www.quandl.com/FRED-Federal-Reserve-Economic-Data/CPIAUCSL-Consumer-Price-Index-for-All-Urban-Consumers-All-Items-USA-Inflation
import collections
_data = collections.OrderedDict()
for _key in ['FRED/USARGDPR', 'FRED/GDP', 'FRED/FYGFD', 'FRED/USAPOPL', 'FRED/CPIAUCSL']:
_data[_key.replace('/','_')] = _quandl_get(_key)
mpl.rcParams['figure.figsize'] = (20,4)
for k,v in _data.iteritems():
v.plot(
title=k,
xlim=('1940','2020'),
ylim=(0, v.max()),
xticks=[str(x) for x in range(1949,2017,4)],
x_compat=True)
#usargdpr.plot(), usagdp.plot(), fygfd.plot(), popl.plot()
#usargdpr.
_data['FRED_USARGDPR'].head(), _data['FRED_FYGFD'].head()
((_data['FRED_USARGDPR'].resample('A', how='mean'))
/ _data['FRED_FYGFD'].resample('A', how='mean')).plot(title='USARGDPR / FYGFD')
((_data['FRED_GDP'].resample('A', how='mean'))
/ _data['FRED_FYGFD'].resample('A', how='mean')).plot(title='GDP / FYGFD')
((_data['FRED_GDP'].resample('A', how='mean'))
/ _data['FRED_USAPOPL'].resample('A', how='mean')).plot(title='FRED_GDP / FRED_USAPOPL')
((_data['FRED_FYGFD'].resample('A', how='mean'))
/ _data['FRED_GDP'].resample('A', how='mean')).plot(title='FRED_FYGFD / FRED_GDP')
plot = _data['FRED_CPIAUCSL'].resample('A', how='mean').plot(
xticks=[str(x) for x in range(1949,2017,4)],
x_compat=True,
title="Yearly Inflation (CPI)"
)
plot.legend(loc='upper left')
def add_line(plot, _year, text=None):
_max = plot.yaxis.get_view_interval()[-1]
plot.plot((_year,_year), (0, _max),
color='gray', linewidth=1.5, linestyle="--")
plot.annotate(
text if text is not None else _year,
xy=(_year, 0),
xycoords='data',
xytext=(+10, +30),
textcoords='offset points',
fontsize=12,
#arrowprops=dict(arrowstyle="->"), #, connectionstyle=""), #arc3,rad=.2"),
rotation='vertical',
verticalalignment='bottom',
horizontalalignment='center')
for year in range(1949, 2017, 4):
add_line(plot, str(year))
display(plot)
def get_presidents_df(data_file='./data/us_presidents.csv'):
#!wget 'https://commondatastorage.googleapis.com/ckannet-storage/2012-05-08T122246/USPresident-Wikipedia-URLs-Thmbs-HS.csv' -O ./data/us_presidents.csv
df = presidents = pd.read_csv(data_file)
df = presidents[['President ','Took office ','Left office ']]
df['Took office '] = pd.to_datetime(presidents['Took office '])
df['Left office '] = pd.to_datetime(presidents['Left office '], coerce=True)
#display(df)
df = df.set_index('Took office ', drop=False, verify_integrity=True)
df['term'] = df['Left office '] - df['Took office ']
col = df['term']
val = col[0]
df['term'] = (
col.apply(
lambda x: x.astype('timedelta64[D]'),
convert_dtype=False))
col = df['term']
val = col[0]
val.item().days
df['terms'] = df['term'].apply(
lambda x: (x.item().days if x.item() else 0)
/ float(365.25*4))
return df
#df['terms'] = (df['term'] / np.timedelta64(1, 'D')) / float(365.25*4) # pandas 0.13
df = get_presidents_df()
display(df.head())
def presidents_by_year(df=None):
if df is None:
df = get_presidents_df()
for year,name in df.ix[:,['President ']].to_records():
print(year.year, name)
def add_presidents(plot, presidents=None, yearmin=0):
if presidents is None:
presidents = get_presidents_df()
for year,name in presidents.ix[str(yearmin):,['President ']].to_records():
#print year.year, name
add_line(plot, year, name)
def poli_plot(df, **kwargs):
yearmin = df.index.min().year
yearmax = 2017
plot = df.plot(
xticks=[str(x) for x in range(yearmax, yearmin,-4)],
x_compat=True,
**kwargs)
plot.legend(loc='upper left')
add_presidents(plot, yearmin=yearmin)
df = _data['FRED_CPIAUCSL']
poli_plot(df)
df = ((_data['FRED_GDP'].resample('A', how='mean')) / _data['FRED_USAPOPL'].resample('A', how='mean'))
poli_plot(df, title="GDP per capita (thousands of dollars)")
df = ((_data['FRED_FYGFD'].resample('A', how='mean')) / _data['FRED_USAPOPL'].resample('A', how='mean'))
poli_plot(df, title="Federal debt per capita (thousands of dollars)")
## Download datasets
# http://www.quandl.com/FRED-Federal-Reserve-Economic-Data/GFDEBTN-Federal-Government-Debt-Total-Public-Debt
national_debt = quandl.get('FRED/GFDEBTN')
fred_gfdebtn = national_debt
# http://www.quandl.com/FRED-Federal-Reserve-Economic-Data/CP-Corporate-Profits-After-Tax
corporate_profits_after_tax = quandl.get('FRED/CP')
fred_cp = corporate_profits_after_tax
# http://www.quandl.com/FRED-Federal-Reserve-Economic-Data/GDP-Gross-Domestic-Product-1-Decimal
gdp = quandl.get('FRED/GDP')
fred_gdp = gdp
# http://www.quandl.com/FRED-Federal-Reserve-Economic-Data/CPIAUCSL-Consumer-Price-Index-for-All-Urban-Consumers-All-Items-USA-Inflation
inflation = quandl.get('FRED/CPIAUCSL')
fred_cpi = inflation
## To one DataFrame (with a common date index)
fred_cp.rename(columns={'Value': 'fred_cp'}, inplace=True)
fred_gfdebtn.rename(columns={'Value': 'fred_gfdebtn'}, inplace=True)
fred_gdp.rename(columns={'Value': 'fred_gdp'}, inplace=True)
fred_cpi.rename(columns={'Value': 'fred_cpi'}, inplace=True)
fred_cp['fred_gdp'] = fred_gdp['fred_gdp']
fred_cp['fred_gfd'] = fred_gfdebtn['fred_gfdebtn']
fred_cp['fred_cpi'] = fred_cpi['fred_cpi']
## Draw plots
fred_cp.plot(subplots=True, sharex=True, title='Free Money', figsize=(20,4*len(fred_cp.columns)))
## Calculate pairwise correlation
# http://pandas.pydata.org/pandas-docs/dev/generated/pandas.DataFrame.corr.html
fred_cp.corr()
print(fred_cp.corr())