Grabbing all Natural Gas storage data from the Energy Information Agency (EIA)¶
In [18]:
import requests
import pandas as pd
In [10]:
#this is not pushed to git hub, my api key is there
import config
In [11]:
#point MY_KEY at a working eia api key
MY_KEY = config.EIA_API
In [12]:
#api url for series name Weekly Lower 48 States Natural Gas Working Underground Storage, Weekly
storageURL = 'http://api.eia.gov/series/?api_key=' + MY_KEY + '&series;_id=NG.NW2_EPG0_SWO_R48_BCF.W&out;=JSON'
In [13]:
#get requests data from specificied url
storage = requests.get(storageURL)
In [15]:
#the request was for json formatting so uses requests' JSON decoder
storage_json = storage.json()
#storage_json is a nested dict, so take a look at it and find my way into the sub-dict with the data
date_series = storage_json['series'][0]['data']
In [30]:
date_series[0:10]
Out[30]:
In [16]:
#loop through that date_series dict and make and array out of the date entries
endi = len(date_series)
date = []
for i in range(endi):
date.append(date_series[i][0])
In [19]:
#convert that bad boy to a data frame
df = pd.DataFrame(data=date)
df.columns = ['Date']
In [20]:
#loop through the rest of the dict with and put the data in
lenj = len(storage_json)-1
for j in range (lenj):
data_series = storage_json['series'][0]['data']
data = []
endk = len(date_series)
for k in range (endk):
data.append(data_series[k][1])
df[j] = data
In [21]:
#rename columns
df.columns = ['date','storageUS']
#substring date to use for making a python datevariable, the infer datetime thing didnt work
df['year'] = df['date'].apply(lambda t: t[0:4])
df['year'] = df['year'].astype(int)
df['month'] = df['date'].apply(lambda t: t[4:6])
df['month'] = df['month'].astype(int)
df['day'] = df['date'].apply(lambda t: t[-2:])
df['day'] = df['day'].astype(int)
In [22]:
#date formatted as datetime
df['date_f'] = pd.to_datetime(df.year*10000 + df.month*100 + df.day, format='%Y%m%d')
In [23]:
#week of year to make y-y comparisons
df['week'] = df['date_f'].dt.week
In [24]:
#move the index to date time
df.set_index(['date_f'], inplace = True)
#sort by datetime acending, bc the data comes in most rescent first
df = df.sort_index()
#calc weekly storage change
df['lastWeek'] = df['storageUS'].shift()
df['change'] = df['storageUS'] - df['lastWeek']
In [25]:
#make it so the index label doesnt appear, just looks prettier
df.index.name = None
In [27]:
df.tail()
Out[27]: