Grabbing all Natural Gas storage data from the Energy Information Agency (EIA)¶

In [18]:

import requests
import pandas as pd

In [10]:

#this is not pushed to git hub, my api key is there
import config

In [11]:

#point MY_KEY at a working eia api key
MY_KEY = config.EIA_API

In [12]:

#api url for series name Weekly Lower 48 States Natural Gas Working Underground Storage, Weekly
storageURL = 'http://api.eia.gov/series/?api_key=' + MY_KEY + '&series;_id=NG.NW2_EPG0_SWO_R48_BCF.W&out;=JSON'

In [13]:

#get requests data from specificied url
storage = requests.get(storageURL)

In [15]:

#the request was for json formatting so uses requests' JSON decoder
storage_json = storage.json()

#storage_json is a nested dict, so take a look at it and find my way into the sub-dict with the data
date_series = storage_json['series'][0]['data']

In [30]:

date_series[0:10]

Out[30]:

[['20161230', 3311],
 ['20161223', 3360],
 ['20161216', 3597],
 ['20161209', 3806],
 ['20161202', 3953],
 ['20161125', 3995],
 ['20161118', 4045],
 ['20161111', 4047],
 ['20161104', 4017],
 ['20161028', 3963]]

In [16]:

#loop through that date_series dict and make and array out of the date entries
endi = len(date_series)
date = []

for i in range(endi):
    date.append(date_series[i][0])

In [19]:

#convert that bad boy to a data frame
df = pd.DataFrame(data=date)
df.columns = ['Date']

In [20]:

#loop through the rest of the dict with and put the data in
lenj = len(storage_json)-1

for j in range (lenj):
    data_series = storage_json['series'][0]['data']
    data = []
    endk = len(date_series) 
    for k in range (endk):
        data.append(data_series[k][1])
    df[j] = data

In [21]:

#rename columns
df.columns = ['date','storageUS']

#substring date to use for making a python datevariable, the infer datetime thing didnt work 
df['year'] = df['date'].apply(lambda t: t[0:4])
df['year'] = df['year'].astype(int)

df['month'] = df['date'].apply(lambda t: t[4:6])
df['month'] = df['month'].astype(int)

df['day'] = df['date'].apply(lambda t: t[-2:])
df['day'] = df['day'].astype(int)

In [22]:

#date formatted as datetime
df['date_f'] = pd.to_datetime(df.year*10000 + df.month*100 + df.day, format='%Y%m%d')

In [23]:

#week of year to make y-y comparisons
df['week'] = df['date_f'].dt.week

In [24]:

#move the index to date time
df.set_index(['date_f'], inplace = True)
#sort by datetime acending, bc the data comes in most rescent first
df = df.sort_index()
#calc weekly storage change
df['lastWeek'] = df['storageUS'].shift()
df['change'] = df['storageUS'] - df['lastWeek']

In [25]:

#make it so the index label doesnt appear, just looks prettier
df.index.name = None

In [27]:

df.tail()

Out[27]:

	date	storageUS	year	month	day	week	lastWeek	change
2016-12-02	20161202	3953	2016	12	2	48	3995.0	-42.0
2016-12-09	20161209	3806	2016	12	9	49	3953.0	-147.0
2016-12-16	20161216	3597	2016	12	16	50	3806.0	-209.0
2016-12-23	20161223	3360	2016	12	23	51	3597.0	-237.0
2016-12-30	20161230	3311	2016	12	30	52	3360.0	-49.0