Grabbing all Natural Gas storage data from the Energy Information Agency (EIA)

" />

Getting Natural Gas storage data from EIA's API


Grabbing all Natural Gas storage data from the Energy Information Agency (EIA)

In [18]:
import requests
import pandas as pd
In [10]:
#this is not pushed to git hub, my api key is there
import config
In [11]:
#point MY_KEY at a working eia api key
MY_KEY = config.EIA_API
In [12]:
#api url for series name Weekly Lower 48 States Natural Gas Working Underground Storage, Weekly
storageURL = 'http://api.eia.gov/series/?api_key=' + MY_KEY + '&series;_id=NG.NW2_EPG0_SWO_R48_BCF.W&out;=JSON'
In [13]:
#get requests data from specificied url
storage = requests.get(storageURL)
In [15]:
#the request was for json formatting so uses requests' JSON decoder
storage_json = storage.json()

#storage_json is a nested dict, so take a look at it and find my way into the sub-dict with the data
date_series = storage_json['series'][0]['data']
In [30]:
date_series[0:10]
Out[30]:
[['20161230', 3311],
 ['20161223', 3360],
 ['20161216', 3597],
 ['20161209', 3806],
 ['20161202', 3953],
 ['20161125', 3995],
 ['20161118', 4045],
 ['20161111', 4047],
 ['20161104', 4017],
 ['20161028', 3963]]
In [16]:
#loop through that date_series dict and make and array out of the date entries
endi = len(date_series)
date = []

for i in range(endi):
    date.append(date_series[i][0])
In [19]:
#convert that bad boy to a data frame
df = pd.DataFrame(data=date)
df.columns = ['Date']
In [20]:
#loop through the rest of the dict with and put the data in
lenj = len(storage_json)-1

for j in range (lenj):
    data_series = storage_json['series'][0]['data']
    data = []
    endk = len(date_series) 
    for k in range (endk):
        data.append(data_series[k][1])
    df[j] = data
In [21]:
#rename columns
df.columns = ['date','storageUS']

#substring date to use for making a python datevariable, the infer datetime thing didnt work 
df['year'] = df['date'].apply(lambda t: t[0:4])
df['year'] = df['year'].astype(int)

df['month'] = df['date'].apply(lambda t: t[4:6])
df['month'] = df['month'].astype(int)

df['day'] = df['date'].apply(lambda t: t[-2:])
df['day'] = df['day'].astype(int)
In [22]:
#date formatted as datetime
df['date_f'] = pd.to_datetime(df.year*10000 + df.month*100 + df.day, format='%Y%m%d')
In [23]:
#week of year to make y-y comparisons
df['week'] = df['date_f'].dt.week
In [24]:
#move the index to date time
df.set_index(['date_f'], inplace = True)
#sort by datetime acending, bc the data comes in most rescent first
df = df.sort_index()
#calc weekly storage change
df['lastWeek'] = df['storageUS'].shift()
df['change'] = df['storageUS'] - df['lastWeek']
In [25]:
#make it so the index label doesnt appear, just looks prettier
df.index.name = None
In [27]:
df.tail()
Out[27]:
date storageUS year month day week lastWeek change
2016-12-02 20161202 3953 2016 12 2 48 3995.0 -42.0
2016-12-09 20161209 3806 2016 12 9 49 3953.0 -147.0
2016-12-16 20161216 3597 2016 12 16 50 3806.0 -209.0
2016-12-23 20161223 3360 2016 12 23 51 3597.0 -237.0
2016-12-30 20161230 3311 2016 12 30 52 3360.0 -49.0

bookmark
Brett Rudder

Lorem ipsum dolor sit amet, consectetur adipiscing elit, sed do eiusmod tempor incididunt ut labore et dolore magna aliqua.


Did you like this article? Share it with your friends!