'KeyError: 'Date' when it's spelled correctly
I have a program that reads a datarframe and does a line graph on its data, I'm trying to overlay a scatter of another dataframe in the same graph, but I'm getting the error KeyError: 'Date on the line ax3 = TMIN2015.plot.scatter(x ='Date', y='TMIN',secondary_y=True, ax=ax1) I'll add the code and how the dataframe csv file originally looks like
import matplotlib.pyplot as plt
import mplleaflet
import pandas as pd
import numpy as np
def leaflet_plot_stations(binsize, hashid):
df = pd.read_csv('data/C2A2_data/BinSize_d{}.csv'.format(binsize))
station_locations_by_hash = df[df['hash'] == hashid]
lons = station_locations_by_hash['LONGITUDE'].tolist()
lats = station_locations_by_hash['LATITUDE'].tolist()
#plt.figure(figsize=(8,8))
#plt.scatter(lons, lats, c='r', alpha=0.7, s=200)
#---my code---
#1. Read the documentation and familiarize yourself with the dataset, then write some python code which returns a line graph
#of the record high and record low temperatures by day of the year over the period 2005-2014. The area between the record
#high and record low temperatures for each day should be shaded.
df2 = pd.read_csv('data/C2A2_data/BinnedCsvs_d400/fb441e62df2d58994928907a91895ec62c2c42e6cd075c2700843b89.csv')
df2['Date'] = pd.to_datetime(df2['Date'])
df2[(df2['Date'] > '2005-01-01') & (df2['Date'] < '2014-12-31')]
#sorting by date to make the data reading easier
df2 = df2.sort_values(by="Date")
#groupyingby the min and max
df2 = df2.groupby('Date')['Data_Value'].agg([('TMIN','min'), ('TMAX','max')])
#this other syntaxis is valid, but due to the version of the current library it causes an error, that's why I used the
#method above
#df2.groupby('Date').agg(TMIN = ('Data_Value', 'min'), TMAX = ('Data_Value', 'max'))
#reseting the index so I can use the dates further on
df2 = df2.reset_index()
#creating the line graph
ax1 = df2.plot(x ='Date', y='TMIN', kind = 'line')
ax2 = df2.plot(x ='Date', y='TMAX',secondary_y=True, ax=ax1)
y1 = df2['TMIN']
y2 = df2['TMAX']
#to calculate the position in scale
ny = ((y2-np.min(y2))/(np.max(y2)-np.min(y2)))*(np.max(y1)-np.min(y1))+np.min(y1)
#shading between spaces
#ax1.fill_between(df2['Date'],y1,ny, where=y1<=ny, interpolate=True, color='grey', alpha=0.5)
#ax1.fill_between(df2['Date'],y1,ny, where=y1>=ny, interpolate=True, color='grey', alpha=0.5)
plt.fill_between(df2['TMIN'],df2['TMAX'], interpolate=True, color='grey', alpha=0.5)
plt.xlabel('Year Interval')
plt.ylabel('Temperature Frequency')
plt.title('Record high and record low temperatures by day of the year over the period 2005-2014')
#2. Overlay a scatter of the 2015 data for any points (highs and lows) for which the ten year record (2005-2014) record
#high or record low was broken in 2015.
TMIN2015 = pd.read_csv('data/C2A2_data/BinnedCsvs_d400/fb441e62df2d58994928907a91895ec62c2c42e6cd075c2700843b89.csv')
TMIN2015['Date'] = pd.to_datetime(TMIN2015['Date'])
TMIN2015 = TMIN2015[(TMIN2015['Date'] > '2015-01-01') & (TMIN2015['Date'] < '2015-12-31')]
TMIN2015 = TMIN2015.sort_values(by="Date")
TMIN2015 = TMIN2015.groupby('Date')['Data_Value'].agg([('TMIN','min'), ('TMAX','max')])
TMIN2015 = TMIN2015.reset_index()
ax3 = TMIN2015.plot.scatter(x ='Date', y='TMIN',secondary_y=True, ax=ax1)
ax4 = TMIN2015.plot.scatter(x ='Date', y='TMAX',secondary_y=True, ax=ax1)
return plt.show()
leaflet_plot_stations(400,'fb441e62df2d58994928907a91895ec62c2c42e6cd075c2700843b89')
An example on how the table looks like without being modified in code:
| ID | Date | Element | Data_Value | |
|---|---|---|---|---|
| 0 | USW00094889 | 2014-11-12 | TMAX | 22 |
| 1 | USC00208972 | 2009-04-29 | TMIN | 56 |
| 2 | USC00200032 | 2008-05-26 | TMAX | 278 |
Sources
This article follows the attribution requirements of Stack Overflow and is licensed under CC BY-SA 3.0.
Source: Stack Overflow
| Solution | Source |
|---|
