In [None]:
from datetime import datetime, timedelta

In [None]:
# Default input parameters
DATE = (datetime.today() - timedelta(1)).strftime('%m-%d-%Y')

TOP_K = 5

US_STATES = ["Alabama", "California", "Arizona"]

## Install required libraries

Ref: https://aws.amazon.com/blogs/big-data/install-python-libraries-on-a-running-cluster-with-emr-notebooks/

In [None]:
sc.install_pypi_package("pandas==0.25.1")
sc.install_pypi_package("requests==2.24.0")
sc.install_pypi_package("numpy==1.19.1")
sc.install_pypi_package("kiwisolver==1.2.0")
sc.install_pypi_package("matplotlib==3.3.0") 

## Load data to pandas dataframe

In [None]:
import pandas as pd
import io
import requests

file_name = '{}.csv'.format(DATE)

url = 'https://raw.githubusercontent.com/CSSEGISandData/COVID-19/master/csse_covid_19_data/csse_covid_19_daily_reports_us/' + file_name
print("reading data from: " + url)

content=requests.get(url).content
csv = io.StringIO(content.decode('utf-8'))

pandas_df=pd.read_csv(csv)

## Sort values by confirmed cases and filter top k States

In [None]:
pandas_df.sort_values(by=['Confirmed'], inplace=True, ascending=False)
filtered_df = pandas_df.head(TOP_K)
top_state = filtered_df.iloc[0]['Province_State']
print("State with most confirmed cases: " + top_state)

## Plot the graph: Top `k` US States with confirmed covid-19 cases.

In [None]:
import matplotlib.pyplot as plt
import numpy as np

cases = filtered_df['Confirmed']
states = filtered_df['Province_State'].values


fig, ax = plt.subplots(figsize=(14, 12), subplot_kw=dict(aspect="equal"))

def func(pct, allvals):
    absolute = int(pct/100.*np.sum(allvals))
    return "{:.1f}%\n({:d})".format(pct, absolute)

wedges, texts, autotexts = ax.pie(cases, autopct=lambda pct: func(pct, cases), textprops=dict(color="w"))

bbox_props = dict(boxstyle="square,pad=0.3", fc="w", ec="k", lw=0.72)
kw = dict(arrowprops=dict(arrowstyle="-"), bbox=bbox_props, zorder=0, va="center")

for i, p in enumerate(wedges):
    ang = (p.theta2 - p.theta1)/2. + p.theta1
    y = np.sin(np.deg2rad(ang))
    x = np.cos(np.deg2rad(ang))
    horizontalalignment = {-1: "right", 1: "left"}[int(np.sign(x))]
    connectionstyle = "angle,angleA=0,angleB={}".format(ang)
    kw["arrowprops"].update({"connectionstyle": connectionstyle})
    ax.annotate(states[i], xy=(x, y), xytext=(1.35*np.sign(x), 1.4*y),
                horizontalalignment=horizontalalignment, **kw)

ax.set_title("Covid-19 cases: Top {} US states".format(TOP_K))

%matplot plt

## Plot the graph: Mortality rate among covid patients in specific US States.

In [None]:
filtered_df = pandas_df.loc[pandas_df['Province_State'].isin(US_STATES)]

ax = filtered_df.plot.bar(x='Province_State', y='Case_Fatality_Ratio', rot=0)
ax.set_title("Covid-19 cases: Case_Fatality_Ratio for specific US states")
%matplot plt

In [None]:
ab=pandas_df.head(TOP_K)
ab