import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
data= pd.read_csv('Downloads\\master.csv')
#datanew= pd.read_csv('C:\\Users\\alvir\\Downloads\\master_wb.csv')
Looking at Data
data.columns.values
#datanew.columns.values
data.shape
data.head()
data.info()
Numerical=['year', 'suicides_no', 'population', 'suicides/100k pop','HDI for year', 'gdp_for_year ($)', 'gdp_per_capita ($)']
Categorical =['country', 'sex', 'age', 'generation', 'country-year']
contries= data['country'].unique()
print('Countries count:', len(contries))
age= data['age'].unique()
age
age= data['generation'].unique()
age
Plotting
total_suicides= pd.DataFrame(data.groupby(['age', 'sex'])['suicides/100k pop'].sum().unstack() )
total= pd.DataFrame(total_suicides.sum()).reset_index()
total
Female=data[data['sex']=='female']
Male=data[data['sex']=='male']
#print('Total Male:', Male['suicides_no'].sum(), 'while for women is:', Female['suicides_no'].sum())
total= Male['suicides/100k pop'].sum() + Female['suicides/100k pop'].sum()
Male_totals= Male['suicides/100k pop'].sum()/total
Female_totals= Female['suicides/100k pop'].sum()/total
labels = 'Female', 'Male'
sizes = [Female_totals, Male_totals]
colors = ['lightcoral', 'lightskyblue']
explode = (0.1, 0)
# Plot
plt.pie(sizes, explode=explode, labels=labels, colors=colors,
autopct='%1.1f%%', shadow=True, startangle=140)
plt.axis('equal')
plt.show()
print('Male suicides are:', Male_totals *100, '% more than Female suicides')
print('Male suicide rate is higher than female')
age= data.groupby(['sex']).sum().reset_index()
plt.figure(figsize=(8,4))
plt.title('Number of Sucidies per age group and gender from 1985 to 2016')
sns.barplot(x='sex', y='suicides/100k pop', hue='sex', data=age, palette='vlag')
plt.ylabel('Suicides ')
plt.xlabel('sex')
plt.show
print('Number of Suicide is higher for Male than Female')
age= data.groupby(['age','sex']).mean().reset_index()
plt.figure(figsize=(8,4))
plt.title('Number of Sucidies over Age Group and Sex')
sns.barplot(x='age', y='suicides/100k pop', hue='sex', data=age, palette='deep')
plt.ylabel('Suicides per 100k')
plt.xlabel('Age Group & Sex')
plt.show
print('Age groups between 35-54 years has a higher suicide number')
countries= data.groupby('country').mean().reset_index()
countries
countries= data.groupby('country').mean().reset_index()
plt.figure(figsize=(20,10))
plt.title('Top 5 Countries with most Suicicdes Per 100k Population over years 1987-2015')
top= countries.sort_values(by='suicides/100k pop', ascending=False)[:5]
sns.barplot(x='country', y='suicides/100k pop', data=top, palette='bright')
plt.ylabel('Suicides Per 100k ')
plt.xlabel('Countries')
plt.show
print('Russian Federation has a higher suicide number')
# Generation wise comparision of suicide rate using bar graph.
age= data.groupby(['age','sex']).mean().reset_index()
generation_plot = sns.barplot(x='sex', y='suicides_no', hue='age', data=age)
plt.title("Age wise comparision of suicide rate",fontsize=12)
plt.xlabel("Sex",fontsize=13)
plt.ylabel("Suicide Count",fontsize=13)
plt.show()
age= data['year'].unique()
age
total_suicides= pd.DataFrame(data['gdp_per_capita ($)'].groupby( data['country']).mean().reset_index())
ploting = total_suicides.sort_values(by=['gdp_per_capita ($)','country'], ascending=False)
plt.figure(figsize=(14,18))
plt.title('Countries by GDP Per Capita')
sns.barplot(x='gdp_per_capita ($)', y='country', data=ploting, palette='dark')
plt.ylabel('Countries')
plt.xlabel('GDP Per Capita ($)')
plt.show
data['year'].unique()
#Plot by year (line)
gsd_year=pd.DataFrame(data.groupby(['year','country'])['suicides/100k pop'].mean().unstack())
gsd_year['Suicide'] = gsd_year.sum(axis=1)
gsd_year.loc[:,'Suicide'].plot(kind='line',figsize=(10,6),marker='o')
plt.title('Suicide Trend Over Year')
plt.xlabel('Year(s)')
plt.ylabel('Suicids per 100K')
plt.grid()
plt.show()
#Trend over time by Sex
#Plot by year (line)
years=pd.DataFrame(data.groupby(['year','country'])['gdp_per_capita ($)'].mean().unstack())
years['Suicides'] = years.mean(axis=1)
years.loc[:,'Suicides'].plot(kind='line',figsize=(10,6),marker='o')
plt.title('GDP Per Capita Trend Over Years')
plt.xlabel('Year(s)')
plt.ylabel('GDP Per Capita')
plt.grid()
plt.show()
print("Minimum Suicides per 100k Population: {}".format(data['suicides/100k pop'].min()))
print("Maximum Suicides per 100k Population: {}".format(data['suicides/100k pop'].max()))
print("Average Suicides per 100k Population: {}".format(data['suicides/100k pop'].mean()))
plt.figure(figsize=(7,5))
sns.heatmap(data.corr(),annot = True)
plt.title("Figure4: Correlation",fontsize=12)
plt.show()
# Number of countries contributed each year.
def count_country(group):
return len(group.country.unique())
country_no = data.groupby('year').apply(count_country)
country_no.name = 'number_of_countries'
country_no
# Create a new data frame with feature "number_of_countries" which contributed data for each year.
yr_grouped = data.groupby('year')
annual_pop = yr_grouped.population.sum()
annual_suicides = yr_grouped.suicides_no.sum()
country_df = pd.concat([country_no,annual_suicides,annual_pop],axis = 1)
country_df.head()
# gdp_per_captia with respect to each country using bar plot.
data_gdp = data[['country','gdp_per_capita ($)']].groupby(['country']).sum()
data_gdp.plot(kind='bar', figsize=(40,10), fontsize=25)
plt.title("Figure7: Country wise comparision of GDP per capita",fontsize=40)
plt.xlabel("Country",fontsize=35)
plt.ylabel("Suicide Count",fontsize=40)
plt.show()