import pandas as pd
import numpy as np
from matplotlib import pyplot as plt
from google.colab import files
uploaded = files.upload()
import io
df = pd.read_csv(io.BytesIO(uploaded['uchigreendata.csv']))
Saving uchigreendata.csv to uchigreendata (4).csv
emails = [email for email in df["Emails"].tolist() if str(email) != 'nan']
for email in emails:
pass
#print(email)
df.drop(columns="Emails", inplace=True)
df.drop(df[(df['DistributionChannel'] == "preview")].index, inplace=True)
df.drop(df[(df['Finished'] == 0)].index, inplace=True)
Creating two dataframes
#Getting the control group's responses
control_entries = df["Harvard-Data"].isnull()
control = df[control_entries]
#Getting the treatment group's responses
treatment_entries = df["Harvard-Data"].isnull() == False
treatment = df[treatment_entries]
treatment
QuestionType | Finished | DistributionChannel | Affiliation | Affiliation-Explanation | 2030Plan | Personal-Sustainability-Priority | UChicago-Sustainability-Priority | Sufficient-Initiative | Harvard-Data | Harvard-Explanation | |
---|---|---|---|---|---|---|---|---|---|---|---|
6 | 0 | 1 | anonymous | 1 | NaN | 1.0 | NaN | NaN | NaN | 4.0 | Those are huge numbers! |
10 | 0 | 1 | anonymous | 1 | NaN | 1.0 | NaN | NaN | NaN | 4.0 | NaN |
12 | 0 | 1 | anonymous | 1 | NaN | 2.0 | 7.0 | 5.0 | NaN | 5.0 | NaN |
18 | 0 | 1 | anonymous | 1 | NaN | 1.0 | 9.0 | 6.0 | NaN | 4.0 | NaN |
19 | 0 | 1 | anonymous | 1 | NaN | 1.0 | 9.0 | 10.0 | NaN | 3.0 | NaN |
... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
133 | 0 | 1 | anonymous | 1 | NaN | 1.0 | 8.0 | 6.0 | NaN | 6.0 | pretty on par with Harvard |
140 | 0 | 1 | anonymous | 1 | NaN | 2.0 | 5.0 | 7.0 | NaN | 4.0 | still not enough to compare to rest of world |
144 | 0 | 1 | anonymous | 1 | NaN | 2.0 | 8.0 | 10.0 | NaN | 3.0 | Minimally, I would like to see other metrics r... |
149 | 0 | 1 | anonymous | 1 | NaN | 2.0 | 5.0 | 5.0 | NaN | 4.0 | Na |
151 | 0 | 1 | anonymous | 1 | NaN | 1.0 | 8.0 | 10.0 | NaN | 7.0 | Uchicago looks to be dropping in Greenhouse Ga... |
61 rows × 11 columns
Functions for making a bar chart
#A function for adding value labels to a bar chart
def make_chart(x, y):
result = plt.bar(x, y)
#Adding value labels
for i in range(len(x)):
plt.text(i, y[i], y[i])
return result
def add_labels(title, x, y):
plt.title(title)
plt.xlabel(x)
plt.ylabel(y)
#On a scale of 1-10 how much do you personally prioritize or care about sustainability?
#Average score of every respondent
ave_personal_sustainability = df["Personal-Sustainability-Priority "].mean()
#Average sustainability score of the control group
ave_control_sustainability = control["Personal-Sustainability-Priority "].mean()
#Average sustainability score of the treatment group
ave_treatment_sustainability = treatment["Personal-Sustainability-Priority "].mean()
#Making the bar chart
personal_sustainability = [ave_personal_sustainability, ave_control_sustainability, ave_treatment_sustainability]
labels = ["Total", "Control", "Treatment"]
personal_sustainability_plot = make_chart(labels, personal_sustainability)
add_labels("Personal Sustainability Rating", "Group", "Score")
plt.show()
#Demographics of the control group
control_demographics = control.groupby("Affiliation").size().to_frame()
#Demographics of the treatment group
treatment_demographics = treatment.groupby("Affiliation").size().to_frame()
#Displaying the demographics
control_demographics
treatment_demographics
0 | |
---|---|
Affiliation | |
1 | 60 |
4 | 1 |
NOTE: it's not really worth looking at the demographics of the different groups. There is only 1 postdoc in the treatment group but every other respondent is an undergraduate student.
#Have you ever heard of UChicago’s plan to reduce greenhouse gas emissions by 50% by 2030?
#Average score for all respondents
ave_2030 = df.groupby("2030Plan").size().to_frame()
total_ave_2030 = ave_2030.sum()
ave_2030 = (ave_2030 / total_ave_2030) * 100
#Average score for the control group
control_2030 = control.groupby("2030Plan").size().to_frame()
total_control_2030 = control_2030.sum()
control_2030 = (control_2030 / total_control_2030) * 100
#Average score for the treatment group
treatment_2030 = treatment.groupby("2030Plan").size().to_frame()
total_treatment_2030 = treatment_2030.sum()
treatment_2030 = (treatment_2030 / total_treatment_2030) * 100
#Making the bar chart for ave_2030
ave_2030_data = ave_2030[0].values.tolist()
labels = ["Yes", "No", "Maybe"]
ave_2030_plot = make_chart(labels, ave_2030_data)
add_labels("Have you ever heard of UChicago’s plan to reduce greenhouse gas emissions by 50% by 2030? (All Responses)", "Response", "Percentage")
plt.show()
#Making the bar chart for control_2030
control_2030_data = control_2030[0].values.tolist()
labels = ["Yes", "No", "Maybe"]
control_2030_plot = make_chart(labels, control_2030_data)
add_labels("Have you ever heard of UChicago’s plan to reduce greenhouse gas emissions by 50% by 2030? (Control Group)", "Response", "Percentage")
plt.show()
#Making the bar chart for treatment_2030
treatment_2030_data = treatment_2030[0].values.tolist()
labels = ["Yes", "No", "Maybe"]
treatment_2030_plot = make_chart(labels, treatment_2030_data)
add_labels("Have you ever heard of UChicago’s plan to reduce greenhouse gas emissions by 50% by 2030? (Treatment Group)", "Response", "Percentage")
plt.show()
#On a scale of 1-10 how important do you think sustainability is to UChicago?
#Average score for all respondents
ave_uchi_sustainability = df["UChicago-Sustainability-Priority"].mean()
#Average score for the control group
control_uchi_sustainability = control["UChicago-Sustainability-Priority"].mean()
#Average score for the treatment group
treatment_uchi_sustainability = treatment["UChicago-Sustainability-Priority"].mean()
#Making the bar chart
uchi_sustainability = [ave_uchi_sustainability, control_uchi_sustainability, treatment_uchi_sustainability]
labels = ["Total", "Control", "Treatment"]
uchi_sustainability_plot = make_chart(labels, uchi_sustainability)
add_labels("How much does UChicago care about sustainability?", "Group", "Score")
plt.show()
#Do you believe that UChicago is taking sufficient initiative to be sustainable?
#Average score for the control group
control_uchi_initiative = df["Sufficient-Initiative"].mean()
#Average score for the treatment group
treatment_uchi_initiative = df["Harvard-Data"].mean()
#Average score for all respondents
ave_uchi_initiative = df[["Sufficient-Initiative", "Harvard-Data"]].sum().sum()
ave_uchi_initiative = ave_uchi_initiative / (len(df["Sufficient-Initiative"]))
#Making the bar chart
uchi_initiative = [ave_uchi_initiative, control_uchi_initiative, treatment_uchi_initiative]
labels = ["Total", "Control", "Treatment"]
uchi_iniative_plot = make_chart(labels, uchi_initiative)
add_labels("Do you believe that UChicago is taking sufficient initiative to be sustainable?","Group", "Score")
plt.show()
#Explanations of how the Harvard data affected respondents' views
harvard_explanations = [i for i in df["Harvard-Explanation"].tolist() if str(i) != 'nan']
for i, resp in enumerate(harvard_explanations, 1):
print("{}. {}".format(i, resp))
1. Those are huge numbers! 2. UChicago is doing better than expected 3. Somewhat 4. We're doing better than Harvard but they are also larger and not necessarily that sustainable 5. Sad! But expected 6. It has not influenced my response 7. I think this more so pointed to Harvard needing to do more. 8. not a ton of change was impressive 9. I guess it makes me feel better we’re in line with other similar institutions, but I guess it also makes me think about how all universities with massive endowments could be doing more. 10. It hasn’t at all this gives me no metric on which to judge how effective UChicago’s initiatives are 11. No 12. It seems as though UChicago's emissions are following a decreasing trend, but investment in fossil fuels offsets this progress in different ways. 13. Does not really tell me much, each school has different student populations I’m not sure this is a valid comparison 14. Given the steady levels of emissions over the years without much major decrease, it makes me think that there is not care given to to emissions and their impact 15. No influence 16. No 17. Though we’re not as green as Harvard, I do understand that we’re taking a lot of measures to make sure our practices are more environmentally friendly. But I think this opinion might be skewed just cus I’m in PSI LOL 18. I can see that 2021 was our lowest emissions year, but every year before that there seems to be a negligible difference in our missions compared to where the graph starts. Harvard seems to have a more steady downward trend than we do. 19. Somewhat. The ups and downs show that it isn’t all explicit policy but it looks good for uchi 20. we’re doing better but we still have a long way to go 21. Seeing that there's a distinct downward trend makes me more inclined to think that the University's initiatives are sufficient 22. I'm glad to know that it is doing better than Harvard, at least as of 2021, but I think that we still have a long way to go. 23. it shows that there has been a very slight change but it is clearly no enough 24. not really 25. They are better than expected still needs improvement 26. The comparison to Harvard doesn’t really have any influence on my response. However looking at the trend of UChicago’s emissions, despite the recent drop after 2017 we cannot say for certain that it will stay low. Especially considering that the drop could be in part due to less campus activity because of COVID. 27. A bit 28. very influential since I never knew about UChicago's greenhouse gas emissions 29. My response is the same 30. There's definitely been a decrease in the past decade, but I know of other problems UChicago has with environmental issues and sustainability. 31. no change 32. it's had an incredible influence 33. Help give perspective 34. surprised me 35. pretty on par with Harvard 36. still not enough to compare to rest of world 37. Minimally, I would like to see other metrics rather than comparing ourselves to Harvard. For all I know Harvard could be an atrocious carbon emitter. 38. Na 39. Uchicago looks to be dropping in Greenhouse Gas omissions so I figure there are measures in place to be more sustainable.