243 KiB
243 KiB
In [1]:
!pip install -r requirements.txt
In [14]:
import os
import numpy as np
import awkward as ak
import matplotlib.pyplot as plt
import pandas as pd
In [29]:
df = pd.read_excel("data/ua.xlsx", header=3)
df = df[:-4][[field for field in df[:-4] if 'Unnamed' not in field]]
key_all = "All R&D expenditures"
key_federal = "Federal government"
key_state = "State and local government"
key_inst = "Institution funds"
key_business = "Business"
key_nonprofit = "Nonprofit organizations"
key_other = "All other sources"
keys = [key_federal, key_state, key_inst, key_business, key_nonprofit, key_other]
for key in keys:
key_percent = key + " Percent"
df[key_percent] = df[key] / df[key_all] * 100
df.sort_values(key_all)
df
Out[29]:
In [32]:
df.sort_values(key_inst + " Percent", ascending=False)
Out[32]:
In [41]:
data_m = data[mask]
for key in data_m.academics.program_percentage.fields:
mean = ak.mean(data_m.academics.program_percentage[key], axis=0)
if ak.mean(mean) < 0.03:
continue
plt.plot(mean * 100, label=key)
plt.xlabel("Year")
plt.ylabel("Percent Enrollment")
plt.legend()
Out[41]:
In [42]:
avg_students = ak.mean(data.student.enrollment.undergrad_12_month, axis=1)
plt.hist(avg_students, bins=np.arange(0, 100_000, 1_000))
plt.xlabel("Students Enrolled")
plt.ylabel("Univeristy Count")
plt.title("Enrollemnt Distribution (Average)")
plt.yscale("log")
In [43]:
plt.hist(data.student.enrollment.undergrad_12_month[:, -1], bins=np.arange(0, 100_000, 1_000))
plt.xlabel("Students Enrolled")
plt.ylabel("Univeristy Count")
plt.title("Enrollemnt Distribution (Last Year)")
plt.yscale("log")
In [44]:
plt.hist(data.admissions.admission_rate.overall[:, -1])
Out[44]:
In [ ]:
data.school.fields