feat: final push for submission
This commit is contained in:
162
scorecard_tools.py
Normal file
162
scorecard_tools.py
Normal file
@@ -0,0 +1,162 @@
|
||||
import os
|
||||
import logging
|
||||
|
||||
import awkward as ak
|
||||
import numpy as np
|
||||
import matplotlib.pyplot as plt
|
||||
import pandas as pd
|
||||
|
||||
|
||||
logger = logging.getLogger("Scorecard Data")
|
||||
|
||||
|
||||
def mask_valid(data):
|
||||
# Strip out NaN and zero placeholders
|
||||
mask_nan = ~ak.any(np.isnan(data.academics.program_percentage.physical_science), axis=1)
|
||||
mask_all_zero = ~ak.all(data.academics.program_percentage.physical_science == 0, axis=1)
|
||||
mask = mask_nan & mask_all_zero
|
||||
data = data[mask]
|
||||
|
||||
return data
|
||||
|
||||
def mask_school(data, name):
|
||||
return data[data.school.name[:, -1] == name][0]
|
||||
|
||||
|
||||
# Percentage enrollment per field over time averaged across the nation
|
||||
def save_scorecard_stats_national(output_dir, data, ext="png"):
|
||||
prog_percentage = data.academics.program_percentage
|
||||
students = data.student.enrollment.undergrad_12_month
|
||||
|
||||
# Percent enrollment
|
||||
for key in prog_percentage.fields:
|
||||
# Find the mean across the nation
|
||||
mean = ak.mean(prog_percentage[key], axis=0)
|
||||
if ak.all(mean < 0.05):
|
||||
continue
|
||||
plt.plot(mean * 100, label=key)
|
||||
plt.xlabel("Year")
|
||||
plt.ylabel("Percent Enrollment")
|
||||
plt.title("Average Enrollment (National)")
|
||||
plt.legend()
|
||||
plt.savefig(os.path.join(output_dir, f"national-enrollment-percent.{ext}"))
|
||||
plt.clf()
|
||||
|
||||
# Students enrolled
|
||||
for key in prog_percentage.fields:
|
||||
# Find the mean across the nation
|
||||
mean_perc = ak.mean(prog_percentage[key], axis=0)
|
||||
if ak.all(mean_perc < 0.05):
|
||||
continue
|
||||
|
||||
mean = ak.mean(prog_percentage[key] * students, axis=0)
|
||||
plt.plot(mean, label=key)
|
||||
plt.xlabel("Year")
|
||||
plt.ylabel("Percent Enrollment")
|
||||
plt.title("Average Enrollment (National)")
|
||||
plt.legend()
|
||||
plt.savefig(os.path.join(output_dir, f"national-enrollment-students.{ext}"))
|
||||
plt.clf()
|
||||
|
||||
|
||||
# Percentage enrollment per field over time averaged across one school
|
||||
def save_scorecard_stats_school(output_dir, data, school: str, ext="png"):
|
||||
data = mask_school(data, school)
|
||||
prog_percentage = data.academics.program_percentage
|
||||
students = data.student.enrollment.undergrad_12_month
|
||||
|
||||
# Plot percent enrollment
|
||||
for key in prog_percentage.fields:
|
||||
# Find the mean across the nation
|
||||
mean = ak.mean(prog_percentage[key], axis=0)
|
||||
if mean < 0.05:
|
||||
continue
|
||||
plt.plot(prog_percentage[key] * 100, label=key)
|
||||
plt.xlabel("Year")
|
||||
plt.ylabel("Percent Enrollment")
|
||||
plt.title(f"Average Enrollment ({school})")
|
||||
plt.legend()
|
||||
plt.savefig(os.path.join(output_dir, f"{school}-enrollment-percent.{ext}"))
|
||||
plt.clf()
|
||||
|
||||
# Plot student enrollment
|
||||
for key in prog_percentage.fields:
|
||||
# Find the mean across the nation
|
||||
mean = ak.mean(prog_percentage[key], axis=0)
|
||||
if mean < 0.05:
|
||||
continue
|
||||
plt.plot(prog_percentage[key] * students, label=key)
|
||||
plt.xlabel("Year")
|
||||
plt.ylabel("Percent Enrollment")
|
||||
plt.title(f"Average Enrollment ({school})")
|
||||
plt.legend()
|
||||
plt.savefig(os.path.join(output_dir, f"{school}-enrollment-students.{ext}"))
|
||||
plt.clf()
|
||||
|
||||
def save_scorecard_stats(output_dir, data, ext="png"):
|
||||
# Students Enrolled Distribution (Average)
|
||||
avg_students = ak.mean(data.student.enrollment.undergrad_12_month, axis=1)
|
||||
plt.hist(avg_students, bins=np.arange(0, 100_000, 1_000))
|
||||
plt.xlabel("Students Enrolled")
|
||||
plt.ylabel("Univeristy Count")
|
||||
plt.title("Enrollemnt Distribution (Average)")
|
||||
plt.yscale("log")
|
||||
plt.savefig(os.path.join(output_dir, f"dist-students-enrolled-avg.{ext}"))
|
||||
plt.clf()
|
||||
|
||||
# Students Enrolled Distribution (Last Year)
|
||||
plt.hist(data.student.enrollment.undergrad_12_month[:, -1], bins=np.arange(0, 100_000, 1_000))
|
||||
plt.xlabel("Students Enrolled")
|
||||
plt.ylabel("Univeristy Count")
|
||||
plt.title("Enrollemnt Distribution (Last Year)")
|
||||
plt.yscale("log")
|
||||
plt.savefig(os.path.join(output_dir, f"dist-students-enrolled-last-year.{ext}"))
|
||||
plt.clf()
|
||||
|
||||
|
||||
|
||||
# Print national enrollment stats
|
||||
def print_enrollment_national(data):
|
||||
prog_percentage = data.academics.program_percentage
|
||||
students = data.student.enrollment.undergrad_12_month
|
||||
|
||||
# Create dataframe of enrollment across the nation (latest academic year and average)
|
||||
df = {'Name': list(prog_percentage.fields), 'Percent (Last)': [], 'Percent (Avg)': [], "Students (Last)": [], "Students (Avg)": [] }
|
||||
for key in df['Name']:
|
||||
vals = prog_percentage[key]
|
||||
df['Percent (Last)'].append(ak.mean(vals[-1]) * 100)
|
||||
df['Percent (Avg)'].append(ak.mean(vals) * 100)
|
||||
df['Students (Last)'].append(ak.mean(vals[-1] * students[-1]))
|
||||
df['Students (Avg)'].append(ak.mean(vals * students))
|
||||
|
||||
df = pd.DataFrame(df)
|
||||
|
||||
# Print Dataframe
|
||||
logger.info("\n\nNational Percentage (Last Academic Year): ")
|
||||
print(df.sort_values("Percent (Last)", ascending=False))
|
||||
|
||||
logger.info("\n\nNational Percentage (Avg): ")
|
||||
print(df.sort_values("Percent (Avg)", ascending=False))
|
||||
|
||||
def print_enrollment(data, name: str):
|
||||
data = mask_school(data, name)
|
||||
prog_percentage = data.academics.program_percentage
|
||||
students = data.student.enrollment.undergrad_12_month
|
||||
|
||||
# Create dataframe of enrollment across the nation (latest academic year and average)
|
||||
df = {'Name': list(prog_percentage.fields), 'Percent (Last)': [], 'Percent (Avg)': [], "Students (Last)": [], "Students (Avg)": [] }
|
||||
for key in df['Name']:
|
||||
vals = prog_percentage[key]
|
||||
df['Percent (Last)'].append(vals[-1] * 100)
|
||||
df['Percent (Avg)'].append(ak.mean(vals) * 100)
|
||||
df['Students (Last)'].append(vals[-1] * students[-1])
|
||||
df['Students (Avg)'].append(ak.mean(vals * students))
|
||||
|
||||
df = pd.DataFrame(df)
|
||||
|
||||
# Print Dataframe
|
||||
logger.info(f"\n\nProgram Percentage ({name}, Last Academic Year): ")
|
||||
print(df.sort_values("Percent (Last)", ascending=False))
|
||||
|
||||
logger.info(f"\n\nProgram Percentage ({name}, Avg): ")
|
||||
print(df.sort_values("Percent (Avg)", ascending=False))
|
||||
Reference in New Issue
Block a user