162 lines
5.9 KiB
Python
162 lines
5.9 KiB
Python
import os
|
|
import logging
|
|
|
|
import awkward as ak
|
|
import numpy as np
|
|
import matplotlib.pyplot as plt
|
|
import pandas as pd
|
|
|
|
|
|
logger = logging.getLogger("Scorecard Data")
|
|
|
|
|
|
def mask_valid(data):
|
|
# Strip out NaN and zero placeholders
|
|
mask_nan = ~ak.any(np.isnan(data.academics.program_percentage.physical_science), axis=1)
|
|
mask_all_zero = ~ak.all(data.academics.program_percentage.physical_science == 0, axis=1)
|
|
mask = mask_nan & mask_all_zero
|
|
data = data[mask]
|
|
|
|
return data
|
|
|
|
def mask_school(data, name):
|
|
return data[data.school.name[:, -1] == name][0]
|
|
|
|
|
|
# Percentage enrollment per field over time averaged across the nation
|
|
def save_scorecard_stats_national(output_dir, data, ext="png"):
|
|
prog_percentage = data.academics.program_percentage
|
|
students = data.student.enrollment.undergrad_12_month
|
|
|
|
# Percent enrollment
|
|
for key in prog_percentage.fields:
|
|
# Find the mean across the nation
|
|
mean = ak.mean(prog_percentage[key], axis=0)
|
|
if ak.all(mean < 0.05):
|
|
continue
|
|
plt.plot(mean * 100, label=key)
|
|
plt.xlabel("Year")
|
|
plt.ylabel("Percent Enrollment")
|
|
plt.title("Average Enrollment (National)")
|
|
plt.legend()
|
|
plt.savefig(os.path.join(output_dir, f"national-enrollment-percent.{ext}"))
|
|
plt.clf()
|
|
|
|
# Students enrolled
|
|
for key in prog_percentage.fields:
|
|
# Find the mean across the nation
|
|
mean_perc = ak.mean(prog_percentage[key], axis=0)
|
|
if ak.all(mean_perc < 0.05):
|
|
continue
|
|
|
|
mean = ak.mean(prog_percentage[key] * students, axis=0)
|
|
plt.plot(mean, label=key)
|
|
plt.xlabel("Year")
|
|
plt.ylabel("Percent Enrollment")
|
|
plt.title("Average Enrollment (National)")
|
|
plt.legend()
|
|
plt.savefig(os.path.join(output_dir, f"national-enrollment-students.{ext}"))
|
|
plt.clf()
|
|
|
|
|
|
# Percentage enrollment per field over time averaged across one school
|
|
def save_scorecard_stats_school(output_dir, data, school: str, ext="png"):
|
|
data = mask_school(data, school)
|
|
prog_percentage = data.academics.program_percentage
|
|
students = data.student.enrollment.undergrad_12_month
|
|
|
|
# Plot percent enrollment
|
|
for key in prog_percentage.fields:
|
|
# Find the mean across the nation
|
|
mean = ak.mean(prog_percentage[key], axis=0)
|
|
if mean < 0.05:
|
|
continue
|
|
plt.plot(prog_percentage[key] * 100, label=key)
|
|
plt.xlabel("Year")
|
|
plt.ylabel("Percent Enrollment")
|
|
plt.title(f"Average Enrollment ({school})")
|
|
plt.legend()
|
|
plt.savefig(os.path.join(output_dir, f"{school}-enrollment-percent.{ext}"))
|
|
plt.clf()
|
|
|
|
# Plot student enrollment
|
|
for key in prog_percentage.fields:
|
|
# Find the mean across the nation
|
|
mean = ak.mean(prog_percentage[key], axis=0)
|
|
if mean < 0.05:
|
|
continue
|
|
plt.plot(prog_percentage[key] * students, label=key)
|
|
plt.xlabel("Year")
|
|
plt.ylabel("Percent Enrollment")
|
|
plt.title(f"Average Enrollment ({school})")
|
|
plt.legend()
|
|
plt.savefig(os.path.join(output_dir, f"{school}-enrollment-students.{ext}"))
|
|
plt.clf()
|
|
|
|
def save_scorecard_stats(output_dir, data, ext="png"):
|
|
# Students Enrolled Distribution (Average)
|
|
avg_students = ak.mean(data.student.enrollment.undergrad_12_month, axis=1)
|
|
plt.hist(avg_students, bins=np.arange(0, 100_000, 1_000))
|
|
plt.xlabel("Students Enrolled")
|
|
plt.ylabel("Univeristy Count")
|
|
plt.title("Enrollemnt Distribution (Average)")
|
|
plt.yscale("log")
|
|
plt.savefig(os.path.join(output_dir, f"dist-students-enrolled-avg.{ext}"))
|
|
plt.clf()
|
|
|
|
# Students Enrolled Distribution (Last Year)
|
|
plt.hist(data.student.enrollment.undergrad_12_month[:, -1], bins=np.arange(0, 100_000, 1_000))
|
|
plt.xlabel("Students Enrolled")
|
|
plt.ylabel("Univeristy Count")
|
|
plt.title("Enrollemnt Distribution (Last Year)")
|
|
plt.yscale("log")
|
|
plt.savefig(os.path.join(output_dir, f"dist-students-enrolled-last-year.{ext}"))
|
|
plt.clf()
|
|
|
|
|
|
|
|
# Print national enrollment stats
|
|
def print_enrollment_national(data):
|
|
prog_percentage = data.academics.program_percentage
|
|
students = data.student.enrollment.undergrad_12_month
|
|
|
|
# Create dataframe of enrollment across the nation (latest academic year and average)
|
|
df = {'Name': list(prog_percentage.fields), 'Percent (Last)': [], 'Percent (Avg)': [], "Students (Last)": [], "Students (Avg)": [] }
|
|
for key in df['Name']:
|
|
vals = prog_percentage[key]
|
|
df['Percent (Last)'].append(ak.mean(vals[-1]) * 100)
|
|
df['Percent (Avg)'].append(ak.mean(vals) * 100)
|
|
df['Students (Last)'].append(ak.mean(vals[-1] * students[-1]))
|
|
df['Students (Avg)'].append(ak.mean(vals * students))
|
|
|
|
df = pd.DataFrame(df)
|
|
|
|
# Print Dataframe
|
|
logger.info("\n\nNational Percentage (Last Academic Year): ")
|
|
print(df.sort_values("Percent (Last)", ascending=False))
|
|
|
|
logger.info("\n\nNational Percentage (Avg): ")
|
|
print(df.sort_values("Percent (Avg)", ascending=False))
|
|
|
|
def print_enrollment(data, name: str):
|
|
data = mask_school(data, name)
|
|
prog_percentage = data.academics.program_percentage
|
|
students = data.student.enrollment.undergrad_12_month
|
|
|
|
# Create dataframe of enrollment across the nation (latest academic year and average)
|
|
df = {'Name': list(prog_percentage.fields), 'Percent (Last)': [], 'Percent (Avg)': [], "Students (Last)": [], "Students (Avg)": [] }
|
|
for key in df['Name']:
|
|
vals = prog_percentage[key]
|
|
df['Percent (Last)'].append(vals[-1] * 100)
|
|
df['Percent (Avg)'].append(ak.mean(vals) * 100)
|
|
df['Students (Last)'].append(vals[-1] * students[-1])
|
|
df['Students (Avg)'].append(ak.mean(vals * students))
|
|
|
|
df = pd.DataFrame(df)
|
|
|
|
# Print Dataframe
|
|
logger.info(f"\n\nProgram Percentage ({name}, Last Academic Year): ")
|
|
print(df.sort_values("Percent (Last)", ascending=False))
|
|
|
|
logger.info(f"\n\nProgram Percentage ({name}, Avg): ")
|
|
print(df.sort_values("Percent (Avg)", ascending=False)) |