import os import logging import awkward as ak import numpy as np import matplotlib.pyplot as plt import pandas as pd logger = logging.getLogger("Scorecard Data") def mask_valid(data): # Strip out NaN and zero placeholders mask_nan = ~ak.any(np.isnan(data.academics.program_percentage.physical_science), axis=1) mask_all_zero = ~ak.all(data.academics.program_percentage.physical_science == 0, axis=1) mask = mask_nan & mask_all_zero data = data[mask] return data def mask_school(data, name): return data[data.school.name[:, -1] == name][0] # Percentage enrollment per field over time averaged across the nation def save_scorecard_stats_national(output_dir, data, ext="png"): prog_percentage = data.academics.program_percentage students = data.student.enrollment.undergrad_12_month # Percent enrollment for key in prog_percentage.fields: # Find the mean across the nation mean = ak.mean(prog_percentage[key], axis=0) if ak.all(mean < 0.05): continue plt.plot(mean * 100, label=key) plt.xlabel("Year") plt.ylabel("Percent Enrollment") plt.title("Average Enrollment (National)") plt.legend() plt.savefig(os.path.join(output_dir, f"national-enrollment-percent.{ext}")) plt.clf() # Students enrolled for key in prog_percentage.fields: # Find the mean across the nation mean_perc = ak.mean(prog_percentage[key], axis=0) if ak.all(mean_perc < 0.05): continue mean = ak.mean(prog_percentage[key] * students, axis=0) plt.plot(mean, label=key) plt.xlabel("Year") plt.ylabel("Percent Enrollment") plt.title("Average Enrollment (National)") plt.legend() plt.savefig(os.path.join(output_dir, f"national-enrollment-students.{ext}")) plt.clf() # Percentage enrollment per field over time averaged across one school def save_scorecard_stats_school(output_dir, data, school: str, ext="png"): data = mask_school(data, school) prog_percentage = data.academics.program_percentage students = data.student.enrollment.undergrad_12_month # Plot percent enrollment for key in prog_percentage.fields: # Find the mean across the nation mean = ak.mean(prog_percentage[key], axis=0) if mean < 0.05: continue plt.plot(prog_percentage[key] * 100, label=key) plt.xlabel("Year") plt.ylabel("Percent Enrollment") plt.title(f"Average Enrollment ({school})") plt.legend() plt.savefig(os.path.join(output_dir, f"{school}-enrollment-percent.{ext}")) plt.clf() # Plot student enrollment for key in prog_percentage.fields: # Find the mean across the nation mean = ak.mean(prog_percentage[key], axis=0) if mean < 0.05: continue plt.plot(prog_percentage[key] * students, label=key) plt.xlabel("Year") plt.ylabel("Percent Enrollment") plt.title(f"Average Enrollment ({school})") plt.legend() plt.savefig(os.path.join(output_dir, f"{school}-enrollment-students.{ext}")) plt.clf() def save_scorecard_stats(output_dir, data, ext="png"): # Students Enrolled Distribution (Average) avg_students = ak.mean(data.student.enrollment.undergrad_12_month, axis=1) plt.hist(avg_students, bins=np.arange(0, 100_000, 1_000)) plt.xlabel("Students Enrolled") plt.ylabel("Univeristy Count") plt.title("Enrollemnt Distribution (Average)") plt.yscale("log") plt.savefig(os.path.join(output_dir, f"dist-students-enrolled-avg.{ext}")) plt.clf() # Students Enrolled Distribution (Last Year) plt.hist(data.student.enrollment.undergrad_12_month[:, -1], bins=np.arange(0, 100_000, 1_000)) plt.xlabel("Students Enrolled") plt.ylabel("Univeristy Count") plt.title("Enrollemnt Distribution (Last Year)") plt.yscale("log") plt.savefig(os.path.join(output_dir, f"dist-students-enrolled-last-year.{ext}")) plt.clf() # Print national enrollment stats def print_enrollment_national(data): prog_percentage = data.academics.program_percentage students = data.student.enrollment.undergrad_12_month # Create dataframe of enrollment across the nation (latest academic year and average) df = {'Name': list(prog_percentage.fields), 'Percent (Last)': [], 'Percent (Avg)': [], "Students (Last)": [], "Students (Avg)": [] } for key in df['Name']: vals = prog_percentage[key] df['Percent (Last)'].append(ak.mean(vals[-1]) * 100) df['Percent (Avg)'].append(ak.mean(vals) * 100) df['Students (Last)'].append(ak.mean(vals[-1] * students[-1])) df['Students (Avg)'].append(ak.mean(vals * students)) df = pd.DataFrame(df) # Print Dataframe logger.info("\n\nNational Percentage (Last Academic Year): ") print(df.sort_values("Percent (Last)", ascending=False)) logger.info("\n\nNational Percentage (Avg): ") print(df.sort_values("Percent (Avg)", ascending=False)) def print_enrollment(data, name: str): data = mask_school(data, name) prog_percentage = data.academics.program_percentage students = data.student.enrollment.undergrad_12_month # Create dataframe of enrollment across the nation (latest academic year and average) df = {'Name': list(prog_percentage.fields), 'Percent (Last)': [], 'Percent (Avg)': [], "Students (Last)": [], "Students (Avg)": [] } for key in df['Name']: vals = prog_percentage[key] df['Percent (Last)'].append(vals[-1] * 100) df['Percent (Avg)'].append(ak.mean(vals) * 100) df['Students (Last)'].append(vals[-1] * students[-1]) df['Students (Avg)'].append(ak.mean(vals * students)) df = pd.DataFrame(df) # Print Dataframe logger.info(f"\n\nProgram Percentage ({name}, Last Academic Year): ") print(df.sort_values("Percent (Last)", ascending=False)) logger.info(f"\n\nProgram Percentage ({name}, Avg): ") print(df.sort_values("Percent (Avg)", ascending=False))