Files
undergrad-uh401/run.py
2025-12-06 03:59:17 +00:00

50 lines
1.6 KiB
Python

import os
import logging
import awkward as ak
import utils
import scorecard_tools
import excel_tools
if __name__ == "__main__":
# Setup Args
parser = utils.get_common_args(prog="Main Data Parser")
parser.add_argument(
"-o",
"--output-dir",
default="plots",
help="Directory to save generated plots",
)
parser.add_argument("-s", "--school", default="The University of Alabama")
args = parser.parse_args()
# Setup Logging
utils.setup_logging(args.debug)
logger = logging.getLogger("DataAnalysis")
# Load Scorecard
scorecard_dir = os.path.join(args.data_dir, "scorecard")
scorecard_dir = os.path.join(scorecard_dir, os.listdir(scorecard_dir)[0])
scorecard_file = os.path.join(scorecard_dir, "merged.parquet")
logger.info(f"Loading College Scorecard data from file {scorecard_file}")
scorecard_data = ak.from_parquet(scorecard_file)
os.makedirs(args.output_dir, exist_ok=True)
logger.info("Saving scorecard-only statistics")
scorecard_data = scorecard_tools.mask_valid(scorecard_data)
scorecard_tools.save_scorecard_stats(args.output_dir, scorecard_data)
scorecard_tools.save_scorecard_stats_national(args.output_dir, scorecard_data)
scorecard_tools.save_scorecard_stats_school(args.output_dir, scorecard_data, args.school)
# Print
scorecard_tools.print_enrollment_national(scorecard_data)
scorecard_tools.print_enrollment(scorecard_data, args.school)
# Load Excel
excel_data = excel_tools.read_excel(os.path.join(args.data_dir, "ua.xlsx"))
excel_tools.print_data(excel_data)