Files
undergrad-uh401/explore.ipynb

63 KiB

In [28]:
import os

import numpy as np
import awkward as ak
import matplotlib.pyplot as plt
In [5]:
scorecard_dir = "data/scorecard"
scorecard_dir = os.path.join(scorecard_dir, os.listdir(scorecard_dir)[0])
data = ak.from_parquet(os.path.join(scorecard_dir, "merged.parquet"))
In [43]:
mask_nan = ~ak.any(np.isnan(data.academics.program_percentage.physical_science), axis=1)
mask_all_zero = ~ak.all(data.academics.program_percentage.physical_science == 0, axis=1)
mask = mask_nan & mask_all_zero
Out[43]:
[[0.0068, 0.0267, 0.0221, 0.0218, 0.0183, ..., 0.0205, 0.0071, 0.0117, 0.0137],
 [0.0096, 0.0145, 0.0133, 0.0124, 0.0134, ..., 0.0145, 0.0189, 0.0226, 0.0231],
 [0.0203, 0.0191, 0.0152, 0.0252, 0.0208, ..., 0.0465, 0.0382, 0.0413, 0.0514],
 [0.0167, 0.0158, 0.0111, 0.0253, 0.0059, ..., 0.0125, 0.0255, 0.0063, 0.0126],
 [0.0142, 0.0116, 0.006, 0.0073, 0.0105, ..., 0.01, 0.0087, 0.0095, 0.0106],
 [0.009, 0.0063, 0.0047, 0.0105, 0.0066, ..., 0.0015, 0.0084, 0.004, 0, 0.0028],
 [0.0144, 0.0101, 0.0062, 0.0142, 0.0108, ..., 0.0193, 0.0276, 0.0147, 0.0099],
 [0.0075, 0.0082, 0.0091, 0.0079, 0.0077, ..., 0.0113, 0.007, 0.0099, 0.0083],
 [0.0268, 0.0095, 0.015, 0.0176, 0.0221, ..., 0.0498, 0.0268, 0.0114, 0.0216],
 [0.0354, 0.0303, 0.0208, 0.048, 0.0403, ..., 0.0129, 0.0123, 0.0098, 0.0058],
 ...,
 [0, 0, 0, 0.001, 0, 0, 0, 0, 0.0007, 0, 0, ..., 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
 [0.108, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ..., 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
 [0, 0.0037, 0.0064, 0.0098, 0.0065, ..., 0.0049, 0.0067, 0.0113, 0.0051],
 [0.0018, 0.005, 0.0093, 0.0052, 0.008, ..., 0.008, 0.006, 0.0063, 0.0076],
 [0, 0, 0, 0, 0, 0, 0, ..., 0.0053, 0.0132, 0.015, 0.0145, 0.0162, 0.0182],
 [0, 0, 0, 0, 0, 0, 0.0062, ..., 0.0478, 0.0467, 0.059, 0.044, 0.0253, 0.0164],
 [0, 0, 0, 0.0125, 0, 0, 0, 0, 0, 0, 0, ..., 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
 [0, 0, 0, 0, 0, 0, 0, 0, ..., 0, 0, 0, 0.0019, 0.0055, 0.0035, 0.0038, 0.0058],
 [0, 0, 0, 0, 0.0046, 0.0092, ..., 0.0014, 0.0012, 0.0009, 0.0011, 0.0002]]
--------------------------------------------------------------------------------
backend: cpu
nbytes: 2.8 MB
type: 1576 * 28 * float64
In [65]:
data_m = data[mask]
for key in data_m.academics.program_percentage.fields:
    mean = ak.mean(data_m.academics.program_percentage[key], axis=0)
    if ak.mean(mean) < 0.03:
        continue
    plt.plot(mean * 100, label=key)
plt.legend()
Out[65]:
<matplotlib.legend.Legend at 0x739dc95e00e0>
No description has been provided for this image