{ "cells": [ { "cell_type": "code", "execution_count": 1, "id": "918fe2db-ba93-4c8a-a59a-4c4fd6904ccb", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Requirement already satisfied: pandas in /opt/conda/lib/python3.12/site-packages (2.3.0)\n", "Requirement already satisfied: awkward in /opt/conda/lib/python3.12/site-packages (2.8.9)\n", "Requirement already satisfied: tqdm in /opt/conda/lib/python3.12/site-packages (4.67.1)\n", "Requirement already satisfied: numpy>=1.26.0 in /opt/conda/lib/python3.12/site-packages (from pandas) (2.2.6)\n", "Requirement already satisfied: python-dateutil>=2.8.2 in /opt/conda/lib/python3.12/site-packages (from pandas) (2.9.0.post0)\n", "Requirement already satisfied: pytz>=2020.1 in /opt/conda/lib/python3.12/site-packages (from pandas) (2025.2)\n", "Requirement already satisfied: tzdata>=2022.7 in /opt/conda/lib/python3.12/site-packages (from pandas) (2025.2)\n", "Requirement already satisfied: awkward-cpp==50 in /opt/conda/lib/python3.12/site-packages (from awkward) (50)\n", "Requirement already satisfied: fsspec>=2022.11.0 in /opt/conda/lib/python3.12/site-packages (from awkward) (2025.5.1)\n", "Requirement already satisfied: packaging in /opt/conda/lib/python3.12/site-packages (from awkward) (25.0)\n", "Requirement already satisfied: six>=1.5 in /opt/conda/lib/python3.12/site-packages (from python-dateutil>=2.8.2->pandas) (1.17.0)\n" ] } ], "source": [ "!pip install pandas awkward tqdm" ] }, { "cell_type": "code", "execution_count": 2, "id": "2d2afa98-7e3a-4b96-bec3-4706fd86f394", "metadata": { "scrolled": true }, "outputs": [], "source": [ "import os\n", "import yaml\n", "\n", "import awkward as ak\n", "import pandas as pd\n", "import numpy as np\n", "import tqdm" ] }, { "cell_type": "code", "execution_count": 3, "id": "106ac0bf-efc5-4f64-8834-bab1b38b8695", "metadata": {}, "outputs": [], "source": [ "scorecard_dir = \"data/scorecard\"\n", "scorecard_dir = os.path.join(scorecard_dir, os.listdir(scorecard_dir)[0])\n", "\n", "with open(os.path.join(scorecard_dir, 'data.yaml'), 'r') as file:\n", " data = yaml.safe_load(file)" ] }, { "cell_type": "code", "execution_count": 4, "id": "a4b983ed-f887-4a35-9dd9-7f952c0dbd7e", "metadata": {}, "outputs": [], "source": [ "def conv_dataframe(dataframe):\n", " result = {}\n", " for key, sec in data['dictionary'].items():\n", " if 'calculate' in sec:\n", " continue\n", " \n", " data_key = sec['source']\n", " if data_key not in dataframe:\n", " continue\n", " \n", " parts = key.split('.')\n", " section = result\n", " for i in range(len(parts) - 1):\n", " part = parts[i]\n", " if part not in section:\n", " section[part] = {}\n", " section = section[part]\n", " \n", " section[parts[-1]] = dataframe[data_key]\n", " return ak.Array(result)" ] }, { "cell_type": "code", "execution_count": 5, "id": "7700c8f9-8986-4039-928b-016e3c57d9bc", "metadata": {}, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "100%|██████████| 28/28 [00:00<00:00, 303463.86it/s]\n", " 0%| | 0/28 [00:00