Differences

This shows you the differences between two versions of the page.

--- apiload [2025/09/12 09:44] – 159.196.132.18
+++ apiload [2025/09/12 10:02] (current) – 159.196.132.18
@@ Line 1: / Line 1: @@
 **API Load**
+These are the import flags for the Python3 script.
+Necessary libraries should be loaded via pip3 and ran in the virtual env:
+<code python>
+import requests
+import pandas as pd
+from sqlalchemy import create_engine, types as sql_types
+from datetime import *
+from urllib.parse import urlencode, urljoin
+import sys
+</code>
+This function is used to construct the API call.
+To load a different data set, change the **BASEURL** variable and the app_identifier (and any other compulsory vars):
+<code python>
+def build_url():
+        BASEURL = "https://hapi.humdata.org/api/v2/"
+        ENDPOINT = sys.argv[2]
+        params = {
+                "app_identifier": "RUNVRm9vZFNlYzphdG5ldmluQG91ci5lY3UuZWR1LmF1",
+                "start_date": sys.argv[1],
+                "end_date": date.today(),
+                "output_format":"json",
+                "offset":"0",
+                "location_code":"MOZ"
+                }
+        full_path = BASEURL + ENDPOINT + "?" + urlencode(params)
+        return full_path
+</code>
+This function will map a JSON data type to an SQL equivalent for table creation:
+<code python>
+def map_dtype(dtype):
+    if pd.api.types.is_integer_dtype(dtype):
+        return sql_types.INTEGER()
+    elif pd.api.types.is_float_dtype(dtype):
+        return sql_types.FLOAT()
+    elif pd.api.types.is_datetime64_any_dtype(dtype):
+        return sql_types.DATETIME()
+    elif datetime_valid(dtype):
+        return sql_types.DATETIME()
+    else:
+        return sql_types.VARCHAR(length=255)
+</code>
+Use this to scan for an ISO datetime variable (this is an obscure data type and needs it's own func):
+<code python>
+def datetime_valid(dt_str):
+        print(dt_str)
+        try:
+                datetime.fromisoformat(dt_str)
+        except:
+                return False
+        return True
+</code>
+Input validation is then done (and kicks off the main part of our function):
+<code python>
+if(len(sys.argv) < 4):
+        print("Not enough arguments provided")
+        print("python3 load.py <start_date> <endpoint> <table_name>")
+        print("python3 load.py 2020-01-01 food-security-nutrition-poverty/poverty-rate IDX_PovRate")
+        sys.exit("")
+</code>
+Build endpoint using our function above, then fire the web request, save the data as JSON:
+<code python>
+response = requests.get(build_url())
+response.raise_for_status()
+raw_data = response.json()
+</code>
+Normalize and flatten data using pandas library:
+<code python>
+if isinstance(raw_data, dict):
+    records = raw_data.get("data", raw_data)
+else:
+    records = raw_data
+df = pd.json_normalize(records)
+</code>
+Connect to the database, create the table, print output, finish:
+<code python>
+engine = create_engine(f"mysql+mysqldb://{username}:{password}@{host}/{database}")
+# Step 5: Create table and insert data
+df.to_sql(name=table_name, con=engine, if_exists='replace', index=False, dtype=dtype_mapping)
+print("Table created and data inserted successfully!")
+</code>
+Specific database connection details are omitted.
+This python script can be called recursively to load multiple different endpoints, country, or date time.
+An example of loading IDX endpoints:
+<code bash>
+#!/bin/bash                                                                                                                                                                                                                                                                                                                                             python3 load.py 2020-01-01 affected-people/refugees-persons-of-concern IDX_Refugees                                                                                         python3 load.py 2020-01-01 affected-people/humanitarian-needs IDX_Humanitarian                                                                                              python3 load.py 2020-01-01 affected-people/returnees IDX_returnees                                                                                                          echo "Loaded 'Affected people'"                                                                                                                                                                                                                                                                                                                         python3 load.py 2020-01-01 coordination-context/operational-presence IDX_OperationalPresence                                                                                python3 load.py 2020-01-01 coordination-context/funding IDX_Funding                                                                                                         python3 load.py 2020-01-01 coordination-context/conflict-events IDX_ConflictEvents                                                                                          python3 load.py 2020-01-01 coordination-context/national-risk IDX_NationalRisk                                                                                              echo "Loaded 'Coordination Context'"                                                                                                                                                                                                                                                                                                                    python3 load.py 2020-01-01 food-security-nutrition-poverty/food-security IDX_FoodSecurity                                                                                   python3 load.py 2020-01-01 food-security-nutrition-poverty/food-prices-market-monitor IDX_FoodPrices                                                                        python3 load.py 2020-01-01 food-security-nutrition-poverty/poverty-rate IDX_PovertyRate                                                                                     echo "Loaded 'Food Security Nutrition Poverty'"                                                                                                                                                                                                                                                                                                         python3 load.py 2020-01-01 geography-infrastructure/baseline-population IDX_BaselinePopulation                                                                              echo "Loaded 'Geography Infrastructure'"                                                                                                                                                                                                                                                                                                                python3 load.py 2020-01-01 climate/rainfall IDX_Rainfall                                                                                                                    echo "Loaded 'Climate Rainfall data'"
+</code>