apiload
Differences
This shows you the differences between two versions of the page.
| Both sides previous revisionPrevious revisionNext revision | Previous revision | ||
| apiload [2025/09/12 09:44] – 159.196.132.18 | apiload [2025/09/12 10:02] (current) – 159.196.132.18 | ||
|---|---|---|---|
| Line 1: | Line 1: | ||
| **API Load** | **API Load** | ||
| + | |||
| + | |||
| + | These are the import flags for the Python3 script. | ||
| + | Necessary libraries should be loaded via pip3 and ran in the virtual env: | ||
| + | <code python> | ||
| + | import requests | ||
| + | import pandas as pd | ||
| + | from sqlalchemy import create_engine, | ||
| + | from datetime import * | ||
| + | from urllib.parse import urlencode, urljoin | ||
| + | import sys | ||
| + | </ | ||
| + | |||
| + | This function is used to construct the API call. | ||
| + | To load a different data set, change the **BASEURL** variable and the app_identifier (and any other compulsory vars): | ||
| + | <code python> | ||
| + | def build_url(): | ||
| + | BASEURL = " | ||
| + | ENDPOINT = sys.argv[2] | ||
| + | |||
| + | params = { | ||
| + | " | ||
| + | " | ||
| + | " | ||
| + | " | ||
| + | " | ||
| + | " | ||
| + | } | ||
| + | full_path = BASEURL + ENDPOINT + "?" | ||
| + | return full_path | ||
| + | </ | ||
| + | |||
| + | This function will map a JSON data type to an SQL equivalent for table creation: | ||
| + | <code python> | ||
| + | def map_dtype(dtype): | ||
| + | if pd.api.types.is_integer_dtype(dtype): | ||
| + | return sql_types.INTEGER() | ||
| + | elif pd.api.types.is_float_dtype(dtype): | ||
| + | return sql_types.FLOAT() | ||
| + | elif pd.api.types.is_datetime64_any_dtype(dtype): | ||
| + | return sql_types.DATETIME() | ||
| + | elif datetime_valid(dtype): | ||
| + | return sql_types.DATETIME() | ||
| + | else: | ||
| + | return sql_types.VARCHAR(length=255) | ||
| + | </ | ||
| + | |||
| + | Use this to scan for an ISO datetime variable (this is an obscure data type and needs it's own func): | ||
| + | <code python> | ||
| + | def datetime_valid(dt_str): | ||
| + | print(dt_str) | ||
| + | try: | ||
| + | datetime.fromisoformat(dt_str) | ||
| + | except: | ||
| + | return False | ||
| + | return True | ||
| + | </ | ||
| + | |||
| + | Input validation is then done (and kicks off the main part of our function): | ||
| + | <code python> | ||
| + | if(len(sys.argv) < 4): | ||
| + | print(" | ||
| + | print(" | ||
| + | print(" | ||
| + | sys.exit("" | ||
| + | </ | ||
| + | |||
| + | Build endpoint using our function above, then fire the web request, save the data as JSON: | ||
| + | <code python> | ||
| + | response = requests.get(build_url()) | ||
| + | response.raise_for_status() | ||
| + | raw_data = response.json() | ||
| + | </ | ||
| + | |||
| + | Normalize and flatten data using pandas library: | ||
| + | <code python> | ||
| + | if isinstance(raw_data, | ||
| + | records = raw_data.get(" | ||
| + | else: | ||
| + | records = raw_data | ||
| + | |||
| + | |||
| + | df = pd.json_normalize(records) | ||
| + | </ | ||
| + | |||
| + | Connect to the database, create the table, print output, finish: | ||
| + | <code python> | ||
| + | |||
| + | engine = create_engine(f" | ||
| + | |||
| + | # Step 5: Create table and insert data | ||
| + | df.to_sql(name=table_name, | ||
| + | |||
| + | print(" | ||
| + | </ | ||
| + | |||
| + | Specific database connection details are omitted. | ||
| + | |||
| + | This python script can be called recursively to load multiple different endpoints, country, or date time. | ||
| + | An example of loading IDX endpoints: | ||
| + | <code bash> | ||
| + | # | ||
| + | </ | ||
apiload.1757670241.txt.gz · Last modified: by 159.196.132.18
