{ "cells": [ { "cell_type": "markdown", "id": "06eecdd2-f107-4ae7-bb8d-39bb6c6ed6a0", "metadata": {}, "source": [ "# ADS-B API\n", "\n", "The [Contrails API](https://api.contrails.org) enables authorized users to access a common ADS-B dataset for contrails research. \n", "\n", "The underlying ADS-B data is provided by [Spire Aviation](https://aviation-docs.spire.com/).\n", "\n", "E-mail [api@contrails.org](mailto:api@contrails.org?subject=Common%20ADS-B%20Access) with subject **Common ADS-B Access** to learn more about how your organization can participate in this program." ] }, { "cell_type": "code", "execution_count": 1, "id": "25cdb008-d720-4614-9dcf-d1785e40349d", "metadata": {}, "outputs": [], "source": [ "import os" ] }, { "cell_type": "code", "execution_count": 2, "id": "d5aaa134-eed6-40e6-83e1-e968c4750da3", "metadata": {}, "outputs": [], "source": [ "# Load API key\n", "# (contact api@contrails.org if you need an API key)\n", "URL = \"https://api.contrails.org\"\n", "API_KEY = os.environ[\"CONTRAILS_API_KEY\"]\n", "HEADERS = {\"x-api-key\": API_KEY}" ] }, { "cell_type": "markdown", "id": "8fe4646c-0521-4c64-94ab-2cc4def86532", "metadata": {}, "source": [ "## Telemetry\n", "\n", "**GET [/v1/adsb/telemetry](https://api.contrails.org/openapi#/ADS-B/get_telemetry_v1_adsb_telemetry_get)**\n", "\n", "> Note this endpoint can take up to 30 seconds to return depending on bandwidth\n", "\n", "This endpoint returns 1 hour range of all global ADS-B telemetry data as an [Apache Parquet](https://parquet.apache.org/) file.\n", "\n", "Input date must be an ISO 8601 datetime string (UTC) with hourly resolution, e.g. `\"2025-01-06T00\"`. \n", "Any minute or second resolution is ignored.\n", "\n", "See the [ADS-B schema](https://apidocs.contrails.org/_static/adsb-schema.json) for the description of each data key in the Parquet file." ] }, { "cell_type": "code", "execution_count": 3, "id": "bb7d9661-2e40-407e-95f8-6cffb3dfd0c1", "metadata": {}, "outputs": [], "source": [ "import requests # pip install requests\n", "import matplotlib.pyplot as plt # pip install matplotlib\n", "import pandas as pd # pip install pandas" ] }, { "cell_type": "markdown", "id": "02244627-ddb4-4503-9ffe-31c0ef0190e4", "metadata": {}, "source": [ "### Get data for a single hour" ] }, { "cell_type": "code", "execution_count": 4, "id": "71be1e61-0ff6-46cd-81f5-9952910ebcc8", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "HTTP Response Code: 200 OK\n", "\n" ] } ], "source": [ "params = {\n", " \"date\": \"2025-01-24T02\" # ISO 8601 (UTC)\n", "}\n", "\n", "r = requests.get(f\"{URL}/v1/adsb/telemetry\", params=params, headers=HEADERS)\n", "print(f\"HTTP Response Code: {r.status_code} {r.reason}\\n\")\n", "\n", "# write out response content as parquet file\n", "with open(f\"{params['date']}.pq\", \"wb\") as f:\n", " f.write(r.content)" ] }, { "cell_type": "code", "execution_count": 5, "id": "58f47a94-4a68-48ee-83b5-64a421501900", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Number of unique flights: 17103\n", "Number of unique waypoints: 1093322\n" ] }, { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
timestamplatitudelongitudecollection_typealtitude_baroicao_addressflight_idcallsigntail_numberflight_numberaircraft_type_icaoairline_iatadeparture_airport_icaodeparture_scheduled_timearrival_airport_icaoarrival_scheduled_time
02025-01-24 02:59:5937.882629-80.429001terrestrial31000A9198693a5cd24-1e7b-4dca-a07d-ad391a2e8237PDT5701N686AEAA5701E145PTKCLT2025-01-24 01:53:00KERI2025-01-24 03:48:00
12025-01-24 02:59:5936.193588-112.395912terrestrial35000AB415Ed1dfe570-9e39-4323-a6cf-f4cf602b4149SCX618N824SYSY618B738SYKPSP2025-01-24 02:24:00KMSP2025-01-24 05:41:00
22025-01-24 02:59:59-44.230362171.841019terrestrial33950C81D8E12d6d993-c01e-4553-80e1-944a34119f69ANZ689ZK-OABNZ689A320NZNZWN2025-01-24 02:05:00NZDN2025-01-24 03:25:00
32025-01-24 02:59:5943.00835426.135494terrestrial380004B187F0e7d48c3-a4e2-4489-aaa3-4c9b9bea05c2SWR155HB-JHFLX155A333LXVABB2025-01-23 19:50:00LSZH2025-01-24 05:10:00
42025-01-24 02:59:5928.975525-109.411362terrestrial370000D09D5b4050af1-1fc8-4997-ac5a-1d46b690c869VOI1743XA-VLUY41743A321Y4KLAS2025-01-24 01:31:00MMGL2025-01-24 04:43:00
\n", "
" ], "text/plain": [ " timestamp latitude longitude collection_type altitude_baro \\\n", "0 2025-01-24 02:59:59 37.882629 -80.429001 terrestrial 31000 \n", "1 2025-01-24 02:59:59 36.193588 -112.395912 terrestrial 35000 \n", "2 2025-01-24 02:59:59 -44.230362 171.841019 terrestrial 33950 \n", "3 2025-01-24 02:59:59 43.008354 26.135494 terrestrial 38000 \n", "4 2025-01-24 02:59:59 28.975525 -109.411362 terrestrial 37000 \n", "\n", " icao_address flight_id callsign tail_number \\\n", "0 A91986 93a5cd24-1e7b-4dca-a07d-ad391a2e8237 PDT5701 N686AE \n", "1 AB415E d1dfe570-9e39-4323-a6cf-f4cf602b4149 SCX618 N824SY \n", "2 C81D8E 12d6d993-c01e-4553-80e1-944a34119f69 ANZ689 ZK-OAB \n", "3 4B187F 0e7d48c3-a4e2-4489-aaa3-4c9b9bea05c2 SWR155 HB-JHF \n", "4 0D09D5 b4050af1-1fc8-4997-ac5a-1d46b690c869 VOI1743 XA-VLU \n", "\n", " flight_number aircraft_type_icao airline_iata departure_airport_icao \\\n", "0 AA5701 E145 PT KCLT \n", "1 SY618 B738 SY KPSP \n", "2 NZ689 A320 NZ NZWN \n", "3 LX155 A333 LX VABB \n", "4 Y41743 A321 Y4 KLAS \n", "\n", " departure_scheduled_time arrival_airport_icao arrival_scheduled_time \n", "0 2025-01-24 01:53:00 KERI 2025-01-24 03:48:00 \n", "1 2025-01-24 02:24:00 KMSP 2025-01-24 05:41:00 \n", "2 2025-01-24 02:05:00 NZDN 2025-01-24 03:25:00 \n", "3 2025-01-23 19:50:00 LSZH 2025-01-24 05:10:00 \n", "4 2025-01-24 01:31:00 MMGL 2025-01-24 04:43:00 " ] }, "execution_count": 5, "metadata": {}, "output_type": "execute_result" } ], "source": [ "# read parquet file with pandas\n", "df = pd.read_parquet(f\"{params['date']}.pq\")\n", "\n", "print(\"Number of unique flights:\", df[\"flight_id\"].nunique())\n", "print(\"Number of unique waypoints:\", len(df[\"flight_id\"]))\n", "\n", "df.head()" ] }, { "cell_type": "code", "execution_count": 6, "id": "d64172ec-beb0-465b-8f56-d439b4fd15f4", "metadata": {}, "outputs": [ { "data": { "image/png": "", "text/plain": [ "
" ] }, "metadata": {}, "output_type": "display_data" } ], "source": [ "# select single flight and plot\n", "flight_id = df.iloc[0][\"flight_id\"]\n", "flight = df.loc[df[\"flight_id\"] == flight_id]\n", "flight.plot.scatter(x=\"longitude\", y=\"latitude\", c=\"altitude_baro\", cmap=\"bwr\", s=2);" ] }, { "cell_type": "markdown", "id": "bf7c6766-3585-4353-bad4-5587adf23106", "metadata": {}, "source": [ "### Aggregate data over multiple hours" ] }, { "cell_type": "code", "execution_count": 7, "id": "566809d5-60ef-4dc8-a356-5da708e79e0d", "metadata": {}, "outputs": [], "source": [ "start = \"2025-01-15T02\"\n", "end = \"2025-01-15T03\"\n", "times = pd.date_range(start=start, end=end, freq=\"h\")\n", "times_str = [t.strftime(\"%Y-%m-%dT%H\") for t in times]" ] }, { "cell_type": "code", "execution_count": 8, "id": "6e0909b6-c4d5-46cb-bf83-fdffc8691199", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Downloading hour: 2025-01-15T02\n", "HTTP Response Code: 200 OK\n", "\n", "Downloading hour: 2025-01-15T03\n", "HTTP Response Code: 200 OK\n", "\n" ] } ], "source": [ "for t in times_str:\n", " print(f\"Downloading hour: {t}\")\n", "\n", " r = requests.get(f\"{URL}/v1/adsb/telemetry\", params={\"date\": t}, headers=HEADERS)\n", " print(f\"HTTP Response Code: {r.status_code} {r.reason}\\n\")\n", "\n", " # write out response content as parquet file\n", " with open(f\"{t}.pq\", \"wb\") as f:\n", " f.write(r.content)" ] }, { "cell_type": "code", "execution_count": 9, "id": "1cb80397-83a8-4319-8979-705fd81611c1", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Number of unique flights: 21240\n", "Number of unique waypoints: 1944787\n" ] } ], "source": [ "dfs = []\n", "for t in times_str:\n", " dfs.append(pd.read_parquet(f\"{t}.pq\"))\n", "\n", "df = pd.concat(dfs)\n", "\n", "print(\"Number of unique flights:\", df[\"flight_id\"].nunique())\n", "print(\"Number of unique waypoints:\", len(df[\"flight_id\"]))" ] }, { "cell_type": "code", "execution_count": 10, "id": "90e10332-a007-412a-92ad-0284365a2191", "metadata": {}, "outputs": [ { "data": { "image/png": "", "text/plain": [ "
" ] }, "metadata": {}, "output_type": "display_data" } ], "source": [ "# select single flight and plot\n", "flight_id = df.iloc[0][\"flight_id\"]\n", "flight = df.loc[df[\"flight_id\"] == flight_id]\n", "flight.plot.scatter(x=\"longitude\", y=\"latitude\", c=\"altitude_baro\", cmap=\"bwr\", s=2);" ] }, { "cell_type": "markdown", "id": "c5c8816c-ad6e-47d1-be33-7d3e49dc6808", "metadata": {}, "source": [ "## Bulk Load ADS-B into external datastore\n", "\n", "> This section requires a fresh notebook kernel.\n", "> Restart the kernel if you have already run the section above.\n", "\n", "This section will provide a tutorial that covers:\n", "\n", "- Fetching a range of ADS-B data from the Contrails API\n", "- Loading those data into an external database/datastore\n", "\n", "This tutorial will focus on loading data into a [Google BigQuery table](https://cloud.google.com/bigquery).\n", "The same approach can be adapted to load these data into other database / datastores.\n", "\n", "This process is useful if you want to perform advanced queries on the dataset.\n", "\n", "### Prerequisites\n", "\n", "You must have a [Google Cloud account](https://cloud.google.com/), and the [Google Cloud CLI](https://cloud.google.com/sdk/docs/install) (`gcloud`) installed on your machine.\n", "\n", "You must also have set up a [BigQuery table](https://console.cloud.google.com/bigquery) and given your account [the required permissions](https://cloud.google.com/bigquery/docs/loading-data-cloud-storage-parquet#required_permissions) to load data into this table." ] }, { "cell_type": "code", "execution_count": 1, "id": "2a99c66a-7861-4c53-b77d-1f2c6bc40a54", "metadata": {}, "outputs": [], "source": [ "import json\n", "import os\n", "from pathlib import Path\n", "\n", "# NOTE: grequests *must* be imported before requests, or you will see a MonekyPatchWarning\n", "import grequests # pip install grequests (for parallel REST requests)\n", "import pandas as pd # pip install pandas\n", "\n", "from google.cloud import bigquery # pip install google-cloud-bigquery\n", "from google.cloud.bigquery import LoadJobConfig" ] }, { "cell_type": "code", "execution_count": 2, "id": "05bb4abf-845a-4add-9baa-ec7ad13612af", "metadata": {}, "outputs": [], "source": [ "# Load API key\n", "URL = \"https://api.contrails.org\"\n", "API_KEY = os.environ[\"CONTRAILS_API_KEY\"]\n", "HEADERS = {\"x-api-key\": API_KEY}" ] }, { "cell_type": "markdown", "id": "a1ea9b43-a03d-48db-871d-254dbf49b0b4", "metadata": {}, "source": [ "### Download ADS-B data files to your machine\n", "\n", "Set target hours for ADS-B data, then fetch ADS-B data from the Contrails API in a parallel, saving parquet files to the local machine." ] }, { "cell_type": "code", "execution_count": 3, "id": "bb74d1a6-b223-4762-b703-450ba0a99322", "metadata": {}, "outputs": [], "source": [ "# 6 hours of data\n", "start = \"2025-01-16T00\"\n", "end = \"2025-01-16T06\"\n", "times = pd.date_range(start=start, end=end, freq=\"h\")\n", "times_str = [t.strftime(\"%Y-%m-%dT%H\") for t in times]" ] }, { "cell_type": "code", "execution_count": 4, "id": "4fecb7aa-806a-4486-8360-240b0c29a755", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "2025-01-16T00: 200 OK\n", "2025-01-16T01: 200 OK\n", "2025-01-16T02: 200 OK\n", "2025-01-16T03: 200 OK\n", "2025-01-16T04: 200 OK\n", "2025-01-16T05: 200 OK\n", "2025-01-16T06: 200 OK\n" ] } ], "source": [ "# Use `grequests` to send out parallel API requests\n", "# (this cell can take minutes to evaluate depending on bandwidth)\n", "req = (\n", " grequests.get(f\"{URL}/v1/adsb/telemetry\", params={\"date\": t}, headers=HEADERS)\n", " for t in times_str\n", ")\n", "responses = grequests.map(req, size=25)\n", "\n", "# create local directory to store local parquet files\n", "os.makedirs(\"adsb\", exist_ok=True)\n", "\n", "# Write out each hour as a parquet file in subdirectory `adsb`\n", "for t, r in zip(times_str, responses):\n", " print(f\"{t}: {r.status_code} {r.reason}\")\n", "\n", " # write out response content as parquet file\n", " path = Path(f\"adsb/{t}.pq\")\n", " with open(path, \"wb\") as f:\n", " f.write(r.content)" ] }, { "cell_type": "markdown", "id": "077c5666-ed0c-46fa-b674-6380956e948f", "metadata": {}, "source": [ "### (Optional) Create the target BigQuery table\n", "\n", "If a target BigQuery table does not exist, then create one prior to inserting the target data.\n", "\n", "The table must have a schema compatible with the fields present in the parquet ADS-B data.\n", "\n", "You can create a table using the `bq mk` command (`bq` comes bundled with the `gcloud` CLI).\n", "\n", "```bash\n", "bq mk --table project_id:dataset_id.table_id adsb-schema.json\n", "```\n", "\n", "- `project_id` is the GCP project ID for your account.\n", "- `dataset_id` is the BigQuery dataset where you want to create a new table.\n", "\n", "> If the dataset does not already exist, you will have to create it first with the \n", "> [`bq mk --dataset` command](https://cloud.google.com/bigquery/docs/datasets#bq) \n", "> (or via the web Console...)\n", "\n", "- `table_id` is the table name for the new table you are creating.\n", "- `adsb-schema.json` is the filepath to a local JSON file with the schema definition for the new table. Download the [ADS-B schema](https://apidocs.contrails.org/_static/adsb-schema.json) provided in the documentation - this schema is compatible with the BigQuery API\n", "\n", "```bash\n", "curl -X GET https://apidocs.contrails.org/_static/adsb-schema.json > adsb-schema.json\n", "```" ] }, { "cell_type": "code", "execution_count": 5, "id": "85c82714-622c-4cb8-a318-ca8626b78875", "metadata": {}, "outputs": [], "source": [ "# !bq mk --table project_id:dataset_id.table_id adsb-schema.json" ] }, { "cell_type": "markdown", "id": "04cf0db4-e6f7-48b8-84cd-f70381574b5a", "metadata": {}, "source": [ "### Load data into a BigQuery table\n", "\n", "Assuming you have an empty BigQuery table created, the following loads local data into the BigQuery table on file at a time.\n", "\n", "> **PRO TIP**\n", "> \n", "> To maximize BigQuery load speed, \n", "> consider moving the dataset into a Google Cloud Storage Bucket.\n", ">\n", "> See [client.load_table_from_uri(..)](https://cloud.google.com/python/docs/reference/bigquery/latest/google.cloud.bigquery.client.Client#google_cloud_bigquery_client_Client_load_table_from_uri) or the [`bq load` command](https://cloud.google.com/bigquery/docs/batch-loading-data#permissions-load-data-from-cloud-storage).\n", "> \n", "> Uploading from a GCS bucket will increase upload speed both due to \n", "> the bucket being in the Google network (high uplink speed), and the commands above supporting wildcards for GCS URI paths." ] }, { "cell_type": "code", "execution_count": 6, "id": "cf144e4b-7151-488d-8ca7-977f700ac36d", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Loading 2025-01-16T00\n", "Loaded 1158825 rows into contrails-301217.sandbox.adsb3\n", "Loading 2025-01-16T01\n", "Loaded 1197240 rows into contrails-301217.sandbox.adsb3\n", "Loading 2025-01-16T02\n", "Loaded 1111672 rows into contrails-301217.sandbox.adsb3\n", "Loading 2025-01-16T03\n", "Loaded 966330 rows into contrails-301217.sandbox.adsb3\n", "Loading 2025-01-16T04\n", "Loaded 895878 rows into contrails-301217.sandbox.adsb3\n", "Loading 2025-01-16T05\n", "Loaded 736327 rows into contrails-301217.sandbox.adsb3\n", "Loading 2025-01-16T06\n", "Loaded 601999 rows into contrails-301217.sandbox.adsb3\n" ] } ], "source": [ "# Initialize BigQuery client\n", "client = bigquery.Client() # Uses your default GCP \"project\" - see `gcloud config list`\n", "\n", "# Create table reference\n", "project_id = \"\" # REPLACE WITH YOUR GCP PROJECT\n", "dataset_id = \"\" # REPLACE WITH YOUR BQ DATASET\n", "table_id = \"\" # REPLACE WITH YOUR BQ TABLE\n", "bigquery_id = f\"{project_id}.{dataset_id}.{table_id}\"\n", "\n", "# Load schema\n", "with open(\"adsb-schema.json\", \"r\") as f:\n", " schema = json.load(f)\n", "\n", "# Configure the loading job\n", "job_config = LoadJobConfig(source_format=bigquery.SourceFormat.PARQUET, schema=schema)\n", "\n", "for t in times_str:\n", " # read in parquet file\n", " path = Path(f\"adsb/{t}.pq\")\n", " print(f\"Loading {t}\")\n", "\n", " # Open the local parquet file\n", " with open(path, \"rb\") as f:\n", " # Start the load job\n", " load_job = client.load_table_from_file(f, bigquery_id, job_config=job_config)\n", "\n", " # Wait for job completion\n", " load_job.result()\n", "\n", " print(f\"Loaded {load_job.output_rows} rows into {bigquery_id}\")" ] } ], "metadata": { "kernelspec": { "display_name": "Python 3 (ipykernel)", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.11.4" } }, "nbformat": 4, "nbformat_minor": 5 }