From eb5c7be8657146252c5806fd50eb1c58ec199e70 Mon Sep 17 00:00:00 2001 From: suman shekhar Date: Fri, 14 Jun 2024 10:49:42 -0600 Subject: [PATCH] Add files via upload --- AWS_access_cookbook.ipynb | 525 ++++++++++++++++++++++++++++++++++ channel_paths.txt | 16 ++ real-time_file_path_update.py | 86 ++++++ setup.py | 15 + 4 files changed, 642 insertions(+) create mode 100644 AWS_access_cookbook.ipynb create mode 100644 channel_paths.txt create mode 100644 real-time_file_path_update.py create mode 100644 setup.py diff --git a/AWS_access_cookbook.ipynb b/AWS_access_cookbook.ipynb new file mode 100644 index 00000000..caca1b4f --- /dev/null +++ b/AWS_access_cookbook.ipynb @@ -0,0 +1,525 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "id": "8b5f0b3c-66b9-4df9-b629-f112a80660ba", + "metadata": {}, + "source": [ + "# Accessing Real-Time NOAA GOES16 Data from AWS bucket\n", + "##\n", + "This notebook demonstrates how to access and visualize real-time NOAA GOES16 satellite data from the AWS s3 bucket. We will use various Python libraries to read, process, and visualize this data.\n" + ] + }, + { + "cell_type": "code", + "execution_count": 12, + "id": "06ba2399-99a4-4a6e-8246-58ec191591e3", + "metadata": {}, + "outputs": [], + "source": [ + "# Make sure you have all the following libraries installed\n", + "\n", + "import warnings\n", + "warnings.filterwarnings('ignore')\n", + "warnings.simplefilter('ignore', SyntaxWarning)\n", + "import boto3\n", + "from botocore import UNSIGNED\n", + "from botocore.client import Config\n", + "from satpy.scene import Scene\n", + "from satpy.utils import debug_on\n", + "\n", + "from datetime import datetime\n", + "import time\n", + "from glob import glob\n", + "import os" + ] + }, + { + "cell_type": "markdown", + "id": "f28327c3-b1e5-4821-803e-05bd3a4a528c", + "metadata": {}, + "source": [ + "In this section, we extract various components of the current UTC date and time, including the year, month, day, and hour. Additionally, we convert the current date to its corresponding Julian day number and display it. This process is essential for time-sensitive data operations, such as handling satellite imagery timestamps." + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "id": "0b927c29-fdf3-476c-9b4f-b1e848eb6df8", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "166\n" + ] + } + ], + "source": [ + "# Get the current UTC time\n", + "now = datetime.utcnow()\n", + "\n", + "# Extract year, month, day, and hour\n", + "year = now.year\n", + "month = now.month\n", + "day = now.day\n", + "hour = now.hour\n", + "\n", + "# Convert the current date to Julian day\n", + "julian_day = now.timetuple().tm_yday\n", + "\n", + "#print the current date\n", + "print(julian_day)" + ] + }, + { + "cell_type": "markdown", + "id": "48112291-b002-4fb8-8cdb-9a62a29020f2", + "metadata": {}, + "source": [ + "## Listing Files from NOAA GOES16 Bucket on AWS S3 Using Anonymous Access\n", + "#\n", + "This section demonstrates how to initialize a session with Amazon S3 without authentication (anonymous access) and lists files from the NOAA GOES16 data bucket. This method is used to retrieve publicly available satellite data, where no AWS credentials are required. The example lists objects within a specific prefix, which needs to be adjusted based on your specific date and time requirements." + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "id": "0e3ef845-559e-4511-bfdc-e8e269244f4b", + "metadata": { + "scrolled": true + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "ABI-L1b-RadF/2024/166/16/OR_ABI-L1b-RadF-M6C01_G16_s20241661600211_e20241661609519_c20241661609561.nc\n", + "ABI-L1b-RadF/2024/166/16/OR_ABI-L1b-RadF-M6C01_G16_s20241661610211_e20241661619519_c20241661619572.nc\n", + "ABI-L1b-RadF/2024/166/16/OR_ABI-L1b-RadF-M6C01_G16_s20241661620211_e20241661629519_c20241661629561.nc\n", + "ABI-L1b-RadF/2024/166/16/OR_ABI-L1b-RadF-M6C01_G16_s20241661630211_e20241661639519_c20241661639554.nc\n", + "ABI-L1b-RadF/2024/166/16/OR_ABI-L1b-RadF-M6C02_G16_s20241661600211_e20241661609519_c20241661609557.nc\n", + "ABI-L1b-RadF/2024/166/16/OR_ABI-L1b-RadF-M6C02_G16_s20241661610211_e20241661619519_c20241661619550.nc\n", + "ABI-L1b-RadF/2024/166/16/OR_ABI-L1b-RadF-M6C02_G16_s20241661620211_e20241661629519_c20241661629545.nc\n", + "ABI-L1b-RadF/2024/166/16/OR_ABI-L1b-RadF-M6C02_G16_s20241661630211_e20241661639519_c20241661639546.nc\n", + "ABI-L1b-RadF/2024/166/16/OR_ABI-L1b-RadF-M6C03_G16_s20241661600211_e20241661609519_c20241661609570.nc\n", + "ABI-L1b-RadF/2024/166/16/OR_ABI-L1b-RadF-M6C03_G16_s20241661610211_e20241661619519_c20241661619566.nc\n", + "ABI-L1b-RadF/2024/166/16/OR_ABI-L1b-RadF-M6C03_G16_s20241661620211_e20241661629519_c20241661629566.nc\n", + "ABI-L1b-RadF/2024/166/16/OR_ABI-L1b-RadF-M6C03_G16_s20241661630211_e20241661639519_c20241661639564.nc\n", + "ABI-L1b-RadF/2024/166/16/OR_ABI-L1b-RadF-M6C04_G16_s20241661600211_e20241661609519_c20241661609549.nc\n", + "ABI-L1b-RadF/2024/166/16/OR_ABI-L1b-RadF-M6C04_G16_s20241661610211_e20241661619519_c20241661619544.nc\n", + "ABI-L1b-RadF/2024/166/16/OR_ABI-L1b-RadF-M6C04_G16_s20241661620211_e20241661629519_c20241661629551.nc\n", + "ABI-L1b-RadF/2024/166/16/OR_ABI-L1b-RadF-M6C04_G16_s20241661630211_e20241661639519_c20241661639537.nc\n", + "ABI-L1b-RadF/2024/166/16/OR_ABI-L1b-RadF-M6C05_G16_s20241661600211_e20241661609519_c20241661609566.nc\n", + "ABI-L1b-RadF/2024/166/16/OR_ABI-L1b-RadF-M6C05_G16_s20241661610211_e20241661619519_c20241661619557.nc\n", + "ABI-L1b-RadF/2024/166/16/OR_ABI-L1b-RadF-M6C05_G16_s20241661620211_e20241661629519_c20241661629556.nc\n", + "ABI-L1b-RadF/2024/166/16/OR_ABI-L1b-RadF-M6C05_G16_s20241661630211_e20241661639519_c20241661639559.nc\n", + "ABI-L1b-RadF/2024/166/16/OR_ABI-L1b-RadF-M6C06_G16_s20241661600211_e20241661609525_c20241661609544.nc\n", + "ABI-L1b-RadF/2024/166/16/OR_ABI-L1b-RadF-M6C06_G16_s20241661610211_e20241661619525_c20241661619562.nc\n", + "ABI-L1b-RadF/2024/166/16/OR_ABI-L1b-RadF-M6C06_G16_s20241661620211_e20241661629525_c20241661629540.nc\n", + "ABI-L1b-RadF/2024/166/16/OR_ABI-L1b-RadF-M6C06_G16_s20241661630211_e20241661639525_c20241661639543.nc\n", + "ABI-L1b-RadF/2024/166/16/OR_ABI-L1b-RadF-M6C07_G16_s20241661600211_e20241661609531_c20241661609577.nc\n", + "ABI-L1b-RadF/2024/166/16/OR_ABI-L1b-RadF-M6C07_G16_s20241661610211_e20241661619531_c20241661619576.nc\n", + "ABI-L1b-RadF/2024/166/16/OR_ABI-L1b-RadF-M6C07_G16_s20241661620211_e20241661629531_c20241661629574.nc\n", + "ABI-L1b-RadF/2024/166/16/OR_ABI-L1b-RadF-M6C07_G16_s20241661630211_e20241661639531_c20241661639572.nc\n", + "ABI-L1b-RadF/2024/166/16/OR_ABI-L1b-RadF-M6C08_G16_s20241661600211_e20241661609519_c20241661609574.nc\n", + "ABI-L1b-RadF/2024/166/16/OR_ABI-L1b-RadF-M6C08_G16_s20241661610211_e20241661619519_c20241661619580.nc\n", + "ABI-L1b-RadF/2024/166/16/OR_ABI-L1b-RadF-M6C08_G16_s20241661620211_e20241661629519_c20241661629570.nc\n", + "ABI-L1b-RadF/2024/166/16/OR_ABI-L1b-RadF-M6C08_G16_s20241661630211_e20241661639519_c20241661639568.nc\n", + "ABI-L1b-RadF/2024/166/16/OR_ABI-L1b-RadF-M6C09_G16_s20241661600211_e20241661609525_c20241661609574.nc\n", + "ABI-L1b-RadF/2024/166/16/OR_ABI-L1b-RadF-M6C09_G16_s20241661610211_e20241661619525_c20241661619581.nc\n", + "ABI-L1b-RadF/2024/166/16/OR_ABI-L1b-RadF-M6C09_G16_s20241661620211_e20241661629525_c20241661629589.nc\n", + "ABI-L1b-RadF/2024/166/16/OR_ABI-L1b-RadF-M6C09_G16_s20241661630211_e20241661639525_c20241661639582.nc\n", + "ABI-L1b-RadF/2024/166/16/OR_ABI-L1b-RadF-M6C10_G16_s20241661600211_e20241661609530_c20241661609570.nc\n", + "ABI-L1b-RadF/2024/166/16/OR_ABI-L1b-RadF-M6C10_G16_s20241661610211_e20241661619530_c20241661619559.nc\n", + "ABI-L1b-RadF/2024/166/16/OR_ABI-L1b-RadF-M6C10_G16_s20241661620211_e20241661629531_c20241661629564.nc\n", + "ABI-L1b-RadF/2024/166/16/OR_ABI-L1b-RadF-M6C10_G16_s20241661630211_e20241661639531_c20241661639571.nc\n", + "ABI-L1b-RadF/2024/166/16/OR_ABI-L1b-RadF-M6C11_G16_s20241661600211_e20241661609519_c20241661609560.nc\n", + "ABI-L1b-RadF/2024/166/16/OR_ABI-L1b-RadF-M6C11_G16_s20241661610211_e20241661619519_c20241661619564.nc\n", + "ABI-L1b-RadF/2024/166/16/OR_ABI-L1b-RadF-M6C11_G16_s20241661620211_e20241661629519_c20241661629558.nc\n", + "ABI-L1b-RadF/2024/166/16/OR_ABI-L1b-RadF-M6C11_G16_s20241661630211_e20241661639519_c20241661639560.nc\n", + "ABI-L1b-RadF/2024/166/16/OR_ABI-L1b-RadF-M6C12_G16_s20241661600211_e20241661609525_c20241661609566.nc\n", + "ABI-L1b-RadF/2024/166/16/OR_ABI-L1b-RadF-M6C12_G16_s20241661610211_e20241661619525_c20241661619577.nc\n", + "ABI-L1b-RadF/2024/166/16/OR_ABI-L1b-RadF-M6C12_G16_s20241661620211_e20241661629525_c20241661629569.nc\n", + "ABI-L1b-RadF/2024/166/16/OR_ABI-L1b-RadF-M6C12_G16_s20241661630211_e20241661639525_c20241661639575.nc\n", + "ABI-L1b-RadF/2024/166/16/OR_ABI-L1b-RadF-M6C13_G16_s20241661600211_e20241661609531_c20241661609591.nc\n", + "ABI-L1b-RadF/2024/166/16/OR_ABI-L1b-RadF-M6C13_G16_s20241661610211_e20241661619531_c20241661619585.nc\n", + "ABI-L1b-RadF/2024/166/16/OR_ABI-L1b-RadF-M6C13_G16_s20241661620211_e20241661629530_c20241661629584.nc\n", + "ABI-L1b-RadF/2024/166/16/OR_ABI-L1b-RadF-M6C13_G16_s20241661630211_e20241661639531_c20241661639590.nc\n", + "ABI-L1b-RadF/2024/166/16/OR_ABI-L1b-RadF-M6C14_G16_s20241661600211_e20241661609519_c20241661609583.nc\n", + "ABI-L1b-RadF/2024/166/16/OR_ABI-L1b-RadF-M6C14_G16_s20241661610211_e20241661619519_c20241661619573.nc\n", + "ABI-L1b-RadF/2024/166/16/OR_ABI-L1b-RadF-M6C14_G16_s20241661620211_e20241661629519_c20241661629580.nc\n", + "ABI-L1b-RadF/2024/166/16/OR_ABI-L1b-RadF-M6C14_G16_s20241661630211_e20241661639519_c20241661639586.nc\n", + "ABI-L1b-RadF/2024/166/16/OR_ABI-L1b-RadF-M6C15_G16_s20241661600211_e20241661609525_c20241661609578.nc\n", + "ABI-L1b-RadF/2024/166/16/OR_ABI-L1b-RadF-M6C15_G16_s20241661610211_e20241661619525_c20241661619589.nc\n", + "ABI-L1b-RadF/2024/166/16/OR_ABI-L1b-RadF-M6C15_G16_s20241661620211_e20241661629525_c20241661629573.nc\n", + "ABI-L1b-RadF/2024/166/16/OR_ABI-L1b-RadF-M6C15_G16_s20241661630211_e20241661639525_c20241661639567.nc\n", + "ABI-L1b-RadF/2024/166/16/OR_ABI-L1b-RadF-M6C16_G16_s20241661600211_e20241661609531_c20241661609587.nc\n", + "ABI-L1b-RadF/2024/166/16/OR_ABI-L1b-RadF-M6C16_G16_s20241661610211_e20241661619531_c20241661619569.nc\n", + "ABI-L1b-RadF/2024/166/16/OR_ABI-L1b-RadF-M6C16_G16_s20241661620211_e20241661629531_c20241661629577.nc\n", + "ABI-L1b-RadF/2024/166/16/OR_ABI-L1b-RadF-M6C16_G16_s20241661630211_e20241661639531_c20241661639579.nc\n" + ] + } + ], + "source": [ + "\n", + "\n", + "# Initialize a session using anonymous access\n", + "s3_client = boto3.client('s3', config=Config(signature_version=UNSIGNED))\n", + "\n", + "# Define the bucket name and prefix\n", + "bucket_name = 'noaa-goes16'\n", + "Prefix = f'ABI-L1b-RadF/{year}/{julian_day:03d}/{hour:02d}/' # Adjust based on your date and time requirements\n", + "\n", + "# List objects within the specified prefix\n", + "response = s3_client.list_objects_v2(Bucket=bucket_name, Prefix=Prefix)\n", + "\n", + "# Check if there are any contents in the response\n", + "if 'Contents' in response:\n", + " for obj in response['Contents']:\n", + " print(obj['Key'])\n", + "else:\n", + " print(\"No files found in the specified path.\")\n" + ] + }, + { + "cell_type": "markdown", + "id": "45424b8f-7f44-4e72-84d8-efef6f3c2a0b", + "metadata": {}, + "source": [ + "## Now let's see if we can see how many channels are there without downloading the data " + ] + }, + { + "cell_type": "markdown", + "id": "e1a2dc33-c613-4fba-b719-b65582f7a020", + "metadata": {}, + "source": [ + "#\n", + "\n", + "To determine the available channels and composite variables in an S3 bucket for NOAA GOES data without downloading the data, you can list the objects in the bucket and parse their filenames. This approach leverages the naming convention of the files to identify different channels and composites." + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "id": "840e7833-a019-4841-8573-52ec047ad0d1", + "metadata": {}, + "outputs": [], + "source": [ + "# first step would be to setup a S3 client using boto3 package for anonymous access to the NOAA GOES S3 bucket.\n", + "\n", + "import boto3\n", + "from botocore import UNSIGNED\n", + "from botocore.client import Config\n", + "from collections import defaultdict\n", + "\n", + "# Initialize the S3 client with anonymous access\n", + "s3_client = boto3.client('s3', config=Config(signature_version=UNSIGNED))" + ] + }, + { + "cell_type": "code", + "execution_count": 10, + "id": "e937dea3-2875-438c-95cd-dc9aaf1e12d0", + "metadata": {}, + "outputs": [], + "source": [ + "# Now as you have created a client to connect the s3 bucket, we need to specify which s3 bucket we want to acess\n", + "\n", + "# Define the bucket name and prefix\n", + "bucket_name = 'noaa-goes16'\n", + "prefix = f'ABI-L1b-RadF/{year}/{julian_day:03d}/{hour:02d}/' # Adjust based on your date and time requirements\n", + "\n", + "# List objects within the specified prefix to get file names\n", + "response = s3_client.list_objects_v2(Bucket = bucket_name, Prefix = prefix)\n", + "\n", + "# Initialize dictionaries to store channels and composites\n", + "channels = defaultdict(int)\n", + "composites = defaultdict(int)\n", + "\n" + ] + }, + { + "cell_type": "code", + "execution_count": 11, + "id": "83e75e79-8c6c-484c-962b-df8db7c38593", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Available channels and their counts:\n", + "C01: 4\n", + "C02: 4\n", + "C03: 4\n", + "C04: 4\n", + "C05: 4\n", + "C06: 4\n", + "C07: 4\n", + "C08: 4\n", + "C09: 4\n", + "C10: 4\n", + "C11: 4\n", + "C12: 4\n", + "C13: 4\n", + "C14: 4\n", + "C15: 4\n", + "C16: 4\n" + ] + } + ], + "source": [ + "# Process the list of files\n", + "if 'Contents' in response:\n", + " for obj in response['Contents']:\n", + " key = obj['Key']\n", + " filename = key.split('/')[-1]\n", + " parts = filename.split('_')\n", + " \n", + " if len(parts) > 2:\n", + " channel = parts[1][-3:] # This extracts the channel information (e.g., RadC02)\n", + " channels[channel] += 1\n", + " \n", + "\n", + "# Print the available channels and their counts\n", + "print(\"Available channels and their counts:\")\n", + "for channel, count in channels.items():\n", + " print(f\"{channel}: {count}\")\n" + ] + }, + { + "cell_type": "markdown", + "id": "4e911962-eff7-4f30-98ca-3d9d2318d7d1", + "metadata": {}, + "source": [ + "**If you run the above code again and again you would see that it keeps getting updated!!**\n", + "\n", + "## Understanding Data Updates and the Importance of Real-Time Access\n", + "\n", + "### Data Update Mechanisms\n", + "\n", + "The NOAA GOES16 satellite transmits data continuously, covering a wide range of atmospheric and environmental conditions across the Western Hemisphere. This data is crucial for meteorology and climate monitoring. It includes high-resolution imagery and other meteorological parameters that are updated every few minutes.\n", + "\n", + "Data from the GOES16 satellite is captured and then pushed to AWS in near real-time, allowing users to access updated information without significant delays. This real-time stream ensures that every new piece of data—be it cloud patterns, storm developments, or atmospheric changes—is made available to users as quickly as possible.\n", + "\n", + "### Why Real-Time Access Is Crucial\n", + "\n", + "1. **Weather Forecasting and Alerts**: Real-time data access is critical for weather forecasting. Meteorologists rely on up-to-the-minute data to predict severe weather events, including hurricanes, tornadoes, and thunderstorms, which can save lives and property by providing timely warnings.\n", + "\n", + "2. **Research and Environmental Monitoring**: Scientists studying climate patterns, atmospheric science, and environmental changes depend on the most current data to ensure the accuracy of their work. Delays in data access can affect the relevance and effectiveness of scientific research.\n", + "\n", + "3. **Commercial Applications**: Industries such as agriculture, aviation, and maritime operations rely heavily on accurate weather forecasts which depend on real-time data. For instance, farmers need timely weather information to plan irrigation and harvesting, while airlines use real-time weather data to route flights safely and efficiently.\n", + "\n", + "4. **Public Safety and Emergency Response**: During natural disasters, real-time satellite data can guide evacuation plans and emergency responses, providing updated information on disaster progression and affected areas.\n", + "\n", + "### How Users Access This Data\n", + "\n", + "To access this real-time data, users can connect to AWS where the data is stored in S3 buckets. The data is organized and can be accessed publicly or with authenticated sessions depending on the data's sensitivity and intended usage. This setup allows users from around the world to download and utilize the data for a variety of applications seamlessly.\n", + "\n", + "The use of services like AWS ensures scalability and reliability in data delivery, which is vital for maintaining continuous access to satellite data for global users.\n", + "\n", + "\n" + ] + }, + { + "cell_type": "markdown", + "id": "4aed90a5-3850-44ab-bb9d-df00324262cc", + "metadata": {}, + "source": [ + "## Running the Real-Time Data Updater\n", + "\n", + "In this section, we discuss how we automate the process of fetching and updating real-time satellite data from NOAA GOES16 using a background process.\n", + "\n", + "### Overview of `run_updater()`\n", + "\n", + "The `run_updater()` function is a crucial part of our notebook's capability to access and process real-time data. This function is designed to run continuously in the background, checking for new satellite data updates and loading them into our analysis environment as soon as they become available.\n", + "\n", + "### Implementation Details\n", + "\n", + "The `run_updater()` function is part of a Python package called `real_time_package`, specifically within the `runner` module. Here’s a brief overview of how it works:\n", + "\n", + "1. **Background Thread**: The function initiates a background thread that runs independently of the main notebook execution. This allows the notebook to remain responsive and capable of performing other tasks while data updates are being managed.\n", + "\n", + "2. **Data Checking**: The background thread periodically checks for new data files in the NOAA GOES16 AWS S3 bucket. It uses predefined criteria to determine which files are new since the last check.\n", + "\n", + "3. **Data Loading**: Once new files are detected, the function automatically loads them into the current session, ensuring that the dataset in use is always up to date.\n", + "\n", + "4. **Error Handling**: It includes robust error handling mechanisms to manage issues like network interruptions or access errors, ensuring the process is reliable and the notebook environment remains stable.\n", + "\n", + "### Usage\n", + "\n", + "```python\n", + "from real_time_package.runner import run_updater\n", + "\n", + "# Start the data updater in the background\n", + "run_updater()\n" + ] + }, + { + "cell_type": "code", + "execution_count": 14, + "id": "dc98ff76-01cd-416c-99fc-2bf0928df514", + "metadata": {}, + "outputs": [], + "source": [ + "from real_time_package.runner import run_updater\n", + "\n", + "run_updater()" + ] + }, + { + "cell_type": "markdown", + "id": "8ebfcabe-052c-4c6a-b431-4c26cd37f70f", + "metadata": {}, + "source": [ + "## Having updated the list of paths, let's now visualize this data directly without the need for downloading it." + ] + }, + { + "cell_type": "code", + "execution_count": 15, + "id": "f01f9c56-54e9-4a69-886d-d6b424e52a80", + "metadata": {}, + "outputs": [], + "source": [ + "import s3fs\n", + "\n", + "#This time we have txt file where all the file path names are updated according to the time stamp.\n", + "\n", + "# Define the s3 bucket and file path\n", + "bucket_name = 'noaa-goes16'\n", + "\n", + "# Read the file paths from the local file\n", + "path_to_channel = 'channel_paths.txt'\n", + "with open(path_to_channel, 'r') as f:\n", + " file_paths = [line.strip() for line in f.readlines()]\n", + "\n", + "# Prefix the file paths with 's3://' to use them directly from S3\n", + "s3_file_paths = [f's3://{bucket_name}/{path}' for path in file_paths]\n", + "\n", + "# Initialize the S3 filesystem with anonymous acess\n", + "s3 = s3fs.S3FileSystem(anon = True)\n", + "\n" + ] + }, + { + "cell_type": "markdown", + "id": "231ff94a-c3e0-4e52-a0b2-059f5a15be9f", + "metadata": {}, + "source": [ + "## Visualize" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "d9e038d1-aaf2-4929-a78c-39118a1301d7", + "metadata": {}, + "outputs": [], + "source": [ + "%%time\n", + "\n", + "# Create a Satpy Scene with the file-like objects from S3\n", + "scn = Scene(filenames=s3_file_paths, reader='abi_l1b')\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "3c9744f8-fec0-42ea-bfac-e95453f76ebc", + "metadata": {}, + "outputs": [], + "source": [ + "%%time\n", + "\n", + "composite_variable = 'airmass'\n", + "scn.load([composite_variable], use_dask=True)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "9ea0e08a-91ff-4079-b4be-05867b351456", + "metadata": {}, + "outputs": [], + "source": [ + "%%time\n", + "\n", + "#print the start and end times\n", + "print(f\"Start time: {scn.start_time}\")\n", + "print(f\"End time: {scn.end_time}\")\n", + "\n", + "#plot the data using matplotlib\n", + "# scn.show(composite_variable)\n", + "\n", + "# save the image to a file\n", + "# output_path = f'{composite_variable}.png'\n", + "# scn.save_dataset(rgb_im, filename=output_path)\n", + "# print(f\"Airmass image saved at: {output_path}\")" + ] + }, + { + "cell_type": "markdown", + "id": "0dab3f4e-6868-44de-9a31-7ea2e0708fdb", + "metadata": {}, + "source": [ + "\n", + "\n", + "### Creation of Composite Variables\n", + "\n", + "Composite variables are synthesized views created from multiple individual data channels, offering a more comprehensive understanding of the observed scenes. Here’s how Satpy creates these composites:\n", + "\n", + "1. **Combining Channels**: Satpy combines data from multiple channels based on predefined or custom algorithms. For example, a true-color image composite uses red, green, and blue spectral data to create a representation similar to what the human eye would see.\n", + "2. **Enhancements**: After combining the channels, Satpy applies enhancements to improve the visual contrast and color of the composite images. These enhancements might include adjusting brightness, contrast, and color curves to make certain features more distinguishable.\n", + "3. **Mapping**: The data is often reprojected onto a standard map projection for easy integration and comparison with other geospatial data, facilitating uses in weather forecasting, research, and decision support systems.\n", + "\n", + "### Example: True Color Composite\n", + "\n", + "A common composite created by Satpy is the \"True Color\" image, which simulates natural coloration of the Earth as seen from space. It uses data from the red, green, and blue visible light channels. Here’s a simple example of how to create this composite in Satpy:\n", + "\n", + "```python\n", + "# Load the necessary channels\n", + "channels = ['C01', 'C02', 'C03'] # These channel names may vary based on the satellite\n", + "scn.load(channels)\n", + "\n", + "# Create the true color composite\n", + "true_color = scn['true_color']\n", + "\n", + "# Show the composite\n", + "true_color.show()\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "d4e74750-8929-4948-842b-aee6ad395cd2", + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.11.9" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/channel_paths.txt b/channel_paths.txt new file mode 100644 index 00000000..cc73a961 --- /dev/null +++ b/channel_paths.txt @@ -0,0 +1,16 @@ +ABI-L1b-RadF/2024/165/21/OR_ABI-L1b-RadF-M6C01_G16_s20241652130208_e20241652139516_c20241652139548.nc +ABI-L1b-RadF/2024/165/21/OR_ABI-L1b-RadF-M6C02_G16_s20241652130208_e20241652139516_c20241652139560.nc +ABI-L1b-RadF/2024/165/21/OR_ABI-L1b-RadF-M6C03_G16_s20241652130208_e20241652139516_c20241652139572.nc +ABI-L1b-RadF/2024/165/21/OR_ABI-L1b-RadF-M6C04_G16_s20241652130208_e20241652139516_c20241652139539.nc +ABI-L1b-RadF/2024/165/21/OR_ABI-L1b-RadF-M6C05_G16_s20241652130208_e20241652139516_c20241652139575.nc +ABI-L1b-RadF/2024/165/21/OR_ABI-L1b-RadF-M6C06_G16_s20241652130208_e20241652139522_c20241652139556.nc +ABI-L1b-RadF/2024/165/21/OR_ABI-L1b-RadF-M6C07_G16_s20241652130208_e20241652139528_c20241652139568.nc +ABI-L1b-RadF/2024/165/21/OR_ABI-L1b-RadF-M6C08_G16_s20241652130208_e20241652139516_c20241652139564.nc +ABI-L1b-RadF/2024/165/21/OR_ABI-L1b-RadF-M6C09_G16_s20241652130208_e20241652139521_c20241652139566.nc +ABI-L1b-RadF/2024/165/21/OR_ABI-L1b-RadF-M6C10_G16_s20241652130208_e20241652139527_c20241652139558.nc +ABI-L1b-RadF/2024/165/21/OR_ABI-L1b-RadF-M6C11_G16_s20241652130208_e20241652139516_c20241652139562.nc +ABI-L1b-RadF/2024/165/21/OR_ABI-L1b-RadF-M6C12_G16_s20241652130208_e20241652139522_c20241652139549.nc +ABI-L1b-RadF/2024/165/21/OR_ABI-L1b-RadF-M6C13_G16_s20241652130208_e20241652139527_c20241652139577.nc +ABI-L1b-RadF/2024/165/21/OR_ABI-L1b-RadF-M6C14_G16_s20241652130208_e20241652139516_c20241652139573.nc +ABI-L1b-RadF/2024/165/21/OR_ABI-L1b-RadF-M6C15_G16_s20241652130208_e20241652139522_c20241652139581.nc +ABI-L1b-RadF/2024/165/21/OR_ABI-L1b-RadF-M6C16_G16_s20241652130208_e20241652139527_c20241652139570.nc diff --git a/real-time_file_path_update.py b/real-time_file_path_update.py new file mode 100644 index 00000000..a9721a24 --- /dev/null +++ b/real-time_file_path_update.py @@ -0,0 +1,86 @@ +import boto3 +import datetime +import time +from botocore import UNSIGNED +from botocore.client import Config +import os + +def get_updated_files(year, julian_day, last_modified_cutoff): + """ + Get files from an S3 bucket that were modified after a given datetime. + + :param year: The year to check. + :param julian_day: The Julian day to check. + :param last_modified_cutoff: The cutoff datetime for filtering files. + :return: A list of file keys that were modified after the given datetime. + """ + # Initialize the S3 client with anonymous access + s3_client = boto3.client('s3', config=Config(signature_version=UNSIGNED)) + + # Define the bucket name + bucket_name = 'noaa-goes16' + + # List to store the filtered files + filtered_files = [] + + # Iterate over each hour of the day + for hour in range(24): + # Construct the S3 prefix + prefix = f'ABI-L1b-RadF/{year}/{julian_day:03d}/{hour:02d}/' + + # List objects within the specified prefix to get file names + response = s3_client.list_objects_v2(Bucket=bucket_name, Prefix=prefix) + + # Process the list of files + if 'Contents' in response: + for obj in response['Contents']: + last_modified = obj['LastModified'] + # Ensure last_modified is timezone-aware + if last_modified.tzinfo is None: + last_modified = last_modified.replace(tzinfo=datetime.timezone.utc) + if last_modified > last_modified_cutoff: + filtered_files.append(obj['Key']) + + return filtered_files + +def write_path_to_file(file_path, new_paths): + """ + Write the in-memory data to a file. + + :param file_path: The path to the file to write to. + :param new_paths: A list of new paths to write. + """ + with open(file_path, 'w') as f: + for path in new_paths: + f.write(f"{path}\n") + +def main(): + # Define the path to the file that stores the channel paths + file_path = 'channel_paths.txt' + + while True: + # Get the current UTC time + now = datetime.datetime.utcnow().replace(tzinfo=datetime.timezone.utc) + + # Extract year and Julian day + year = now.year + julian_day = now.timetuple().tm_yday + + # Define the cutoff time for filtering + # Set the cutoff to 10 minutes before the current time + last_modified_cutoff = now - datetime.timedelta(minutes=10) + + # Get the list of files modified after the cutoff time + updated_files = get_updated_files(year, julian_day, last_modified_cutoff) + + # Write the new paths to the file, replacing the old content + write_path_to_file(file_path, updated_files) + + # Print the current time and the number of updated files + print(f"{datetime.datetime.now()} - Updated {len(updated_files)} files.") + + # Sleep for 10 minutes before checking again + time.sleep(600) # 600 seconds = 10 minutes + +if __name__ == "__main__": + main() diff --git a/setup.py b/setup.py new file mode 100644 index 00000000..0dd83aea --- /dev/null +++ b/setup.py @@ -0,0 +1,15 @@ +# setup.py + +from setuptools import setup, find_packages + +setup( + name='real_time_package', + version='0.1', + packages=find_packages(), + install_requires=[ + 'boto3', + 's3fs', + 'satpy', + 'matplotlib' + ], +)