This shows you the differences between two versions of the page.
| Next revision | Previous revision | ||
| wiki:ai:video-keyword-transcription [2025/05/30 17:07] – created ddehamer | wiki:ai:video-keyword-transcription [2025/06/02 12:41] (current) – ddehamer | ||
|---|---|---|---|
| Line 61: | Line 61: | ||
| azure_video_highlight_demo.py | azure_video_highlight_demo.py | ||
| - | < | + | |
| + | < | ||
| # | # | ||
| Line 169: | Line 170: | ||
| for phrase in key_phrases: | for phrase in key_phrases: | ||
| print(" | print(" | ||
| + | </ | ||
| + | |||
| + | ===== Telemetry ===== | ||
| + | |||
| + | To connect telemetry from this script and the Azure Cognitive Services it uses (Video Indexer and Text Analytics) to **Azure Application Insights**, you would need to **instrument the Python script** to: | ||
| + | |||
| + | - Send custom telemetry (e.g., video processing status, API latency, errors) | ||
| + | - Optionally, use **dependency tracking** to capture outbound API calls | ||
| + | |||
| + | ---- | ||
| + | |||
| + | ===== ✅ Step-by-Step: | ||
| + | |||
| + | ---- | ||
| + | |||
| + | ==== 🧱 1. Create an Application Insights Resource ==== | ||
| + | |||
| + | - Go to Azure Portal | ||
| + | - Click **Create a resource** | ||
| + | - Search for **Application Insights** | ||
| + | - Choose: | ||
| + | * **Resource group** | ||
| + | * **Region** | ||
| + | * **Name** | ||
| + | * **Application Type**: General | ||
| + | - Click **Create** | ||
| + | - After it's created, go to the resource and copy the **Instrumentation Key** or **Connection String** | ||
| + | |||
| + | ---- | ||
| + | |||
| + | ==== 🧪 2. Install Application Insights SDK for Python ==== | ||
| + | |||
| + | <code -> | ||
| + | bashCopyEditpip install opencensus-ext-azure opencensus-ext-requests opencensus | ||
| + | |||
| + | </ | ||
| + | |||
| + | ---- | ||
| + | |||
| + | ==== 🧰 3. Add Instrumentation to Your Script ==== | ||
| + | |||
| + | At the top of your script, import and configure the telemetry client: | ||
| + | |||
| + | <code -> | ||
| + | pythonCopyEditfrom opencensus.ext.azure.log_exporter import AzureLogHandler | ||
| + | from opencensus.ext.azure.trace_exporter import AzureExporter | ||
| + | from opencensus.trace.samplers import ProbabilitySampler | ||
| + | from opencensus.trace.tracer import Tracer | ||
| + | import logging | ||
| + | |||
| + | # Replace with your Application Insights connection string | ||
| + | APP_INSIGHTS_CONNECTION_STRING = " | ||
| + | |||
| + | # Set up logging | ||
| + | logger = logging.getLogger(__name__) | ||
| + | logger.addHandler(AzureLogHandler(connection_string=APP_INSIGHTS_CONNECTION_STRING)) | ||
| + | logger.setLevel(logging.INFO) | ||
| + | |||
| + | # Set up tracing (optional) | ||
| + | tracer = Tracer( | ||
| + | exporter=AzureExporter(connection_string=APP_INSIGHTS_CONNECTION_STRING), | ||
| + | sampler=ProbabilitySampler(1.0), | ||
| + | ) | ||
| + | |||
| + | </ | ||
| + | |||
| + | ---- | ||
| + | |||
| + | ==== 📝 4. Log Custom Events, Metrics, and Errors ==== | ||
| + | |||
| + | Throughout your script, add telemetry like this: | ||
| + | |||
| + | <code -> | ||
| + | pythonCopyEditlogger.info(" | ||
| + | |||
| + | # On success | ||
| + | logger.info(" | ||
| + | |||
| + | # On API error | ||
| + | logger.error(" | ||
| + | |||
| + | # Add custom dimensions | ||
| + | logger.info(" | ||
| + | " | ||
| + | " | ||
| + | " | ||
| + | " | ||
| + | } | ||
| + | }) | ||
| + | |||
| + | </ | ||
| + | |||
| + | ---- | ||
| + | |||
| + | ==== 📡 5. Monitor in Application Insights ==== | ||
| + | |||
| + | After running the script: | ||
| + | |||
| + | * Go to **Application Insights > Logs (Analytics)** and run queries like: | ||
| + | |||
| + | <code -> | ||
| + | kustoCopyEdittraces | where customDimensions.video_id contains " | ||
| + | |||
| + | </ | ||
| + | |||
| + | Or view: | ||
| + | |||
| + | * **Failures** (to track errors) | ||
| + | * **Performance** (request/ | ||
| + | * **Custom Events & Metrics** | ||
| + | |||
| + | ---- | ||
| + | |||
| + | ===== 🧠 Bonus: Dependency Tracking ===== | ||
| + | |||
| + | If you want to **automatically track outbound HTTP requests** (e.g., calls to Video Indexer or Text Analytics APIs): | ||
| + | |||
| + | <code -> | ||
| + | pythonCopyEditimport requests | ||
| + | from opencensus.ext.requests import trace | ||
| + | |||
| + | trace.trace_integration() | ||
| + | |||
| + | </ | ||
| + | |||
| + | This will auto-record dependency duration and failures to App Insights. | ||
| + | |||
| + | ---- | ||
| + | |||
| + | ===== 📘 Summary ===== | ||
| + | |||
| + | ^ Element ^ Purpose ^ | ||
| + | | '' | ||
| + | | '' | ||
| + | | '' | ||
| + | | Custom '' | ||
| + | |||
| + | ===== Combined code in one script ===== | ||
| + | |||
| + | Fun Fact: Python 3.13 broke compatibility with Analytics so you have to run an older version for this to work. | ||
| + | |||
| + | < | ||
| + | # | ||
| + | |||
| + | import time | ||
| + | import requests | ||
| + | import re | ||
| + | from azure.ai.textanalytics import TextAnalyticsClient | ||
| + | from azure.core.credentials import AzureKeyCredential | ||
| + | from opencensus.ext.azure.log_exporter import AzureLogHandler | ||
| + | from opencensus.ext.azure.trace_exporter import AzureExporter | ||
| + | from opencensus.trace.samplers import ProbabilitySampler | ||
| + | from opencensus.trace.tracer import Tracer | ||
| + | import logging | ||
| + | |||
| + | # --- CONFIGURATION --- | ||
| + | |||
| + | # Azure Video Indexer | ||
| + | LOCATION = " | ||
| + | ACCOUNT_ID = " | ||
| + | VIDEO_INDEXER_API_KEY = " | ||
| + | VIDEO_PATH = " | ||
| + | VIDEO_NAME = " | ||
| + | |||
| + | # Azure Text Analytics | ||
| + | TEXT_ANALYTICS_KEY = " | ||
| + | TEXT_ANALYTICS_ENDPOINT = " | ||
| + | |||
| + | # Application Insights | ||
| + | APP_INSIGHTS_CONNECTION_STRING = " | ||
| + | |||
| + | # --- Logging and Telemetry Setup --- | ||
| + | logger = logging.getLogger(__name__) | ||
| + | logger.addHandler(AzureLogHandler(connection_string=APP_INSIGHTS_CONNECTION_STRING)) | ||
| + | logger.setLevel(logging.INFO) | ||
| + | |||
| + | tracer = Tracer( | ||
| + | exporter=AzureExporter(connection_string=APP_INSIGHTS_CONNECTION_STRING), | ||
| + | sampler=ProbabilitySampler(1.0), | ||
| + | ) | ||
| + | |||
| + | # --- Utility Functions --- | ||
| + | |||
| + | def split_text_by_characters(text, | ||
| + | chunks = [] | ||
| + | while len(text) > max_chars: | ||
| + | end = text.rfind(' | ||
| + | if end == -1: | ||
| + | end = max_chars | ||
| + | chunk = text[:end + 1].strip() | ||
| + | chunks.append(chunk) | ||
| + | text = text[end + 1:].strip() | ||
| + | if text: | ||
| + | chunks.append(text) | ||
| + | return chunks | ||
| + | |||
| + | # --- Azure Video Indexer Functions --- | ||
| + | |||
| + | def get_access_token(): | ||
| + | url = f" | ||
| + | headers = {" | ||
| + | response = requests.get(url, | ||
| + | response.raise_for_status() | ||
| + | return response.text.strip('"' | ||
| + | |||
| + | def upload_video(token): | ||
| + | logger.info(" | ||
| + | with open(VIDEO_PATH, | ||
| + | files = {' | ||
| + | url = f" | ||
| + | response = requests.post(url, | ||
| + | response.raise_for_status() | ||
| + | video_id = response.json()[' | ||
| + | logger.info(" | ||
| + | return video_id | ||
| + | |||
| + | def wait_for_processing(token, | ||
| + | logger.info(" | ||
| + | url = f" | ||
| + | while True: | ||
| + | response = requests.get(url) | ||
| + | response.raise_for_status() | ||
| + | state = response.json().get(' | ||
| + | logger.info(f" | ||
| + | if state == ' | ||
| + | return | ||
| + | time.sleep(10) | ||
| + | |||
| + | def download_transcript(token, | ||
| + | logger.info(" | ||
| + | url = f" | ||
| + | response = requests.get(url) | ||
| + | response.raise_for_status() | ||
| + | return response.text | ||
| + | |||
| + | # --- Azure Text Analytics Function --- | ||
| + | |||
| + | def extract_key_phrases(text, | ||
| + | client = TextAnalyticsClient(endpoint=TEXT_ANALYTICS_ENDPOINT, | ||
| + | chunks = split_text_by_characters(text) | ||
| + | all_phrases = [] | ||
| + | |||
| + | for i, chunk in enumerate(chunks): | ||
| + | try: | ||
| + | response = client.extract_key_phrases([chunk]) | ||
| + | if not response[0].is_error: | ||
| + | all_phrases.extend(response[0].key_phrases) | ||
| + | logger.info(f" | ||
| + | else: | ||
| + | logger.error(f" | ||
| + | except Exception as e: | ||
| + | logger.exception(f" | ||
| + | |||
| + | logger.info(" | ||
| + | return list(set(all_phrases)) | ||
| + | |||
| + | # --- MAIN EXECUTION FLOW --- | ||
| + | |||
| + | if __name__ == " | ||
| + | try: | ||
| + | token = get_access_token() | ||
| + | video_id = upload_video(token) | ||
| + | wait_for_processing(token, | ||
| + | transcript = download_transcript(token, | ||
| + | key_phrases = extract_key_phrases(transcript, | ||
| + | |||
| + | print(" | ||
| + | for phrase in key_phrases: | ||
| + | print(" | ||
| + | except Exception as e: | ||
| + | logger.exception(" | ||
| </ | </ | ||
| [[ai_knowledge|AI Knowledge]] | [[ai_knowledge|AI Knowledge]] | ||
| + | |||