feature: IANA update

This commit is contained in:
Heiko
2025-12-19 20:10:39 +01:00
parent f038d6a3fc
commit 753c582010
27 changed files with 1923 additions and 419 deletions

View File

@@ -0,0 +1,248 @@
"""Update IANA command handler."""
import argparse
import json
import logging
import sqlite3
from pathlib import Path
from urllib.error import URLError
from urllib.request import urlopen
from ..iana_parser import (
extract_updated_date,
find_registry,
get_table_name_from_filename,
parse_xml_with_namespace_support,
)
from ..iana_validator import (
ValidationError,
normalize_header,
validate_headers,
validate_registry_data,
)
from ..output import print_error
logger = logging.getLogger(__name__)
def fetch_xml_from_url(url: str, timeout: int = 30) -> str:
"""Fetch XML content from URL.
Args:
url: URL to fetch
timeout: Timeout in seconds
Returns:
XML content as string
Raises:
URLError: If URL cannot be fetched
"""
logger.info(f"Fetching {url}")
with urlopen(url, timeout=timeout) as response:
return response.read().decode("utf-8")
def calculate_diff(
old_rows: list[tuple],
new_rows: list[tuple],
pk_index: int = 0,
) -> dict[str, list]:
"""Calculate diff between old and new data.
Args:
old_rows: Existing rows from DB
new_rows: New rows from XML
pk_index: Index of primary key column
Returns:
Dict with 'added', 'deleted', 'modified' lists of primary keys
"""
old_dict = {row[pk_index]: row for row in old_rows}
new_dict = {row[pk_index]: row for row in new_rows}
added = [k for k in new_dict if k not in old_dict]
deleted = [k for k in old_dict if k not in new_dict]
modified = [k for k in new_dict if k in old_dict and old_dict[k] != new_dict[k]]
return {"added": added, "deleted": deleted, "modified": modified}
def process_registry_with_validation(
xml_content: str,
registry_id: str,
table_name: str,
headers: list[str],
db_conn: sqlite3.Connection,
skip_min_rows_check: bool = False,
) -> tuple[int, dict[str, list]]:
"""Process registry with validation and diff calculation.
Args:
xml_content: XML content as string
registry_id: Registry ID to extract
table_name: Database table name
headers: List of column headers
db_conn: Database connection
skip_min_rows_check: Skip minimum rows validation (for tests)
Returns:
Tuple of (row_count, diff_dict)
Raises:
ValidationError: If validation fails
ValueError: If registry not found
"""
import tempfile
with tempfile.NamedTemporaryFile(
mode="w", suffix=".xml", delete=False, encoding="utf-8"
) as tmp_file:
tmp_file.write(xml_content)
tmp_path = tmp_file.name
try:
root, ns = parse_xml_with_namespace_support(tmp_path)
finally:
Path(tmp_path).unlink()
validate_headers(table_name, headers, db_conn)
registry = find_registry(root, registry_id, ns)
if ns:
records = registry.findall("iana:record", ns)
else:
records = registry.findall("record")
from ..iana_parser import extract_field_value, is_unassigned
rows_dict = []
for record in records:
if is_unassigned(record, ns):
continue
row_dict = {}
for header in headers:
normalized_key = normalize_header(header)
row_dict[normalized_key] = extract_field_value(record, header, ns)
rows_dict.append(row_dict)
validate_registry_data(table_name, rows_dict, skip_min_rows_check)
rows = [tuple(row.values()) for row in rows_dict]
cursor = db_conn.cursor()
old_rows = cursor.execute(f"SELECT * FROM {table_name}").fetchall()
diff = calculate_diff(old_rows, rows)
placeholders = ",".join(["?"] * len(headers))
cursor.execute(f"DELETE FROM {table_name}")
cursor.executemany(f"INSERT INTO {table_name} VALUES ({placeholders})", rows)
return len(rows), diff
def handle_update_iana_command(args: argparse.Namespace) -> int:
"""Handle the update-iana subcommand.
Args:
args: Parsed arguments
Returns:
Exit code (0 for success, 1 for error)
"""
logging.basicConfig(level=logging.INFO, format="%(levelname)s: %(message)s")
db_path = args.database
if not Path(db_path).exists():
print_error(f"Database not found: {db_path}")
return 1
script_dir = Path(__file__).parent.parent
config_path = script_dir / "data" / "iana_parse.json"
logger.info(f"Loading configuration from {config_path}")
try:
with config_path.open(encoding="utf-8") as f:
config = json.load(f)
except (FileNotFoundError, json.JSONDecodeError, OSError) as e:
print_error(f"Error loading configuration: {e}")
return 1
try:
conn = sqlite3.connect(str(db_path))
except sqlite3.Error as e:
print_error(f"Error opening database: {e}")
return 1
logger.info("Starting IANA registry update")
try:
conn.execute("BEGIN TRANSACTION")
total_registries = 0
total_rows = 0
for url, registries in config.items():
try:
xml_content = fetch_xml_from_url(url)
except (URLError, OSError) as e:
print_error(f"Failed to fetch {url}: {e}")
conn.rollback()
conn.close()
return 1
xml_date = extract_updated_date(xml_content)
logger.info(f"XML data date: {xml_date}")
for registry_id, output_filename, headers in registries:
table_name = get_table_name_from_filename(output_filename)
try:
row_count, diff = process_registry_with_validation(
xml_content, registry_id, table_name, headers, conn
)
logger.info(
f"{table_name}: {row_count} rows "
f"({len(diff['added'])} added, "
f"{len(diff['modified'])} modified, "
f"{len(diff['deleted'])} deleted)"
)
total_registries += 1
total_rows += row_count
except (ValidationError, ValueError) as e:
print_error(
f"Validation failed for {table_name}: {e}\n"
f"IANA data structure may have changed. "
f"Please open an issue at the project repository."
)
conn.rollback()
conn.close()
return 1
conn.commit()
logger.info(
f"Successfully updated {total_registries} registries "
f"({total_rows} total rows)"
)
except sqlite3.Error as e:
print_error(f"Database error: {e}")
conn.rollback()
conn.close()
return 1
finally:
conn.close()
return 0