Saiki/saiki/audio.py

"""Extract Anki audio media into playlists."""

from __future__ import annotations

import os
import re
import shutil
import subprocess
import tempfile
from typing import Callable

from .ankiconnect import anki_request
from .config import Config

AUDIO_EXTS = (".mp3", ".wav", ".ogg", ".m4a", ".flac")


def resolve_media_paths(media_dir: str, out_dir: str, media_name: str) -> tuple[str, str] | None:
    normalized = os.path.normpath(media_name)
    if os.path.isabs(normalized) or normalized.startswith(".."):
        return None
    return os.path.join(media_dir, normalized), os.path.join(out_dir, normalized)


def build_playlist(out_dir: str, language: str) -> str:
    m3u_path = os.path.join(out_dir, f"{language}.m3u")
    concat_name = f"{language}_concat.mp3"
    files: list[str] = []
    for root, _, filenames in os.walk(out_dir):
        for fname in filenames:
            abs_path = os.path.join(root, fname)
            rel_path = os.path.relpath(abs_path, out_dir)
            if rel_path in {os.path.basename(m3u_path), concat_name}:
                continue
            if fname.lower().endswith(AUDIO_EXTS) and os.path.isfile(abs_path):
                files.append(rel_path)

    with open(m3u_path, "w", encoding="utf-8") as fh:
        for fname in sorted(files):
            fh.write(f"{fname}\n")
    return m3u_path


def concat_audio_from_m3u(out_dir: str, m3u_path: str, out_path: str) -> None:
    if shutil.which("ffmpeg") is None:
        raise RuntimeError("ffmpeg not found in PATH. Install ffmpeg to use --concat.")

    with open(m3u_path, "r", encoding="utf-8") as fh:
        rel_files = [line.strip() for line in fh if line.strip()]

    abs_files = [
        os.path.abspath(os.path.join(out_dir, rel))
        for rel in rel_files
        if os.path.isfile(os.path.join(out_dir, rel)) and rel.lower().endswith(AUDIO_EXTS)
    ]
    if not abs_files:
        raise RuntimeError("No audio files found to concatenate.")

    with tempfile.NamedTemporaryFile("w", delete=False, encoding="utf-8") as tmp:
        concat_list_path = tmp.name
        for path in abs_files:
            tmp.write(f"file '{path.replace(chr(39), chr(39) + chr(92) + chr(39) + chr(39))}'\n")

    cmd = [
        "ffmpeg", "-hide_banner", "-loglevel", "error", "-f", "concat", "-safe", "0",
        "-i", concat_list_path, "-c:a", "libmp3lame", "-q:a", "4", "-y", out_path,
    ]
    try:
        subprocess.run(cmd, check=True)
    finally:
        try:
            os.remove(concat_list_path)
        except OSError:
            pass


def extract_audio(
    config: Config,
    lang: str,
    outdir: str | None = None,
    media_dir: str | None = None,
    copy_only_new: bool = False,
    concat: bool = False,
    request: Callable = anki_request,
) -> dict[str, object]:
    language = config.language_name(lang)
    selected_decks = config.decks_for(lang)
    if not selected_decks:
        raise RuntimeError(f"No decks configured for language: {lang}")

    media_root = media_dir or config.media_dir
    out_dir = os.path.expanduser(outdir) if outdir else os.path.join(config.audio_output_root, language)
    os.makedirs(out_dir, exist_ok=True)

    all_ids: list[int] = []
    for deck in selected_decks:
        all_ids.extend(request("findNotes", url=config.anki_connect_url, query=f'deck:"{deck}"') or [])

    if not all_ids:
        return {"copied": 0, "playlist": build_playlist(out_dir, language), "outdir": out_dir, "concat": None}

    notes = request("notesInfo", url=config.anki_connect_url, notes=all_ids) or []
    copied: list[str] = []
    for note in notes:
        for field in (note.get("fields", {}) or {}).values():
            val = field.get("value", "") or ""
            for match in re.findall(r"\[sound:(.+?)\]", val):
                paths = resolve_media_paths(media_root, out_dir, match)
                if paths is None:
                    continue
                src, dst = paths
                if not os.path.exists(src):
                    continue
                os.makedirs(os.path.dirname(dst), exist_ok=True)
                if copy_only_new and os.path.exists(dst):
                    continue
                shutil.copy2(src, dst)
                copied.append(match)

    m3u_path = build_playlist(out_dir, language)
    concat_path = None
    if concat:
        concat_path = os.path.join(out_dir, f"{language}_concat.mp3")
        concat_audio_from_m3u(out_dir, m3u_path, concat_path)
    return {"copied": len(copied), "playlist": m3u_path, "outdir": out_dir, "concat": concat_path}