"""Unified command-line interface for Saiki.""" from __future__ import annotations import argparse import sys from .audio import extract_audio from .config import Config, language_choices, load_config from .importer import import_sentences from .words import compare_word_files, extract_words from .youtube import run_youtube def add_config_arg(parser: argparse.ArgumentParser) -> None: parser.add_argument("--config", help="Path to YAML config file.") def build_parser(config: Config | None = None) -> argparse.ArgumentParser: choices = language_choices(config or load_config()) parser = argparse.ArgumentParser(description="Saiki: sentence mining and listening tools for Anki.") add_config_arg(parser) sub = parser.add_subparsers(dest="command", required=True) audio = sub.add_parser("audio", help="Extract Anki audio into playlists.") audio.add_argument("lang", choices=choices) audio.add_argument("--concat", action="store_true") audio.add_argument("--outdir") audio.add_argument("--media-dir") audio.add_argument("--copy-only-new", action="store_true") words = sub.add_parser("words", help="Extract frequent words from Anki.") words.add_argument("lang", choices=choices) group = words.add_mutually_exclusive_group() group.add_argument("--query") group.add_argument("--deck", action="append") words.add_argument("--field") words.add_argument("--min-freq", type=int, default=2) words.add_argument("--outdir") words.add_argument("--out") words.add_argument("--full-field", action="store_true") words.add_argument("--spacy-model") compare = sub.add_parser("compare-words", help="Print words in source that are not in known.") compare.add_argument("source") compare.add_argument("known") youtube = sub.add_parser("youtube", help="Mine a YouTube transcript.") youtube.add_argument("lang", choices=choices) youtube.add_argument("video") youtube.add_argument("--mode", choices=["vocab", "sentences"], default="vocab") youtube.add_argument("--top", type=int) youtube.add_argument("--no-stopwords", action="store_true") youtube.add_argument("--raw", action="store_true") youtube.add_argument("--out") youtube.add_argument("--format", choices=["tsv", "csv"], default="tsv") youtube.add_argument("--known-words", help="Word list to filter vocab_guess against.") youtube.add_argument("--only-new", action="store_true", help="Only export sentences with unknown vocab.") importer = sub.add_parser("import", help="Generate TTS and import sentence cards.") importer.add_argument("lang", choices=choices) importer.add_argument("sentence_file", nargs="?") importer.add_argument("--tags", help="Comma-separated tags. text-to-speech is always included.") return parser def main(argv: list[str] | None = None) -> int: pre = argparse.ArgumentParser(add_help=False) add_config_arg(pre) known, _ = pre.parse_known_args(argv) config = load_config(known.config) parser = build_parser(config) args = parser.parse_args(argv) if args.command == "audio": result = extract_audio(config, args.lang, args.outdir, args.media_dir, args.copy_only_new, args.concat) print(f"Copied {result['copied']} files") print(f"Playlist: {result['playlist']}") print(f"Output directory: {result['outdir']}") if result["concat"]: print(f"Concatenated file: {result['concat']}") return 0 if args.command == "words": result = extract_words( config, args.lang, args.query, args.deck, args.field, args.min_freq, args.outdir, args.out, args.full_field, args.spacy_model, ) print(f"Query: {result['query']}") print(f"Found {result['notes']} notes") print(f"Extracted {result['unique']} unique entries") print(f"Wrote {result['written']} entries to: {result['out']}") return 0 if args.command == "compare-words": for line in compare_word_files(args.source, args.known): print(line) return 0 if args.command == "youtube": result = run_youtube( config, args.lang, args.video, args.mode, args.top, args.no_stopwords, args.raw, args.out, args.format, args.known_words, args.only_new, ) if args.mode == "sentences" and not args.out: for line in result["lines"]: print(f"[{line.start:.2f}s] {line.text}") elif args.mode == "sentences": print(f"Wrote {result['written']} rows to: {result['out']}") else: for word, count in result["items"]: print(f"{word}: {count}") return 0 if args.command == "import": result = import_sentences(config, args.lang, args.sentence_file, args.tags) print(f"Done. Added {result.added}/{result.processed} cards. Failed: {result.failed}") return 0 if result.failed == 0 else 1 parser.print_help() return 2 if __name__ == "__main__": raise SystemExit(main(sys.argv[1:]))