Rename project to Saiki and unify CLI

2026-05-26 18:09:26 -04:00
parent 8ee1f8de25
commit f38030238c
19 changed files with 1274 additions and 1326 deletions
--- a/saiki/cli.py
+++ b/saiki/cli.py
@@ -0,0 +1,126 @@
+"""Unified command-line interface for Saiki."""
+
+from __future__ import annotations
+
+import argparse
+import sys
+
+from .audio import extract_audio
+from .config import Config, language_choices, load_config
+from .importer import import_sentences
+from .words import compare_word_files, extract_words
+from .youtube import run_youtube
+
+
+def add_config_arg(parser: argparse.ArgumentParser) -> None:
+    parser.add_argument("--config", help="Path to YAML config file.")
+
+
+def build_parser(config: Config | None = None) -> argparse.ArgumentParser:
+    choices = language_choices(config or load_config())
+    parser = argparse.ArgumentParser(description="Saiki: sentence mining and listening tools for Anki.")
+    add_config_arg(parser)
+    sub = parser.add_subparsers(dest="command", required=True)
+
+    audio = sub.add_parser("audio", help="Extract Anki audio into playlists.")
+    audio.add_argument("lang", choices=choices)
+    audio.add_argument("--concat", action="store_true")
+    audio.add_argument("--outdir")
+    audio.add_argument("--media-dir")
+    audio.add_argument("--copy-only-new", action="store_true")
+
+    words = sub.add_parser("words", help="Extract frequent words from Anki.")
+    words.add_argument("lang", choices=choices)
+    group = words.add_mutually_exclusive_group()
+    group.add_argument("--query")
+    group.add_argument("--deck", action="append")
+    words.add_argument("--field")
+    words.add_argument("--min-freq", type=int, default=2)
+    words.add_argument("--outdir")
+    words.add_argument("--out")
+    words.add_argument("--full-field", action="store_true")
+    words.add_argument("--spacy-model")
+
+    compare = sub.add_parser("compare-words", help="Print words in source that are not in known.")
+    compare.add_argument("source")
+    compare.add_argument("known")
+
+    youtube = sub.add_parser("youtube", help="Mine a YouTube transcript.")
+    youtube.add_argument("lang", choices=choices)
+    youtube.add_argument("video")
+    youtube.add_argument("--mode", choices=["vocab", "sentences"], default="vocab")
+    youtube.add_argument("--top", type=int)
+    youtube.add_argument("--no-stopwords", action="store_true")
+    youtube.add_argument("--raw", action="store_true")
+    youtube.add_argument("--out")
+    youtube.add_argument("--format", choices=["tsv", "csv"], default="tsv")
+    youtube.add_argument("--known-words", help="Word list to filter vocab_guess against.")
+    youtube.add_argument("--only-new", action="store_true", help="Only export sentences with unknown vocab.")
+
+    importer = sub.add_parser("import", help="Generate TTS and import sentence cards.")
+    importer.add_argument("lang", choices=choices)
+    importer.add_argument("sentence_file", nargs="?")
+    importer.add_argument("--tags", help="Comma-separated tags. text-to-speech is always included.")
+
+    return parser
+
+
+def main(argv: list[str] | None = None) -> int:
+    pre = argparse.ArgumentParser(add_help=False)
+    add_config_arg(pre)
+    known, _ = pre.parse_known_args(argv)
+    config = load_config(known.config)
+    parser = build_parser(config)
+    args = parser.parse_args(argv)
+
+    if args.command == "audio":
+        result = extract_audio(config, args.lang, args.outdir, args.media_dir, args.copy_only_new, args.concat)
+        print(f"Copied {result['copied']} files")
+        print(f"Playlist: {result['playlist']}")
+        print(f"Output directory: {result['outdir']}")
+        if result["concat"]:
+            print(f"Concatenated file: {result['concat']}")
+        return 0
+
+    if args.command == "words":
+        result = extract_words(
+            config, args.lang, args.query, args.deck, args.field, args.min_freq,
+            args.outdir, args.out, args.full_field, args.spacy_model,
+        )
+        print(f"Query: {result['query']}")
+        print(f"Found {result['notes']} notes")
+        print(f"Extracted {result['unique']} unique entries")
+        print(f"Wrote {result['written']} entries to: {result['out']}")
+        return 0
+
+    if args.command == "compare-words":
+        for line in compare_word_files(args.source, args.known):
+            print(line)
+        return 0
+
+    if args.command == "youtube":
+        result = run_youtube(
+            config, args.lang, args.video, args.mode, args.top, args.no_stopwords,
+            args.raw, args.out, args.format, args.known_words, args.only_new,
+        )
+        if args.mode == "sentences" and not args.out:
+            for line in result["lines"]:
+                print(f"[{line.start:.2f}s] {line.text}")
+        elif args.mode == "sentences":
+            print(f"Wrote {result['written']} rows to: {result['out']}")
+        else:
+            for word, count in result["items"]:
+                print(f"{word}: {count}")
+        return 0
+
+    if args.command == "import":
+        result = import_sentences(config, args.lang, args.sentence_file, args.tags)
+        print(f"Done. Added {result.added}/{result.processed} cards. Failed: {result.failed}")
+        return 0 if result.failed == 0 else 1
+
+    parser.print_help()
+    return 2
+
+
+if __name__ == "__main__":
+    raise SystemExit(main(sys.argv[1:]))