Files
Saiki/saiki/cli.py
2026-05-26 18:09:26 -04:00

127 lines
5.0 KiB
Python

"""Unified command-line interface for Saiki."""
from __future__ import annotations
import argparse
import sys
from .audio import extract_audio
from .config import Config, language_choices, load_config
from .importer import import_sentences
from .words import compare_word_files, extract_words
from .youtube import run_youtube
def add_config_arg(parser: argparse.ArgumentParser) -> None:
parser.add_argument("--config", help="Path to YAML config file.")
def build_parser(config: Config | None = None) -> argparse.ArgumentParser:
choices = language_choices(config or load_config())
parser = argparse.ArgumentParser(description="Saiki: sentence mining and listening tools for Anki.")
add_config_arg(parser)
sub = parser.add_subparsers(dest="command", required=True)
audio = sub.add_parser("audio", help="Extract Anki audio into playlists.")
audio.add_argument("lang", choices=choices)
audio.add_argument("--concat", action="store_true")
audio.add_argument("--outdir")
audio.add_argument("--media-dir")
audio.add_argument("--copy-only-new", action="store_true")
words = sub.add_parser("words", help="Extract frequent words from Anki.")
words.add_argument("lang", choices=choices)
group = words.add_mutually_exclusive_group()
group.add_argument("--query")
group.add_argument("--deck", action="append")
words.add_argument("--field")
words.add_argument("--min-freq", type=int, default=2)
words.add_argument("--outdir")
words.add_argument("--out")
words.add_argument("--full-field", action="store_true")
words.add_argument("--spacy-model")
compare = sub.add_parser("compare-words", help="Print words in source that are not in known.")
compare.add_argument("source")
compare.add_argument("known")
youtube = sub.add_parser("youtube", help="Mine a YouTube transcript.")
youtube.add_argument("lang", choices=choices)
youtube.add_argument("video")
youtube.add_argument("--mode", choices=["vocab", "sentences"], default="vocab")
youtube.add_argument("--top", type=int)
youtube.add_argument("--no-stopwords", action="store_true")
youtube.add_argument("--raw", action="store_true")
youtube.add_argument("--out")
youtube.add_argument("--format", choices=["tsv", "csv"], default="tsv")
youtube.add_argument("--known-words", help="Word list to filter vocab_guess against.")
youtube.add_argument("--only-new", action="store_true", help="Only export sentences with unknown vocab.")
importer = sub.add_parser("import", help="Generate TTS and import sentence cards.")
importer.add_argument("lang", choices=choices)
importer.add_argument("sentence_file", nargs="?")
importer.add_argument("--tags", help="Comma-separated tags. text-to-speech is always included.")
return parser
def main(argv: list[str] | None = None) -> int:
pre = argparse.ArgumentParser(add_help=False)
add_config_arg(pre)
known, _ = pre.parse_known_args(argv)
config = load_config(known.config)
parser = build_parser(config)
args = parser.parse_args(argv)
if args.command == "audio":
result = extract_audio(config, args.lang, args.outdir, args.media_dir, args.copy_only_new, args.concat)
print(f"Copied {result['copied']} files")
print(f"Playlist: {result['playlist']}")
print(f"Output directory: {result['outdir']}")
if result["concat"]:
print(f"Concatenated file: {result['concat']}")
return 0
if args.command == "words":
result = extract_words(
config, args.lang, args.query, args.deck, args.field, args.min_freq,
args.outdir, args.out, args.full_field, args.spacy_model,
)
print(f"Query: {result['query']}")
print(f"Found {result['notes']} notes")
print(f"Extracted {result['unique']} unique entries")
print(f"Wrote {result['written']} entries to: {result['out']}")
return 0
if args.command == "compare-words":
for line in compare_word_files(args.source, args.known):
print(line)
return 0
if args.command == "youtube":
result = run_youtube(
config, args.lang, args.video, args.mode, args.top, args.no_stopwords,
args.raw, args.out, args.format, args.known_words, args.only_new,
)
if args.mode == "sentences" and not args.out:
for line in result["lines"]:
print(f"[{line.start:.2f}s] {line.text}")
elif args.mode == "sentences":
print(f"Wrote {result['written']} rows to: {result['out']}")
else:
for word, count in result["items"]:
print(f"{word}: {count}")
return 0
if args.command == "import":
result = import_sentences(config, args.lang, args.sentence_file, args.tags)
print(f"Done. Added {result.added}/{result.processed} cards. Failed: {result.failed}")
return 0 if result.failed == 0 else 1
parser.print_help()
return 2
if __name__ == "__main__":
raise SystemExit(main(sys.argv[1:]))