Rename project to Saiki and unify CLI
This commit is contained in:
126
saiki/cli.py
Normal file
126
saiki/cli.py
Normal file
@@ -0,0 +1,126 @@
|
||||
"""Unified command-line interface for Saiki."""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import argparse
|
||||
import sys
|
||||
|
||||
from .audio import extract_audio
|
||||
from .config import Config, language_choices, load_config
|
||||
from .importer import import_sentences
|
||||
from .words import compare_word_files, extract_words
|
||||
from .youtube import run_youtube
|
||||
|
||||
|
||||
def add_config_arg(parser: argparse.ArgumentParser) -> None:
|
||||
parser.add_argument("--config", help="Path to YAML config file.")
|
||||
|
||||
|
||||
def build_parser(config: Config | None = None) -> argparse.ArgumentParser:
|
||||
choices = language_choices(config or load_config())
|
||||
parser = argparse.ArgumentParser(description="Saiki: sentence mining and listening tools for Anki.")
|
||||
add_config_arg(parser)
|
||||
sub = parser.add_subparsers(dest="command", required=True)
|
||||
|
||||
audio = sub.add_parser("audio", help="Extract Anki audio into playlists.")
|
||||
audio.add_argument("lang", choices=choices)
|
||||
audio.add_argument("--concat", action="store_true")
|
||||
audio.add_argument("--outdir")
|
||||
audio.add_argument("--media-dir")
|
||||
audio.add_argument("--copy-only-new", action="store_true")
|
||||
|
||||
words = sub.add_parser("words", help="Extract frequent words from Anki.")
|
||||
words.add_argument("lang", choices=choices)
|
||||
group = words.add_mutually_exclusive_group()
|
||||
group.add_argument("--query")
|
||||
group.add_argument("--deck", action="append")
|
||||
words.add_argument("--field")
|
||||
words.add_argument("--min-freq", type=int, default=2)
|
||||
words.add_argument("--outdir")
|
||||
words.add_argument("--out")
|
||||
words.add_argument("--full-field", action="store_true")
|
||||
words.add_argument("--spacy-model")
|
||||
|
||||
compare = sub.add_parser("compare-words", help="Print words in source that are not in known.")
|
||||
compare.add_argument("source")
|
||||
compare.add_argument("known")
|
||||
|
||||
youtube = sub.add_parser("youtube", help="Mine a YouTube transcript.")
|
||||
youtube.add_argument("lang", choices=choices)
|
||||
youtube.add_argument("video")
|
||||
youtube.add_argument("--mode", choices=["vocab", "sentences"], default="vocab")
|
||||
youtube.add_argument("--top", type=int)
|
||||
youtube.add_argument("--no-stopwords", action="store_true")
|
||||
youtube.add_argument("--raw", action="store_true")
|
||||
youtube.add_argument("--out")
|
||||
youtube.add_argument("--format", choices=["tsv", "csv"], default="tsv")
|
||||
youtube.add_argument("--known-words", help="Word list to filter vocab_guess against.")
|
||||
youtube.add_argument("--only-new", action="store_true", help="Only export sentences with unknown vocab.")
|
||||
|
||||
importer = sub.add_parser("import", help="Generate TTS and import sentence cards.")
|
||||
importer.add_argument("lang", choices=choices)
|
||||
importer.add_argument("sentence_file", nargs="?")
|
||||
importer.add_argument("--tags", help="Comma-separated tags. text-to-speech is always included.")
|
||||
|
||||
return parser
|
||||
|
||||
|
||||
def main(argv: list[str] | None = None) -> int:
|
||||
pre = argparse.ArgumentParser(add_help=False)
|
||||
add_config_arg(pre)
|
||||
known, _ = pre.parse_known_args(argv)
|
||||
config = load_config(known.config)
|
||||
parser = build_parser(config)
|
||||
args = parser.parse_args(argv)
|
||||
|
||||
if args.command == "audio":
|
||||
result = extract_audio(config, args.lang, args.outdir, args.media_dir, args.copy_only_new, args.concat)
|
||||
print(f"Copied {result['copied']} files")
|
||||
print(f"Playlist: {result['playlist']}")
|
||||
print(f"Output directory: {result['outdir']}")
|
||||
if result["concat"]:
|
||||
print(f"Concatenated file: {result['concat']}")
|
||||
return 0
|
||||
|
||||
if args.command == "words":
|
||||
result = extract_words(
|
||||
config, args.lang, args.query, args.deck, args.field, args.min_freq,
|
||||
args.outdir, args.out, args.full_field, args.spacy_model,
|
||||
)
|
||||
print(f"Query: {result['query']}")
|
||||
print(f"Found {result['notes']} notes")
|
||||
print(f"Extracted {result['unique']} unique entries")
|
||||
print(f"Wrote {result['written']} entries to: {result['out']}")
|
||||
return 0
|
||||
|
||||
if args.command == "compare-words":
|
||||
for line in compare_word_files(args.source, args.known):
|
||||
print(line)
|
||||
return 0
|
||||
|
||||
if args.command == "youtube":
|
||||
result = run_youtube(
|
||||
config, args.lang, args.video, args.mode, args.top, args.no_stopwords,
|
||||
args.raw, args.out, args.format, args.known_words, args.only_new,
|
||||
)
|
||||
if args.mode == "sentences" and not args.out:
|
||||
for line in result["lines"]:
|
||||
print(f"[{line.start:.2f}s] {line.text}")
|
||||
elif args.mode == "sentences":
|
||||
print(f"Wrote {result['written']} rows to: {result['out']}")
|
||||
else:
|
||||
for word, count in result["items"]:
|
||||
print(f"{word}: {count}")
|
||||
return 0
|
||||
|
||||
if args.command == "import":
|
||||
result = import_sentences(config, args.lang, args.sentence_file, args.tags)
|
||||
print(f"Done. Added {result.added}/{result.processed} cards. Failed: {result.failed}")
|
||||
return 0 if result.failed == 0 else 1
|
||||
|
||||
parser.print_help()
|
||||
return 2
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
raise SystemExit(main(sys.argv[1:]))
|
||||
Reference in New Issue
Block a user