127 lines
5.0 KiB
Python
127 lines
5.0 KiB
Python
"""Unified command-line interface for Saiki."""
|
|
|
|
from __future__ import annotations
|
|
|
|
import argparse
|
|
import sys
|
|
|
|
from .audio import extract_audio
|
|
from .config import Config, language_choices, load_config
|
|
from .importer import import_sentences
|
|
from .words import compare_word_files, extract_words
|
|
from .youtube import run_youtube
|
|
|
|
|
|
def add_config_arg(parser: argparse.ArgumentParser) -> None:
|
|
parser.add_argument("--config", help="Path to YAML config file.")
|
|
|
|
|
|
def build_parser(config: Config | None = None) -> argparse.ArgumentParser:
|
|
choices = language_choices(config or load_config())
|
|
parser = argparse.ArgumentParser(description="Saiki: sentence mining and listening tools for Anki.")
|
|
add_config_arg(parser)
|
|
sub = parser.add_subparsers(dest="command", required=True)
|
|
|
|
audio = sub.add_parser("audio", help="Extract Anki audio into playlists.")
|
|
audio.add_argument("lang", choices=choices)
|
|
audio.add_argument("--concat", action="store_true")
|
|
audio.add_argument("--outdir")
|
|
audio.add_argument("--media-dir")
|
|
audio.add_argument("--copy-only-new", action="store_true")
|
|
|
|
words = sub.add_parser("words", help="Extract frequent words from Anki.")
|
|
words.add_argument("lang", choices=choices)
|
|
group = words.add_mutually_exclusive_group()
|
|
group.add_argument("--query")
|
|
group.add_argument("--deck", action="append")
|
|
words.add_argument("--field")
|
|
words.add_argument("--min-freq", type=int, default=2)
|
|
words.add_argument("--outdir")
|
|
words.add_argument("--out")
|
|
words.add_argument("--full-field", action="store_true")
|
|
words.add_argument("--spacy-model")
|
|
|
|
compare = sub.add_parser("compare-words", help="Print words in source that are not in known.")
|
|
compare.add_argument("source")
|
|
compare.add_argument("known")
|
|
|
|
youtube = sub.add_parser("youtube", help="Mine a YouTube transcript.")
|
|
youtube.add_argument("lang", choices=choices)
|
|
youtube.add_argument("video")
|
|
youtube.add_argument("--mode", choices=["vocab", "sentences"], default="vocab")
|
|
youtube.add_argument("--top", type=int)
|
|
youtube.add_argument("--no-stopwords", action="store_true")
|
|
youtube.add_argument("--raw", action="store_true")
|
|
youtube.add_argument("--out")
|
|
youtube.add_argument("--format", choices=["tsv", "csv"], default="tsv")
|
|
youtube.add_argument("--known-words", help="Word list to filter vocab_guess against.")
|
|
youtube.add_argument("--only-new", action="store_true", help="Only export sentences with unknown vocab.")
|
|
|
|
importer = sub.add_parser("import", help="Generate TTS and import sentence cards.")
|
|
importer.add_argument("lang", choices=choices)
|
|
importer.add_argument("sentence_file", nargs="?")
|
|
importer.add_argument("--tags", help="Comma-separated tags. text-to-speech is always included.")
|
|
|
|
return parser
|
|
|
|
|
|
def main(argv: list[str] | None = None) -> int:
|
|
pre = argparse.ArgumentParser(add_help=False)
|
|
add_config_arg(pre)
|
|
known, _ = pre.parse_known_args(argv)
|
|
config = load_config(known.config)
|
|
parser = build_parser(config)
|
|
args = parser.parse_args(argv)
|
|
|
|
if args.command == "audio":
|
|
result = extract_audio(config, args.lang, args.outdir, args.media_dir, args.copy_only_new, args.concat)
|
|
print(f"Copied {result['copied']} files")
|
|
print(f"Playlist: {result['playlist']}")
|
|
print(f"Output directory: {result['outdir']}")
|
|
if result["concat"]:
|
|
print(f"Concatenated file: {result['concat']}")
|
|
return 0
|
|
|
|
if args.command == "words":
|
|
result = extract_words(
|
|
config, args.lang, args.query, args.deck, args.field, args.min_freq,
|
|
args.outdir, args.out, args.full_field, args.spacy_model,
|
|
)
|
|
print(f"Query: {result['query']}")
|
|
print(f"Found {result['notes']} notes")
|
|
print(f"Extracted {result['unique']} unique entries")
|
|
print(f"Wrote {result['written']} entries to: {result['out']}")
|
|
return 0
|
|
|
|
if args.command == "compare-words":
|
|
for line in compare_word_files(args.source, args.known):
|
|
print(line)
|
|
return 0
|
|
|
|
if args.command == "youtube":
|
|
result = run_youtube(
|
|
config, args.lang, args.video, args.mode, args.top, args.no_stopwords,
|
|
args.raw, args.out, args.format, args.known_words, args.only_new,
|
|
)
|
|
if args.mode == "sentences" and not args.out:
|
|
for line in result["lines"]:
|
|
print(f"[{line.start:.2f}s] {line.text}")
|
|
elif args.mode == "sentences":
|
|
print(f"Wrote {result['written']} rows to: {result['out']}")
|
|
else:
|
|
for word, count in result["items"]:
|
|
print(f"{word}: {count}")
|
|
return 0
|
|
|
|
if args.command == "import":
|
|
result = import_sentences(config, args.lang, args.sentence_file, args.tags)
|
|
print(f"Done. Added {result.added}/{result.processed} cards. Failed: {result.failed}")
|
|
return 0 if result.failed == 0 else 1
|
|
|
|
parser.print_help()
|
|
return 2
|
|
|
|
|
|
if __name__ == "__main__":
|
|
raise SystemExit(main(sys.argv[1:]))
|