import os
import sys
import argparse
import platform
from faster_whisper import WhisperModel
from tqdm import tqdm

def detect_best_compute_type():
    machine = platform.machine()
    system = platform.system()

    if system == "Darwin" and machine in ["arm64", "arm"]:
        try:
            test_model = WhisperModel("tiny", compute_type="int8_float16")
            del test_model
            return "int8_float16"
        except ValueError:
            return "int8"
    return "int8"

def parse_args():
    parser = argparse.ArgumentParser(description="Transcribe WAV to text using faster-whisper.")
    parser.add_argument("input_file", help="Path to input WAV file")
    parser.add_argument("output_file", help="Path to output TXT file")
    parser.add_argument("--model", default="medium", choices=["tiny", "small", "small.en", "medium", "medium.en", "large-v1", "large-v2"], help="Model size to use (default: medium)")
    parser.add_argument("--chunk", type=int, default=None, help="Optional chunk length in seconds")
    parser.add_argument("--compute_type", choices=["int8", "int8_float16", "float16", "float32"], help="Override compute type (default: auto)")
    parser.add_argument("--srt", action="store_true", help="Also save SRT subtitle file alongside TXT")
    return parser.parse_args()

def load_model(model_size, compute_type):
    print(f"Loading model '{model_size}' with compute_type='{compute_type}'...")
    return WhisperModel(model_size, compute_type=compute_type)

def format_srt(segments):
    def srt_timestamp(seconds):
        h = int(seconds // 3600)
        m = int((seconds % 3600) // 60)
        s = int(seconds % 60)
        ms = int((seconds - int(seconds)) * 1000)
        return f"{h:02}:{m:02}:{s:02},{ms:03}"

    lines = []
    for i, segment in enumerate(segments, start=1):
        start = srt_timestamp(segment.start)
        end = srt_timestamp(segment.end)
        text = segment.text.strip()
        lines.extend([str(i), f"{start} --> {end}", text, ""])
    return "\n".join(lines)

def split_audio(input_file, chunk_length_sec):
    from pydub import AudioSegment
    audio = AudioSegment.from_wav(input_file)
    chunks = []
    for i in range(0, len(audio), chunk_length_sec * 1000):
        chunk = audio[i:i + chunk_length_sec * 1000]
        chunk_filename = f"chunk_{i//1000}.wav"
        chunk.export(chunk_filename, format="wav")
        chunks.append(chunk_filename)
    return chunks

def transcribe_audio(model, file_path):
    segments, _ = model.transcribe(file_path)
    texts = []
    collected = []
    for segment in tqdm(segments, desc=f"Transcribing {file_path}"):
        texts.append(segment.text.strip())
        collected.append(segment)
    return " ".join(texts), collected

def main():
    args = parse_args()

    if not os.path.isfile(args.input_file):
        print(f"Error: Input file '{args.input_file}' does not exist.")
        sys.exit(1)

    compute_type = args.compute_type or detect_best_compute_type()
    model = load_model(args.model, compute_type)

    full_text = ""
    full_segments = []

    if args.chunk:
        print(f"Splitting audio into {args.chunk} second chunks...")
        chunks = split_audio(args.input_file, args.chunk)
        for chunk_file in chunks:
            text, segments = transcribe_audio(model, chunk_file)
            full_text += text + "\n"
            full_segments.extend(segments)
            os.remove(chunk_file)
    else:
        full_text, full_segments = transcribe_audio(model, args.input_file)

    with open(args.output_file, "w", encoding="utf-8") as f:
        f.write(full_text)

    if args.srt:
        srt_path = os.path.splitext(args.output_file)[0] + ".srt"
        with open(srt_path, "w", encoding="utf-8") as f:
            f.write(format_srt(full_segments))
        print(f"✅ Subtitle saved to '{srt_path}'")

    print(f"\n✅ Transcription saved to '{args.output_file}'")

if __name__ == "__main__":
    main()