import xml.etree.ElementTree as ET


# -----------------------------
# FPS / TIME CONVERSION
# -----------------------------

def get_fps(root):
    profile = root.find("profile")
    if profile is not None:
        num = float(profile.attrib.get("frame_rate_num", 25))
        den = float(profile.attrib.get("frame_rate_den", 1))
        return num / den
    return 25.0


def time_to_frames(t, fps):
    if t is None:
        return 0

    if ":" in t:
        h, m, s = t.split(":")
        total_seconds = int(h) * 3600 + int(m) * 60 + float(s)
        return int(round(total_seconds * fps))
    else:
        return int(round(float(t) * fps))


def frames_to_srt_time(frames, fps):
    seconds = frames / fps

    total_seconds = int(seconds)
    ms = int(round((seconds - total_seconds) * 1000))

    h = total_seconds // 3600
    m = (total_seconds % 3600) // 60
    s = total_seconds % 60

    return f"{h:02}:{m:02}:{s:02},{ms:03}"


# -----------------------------
# XML COLLECTION
# -----------------------------

def collect_nodes(root, tag):
    return {node.attrib["id"]: node for node in root.findall(tag)}


# -----------------------------
# FILTER EXTRACTION
# -----------------------------

def extract_from_node(node, timeline_pos_frames, clip_in_frames, fps, debug=False):
    subs = []

    for filt in node.findall("filter"):
        service = filt.find("./property[@name='mlt_service']")
        if service is None or service.text != "dynamictext":
            continue

        text_prop = filt.find("./property[@name='argument']")
        if text_prop is None or not text_prop.text:
            continue

        text = text_prop.text.strip()

        f_in_frames = time_to_frames(filt.attrib.get("in", "0"), fps)
        f_out_frames = time_to_frames(filt.attrib.get("out", "0"), fps)

        # ⚠️ For chains this might already be timeline-relative
        abs_start = timeline_pos_frames + (f_in_frames - clip_in_frames)
        abs_end = timeline_pos_frames + (f_out_frames - clip_in_frames)

        if debug:
            print("---- TEXT FOUND ----")
            print(f"Node: {node.attrib.get('id')}")
            print(f"Text: {text}")
            print(f"Filter frames: {f_in_frames} → {f_out_frames}")
            print(f"Clip in (frames): {clip_in_frames}")
            print(f"Timeline pos (frames): {timeline_pos_frames}")
            print(f"Absolute frames: {abs_start} → {abs_end}")
            print("--------------------\n")

        subs.append((abs_start, abs_end, text))

    return subs


# -----------------------------
# MAIN PARSER
# -----------------------------

def parse_mlt(file_path, debug=False):
    tree = ET.parse(file_path)
    root = tree.getroot()

    fps = get_fps(root)

    if debug:
        print(f"\n=== PROJECT FPS: {fps} ===")

    producers = collect_nodes(root, "producer")
    chains = collect_nodes(root, "chain")
    playlists = collect_nodes(root, "playlist")

    all_subs = []

    for playlist_id, playlist in playlists.items():
        timeline_pos_frames = 0

        if debug:
            print(f"\n=== PLAYLIST: {playlist_id} ===")

        for element in playlist:
            # BLANK (gap)
            if element.tag == "blank":
                length_frames = time_to_frames(element.attrib.get("length", "0"), fps)

                if debug:
                    print(f"[BLANK] +{length_frames} frames")

                timeline_pos_frames += length_frames
                continue

            # ENTRY
            if element.tag != "entry":
                continue

            ref_id = element.attrib.get("producer")

            node = None
            if ref_id in producers:
                node = producers[ref_id]
            elif ref_id in chains:
                node = chains[ref_id]

            if node is None:
                continue

            clip_in_frames = time_to_frames(element.attrib.get("in", "0"), fps)
            clip_out_frames = time_to_frames(element.attrib.get("out", "0"), fps)
            clip_duration_frames = clip_out_frames - clip_in_frames + 1

            if debug:
                print(f"\nEntry → {ref_id}")
                print(f"Clip frames: {clip_in_frames} → {clip_out_frames}")
                print(f"Timeline BEFORE: {timeline_pos_frames}")

            subs = extract_from_node(
                node,
                timeline_pos_frames,
                clip_in_frames,
                fps,
                debug
            )

            all_subs.extend(subs)

            timeline_pos_frames += clip_duration_frames

            if debug:
                print(f"Timeline AFTER: {timeline_pos_frames}")

    # Sort globally
    all_subs.sort(key=lambda x: x[0])

    if debug:
        print("\n=== FINAL SUBS ===")
        for i, (s, e, t) in enumerate(all_subs, 1):
            print(f"{i}: {frames_to_srt_time(s, fps)} → {frames_to_srt_time(e, fps)} | {t}")

    return all_subs, fps


# -----------------------------
# SRT OUTPUT
# -----------------------------

def write_srt(subs, fps, output_file):
    with open(output_file, "w", encoding="utf-8") as f:
        for i, (start_f, end_f, text) in enumerate(subs, 1):
            f.write(f"{i}\n")
            f.write(f"{frames_to_srt_time(start_f, fps)} --> {frames_to_srt_time(end_f, fps)}\n")
            f.write(f"{text}\n\n")


# -----------------------------
# ENTRY POINT
# -----------------------------

if __name__ == "__main__":
    import sys

    if len(sys.argv) < 3:
        print("Usage: python shotcut_to_srt.py input.mlt output.srt [--debug]")
        sys.exit(1)

    input_file = sys.argv[1]
    output_file = sys.argv[2]
    debug = "--debug" in sys.argv

    subs, fps = parse_mlt(input_file, debug)
    write_srt(subs, fps, output_file)

    print(f"\nExtracted {len(subs)} subtitles → {output_file}")