import xml.etree.ElementTree as ET # ----------------------------- # FPS / TIME CONVERSION # ----------------------------- def get_fps(root): profile = root.find("profile") if profile is not None: num = float(profile.attrib.get("frame_rate_num", 25)) den = float(profile.attrib.get("frame_rate_den", 1)) return num / den return 25.0 def time_to_frames(t, fps): if t is None: return 0 if ":" in t: h, m, s = t.split(":") total_seconds = int(h) * 3600 + int(m) * 60 + float(s) return int(round(total_seconds * fps)) else: return int(round(float(t) * fps)) def frames_to_srt_time(frames, fps): seconds = frames / fps total_seconds = int(seconds) ms = int(round((seconds - total_seconds) * 1000)) h = total_seconds // 3600 m = (total_seconds % 3600) // 60 s = total_seconds % 60 return f"{h:02}:{m:02}:{s:02},{ms:03}" # ----------------------------- # XML COLLECTION # ----------------------------- def collect_nodes(root, tag): return {node.attrib["id"]: node for node in root.findall(tag)} # ----------------------------- # FILTER EXTRACTION # ----------------------------- def extract_from_node(node, timeline_pos_frames, clip_in_frames, fps, debug=False): subs = [] for filt in node.findall("filter"): service = filt.find("./property[@name='mlt_service']") if service is None or service.text != "dynamictext": continue text_prop = filt.find("./property[@name='argument']") if text_prop is None or not text_prop.text: continue text = text_prop.text.strip() f_in_frames = time_to_frames(filt.attrib.get("in", "0"), fps) f_out_frames = time_to_frames(filt.attrib.get("out", "0"), fps) # ⚠️ For chains this might already be timeline-relative abs_start = timeline_pos_frames + (f_in_frames - clip_in_frames) abs_end = timeline_pos_frames + (f_out_frames - clip_in_frames) if debug: print("---- TEXT FOUND ----") print(f"Node: {node.attrib.get('id')}") print(f"Text: {text}") print(f"Filter frames: {f_in_frames} → {f_out_frames}") print(f"Clip in (frames): {clip_in_frames}") print(f"Timeline pos (frames): {timeline_pos_frames}") print(f"Absolute frames: {abs_start} → {abs_end}") print("--------------------\n") subs.append((abs_start, abs_end, text)) return subs # ----------------------------- # MAIN PARSER # ----------------------------- def parse_mlt(file_path, debug=False): tree = ET.parse(file_path) root = tree.getroot() fps = get_fps(root) if debug: print(f"\n=== PROJECT FPS: {fps} ===") producers = collect_nodes(root, "producer") chains = collect_nodes(root, "chain") playlists = collect_nodes(root, "playlist") all_subs = [] for playlist_id, playlist in playlists.items(): timeline_pos_frames = 0 if debug: print(f"\n=== PLAYLIST: {playlist_id} ===") for element in playlist: # BLANK (gap) if element.tag == "blank": length_frames = time_to_frames(element.attrib.get("length", "0"), fps) if debug: print(f"[BLANK] +{length_frames} frames") timeline_pos_frames += length_frames continue # ENTRY if element.tag != "entry": continue ref_id = element.attrib.get("producer") node = None if ref_id in producers: node = producers[ref_id] elif ref_id in chains: node = chains[ref_id] if node is None: continue clip_in_frames = time_to_frames(element.attrib.get("in", "0"), fps) clip_out_frames = time_to_frames(element.attrib.get("out", "0"), fps) clip_duration_frames = clip_out_frames - clip_in_frames + 1 if debug: print(f"\nEntry → {ref_id}") print(f"Clip frames: {clip_in_frames} → {clip_out_frames}") print(f"Timeline BEFORE: {timeline_pos_frames}") subs = extract_from_node( node, timeline_pos_frames, clip_in_frames, fps, debug ) all_subs.extend(subs) timeline_pos_frames += clip_duration_frames if debug: print(f"Timeline AFTER: {timeline_pos_frames}") # Sort globally all_subs.sort(key=lambda x: x[0]) if debug: print("\n=== FINAL SUBS ===") for i, (s, e, t) in enumerate(all_subs, 1): print(f"{i}: {frames_to_srt_time(s, fps)} → {frames_to_srt_time(e, fps)} | {t}") return all_subs, fps # ----------------------------- # SRT OUTPUT # ----------------------------- def write_srt(subs, fps, output_file): with open(output_file, "w", encoding="utf-8") as f: for i, (start_f, end_f, text) in enumerate(subs, 1): f.write(f"{i}\n") f.write(f"{frames_to_srt_time(start_f, fps)} --> {frames_to_srt_time(end_f, fps)}\n") f.write(f"{text}\n\n") # ----------------------------- # ENTRY POINT # ----------------------------- if __name__ == "__main__": import sys if len(sys.argv) < 3: print("Usage: python shotcut_to_srt.py input.mlt output.srt [--debug]") sys.exit(1) input_file = sys.argv[1] output_file = sys.argv[2] debug = "--debug" in sys.argv subs, fps = parse_mlt(input_file, debug) write_srt(subs, fps, output_file) print(f"\nExtracted {len(subs)} subtitles → {output_file}")