From 89c4a068a991a3a4072bd0b93e23eb5f6a39366a Mon Sep 17 00:00:00 2001 From: akspecs Date: Thu, 4 Nov 2021 23:10:56 -0700 Subject: initial commit adds WIP script, split_album.py, that accepts a pair of arguments in the form of an input text file with timestamps and titles, as well as the path of the multimedia file to clip/split from. --- split_album.py | 158 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 158 insertions(+) create mode 100644 split_album.py (limited to 'split_album.py') diff --git a/split_album.py b/split_album.py new file mode 100644 index 0000000..a2ca0ab --- /dev/null +++ b/split_album.py @@ -0,0 +1,158 @@ +#!/usr/bin/env python3 +""" +split songs from an album contained in one file into their own files +""" + +# TODO: multi-processing (maybe with concurent.futures?) +# TODO: include a mechanism update file tags +# with relevant song title / album / genre info + +import argparse +import subprocess +import sys + + +def clip(input_file, output_file, start_time, end_time): + """ + clip/trim a multimedia file to a given time range using ffmpeg + """ + command = [ + 'ffmpeg', + '-i', input_file, + '-ss', start_time, + '-to', end_time, + '-c:v', 'copy', + '-c:a', 'copy', + output_file + ] + subprocess.run(command) + + +def get_duration(multimedia_file): + """ + get the duration of a multimedia file using ffprobe + """ + # adapted this example: + # ffprobe -i -show_entries format=duration -v quiet -of csv="p=0" -sexagesimal + try: + duration = subprocess.check_output([ + 'ffprobe', + '-v', 'quiet', + '-of', 'csv=p=0', + '-show_entries', 'format=duration', + '-sexagesimal', + '-i', multimedia_file + ]).decode('utf-8') + # + # another approach below returns duration in seconds + # seems less clean that the one above + # + #duration = subprocess.check_output( + # ["ffprobe", "-v", "error", + # "-show_entries", "format=duration", + # "-of", "default=noprint_wrappers=1:nokey=1", + # file_path], + # stderr=subprocess.STDOUT, + #) + return duration.split('.')[0] # split at decimal + except subprocess.CalledProcessError as error: + print(error.output) + sys.exit(1) + + +def iterate_clip(input_list, multimedia_file): + """ + iterate over a list of titles and timestamp ranges, and write + files with their index and song title + """ + for index, item in enumerate(input_list): + clip( + input_file=multimedia_file, + output_file=f'{index+1:02} - {item[0]}.mp3', + start_time=item[1], + end_time=item[2] + ) + + +def parse_input(input_file, multimedia_file): + """ + parse a text input file containing timestamps and song titles + return a list of lists. + + currently, this function works with input files that are delimited + by two spaces between time and title. + + e.g. + --- start file --- + 00:00 Молчат Дома - Клетка + 04:40 Кино - Раньше в твоих глазах + 07:02 Chernikovskaya Hata - Владимирский централ + ---- end file ---- + """ + with open(input_file) as open_file: + timestamp_title = [ + [item.strip() for item in line.split(' ')] for line in open_file + ] + # TODO: rewrite without wasting memory on another list + # i.e. overwrite above timestamp_title on the fly + # to contain what title_range below contains, and + # return that list + title_range = [] + for index, item in enumerate(timestamp_title): + try: + title_range.append([item[1], + item[0], + timestamp_title[index+1][0]]) + except IndexError: + title_range.append([timestamp_title[-1][1], + timestamp_title[-1][0], + get_duration(multimedia_file)]) + return title_range + + +def main(): + arg_parser = argparse.ArgumentParser( + description="split songs from an album contained in one file into their own files" + ) + arg_parser.add_argument( + 'input_txt', + help="input text file to parse for timestamps and titles" + ) + arg_parser.add_argument( + 'multimedia_file', + help="input file containing album to cut/split from" + ) + args = arg_parser.parse_args() + album = args.multimedia_file + title_range = parse_input( + input_file=args.input_txt, + multimedia_file=album + ) + iterate_clip(title_range, album) + + +if __name__ == '__main__': + main() + + +# notes: +''' +title_range = [] +for index, item in enumerate(timestamp_title): + try: + title_range.append([item[1], + item[0], + timestamp_title[index+1][0]]) + except IndexError: + title_range.append([timestamp_title[-1][1], + timestamp_title[-1][0], + get_duration(multimedia_file)]) + +for index, item in enumerate(title_range): + clip( + input_file=multimedia_file, + output_file=f'{index+1:02} - {item[0]}.mp3', + start_time=item[1], + end_time=item[2] + ) +''' -- cgit 1.4.1-2-gfad0