summary refs log tree commit diff stats
diff options
context:
space:
mode:
authorakspecs <akspecs@gmail.com>2021-11-04 23:10:56 -0700
committerakspecs <akspecs@gmail.com>2021-11-06 22:18:57 -0700
commit89c4a068a991a3a4072bd0b93e23eb5f6a39366a (patch)
treed152218a930680a869456521063c90e565faa5b6
downloadsplit-album-89c4a068a991a3a4072bd0b93e23eb5f6a39366a.tar.gz
initial commit
adds WIP script, split_album.py, that accepts a pair of arguments in the
form of an input text file with timestamps and titles, as well as the
path of the multimedia file to clip/split from.
-rw-r--r--split_album.py158
1 files changed, 158 insertions, 0 deletions
diff --git a/split_album.py b/split_album.py
new file mode 100644
index 0000000..a2ca0ab
--- /dev/null
+++ b/split_album.py
@@ -0,0 +1,158 @@
+#!/usr/bin/env python3
+"""
+split songs from an album contained in one file into their own files
+"""
+
+# TODO: multi-processing (maybe with concurent.futures?)
+# TODO: include a mechanism update file tags
+#       with relevant song title / album / genre info
+
+import argparse
+import subprocess
+import sys
+
+
+def clip(input_file, output_file, start_time, end_time):
+    """
+    clip/trim a multimedia file to a given time range using ffmpeg
+    """
+    command = [
+        'ffmpeg',
+        '-i', input_file,
+        '-ss', start_time,
+        '-to', end_time,
+        '-c:v', 'copy',
+        '-c:a', 'copy',
+        output_file
+    ]
+    subprocess.run(command)
+
+
+def get_duration(multimedia_file):
+    """
+    get the duration of a multimedia file using ffprobe
+    """
+    # adapted this example:
+    #  ffprobe -i <file> -show_entries format=duration -v quiet -of csv="p=0" -sexagesimal
+    try:
+        duration = subprocess.check_output([
+            'ffprobe',
+            '-v', 'quiet',
+            '-of', 'csv=p=0',
+            '-show_entries', 'format=duration',
+            '-sexagesimal',
+            '-i', multimedia_file
+        ]).decode('utf-8')
+        #
+        # another approach below returns duration in seconds
+        # seems less clean that the one above
+        #
+        #duration = subprocess.check_output(
+        #    ["ffprobe", "-v", "error",
+        #     "-show_entries", "format=duration",
+        #     "-of", "default=noprint_wrappers=1:nokey=1",
+        #     file_path],
+        #    stderr=subprocess.STDOUT,
+        #)
+        return duration.split('.')[0]  # split at decimal
+    except subprocess.CalledProcessError as error:
+        print(error.output)
+        sys.exit(1)
+
+
+def iterate_clip(input_list, multimedia_file):
+    """
+    iterate over a list of titles and timestamp ranges, and write
+    files with their index and song title
+    """
+    for index, item in enumerate(input_list):
+        clip(
+            input_file=multimedia_file,
+            output_file=f'{index+1:02} - {item[0]}.mp3',
+            start_time=item[1],
+            end_time=item[2]
+        )
+
+
+def parse_input(input_file, multimedia_file):
+    """
+    parse a text input file containing timestamps and song titles
+    return a list of lists.
+
+    currently, this function works with input files that are delimited
+    by two spaces between time and title.
+
+    e.g.
+    --- start file ---
+    00:00  Молчат Дома - Клетка
+    04:40  Кино - Раньше в твоих глазах
+    07:02  Chernikovskaya Hata - Владимирский централ
+    ---- end file ----
+    """
+    with open(input_file) as open_file:
+        timestamp_title = [
+            [item.strip() for item in line.split('  ')] for line in open_file
+        ]
+        # TODO: rewrite without wasting memory on another list
+        #       i.e. overwrite above timestamp_title on the fly
+        #       to contain what title_range below contains, and
+        #       return that list
+        title_range = []
+        for index, item in enumerate(timestamp_title):
+            try:
+                title_range.append([item[1],
+                                    item[0],
+                                    timestamp_title[index+1][0]])
+            except IndexError:
+                title_range.append([timestamp_title[-1][1],
+                                    timestamp_title[-1][0],
+                                    get_duration(multimedia_file)])
+        return title_range
+
+
+def main():
+    arg_parser = argparse.ArgumentParser(
+        description="split songs from an album contained in one file into their own files"
+    )
+    arg_parser.add_argument(
+        'input_txt',
+        help="input text file to parse for timestamps and titles"
+    )
+    arg_parser.add_argument(
+        'multimedia_file',
+        help="input file containing album to cut/split from"
+    )
+    args = arg_parser.parse_args()
+    album = args.multimedia_file
+    title_range = parse_input(
+        input_file=args.input_txt,
+        multimedia_file=album
+    )
+    iterate_clip(title_range, album)
+
+
+if __name__ == '__main__':
+    main()
+
+
+# notes:
+'''
+title_range = []
+for index, item in enumerate(timestamp_title):
+    try:
+        title_range.append([item[1],
+                            item[0],
+                            timestamp_title[index+1][0]])
+    except IndexError:
+        title_range.append([timestamp_title[-1][1],
+                            timestamp_title[-1][0],
+                            get_duration(multimedia_file)])
+
+for index, item in enumerate(title_range):
+    clip(
+        input_file=multimedia_file,
+        output_file=f'{index+1:02} - {item[0]}.mp3',
+        start_time=item[1],
+        end_time=item[2]
+    )
+'''