summary refs log tree commit diff stats
path: root/split_album.py
blob: d76c04e141da03d5f2ebb4ca72bfc68e2eb3e1a1 (plain) (blame)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
#!/usr/bin/env python3
"""
split songs from an album or playlist contained in one file into their own files
"""

# TODO: sanity checks, check if the inputs make sense
#
# TODO: ability to truly split from most multi-media
#       formats and convert to any reasonable file format
#
# TODO: multi-processing (maybe with concurent.futures?)
#
# TODO: include a mechanism update file tags
#       with relevant song title / album / genre info
#
# TODO: automate the entire process of downloading a
#       multimedia file, check/scrape the page being
#       downloaded for timestamp/title in description or
#       comment, and let the rest of the script perform
#       it's magic with the help of ffmpeg!
#       (maybe outside the scope of this standalone
#        script?)

import argparse
import subprocess
import sys


def clip(input_file, output_file, start_time, end_time):
    """
    clip/trim a multimedia file to a given time range using ffmpeg
    """
    command = [
        'ffmpeg',
        '-i', input_file,
        '-ss', start_time,
        '-to', end_time,
        '-c:v', 'copy',
        '-c:a', 'copy',
        output_file
    ]
    subprocess.run(command)


def get_duration(multimedia_file):
    """
    get the duration of a multimedia file using ffprobe
    """
    # adapted this example:
    #  ffprobe -i <file> -show_entries format=duration -v quiet -of csv="p=0" -sexagesimal
    try:
        duration = subprocess.check_output([
            'ffprobe',
            '-v', 'quiet',
            '-of', 'csv=p=0',
            '-show_entries', 'format=duration',
            '-sexagesimal',
            '-i', multimedia_file
        ]).decode('utf-8')
        #
        # another approach below returns duration in seconds
        # seems less clean that the one above
        #
        #duration = subprocess.check_output(
        #    ["ffprobe", "-v", "error",
        #     "-show_entries", "format=duration",
        #     "-of", "default=noprint_wrappers=1:nokey=1",
        #     file_path],
        #    stderr=subprocess.STDOUT,
        #)
        return duration.split('.')[0]  # split at decimal
    except subprocess.CalledProcessError as error:
        print(error.output)
        sys.exit(1)


def iterate_clip(input_list, multimedia_file):
    """
    iterate over a list of titles and timestamp ranges, and write
    files with their index and song title
    """
    for index, item in enumerate(input_list):
        clip(
            input_file=multimedia_file,
            output_file=f'{index+1:02} - {item[0]}.mp3',
            start_time=item[1],
            end_time=item[2]
        )


def parse_input(input_txt_file, multimedia_file):
    """
    parse a text input file containing timestamps and song titles
    return a list of lists.

    currently, this function works with input files that are delimited
    by two spaces between time and title.

    e.g.
    --- start file ---
    00:00  Молчат Дома - Клетка
    04:40  Кино - Раньше в твоих глазах
    07:02  Chernikovskaya Hata - Владимирский централ
    ---- end file ----
    """
    with open(input_txt_file) as open_file:
        timestamp_title = [
            [item.strip() for item in line.split('  ')] for line in open_file
        ]
        # TODO: rewrite without wasting memory on another list
        #       i.e. overwrite above timestamp_title on the fly
        #       to contain what title_range below contains, and
        #       return that list
        title_range = []
        for index, item in enumerate(timestamp_title):
            try:
                title_range.append([item[1],
                                    item[0],
                                    timestamp_title[index+1][0]])
            except IndexError:
                title_range.append([timestamp_title[-1][1],
                                    timestamp_title[-1][0],
                                    get_duration(multimedia_file)])
        return title_range


def main():
    arg_parser = argparse.ArgumentParser(
        description="split songs from an album or playlist contained in one file into their own files"
    )
    arg_parser.add_argument(
        'input_txt',
        help="input text file to parse for timestamps and titles"
    )
    arg_parser.add_argument(
        'multimedia_file',
        help="input file containing album to cut/split from"
    )
    args = arg_parser.parse_args()
    album = args.multimedia_file
    txt = args.input_txt
    title_range = parse_input(
        input_txt_file=txt,
        multimedia_file=album
    )
    iterate_clip(title_range, album)


if __name__ == '__main__':
    main()


# notes:
'''
title_range = []
for index, item in enumerate(timestamp_title):
    try:
        title_range.append([item[1],
                            item[0],
                            timestamp_title[index+1][0]])
    except IndexError:
        title_range.append([timestamp_title[-1][1],
                            timestamp_title[-1][0],
                            get_duration(multimedia_file)])

for index, item in enumerate(title_range):
    clip(
        input_file=multimedia_file,
        output_file=f'{index+1:02} - {item[0]}.mp3',
        start_time=item[1],
        end_time=item[2]
    )
'''