path: root/split_album.py


                      
#!/usr/bin/env python3
"""
split songs from an album or playlist contained in one file into their own files
"""

# TODO: sanity checks, check if the inputs make sense
#
# TODO: ability to truly split from most multi-media
#       formats and convert to any reasonable file format
#
# TODO: multi-processing (maybe with concurent.futures?)
#
# TODO: include a mechanism update file tags
#       with relevant song title / album / genre info
#
# TODO: automate the entire process of downloading a
#       multimedia file, check/scrape the page being
#       downloaded for timestamp/title in description or
#       comment, and let the rest of the script perform
#       it's magic with the help of ffmpeg!
#       (maybe outside the scope of this standalone
#        script?)

import argparse
import subprocess
import sys


def clip(input_file, output_file, start_time, end_time):
    """
    clip/trim a multimedia file to a given time range using ffmpeg
    """
    command = [
        'ffmpeg',
        '-i', input_file,
        '-ss', start_time,
        '-to', end_time,
        '-c:v', 'copy',
        '-c:a', 'copy',
        output_file
    ]
    subprocess.run(command)


def get_duration(multimedia_file):
    """
    get the duration of a multimedia file using ffprobe
    """
    # adapted this example:
    #  ffprobe -i <file> -show_entries format=duration -v quiet -of csv="p=0" -sexagesimal
    try:
        duration = subprocess.check_output([
            'ffprobe',
            '-v', 'quiet',
            '-of', 'csv=p=0',
            '-show_entries', 'format=duration',
            '-sexagesimal',
            '-i', multimedia_file
        ]).decode('utf-8')
        #
        # another approach below returns duration in seconds
        # seems less clean that the one above
        #
        #duration = subprocess.check_output(
        #    ["ffprobe", "-v", "error",
        #     "-show_entries", "format=duration",
        #     "-of", "default=noprint_wrappers=1:nokey=1",
        #     file_path],
        #    stderr=subprocess.STDOUT,
        #)
        return duration.split('.')[0]  # split at decimal
    except subprocess.CalledProcessError as error:
        print(error.output)
        sys.exit(1)


def iterate_clip(input_list, multimedia_file):
    """
    iterate over a list of titles and timestamp ranges, and write
    files with their index and song title
    """
    for index, item in enumerate(input_list):
        clip(
            input_file=multimedia_file,
            output_file=f'{index+1:02} - {item[0]}.mp3',
            start_time=item[1],
            end_time=item[2]
        )


def parse_input(input_txt_file, multimedia_file):
    """
    parse a text input file containing timestamps and song titles
    return a list of lists.

    currently, this function works with input files that are delimited
    by two spaces between time and title.

    e.g.
    --- start file ---
    00:00  Молчат Дома - Клетка
    04:40  Кино - Раньше в твоих глазах
    07:02  Chernikovskaya Hata - Владимирский централ
    ---- end file ----
    """
    with open(input_txt_file) as open_file:
        timestamp_title = [
            [item.strip() for item in line.split('  ')] for line in open_file
        ]
        # TODO: rewrite without wasting memory on another list
        #       i.e. overwrite above timestamp_title on the fly
        #       to contain what title_range below contains, and
        #       return that list
        title_range = []
        for index, item in enumerate(timestamp_title):
            try:
                title_range.append([item[1],
                                    item[0],
                                    timestamp_title[index+1][0]])
            except IndexError:
                title_range.append([timestamp_title[-1][1],
                                    timestamp_title[-1][0],
                                    get_duration(multimedia_file)])
        return title_range


def main():
    arg_parser = argparse.ArgumentParser(
        description="split songs from an album or playlist contained in one file into their own files"
    )
    arg_parser.add_argument(
        'input_txt',
        help="input text file to parse for timestamps and titles"
    )
    arg_parser.add_argument(
        'multimedia_file',
        help="input file containing album to cut/split from"
    )
    args = arg_parser.parse_args()
    album = args.multimedia_file
    txt = args.input_txt
    title_range = parse_input(
        input_txt_file=txt,
        multimedia_file=album
    )
    iterate_clip(title_range, album)


if __name__ == '__main__':
    main()


# notes:
'''
title_range = []
for index, item in enumerate(timestamp_title):
    try:
        title_range.append([item[1],
                            item[0],
                            timestamp_title[index+1][0]])
    except IndexError:
        title_range.append([timestamp_title[-1][1],
                            timestamp_title[-1][0],
                            get_duration(multimedia_file)])

for index, item in enumerate(title_range):
    clip(
        input_file=multimedia_file,
        output_file=f'{index+1:02} - {item[0]}.mp3',
        start_time=item[1],
        end_time=item[2]
    )
'''
#!/usr/bin/env python3
"""
split songs from an album or playlist contained in one file into their own files
"""

# TODO: sanity checks, check if the inputs make sense
#
# TODO: ability to truly split from most multi-media
#       formats and convert to any reasonable file format
#
# TODO: multi-processing (maybe with concurent.futures?)
#
# TODO: include a mechanism update file tags
#       with relevant song title / album / genre info
#
# TODO: automate the entire process of downloading a
#       multimedia file, check/scrape the page being
#       downloaded for timestamp/title in description or
#       comment, and let the rest of the script perform
#       it's magic with the help of ffmpeg!
#       (maybe outside the scope of this standalone
#        script?)

import argparse
import subprocess
import sys


def clip(input_file, output_file, start_time, end_time):
    """
    clip/trim a multimedia file to a given time range using ffmpeg
    """
    command = [
        'ffmpeg',
        '-i', input_file,
        '-ss', start_time,
        '-to', end_time,
        '-c:v', 'copy',
        '-c:a', 'copy',
        output_file
    ]
    subprocess.run(command)


def get_duration(multimedia_file):
    """
    get the duration of a multimedia file using ffprobe
    """
    # adapted this example:
    #  ffprobe -i <file> -show_entries format=duration -v quiet -of csv="p=0" -sexagesimal
    try:
        duration = subprocess.check_output([
            'ffprobe',
            '-v', 'quiet',
            '-of', 'csv=p=0',
            '-show_entries', 'format=duration',
            '-sexagesimal',
            '-i', multimedia_file
        ]).decode('utf-8')
        #
        # another approach below returns duration in seconds
        # seems less clean that the one above
        #
        #duration = subprocess.check_output(
        #    ["ffprobe", "-v", "error",
        #     "-show_entries", "format=duration",
        #     "-of", "default=noprint_wrappers=1:nokey=1",
        #     file_path],
        #    stderr=subprocess.STDOUT,
        #)
        return duration.split('.')[0]  # split at decimal
    except subprocess.CalledProcessError as error:
        print(error.output)
        sys.exit(1)


def iterate_clip(input_list, multimedia_file):
    """
    iterate over a list of titles and timestamp ranges, and write
    files with their index and song title
    """
    for index, item in enumerate(input_list):
        clip(
            input_file=multimedia_file,
            output_file=f'{index+1:02} - {item[0]}.mp3',
            start_time=item[1],
            end_time=item[2]
        )


def parse_input(input_txt_file, multimedia_file):
    """
    parse a text input file containing timestamps and song titles
    return a list of lists.

    currently, this function works with input files that are delimited
    by two spaces between time and title.

    e.g.
    --- start file ---
    00:00  Молчат Дома - Клетка
    04:40  Кино - Раньше в твоих глазах
    07:02  Chernikovskaya Hata - Владимирский централ
    ---- end file ----
    """
    with open(input_txt_file) as open_file:
        timestamp_title = [
            [item.strip() for item in line.split('  ')] for line in open_file
        ]
        # TODO: rewrite without wasting memory on another list
        #       i.e. overwrite above timestamp_title on the fly
        #       to contain what title_range below contains, and
        #       return that list
        title_range = []
        for index, item in enumerate(timestamp_title):
            try:
                title_range.append([item[1],
                                    item[0],
                                    timestamp_title[index+1][0]])
            except IndexError:
                title_range.append([timestamp_title[-1][1],
                                    timestamp_title[-1][0],
                                    get_duration(multimedia_file)])
        return title_range


def main():
    arg_parser = argparse.ArgumentParser(
        description="split songs from an album or playlist contained in one file into their own files"
    )
    arg_parser.add_argument(
        'input_txt',
        help="input text file to parse for timestamps and titles"
    )
    arg_parser.add_argument(
        'multimedia_file',
        help="input file containing album to cut/split from"
    )
    args = arg_parser.parse_args()
    album = args.multimedia_file
    txt = args.input_txt
    title_range = parse_input(
        input_txt_file=txt,
        multimedia_file=album
    )
    iterate_clip(title_range, album)


if __name__ == '__main__':
    main()


# notes:
'''
title_range = []
for index, item in enumerate(timestamp_title):
    try:
        title_range.append([item[1],
                            item[0],
                            timestamp_title[index+1][0]])
    except IndexError:
        title_range.append([timestamp_title[-1][1],
                            timestamp_title[-1][0],
                            get_duration(multimedia_file)])

for index, item in enumerate(title_range):
    clip(
        input_file=multimedia_file,
        output_file=f'{index+1:02} - {item[0]}.mp3',
        start_time=item[1],
        end_time=item[2]
    )
'''