diff --git a/.gitignore b/.gitignore index 6cda75d..75e5899 100644 --- a/.gitignore +++ b/.gitignore @@ -4,7 +4,7 @@ /vinetrimmer/Cache/ /vinetrimmer/Cookies/ /vinetrimmer/Logs/ - +.DS_Store # Created by https://www.toptal.com/developers/gitignore/api/python # Edit at https://www.toptal.com/developers/gitignore?templates=python diff --git a/packaging/make.ps1 b/packaging/make.ps1 new file mode 100644 index 0000000..00cc73a --- /dev/null +++ b/packaging/make.ps1 @@ -0,0 +1,14 @@ +# Tip: add argument `run` to directly run after build for fast testing + +Write-Output 'Creating Python Wheel package via Poetry' +& 'poetry' build -f wheel + +Write-Output 'Building to self-contained folder/app via PyInstaller' +& 'poetry' run python pyinstaller.py + +if ($args[0] -eq 'run') { + & 'dist/vinetrimmer/vinetrimmer.exe' ($args | Select-Object -Skip 1) + exit +} + +Write-Output 'Done! See /dist for output files.' diff --git a/packaging/make.sh b/packaging/make.sh new file mode 100644 index 0000000..f63eb43 --- /dev/null +++ b/packaging/make.sh @@ -0,0 +1,17 @@ +#!/bin/sh + +# Tip: add argument `run` to directly run after build for fast testing + +echo 'Creating Python Wheel package via Poetry' +poetry build -f wheel + +echo 'Building to self-contained folder/app via PyInstaller' +poetry run python pyinstaller.py + +if [ "$1" = 'run' ]; then + shift + ./dist/vinetrimmer/vinetrimmer "$@" + exit +fi + +echo 'Done! See /dist for output files.' diff --git a/packaging/pyinstaller.py b/packaging/pyinstaller.py new file mode 100644 index 0000000..2df0f45 --- /dev/null +++ b/packaging/pyinstaller.py @@ -0,0 +1,95 @@ +#!/usr/bin/env python3 + +import itertools +import os +import shutil +import sys + +import toml +from PyInstaller.__main__ import run + +if sys.platform == "win32": + from PyInstaller.utils.win32.versioninfo import (FixedFileInfo, SetVersion, StringFileInfo, StringStruct, + StringTable, VarFileInfo, VarStruct, VSVersionInfo) + +SCRIPT_PATH = os.path.dirname(os.path.realpath(__file__)) + +"""Load pyproject.toml information.""" +project = toml.load(os.path.join(SCRIPT_PATH, "pyproject.toml")) +poetry = project["tool"]["poetry"] + +"""Configuration options that may be changed or referenced often.""" +DEBUG = False # When False, removes un-needed data after build has finished +NAME = poetry["name"] +AUTHOR = "vinetrimmer contributors" +VERSION = poetry["version"] +ICON_FILE = "assets/icon.ico" # pass None to use default icon +ONE_FILE = False # Must be False if using setup.iss +CONSOLE = True # If build is intended for GUI, set to False +ADDITIONAL_DATA = [ + # (local file path, destination in build output) +] +HIDDEN_IMPORTS = [] +EXTRA_ARGS = [ + "-y", "--win-private-assemblies", "--win-no-prefer-redirects" +] + +"""Prepare environment to ensure output data is fresh.""" +shutil.rmtree("build", ignore_errors=True) +shutil.rmtree("dist/vinetrimmer", ignore_errors=True) +# we don't want to use any spec, only the configuration set in this file +try: + os.unlink(f"{NAME}.spec") +except FileNotFoundError: + pass + +"""Run PyInstaller with the provided configuration.""" +run([ + "vinetrimmer/vinetrimmer.py", + "-n", NAME, + "-i", ["NONE", ICON_FILE][bool(ICON_FILE)], + ["-D", "-F"][ONE_FILE], + ["-w", "-c"][CONSOLE], + *itertools.chain(*[["--add-data", os.pathsep.join(x)] for x in ADDITIONAL_DATA]), + *itertools.chain(*[["--hidden-import", x] for x in HIDDEN_IMPORTS]), + *EXTRA_ARGS +]) + +if sys.platform == "win32": + """Set Version Info Structure.""" + VERSION_4_TUP = tuple(map(int, f"{VERSION}.0".split("."))) + VERSION_4_STR = ".".join(map(str, VERSION_4_TUP)) + SetVersion( + "dist/{0}/{0}.exe".format(NAME), + VSVersionInfo( + ffi=FixedFileInfo( + filevers=VERSION_4_TUP, + prodvers=VERSION_4_TUP + ), + kids=[ + StringFileInfo([StringTable( + "040904B0", # ? + [ + StringStruct("Comments", NAME), + StringStruct("CompanyName", AUTHOR), + StringStruct("FileDescription", "Widevine DRM downloader and decrypter"), + StringStruct("FileVersion", VERSION_4_STR), + StringStruct("InternalName", NAME), + StringStruct("LegalCopyright", f"Copyright (C) 2019-2021 {AUTHOR}"), + StringStruct("OriginalFilename", ""), + StringStruct("ProductName", NAME), + StringStruct("ProductVersion", VERSION_4_STR) + ] + )]), + VarFileInfo([VarStruct("Translation", [0, 1200])]) # ? + ] + ) + ) + +if not DEBUG: + shutil.rmtree("build", ignore_errors=True) + # we don't want to keep the generated spec + try: + os.unlink(f"{NAME}.spec") + except FileNotFoundError: + pass diff --git a/vinetrimmer/config/Services/disneyplus.yml b/vinetrimmer/config/Services/disneyplus.yml new file mode 100644 index 0000000..10ce247 --- /dev/null +++ b/vinetrimmer/config/Services/disneyplus.yml @@ -0,0 +1,50 @@ +certificate: | + CAUSugUKtAIIAxIQbj3s4jO5oUyWjDWqjfr9WRjA2afZBSKOAjCCAQoCggEBALhKWfnyA+FGn5P3tl6ffDjoGq2Oq86hKGl6aZIaGaF7XHPO5mIk7Q35ml + ZIgg1A458Udb4eXRws1n+kJFqtZXCY5S1yElLP0Om1WQsoEY2stpl+PZTGnVv/CsOJGKQ8K4KMr7rKjZem9lA9BrBoxgfXY3tbwlnSf3wTEohyANb5Qfpa + xsU4v8tQDA8PcjzzV9ICodl6crcFZhAy4QMNXfbWOv/ZrGFx5blSXrzP1sMQ64IY8bjUYw4coZM34NDhu8aCA692g8k2mTz2494x7u3Is8v7RKC9ZNiETE + K5/4oeVclXPpelNQokR4uvggnCD1L2EULG/pp6wnk1yWNNLxcCAwEAAToHYmFtdGVjaBKAA2FqHlqkE7EUmdOLiCi0hy5jRgBDJrU1CWNHfH6r2i6s5T5k + 6LK7ZfD65Tv6uyqq1k82PsDz4++kxbpfJDZaypFbae4XPc6lZxRCc5X0toX/x9TftOQQ4N82l5Hxoha569EPRkrnNy7rO7xrRILa3ZVj1alttEnEEjxEuw + SV8usdlUg8/LvLA2C59T/HA2I77k7yVbTrVdy0f81r2l+E2SslivCy1JD3xKlgoaKl4xBnRxItWt8+DCw1Xm2lemYl2LGoh1Wk9gvlXQvr2Jv2+dFX3RNs + i5sd00KS9sePszfjoTkQ6fmpRd7ZgFCGFWYB9JZ92aGUFQRE14OTST2uwSf32YCfsoATDNs4V6dB8YDoTGKFGrcoc4gtHPKySGNt7z/fOW4/01ZGzKqoVY + Fp3jPq7R0qyt5P6fU5NshbLh5VKcnQvwg62BuKsdwV9u4NV36b2a546hGRl/3GBneQ+QDA7NRrgITR33Sz02Oq8yJr3sy24GfZRTbtLJ4qiWkjtw== + +# taken from web application, firefox, linux +## +# firefox, linux: ZGlzbmV5JmJyb3dzZXImMS4wLjA.Cu56AgSfBTDag5NiRA81oLHkDZfu5L3CKadnefEAY84 +# android phones, tv, and fire tv stick: ZGlzbmV5JmFuZHJvaWQmMS4wLjA.bkeb0m230uUhv8qrAXuNu39tbE_mD5EEhM_NAcohjyA +# . e.g. for browser: disney&browser&1.0.0 +## +device_api_key: 'ZGlzbmV5JmFuZHJvaWQmMS4wLjA.bkeb0m230uUhv8qrAXuNu39tbE_mD5EEhM_NAcohjyA' + +bamsdk: + ## + # Browser (Windows, Chrome), isUhdAllowed: false. + # https://bam-sdk-configs.bamgrid.com/bam-sdk/v3.0/disney-svod-3d9324fc/browser/v6.1/windows/chrome/prod.json + # Android Phone, isUhdAllowed: false. + # https://bam-sdk-configs.bamgrid.com/bam-sdk/v3.0/disney-svod-3d9324fc/android/v5.1.0/google/handset/prod.json + # Android TV, isUhdAllowed: TRUE! + # https://bam-sdk-configs.bamgrid.com/bam-sdk/v3.0/disney-svod-3d9324fc/android/v5.1.0/google/tv/prod.json + # Amazon Fire TV, isUhdAllowed: TRUE! + # https://bam-sdk-configs.bamgrid.com/bam-sdk/v3.0/disney-svod-3d9324fc/android/v5.1.0/amazon/tv/prod.json + ## + season: https://disney.api.edge.bamgrid.com/explore/v1.0/season/{id} + page: https://disney.api.edge.bamgrid.com/explore/v1.2/page/{id} + config: 'https://bam-sdk-configs.bamgrid.com/bam-sdk/v3.0/disney-svod-3d9324fc/android/v6.0.0/google/tv/prod.json' + family: 'browser' + profile: 'tv' + ## + # android phone: handset + # android tv: tv? android? + ## + applicationRuntime: 'android' + ## + # android phone: android-phone + # android tv: android-tv + ## + platform: 'android/google/tv' + version: '9.7.1' + ## + # samsung s8: BAMSDK/v4.18.2 (disney-svod-3d9324fc 1.7.2.0; v2.0/v4.18.0; android; phone) samsung SM-G950F (PPR1.180610.011.G950FXXS8DTC1; Linux; 9; API 28) + # android tv: BAMSDK/v4.18.2 (disney-svod-3d9324fc 1.7.2.0; v2.0/v4.18.0; android; tv) google Nexus Player (OPR2.170623.027; Linux; 8.0.0; API 26) + ## + user_agent: 'BAMSDK/v9.7.1 (disney-svod-3d9324fc 2.26.3-rc2.0; v5.0/v9.7.0; android; tv) SDMC S905X4 OTT Box (RTM6.230109.097; Linux; 11; API 30)' diff --git a/vinetrimmer/config/Services/hulu.yml b/vinetrimmer/config/Services/hulu.yml new file mode 100644 index 0000000..6ef3ab5 --- /dev/null +++ b/vinetrimmer/config/Services/hulu.yml @@ -0,0 +1,68 @@ +user_agent: 'Mozilla/5.0 (Fire OS 6.2.7.6; Amazon AFTMM; armeabi-v7a) AppleWebKit/604.1.38 (KHTML, like Gecko) Neutron/1.3.37 Hulu/0 TV Safari/604.1.38' + +endpoints: + movie: 'https://discover.hulu.com/content/v5/hubs/movie/{id}?limit=999&schema=9&referralHost=production' + series: 'https://discover.hulu.com/content/v5/hubs/series/{id}?limit=999&schema=9&referralHost=production' + season: 'https://discover.hulu.com/content/v5/hubs/series/{id}/season/{season}?limit=999&schema=9&referralHost=production' + +device: + PC: + code: '159' # Referred to as just "PC", but what specifically is it + key: '6ebfc84f3c304217b20fd9a66cb5957f' + + Chrome: + code: '190' # Same key as "PC", but chrome specific? code just changed? + key: '6ebfc84f3c304217b20fd9a66cb5957f' + + FireTV: + code: '188' + key: 'ca8d311a734854871623d906b968a073' + + FireTV4K: + code: '208' + key: 'fa49ca06261fe41b6e56fa2d24b4f295' + + Shield: + code: '109' + key: 'd0f4adc1d8a774256acb00c0fff46f5f' + + Shield2: + code: '142' # TODO: Might not be a shield device, was referred to as "shield2" + key: 'd6bdf1f49c73db36f465536162ccc830' + +codecs: + video_selection: 'ONE' + audio_selection: 'ALL' + + video: + - type: 'H265' + profile: 'MAIN_10' + width: 3840 + height: 2160 + framerate: 60 + level: '5.1' + tier: 'MAIN' + + - type: 'H264' + profile: 'HIGH' + width: 1920 + height: 1080 + framerate: 60 + level: '5.2' + + audio: + - type: 'AAC' + + - type: 'EC3' + +drm: + selection_mode: 'ONE' + hdcp: true + + schemas: + - type: 'WIDEVINE' + version: 'MODULAR' + security_level: 'L1' + - type: 'PLAYREADY' + version: 'V2' + security_level: 'SL3000' diff --git a/vinetrimmer/config/Services/paramountplus.yml b/vinetrimmer/config/Services/paramountplus.yml new file mode 100644 index 0000000..9840960 --- /dev/null +++ b/vinetrimmer/config/Services/paramountplus.yml @@ -0,0 +1,49 @@ +US: + device_link: True + base_url: 'https://www.paramountplus.com' + at_token: 'ABC+2JjrOUYWbaaqKmzwPdppq0RDB2WdufcFmIsSnJDmDEQpVgyAjQpqpEDksKZNMKQ=' + login: 'https://www.paramountplus.com/apps-api/v2.0/androidphone/auth/login.json' + status: 'https://www.paramountplus.com/apps-api/v2.0/androidphone/app/status.json' + movie: 'https://www.paramountplus.com/apps-api/v3.0/androidphone/movies/{title_id}.json' + shows: 'https://www.paramountplus.com/apps-api/v3.0/androidphone/shows/slug/{title}.json' + section: 'https://www.paramountplus.com/apps-api/v2.0/androidphone/shows/{showId}/videos/config/{config}.json' + seasons: 'https://www.paramountplus.com/apps-api/v2.0/androidphone/videos/section/{section}.json' + show: 'https://www.paramountplus.com/apps-api/v3.0/androidphone/shows/{}.json' + video_items: 'https://www.paramountplus.com/apps-api/v2.0/{device}/video/cid/{content_id}.json' + barrearUrl: 'https://www.paramountplus.com/apps-api/v3.1/androidphone/irdeto-control/anonymous-session-token.json' + +FR: + device_link: True + base_url: 'https://www.paramountplus.com' + at_token: 'ABAS/G30Pp6tJuNOlZ1OEE6Rf5goS0KjICkGkBVIapVuxemiiASyWVfW4v7SUeAkogc=' + login: 'https://www.paramountplus.com/apps-api/v2.0/androidphone/auth/login.json' + status: 'https://www.paramountplus.com/apps-api/v3.0/androidphone/login/status.json' + movie: 'https://www.paramountplus.com/apps-api/v3.0/androidphone/movies/{title_id}.json' + shows: 'https://www.paramountplus.com/apps-api/v3.0/androidphone/shows/slug/{title}.json' + section: 'https://www.paramountplus.com/apps-api/v2.0/androidphone/shows/{showId}/videos/config/{config}.json' + seasons: 'https://www.paramountplus.com/apps-api/v2.0/androidphone/videos/section/{section}.json' + show: 'https://www.paramountplus.com/apps-api/v3.0/androidphone/shows/{}.json' + video_items: 'https://www.paramountplus.com/apps-api/v2.0/{device}/video/cid/{content_id}.json' + barrearUrl: 'https://www.paramountplus.com/apps-api/v3.1/androidphone/irdeto-control/anonymous-session-token.json' + +INTL: + device_link: False + rows: 50 # Int number between 1 and 50 + base_url: 'https://www.intl.paramountplus.com' + at_token: 'ABAS/G30Pp6tJuNOlZ1OEE6Rf5goS0KjICkGkBVIapVuxemiiASyWVfW4v7SUeAkogc=' + login: 'https://www.intl.paramountplus.com/apps-api/v2.0/androidphone/auth/login.json' + status: 'https://www.intl.paramountplus.com/apps-api/v3.0/androidphone/login/status.json' + movie: 'https://www.intl.paramountplus.com/apps-api/v3.0/androidphone/movies/{title_id}.json' + shows: 'https://www.intl.paramountplus.com/apps-api/v3.0/androidphone/shows/slug/{title}.json' + section: 'https://www.intl.paramountplus.com/apps-api/v2.0/androidphone/shows/{showId}/videos/config/{config}.json' + seasons: 'https://www.intl.paramountplus.com/apps-api/v2.0/androidphone/videos/section/{section}.json' + show: 'https://www.intl.paramountplus.com/apps-api/v3.0/androidphone/shows/{}.json' + video_items: 'https://www.intl.paramountplus.com/apps-api/v2.0/{device}/video/cid/{content_id}.json' + barrearUrl: 'https://www.intl.paramountplus.com/apps-api/v3.1/androidphone/irdeto-control/session-token.json' + +Android: + UserAgent: 'Mozilla/5.0 (Linux; Android 13; SM-A536E) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/112.0.0.0 Mobile Safari/537.36' + +LINK_PLATFORM_URL: 'http://link.theplatform.com/s/dJ5BDC/media/guid/2198311517/{video_id}' +license: 'https://cbsi.live.ott.irdeto.com/widevine/getlicense' +license_pr: 'https://cbsi.live.ott.irdeto.com/playready/rightsmanager.asmx' \ No newline at end of file diff --git a/vinetrimmer/objects/tracks.py b/vinetrimmer/objects/tracks.py index 9ca7419..160d333 100644 --- a/vinetrimmer/objects/tracks.py +++ b/vinetrimmer/objects/tracks.py @@ -31,1371 +31,1286 @@ from vinetrimmer.utils.xml import load_xml from vinetrimmer.vendor.pymp4.parser import Box, MP4 CODEC_MAP = { - # Video - "avc1": "H.264", - "avc3": "H.264", - "hev1": "H.265", - "hvc1": "H.265", - "dvh1": "H.265", - "dvhe": "H.265", - # Audio - "aac": "AAC", - "mp4a": "AAC", - "stereo": "AAC", - "HE": "HE-AAC", - "ac3": "AC3", - "ac-3": "AC3", - "eac": "E-AC3", - "eac-3": "E-AC3", - "ec-3": "E-AC3", - "atmos": "E-AC3", - # Subtitles - "srt": "SRT", - "vtt": "VTT", - "wvtt": "VTT", - "dfxp": "TTML", - "stpp": "TTML", - "ttml": "TTML", - "tt": "TTML", + # Video + "avc1": "H.264", + "avc3": "H.264", + "hev1": "H.265", + "hvc1": "H.265", + "dvh1": "H.265", + "dvhe": "H.265", + # Audio + "aac": "AAC", + "mp4a": "AAC", + "stereo": "AAC", + "HE": "HE-AAC", + "ac3": "AC3", + "ac-3": "AC3", + "eac": "E-AC3", + "eac-3": "E-AC3", + "ec-3": "E-AC3", + "atmos": "E-AC3", + # Subtitles + "srt": "SRT", + "vtt": "VTT", + "wvtt": "VTT", + "dfxp": "TTML", + "stpp": "TTML", + "ttml": "TTML", + "tt": "TTML", } class Track: - class Descriptor(Enum): - URL = 1 # Direct URL, nothing fancy - M3U = 2 # https://en.wikipedia.org/wiki/M3U (and M3U8) - MPD = 3 # https://en.wikipedia.org/wiki/Dynamic_Adaptive_Streaming_over_HTTP - ISM = 4 # https://bitmovin.com/blog/microsoft-smooth-streaming-mss/ + class Descriptor(Enum): + URL = 1 # Direct URL, nothing fancy + M3U = 2 # https://en.wikipedia.org/wiki/M3U (and M3U8) + MPD = 3 # https://en.wikipedia.org/wiki/Dynamic_Adaptive_Streaming_over_HTTP + ISM = 4 # https://bitmovin.com/blog/microsoft-smooth-streaming-mss/ - def __init__(self, id_, source, url, codec, language=None, descriptor=Descriptor.URL, - needs_proxy=False, needs_repack=False, encrypted=False, psshWV=None, psshPR=None, note=None, kid=None, key=None, extra=None): - self.id = id_ - self.source = source - self.url = url - # required basic metadata - self.note= note - self.codec = codec - #self.language = Language.get(language or "none") - self.language = Language.get(language or "en") - self.is_original_lang = False # will be set later - # optional io metadata - self.descriptor = descriptor - self.needs_proxy = bool(needs_proxy) - self.needs_repack = bool(needs_repack) - # decryption - self.encrypted = bool(encrypted) - self.psshWV = psshWV - self.psshPR = psshPR - self.kid = kid - self.key = key - # extra data - self.extra = extra or {} # allow anything for extra, but default to a dict + def __init__(self, id_, source, url, codec, language=None, descriptor=Descriptor.URL, + needs_proxy=False, needs_repack=False, encrypted=False, psshWV=None, psshPR=None, note=None, kid=None, key=None, extra=None): + self.id = id_ + self.source = source + self.url = url + # required basic metadata + self.note= note + self.codec = codec + #self.language = Language.get(language or "none") + self.language = Language.get(language or "en") + self.is_original_lang = False # will be set later + # optional io metadata + self.descriptor = descriptor + self.needs_proxy = bool(needs_proxy) + self.needs_repack = bool(needs_repack) + # decryption + self.encrypted = bool(encrypted) + self.psshWV = psshWV + self.psshPR = psshPR + self.kid = kid + self.key = key + # extra data + self.extra = extra or {} # allow anything for extra, but default to a dict - # should only be set internally - self._location = None + # should only be set internally + self._location = None - def __repr__(self): - return "{name}({items})".format( - name=self.__class__.__name__, - items=", ".join([f"{k}={repr(v)}" for k, v in self.__dict__.items()]) - ) + def __repr__(self): + return "{name}({items})".format( + name=self.__class__.__name__, + items=", ".join([f"{k}={repr(v)}" for k, v in self.__dict__.items()]) + ) - def __eq__(self, other): - return isinstance(other, Track) and self.id == other.id + def __eq__(self, other): + return isinstance(other, Track) and self.id == other.id - def get_track_name(self): - """Return the base track name. This may be enhanced in subclasses.""" - if self.language is None: - self.language = Language.get("en") - if ((self.language.language or "").lower() == (self.language.territory or "").lower() - and self.language.territory not in TERRITORY_MAP): - self.language.territory = None # e.g. de-DE - if self.language.territory == "US": - self.language.territory = None - language = self.language.simplify_script() - extra_parts = [] - if language.script is not None: - extra_parts.append(language.script_name()) - if language.territory is not None: - territory = language.territory_name() - extra_parts.append(TERRITORY_MAP.get(language.territory, territory)) - return ", ".join(extra_parts) or None + def get_track_name(self): + """Return the base track name. This may be enhanced in subclasses.""" + if self.language is None: + self.language = Language.get("en") + if ((self.language.language or "").lower() == (self.language.territory or "").lower() + and self.language.territory not in TERRITORY_MAP): + self.language.territory = None # e.g. de-DE + if self.language.territory == "US": + self.language.territory = None + language = self.language.simplify_script() + extra_parts = [] + if language.script is not None: + extra_parts.append(language.script_name()) + if language.territory is not None: + territory = language.territory_name() + extra_parts.append(TERRITORY_MAP.get(language.territory, territory)) + return ", ".join(extra_parts) or None - def get_data_chunk(self, session=None): - """Get the data chunk from the track's stream.""" - if not session: - session = Session() + def get_data_chunk(self, session=None): + """Get the data chunk from the track's stream.""" + if not session: + session = Session() - url = None + url = None - if self.descriptor == self.Descriptor.M3U: - master = m3u8.loads(session.get(as_list(self.url)[0]).text, uri=self.url) - for segment in master.segments: - if not segment.init_section: - continue - if self.source in ["DSNP", "STRP"] and re.match(r"^[a-zA-Z0-9]{4}-(BUMPER|DUB_CARD)/", segment.init_section.uri): - continue - url = ("" if re.match("^https?://", segment.init_section.uri) else segment.init_section.base_uri) - url += segment.init_section.uri - break + if self.descriptor == self.Descriptor.M3U: + master = m3u8.loads(session.get(as_list(self.url)[0]).text, uri=self.url) + for segment in master.segments: + if not segment.init_section: + continue + if self.source in ["DSNP", "STRP"] and re.match(r"^[a-zA-Z0-9]{4}-(BUMPER|DUB_CARD)/", segment.init_section.uri): + continue + url = ("" if re.match("^https?://", segment.init_section.uri) else segment.init_section.base_uri) + url += segment.init_section.uri + break - if not url: - url = as_list(self.url)[0] + if not url: + url = as_list(self.url)[0] - with session.get(url, stream=True) as s: - # assuming enough to contain the pssh/kid - for chunk in s.iter_content(20000): - # we only want the first chunk - return chunk + with session.get(url, stream=True) as s: + # assuming enough to contain the pssh/kid + for chunk in s.iter_content(20000): + # we only want the first chunk + return chunk - # assuming 20000 bytes is enough to contain the pssh/kid box - return download_range(url, 20000, proxy=proxy) + # assuming 20000 bytes is enough to contain the pssh/kid box + return download_range(url, 20000, proxy=proxy) - def get_pssh(self, session=None): - """ - Get the PSSH of the track. + def get_pssh(self, session=None): + """ + Get the PSSH of the track. - Parameters: - session: Requests Session, best to provide one if cookies/headers/proxies are needed. + Parameters: + session: Requests Session, best to provide one if cookies/headers/proxies are needed. - Returns: - True if PSSH is now available, False otherwise. PSSH will be stored in Track.pssh - automatically. - """ - - if self.psshWV or self.psshPR or not self.encrypted: - return True + Returns: + True if PSSH is now available, False otherwise. PSSH will be stored in Track.pssh + automatically. + """ + + if self.psshWV or self.psshPR or not self.encrypted: + return True - if self.descriptor == self.Descriptor.M3U: - # if an m3u, try get from playlist - master = m3u8.loads(session.get(as_list(self.url)[0]).text, uri=self.url) - for x in master.session_keys: - if x and x.keyformat.lower == "com.microsoft.playready": - self.psshPR = x.uri.split(",")[-1] - break - for x in master.keys: - if x and "com.microsoft.playready" in str(x): - self.psshPR = str(x).split("\"")[1].split(",")[-1] - break - boxes.extend([ - Box.parse(base64.b64decode(x.uri.split(",")[-1])) - for x in (master.session_keys or master.keys) - if x and x.keyformat.lower() == f"urn:uuid:{uuid.UUID('edef8ba979d64acea3c827dcd51d21ed')}" - ]) + if self.descriptor == self.Descriptor.M3U: + # if an m3u, try get from playlist + master = m3u8.loads(session.get(as_list(self.url)[0]).text, uri=self.url) + for x in master.session_keys: + if x and x.keyformat.lower() == "com.microsoft.playready": + self.psshPR = x.uri.split(",")[-1] + break + elif x and x.keyformat.lower() == f"urn:uuid:{uuid.UUID('edef8ba979d64acea3c827dcd51d21ed')}": + self.psshWV = x.uri.split(",")[-1] + break + for x in master.keys: + if x and "com.microsoft.playready" in str(x): + self.psshPR = str(x).split("\"")[1].split(",")[-1] + break + elif x and f"urn:uuid:{uuid.UUID('edef8ba979d64acea3c827dcd51d21ed')}" in str(x): + self.psshWV = str(x).split("\"")[1].split(",")[-1] + break + # Below converts PlayReady PSSH to WideVine PSSH + try: + xml_str = base64.b64decode(self.psshPR).decode("utf-16-le", "ignore") + xml_str = xml_str[xml_str.index("<"):] + xml = load_xml(xml_str).find("DATA") # root: WRMHEADER - data = self.get_data_chunk(session) - if data: - boxes.extend(list(get_boxes(data, b"pssh"))) + kid = xml.findtext("KID") # v4.0.0.0 + if not kid: # v4.1.0.0 + kid = next(iter(xml.xpath("PROTECTINFO/KID/@VALUE")), None) + if not kid: # v4.3.0.0 + kid = next(iter(xml.xpath("PROTECTINFO/KIDS/KID/@VALUE")), None) # can be multiple? + self.kid = uuid.UUID(base64.b64decode(self.kid).hex()).bytes_le.hex() + if not track.psshWV: + self.psshWV = Box.parse(Box.build(dict( + type=b"pssh", + version=0, + flags=0, + system_ID="9a04f079-9840-4286-ab92-e65be0885f95", + init_data=b"\x12\x10" + base64.b64decode(kid) + ))) + return True + except: pass - for box in boxes: - if box.system_ID == uuid.UUID("edef8ba979d64acea3c827dcd51d21ed"): - self.psshWV = box - return True + return False - # Below converts PlayReady PSSH to WideVine PSSH - for box in boxes: - if box.system_ID == uuid.UUID("9a04f07998404286ab92e65be0885f95") and not self.PSSHWV: - xml_str = Box.build(box) - xml_str = xml_str.decode("utf-16-le", "ignore") - xml_str = xml_str[xml_str.index("<"):] + def get_kid(self, session=None): + """ + Get the KID (encryption key id) of the Track. + The KID corresponds to the Encrypted segments of an encrypted Track. - xml = load_xml(xml_str).find("DATA") # root: WRMHEADER + Parameters: + session: Requests Session, best to provide one if cookies/headers/proxies are needed. - kid = xml.findtext("KID") # v4.0.0.0 - if not kid: # v4.1.0.0 - kid = next(iter(xml.xpath("PROTECTINFO/KID/@VALUE")), None) - if not kid: # v4.3.0.0 - kid = next(iter(xml.xpath("PROTECTINFO/KIDS/KID/@VALUE")), None) # can be multiple? + Returns: + True if KID is now available, False otherwise. KID will be stored in Track.kid + automatically. + """ + if self.encrypted and self.source == "DSNP": + log = logging.getLogger("Tracks") + log.info("+ Replacing KID with correct track KID (DSNP workaround)") + if self.descriptor == self.Descriptor.M3U: + # if an m3u, try get from playlist + master = m3u8.loads(session.get(as_list(self.url)[0]).text, uri=self.url) + for x in master.session_keys: + if x and x.keyformat.lower == "com.microsoft.playready" and not self.psshPR: + self.psshPR = x.uri.split(",")[-1] + break + for x in master.keys: + if x and "com.microsoft.playready" in str(x) and not self.psshPR: + self.psshPR = str(x).split("\"")[1].split(",")[-1] + break - self.kid = uuid.UUID(base64.b64decode(self.kid).hex()).bytes_le.hex() + try: + xml_str = base64.b64decode(self.psshPR).decode("utf-16-le", "ignore") + xml_str = xml_str[xml_str.index("<"):] - self.psshWV = Box.parse(Box.build(dict( - type=b"pssh", - version=0, - flags=0, - system_ID="9a04f079-9840-4286-ab92-e65be0885f95", - init_data=b"\x12\x10" + base64.b64decode(kid) - ))) - return True + xml = load_xml(xml_str).find("DATA") # root: WRMHEADER - # boxes = [] + self.kid = xml.findtext("KID") # v4.0.0.0 + if not self.kid: # v4.1.0.0 + self.kid = next(iter(xml.xpath("PROTECTINFO/KID/@VALUE")), None) + if not self.kid: # v4.3.0.0 + self.kid = next(iter(xml.xpath("PROTECTINFO/KIDS/KID/@VALUE")), None) # can be multiple? - # if self.descriptor == self.Descriptor.M3U: - # # if an m3u, try get from playlist - # master = m3u8.loads(session.get(as_list(self.url)[0]).text, uri=self.url) - # boxes.extend([ - # Box.parse(base64.b64decode(x.uri.split(",")[-1])) - # for x in (master.session_keys or master.keys) - # if x and x.keyformat.lower() == Cdm.urn - # ]) + self.kid = uuid.UUID(base64.b64decode(self.kid).hex()).bytes_le.hex() - # data = self.get_data_chunk(session) - # if data: - # boxes.extend(list(get_boxes(data, b"pssh"))) - - # for box in boxes: - # if box.system_ID == Cdm.uuid: - # print(box.system_ID) - # self.pssh = box - # return True - - # for box in boxes: - # if box.system_ID == uuid.UUID("{9a04f079-9840-4286-ab92-e65be0885f95}"): - # xml_str = Box.build(box) - # xml_str = xml_str.decode("utf-16-le", "ignore") - # xml_str = xml_str[xml_str.index("<"):] - - # xml = load_xml(xml_str).find("DATA") # root: WRMHEADER - - # kid = xml.findtext("KID") # v4.0.0.0 - # if not kid: # v4.1.0.0 - # kid = next(iter(xml.xpath("PROTECTINFO/KID/@VALUE")), None) - # if not kid: # v4.3.0.0 - # kid = next(iter(xml.xpath("PROTECTINFO/KIDS/KID/@VALUE")), None) # can be multiple? - - # # self.pssh = Box.parse(Box.build(dict( - # # type=b"pssh", - # # version=0, - # # flags=0, - # # system_ID="9a04f079-9840-4286-ab92-e65be0885f95", - # # init_data=b"\x12\x10" + base64.b64decode(kid) - # # ))) - # return True - - return False - - def get_kid(self, session=None): - """ - Get the KID (encryption key id) of the Track. - The KID corresponds to the Encrypted segments of an encrypted Track. - - Parameters: - session: Requests Session, best to provide one if cookies/headers/proxies are needed. - - Returns: - True if KID is now available, False otherwise. KID will be stored in Track.kid - automatically. - """ - - try: - xml_str = base64.b64decode(self.pssh).decode("utf-16-le", "ignore") - xml_str = xml_str[xml_str.index("<"):] - - xml = load_xml(xml_str).find("DATA") # root: WRMHEADER - - self.kid = xml.findtext("KID") # v4.0.0.0 - if not self.kid: # v4.1.0.0 - self.kid = next(iter(xml.xpath("PROTECTINFO/KID/@VALUE")), None) - if not self.kid: # v4.3.0.0 - self.kid = next(iter(xml.xpath("PROTECTINFO/KIDS/KID/@VALUE")), None) # can be multiple? - - self.kid = uuid.UUID(base64.b64decode(self.kid).hex()).bytes_le.hex() - - except: pass - - if self.kid or not self.encrypted: - return True - - # boxes = [] - - # data = self.get_data_chunk(session) - - # if data: - # # try get via ffprobe, needed for non mp4 data e.g. WEBM from Google Play - # probe = ffprobe(data) - # if probe: - # kid = try_get(probe, lambda x: x["streams"]["tags"]["enc_key_id"]) - # if kid: - # kid = base64.b64decode(kid).hex() - # if kid != "00" * 16: - # self.kid = kid - # return True - # # get tenc and pssh boxes if available - # boxes.extend(list(get_boxes(data, b"tenc"))) - # boxes.extend(list(get_boxes(data, b"pssh"))) - - # # get the track's pssh box if available - # if self.get_pssh(): - # boxes.append(self.pssh) - - # # loop all found boxes and try find a KID - # for box in sorted(boxes, key=lambda b: b.type == b"tenc", reverse=True): - # if box.type == b"tenc": - # kid = box.key_ID.hex - # if kid != "00" * 16: - # self.kid = kid - # return True - # if box.type == b"pssh": - # if box.system_ID == Cdm.uuid: - # # Note: assumes only the first KID of a list is wanted - # if getattr(box, "key_IDs", None): - # kid = box.key_IDs[0].hex - # if kid != "00" * 16: - # self.kid = kid - # return True - # cenc_header = WidevineCencHeader() - # cenc_header.ParseFromString(box.init_data) - # if getattr(cenc_header, "key_id", None): - # kid = cenc_header.key_id[0] - # try: - # int(kid, 16) # KID may be already hex - # except ValueError: - # kid = kid.hex() - # else: - # kid = kid.decode() - # if kid != "00" * 16: - # self.kid = kid - # return True - - return False - - def download(self, out, name=None, headers=None, proxy=None): - """ - Download the Track and apply any necessary post-edits like Subtitle conversion. - - Parameters: - out: Output Directory Path for the downloaded track. - name: Override the default filename format. - Expects to contain `{type}`, `{id}`, and `{enc}`. All of them must be used. - headers: Headers to use when downloading. - proxy: Proxy to use when downloading. - - Returns: - Where the file was saved. - """ - if os.path.isfile(out): - raise ValueError("Path must be to a directory and not a file") - - os.makedirs(out, exist_ok=True) - - name = (name or "{type}_{id}_{enc}").format( - type=self.__class__.__name__, - id=self.id, - enc="enc" if self.encrypted else "dec" - ) + ".mp4" - save_path = os.path.join(out, name) - - if self.descriptor == self.Descriptor.M3U: - master = m3u8.loads( - requests.get( - as_list(self.url)[0], - headers=headers, - proxies={"all": proxy} if self.needs_proxy and proxy else None - ).text, - uri=as_list(self.url)[0] + except: pass + + if self.source == "NF": + self.kid = "{}{}{}".format( + self.kid[:8], + "".join([ self.kid[8:16][i] for i in [2, 3, 0, 1, 6, 7, 4, 5]]), + self.kid[16:] ) - # Keys may be [] or [None] if unencrypted - if any(master.keys + master.session_keys): - self.encrypted = True - self.get_kid() - self.get_pssh() + if self.kid or not self.encrypted: + return True - durations = [] - duration = 0 - for segment in master.segments: - if segment.discontinuity: - durations.append(duration) - duration = 0 - duration += segment.duration - durations.append(duration) - largest_continuity = durations.index(max(durations)) - discontinuity = 0 - has_init = False - segments = [] - for segment in master.segments: - if segment.discontinuity: - discontinuity += 1 - has_init = False - if self.source in ["DSNP", "STRP"] and re.search( - r"[a-zA-Z0-9]{4}-(BUMPER|DUB_CARD)/", - segment.uri + (segment.init_section.uri if segment.init_section else '') - ): - continue - if self.source == "ATVP" and discontinuity != largest_continuity: - # the amount of pre and post-roll sections change all the time - # only way to know which section to get is by getting the largest - continue - if segment.init_section and not has_init: - segments.append( - ("" if re.match("^https?://", segment.init_section.uri) else segment.init_section.base_uri) + - segment.init_section.uri - ) - has_init = True - segments.append( - ("" if re.match("^https?://", segment.uri) else segment.base_uri) + - segment.uri - ) - self.url = segments + return False - if self.source == "CORE": - asyncio.run(saldl( - self.url, - save_path, - headers, - proxy if self.needs_proxy else None - )) - elif (self.descriptor == self.Descriptor.ISM) or (self.source == "HS" and self.__class__.__name__ != "TextTrack"): - asyncio.run(m3u8dl( - self.url, - save_path, - self, - headers, - proxy if self.needs_proxy else None - )) - if self.__class__.__name__ == "AudioTrack": - save_path_orig = save_path - save_path = save_path_orig.replace(".mp4", f".m4a") - if not Path(save_path).is_file(): - save_path = save_path_orig.replace(".mp4", f".{str(self.language)[:2]}.m4a") - if not Path(save_path).is_file(): - save_path = save_path_orig - if not Path(save_path).is_file(): - raise - else: - asyncio.run(aria2c( - self.url, - save_path, - headers, - proxy if self.needs_proxy else None - )) + def download(self, out, name=None, headers=None, proxy=None): + """ + Download the Track and apply any necessary post-edits like Subtitle conversion. - if os.stat(save_path).st_size <= 3: # Empty UTF-8 BOM == 3 bytes - raise IOError( - "Download failed, the downloaded file is empty. " - f"This {'was' if self.needs_proxy else 'was not'} downloaded with a proxy." + - ( - " Perhaps you need to set `needs_proxy` as True to use the proxy for this track." - if not self.needs_proxy else "" - ) - ) + Parameters: + out: Output Directory Path for the downloaded track. + name: Override the default filename format. + Expects to contain `{type}`, `{id}`, and `{enc}`. All of them must be used. + headers: Headers to use when downloading. + proxy: Proxy to use when downloading. - self._location = save_path - return save_path + Returns: + Where the file was saved. + """ + if os.path.isfile(out): + raise ValueError("Path must be to a directory and not a file") - def delete(self): - if self._location: - os.unlink(self._location) - self._location = None + os.makedirs(out, exist_ok=True) - def repackage(self): - if not self._location: - raise ValueError("Cannot repackage a Track that has not been downloaded.") - fixed_file = f"{self._location}_fixed.mkv" - try: - subprocess.run([ - "ffmpeg", "-hide_banner", - "-loglevel", "panic", - "-i", self._location, - # Following are very important! - "-map_metadata", "-1", # don't transfer metadata to output file - "-fflags", "bitexact", # only have minimal tag data, reproducible mux - "-codec", "copy", - fixed_file - ], check=True) - self.swap(fixed_file) - except subprocess.CalledProcessError: - pass + name = (name or "{type}_{id}_{enc}").format( + type=self.__class__.__name__, + id=self.id, + enc="enc" if self.encrypted else "dec" + ) + ".mp4" + save_path = os.path.join(out, name) - def locate(self): - return self._location + if self.descriptor == self.Descriptor.M3U: + master = m3u8.loads( + requests.get( + as_list(self.url)[0], + headers=headers, + proxies={"all": proxy} if self.needs_proxy and proxy else None + ).text, + uri=as_list(self.url)[0] + ) - def move(self, target): - if not self._location: - return False - ok = os.path.realpath(shutil.move(self._location, target)) == os.path.realpath(target) - if ok: - self._location = target - return ok + # Keys may be [] or [None] if unencrypted + if any(master.keys + master.session_keys): + self.encrypted = True + self.get_kid() + self.get_pssh() - def swap(self, target): - if not os.path.exists(target) or not self._location: - return False - os.unlink(self._location) - os.rename(target, self._location) - return True + durations = [] + duration = 0 + for segment in master.segments: + if segment.discontinuity: + durations.append(duration) + duration = 0 + duration += segment.duration + durations.append(duration) + largest_continuity = durations.index(max(durations)) - @staticmethod - def pt_to_sec(d): - if isinstance(d, float): - return d - if d[0:2] == "P0": - d = d.replace("P0Y0M0DT", "PT") - if d[0:2] != "PT": - raise ValueError("Input data is not a valid time string.") - d = d[2:].upper() # skip `PT` - m = re.findall(r"([\d.]+.)", d) - return sum( - float(x[0:-1]) * {"H": 60 * 60, "M": 60, "S": 1}[x[-1].upper()] - for x in m - ) + discontinuity = 0 + has_init = False + segments = [] + for segment in master.segments: + if segment.discontinuity: + discontinuity += 1 + has_init = False + if self.source in ["DSNP", "STRP"] and re.search( + r"[a-zA-Z0-9]{4}-(BUMPER|DUB_CARD)/", + segment.uri + (segment.init_section.uri if segment.init_section else '') + ): + continue + if self.source == "ATVP" and discontinuity != largest_continuity: + # the amount of pre and post-roll sections change all the time + # only way to know which section to get is by getting the largest + continue + if segment.init_section and not has_init: + segments.append( + ("" if re.match("^https?://", segment.init_section.uri) else segment.init_section.base_uri) + + segment.init_section.uri + ) + has_init = True + segments.append( + ("" if re.match("^https?://", segment.uri) else segment.base_uri) + + segment.uri + ) + self.url = segments + + if self.source == "CORE": + asyncio.run(saldl( + self.url, + save_path, + headers, + proxy if self.needs_proxy else None + )) + elif (self.descriptor == self.Descriptor.ISM) or (self.source == "HS" and self.__class__.__name__ != "TextTrack"): + asyncio.run(m3u8dl( + self.url, + save_path, + self, + headers, + proxy if self.needs_proxy else None + )) + if self.__class__.__name__ == "AudioTrack": + save_path_orig = save_path + save_path = save_path_orig.replace(".mp4", f".m4a") + if not Path(save_path).is_file(): + save_path = save_path_orig.replace(".mp4", f".{str(self.language)[:2]}.m4a") + if not Path(save_path).is_file(): + save_path = save_path_orig + if not Path(save_path).is_file(): + raise + else: + asyncio.run(aria2c( + self.url, + save_path, + headers, + proxy if self.needs_proxy else None + )) + + if os.stat(save_path).st_size <= 3: # Empty UTF-8 BOM == 3 bytes + raise IOError( + "Download failed, the downloaded file is empty. " + f"This {'was' if self.needs_proxy else 'was not'} downloaded with a proxy." + + ( + " Perhaps you need to set `needs_proxy` as True to use the proxy for this track." + if not self.needs_proxy else "" + ) + ) + + self._location = save_path + return save_path + + def delete(self): + if self._location: + os.unlink(self._location) + self._location = None + + def repackage(self): + if not self._location: + raise ValueError("Cannot repackage a Track that has not been downloaded.") + fixed_file = f"{self._location}_fixed.mkv" + try: + subprocess.run([ + "ffmpeg", "-hide_banner", + "-loglevel", "panic", + "-i", self._location, + # Following are very important! + "-map_metadata", "-1", # don't transfer metadata to output file + "-fflags", "bitexact", # only have minimal tag data, reproducible mux + "-codec", "copy", + fixed_file + ], check=True) + self.swap(fixed_file) + except subprocess.CalledProcessError: + pass + + def locate(self): + return self._location + + def move(self, target): + if not self._location: + return False + ok = os.path.realpath(shutil.move(self._location, target)) == os.path.realpath(target) + if ok: + self._location = target + return ok + + def swap(self, target): + if not os.path.exists(target) or not self._location: + return False + os.unlink(self._location) + os.rename(target, self._location) + return True + + @staticmethod + def pt_to_sec(d): + if isinstance(d, float): + return d + if d[0:2] == "P0": + d = d.replace("P0Y0M0DT", "PT") + if d[0:2] != "PT": + raise ValueError("Input data is not a valid time string.") + d = d[2:].upper() # skip `PT` + m = re.findall(r"([\d.]+.)", d) + return sum( + float(x[0:-1]) * {"H": 60 * 60, "M": 60, "S": 1}[x[-1].upper()] + for x in m + ) class VideoTrack(Track): - def __init__(self, *args, bitrate, width, size=None, height, fps=None, hdr10=False, hlg=False, dv=False, - needs_ccextractor=False, needs_ccextractor_first=False, **kwargs): - super().__init__(*args, **kwargs) - # required - self.bitrate = int(math.ceil(float(bitrate))) if bitrate else None - self.width = int(width) - self.height = int(height) - # optional - if "/" in str(fps): - num, den = fps.split("/") - self.fps = int(num) / int(den) - elif fps: - self.fps = float(fps) - else: - self.fps = None - self.size = size if size else None - self.hdr10 = bool(hdr10) - self.hlg = bool(hlg) - self.dv = bool(dv) - self.needs_ccextractor = needs_ccextractor - self.needs_ccextractor_first = needs_ccextractor_first + def __init__(self, *args, bitrate, width, size=None, height, fps=None, hdr10=False, hlg=False, dv=False, + needs_ccextractor=False, needs_ccextractor_first=False, **kwargs): + super().__init__(*args, **kwargs) + # required + self.bitrate = int(math.ceil(float(bitrate))) if bitrate else None + self.width = int(width) + self.height = int(height) + # optional + if "/" in str(fps): + num, den = fps.split("/") + self.fps = int(num) / int(den) + elif fps: + self.fps = float(fps) + else: + self.fps = None + self.size = size if size else None + self.hdr10 = bool(hdr10) + self.hlg = bool(hlg) + self.dv = bool(dv) + self.needs_ccextractor = needs_ccextractor + self.needs_ccextractor_first = needs_ccextractor_first - def __str__(self): - codec = next((CODEC_MAP[x] for x in CODEC_MAP if (self.codec or "").startswith(x)), self.codec) - fps = f"{self.fps:.3f}" if self.fps else "Unknown" - size = f" ({humanfriendly.format_size(self.size, binary=True)})" if self.size else "" - return " | ".join([ - "├─ VID", - f"[{codec}, {'HDR10' if self.hdr10 else 'HLG' if self.hlg else 'DV' if self.dv else 'SDR'}]", - f"{self.width}x{self.height} @ {self.bitrate // 1000 if self.bitrate else '?'} kb/s{size}, {fps} FPS" - ]) + def __str__(self): + codec = next((CODEC_MAP[x] for x in CODEC_MAP if (self.codec or "").startswith(x)), self.codec) + fps = f"{self.fps:.3f}" if self.fps else "Unknown" + size = f" ({humanfriendly.format_size(self.size, binary=True)})" if self.size else "" + return " | ".join([ + "├─ VID", + f"[{codec}, {'HDR10' if self.hdr10 else 'HLG' if self.hlg else 'DV' if self.dv else 'SDR'}]", + f"{self.width}x{self.height} @ {self.bitrate // 1000 if self.bitrate else '?'} kb/s{size}, {fps} FPS" + ]) - def ccextractor(self, track_id, out_path, language, original=False): - """Return a TextTrack object representing CC track extracted by CCExtractor.""" - if not self._location: - raise ValueError("You must download the track first.") + def ccextractor(self, track_id, out_path, language, original=False): + """Return a TextTrack object representing CC track extracted by CCExtractor.""" + if not self._location: + raise ValueError("You must download the track first.") - executable = shutil.which("ccextractor") or shutil.which("ccextractorwin") - if not executable: - raise EnvironmentError("ccextractor executable was not found.") + executable = shutil.which("ccextractor") or shutil.which("ccextractorwin") + if not executable: + raise EnvironmentError("ccextractor executable was not found.") - p = subprocess.Popen([ - executable, - "-quiet", "-trim", "-noru", "-ru1", - self._location, "-o", out_path - ], stdout=subprocess.PIPE, stderr=subprocess.STDOUT) - for line in TextIOWrapper(p.stdout, encoding="utf-8"): - if "[iso file] Unknown box type ID32" not in line: - sys.stdout.write(line) - returncode = p.wait() - if returncode and returncode != 10: - raise self.log.exit(f" - ccextractor exited with return code {returncode}") + p = subprocess.Popen([ + executable, + "-quiet", "-trim", "-noru", "-ru1", + self._location, "-o", out_path + ], stdout=subprocess.PIPE, stderr=subprocess.STDOUT) + for line in TextIOWrapper(p.stdout, encoding="utf-8"): + if "[iso file] Unknown box type ID32" not in line: + sys.stdout.write(line) + returncode = p.wait() + if returncode and returncode != 10: + raise self.log.exit(f" - ccextractor exited with return code {returncode}") - if os.path.exists(out_path): - if os.stat(out_path).st_size <= 3: - # An empty UTF-8 file with BOM is 3 bytes. - # If the subtitle file is empty, mkvmerge will fail to mux. - os.unlink(out_path) - return None - cc_track = TextTrack( - id_=track_id, - source=self.source, - url="", # doesn't need to be downloaded - codec="srt", - language=language, - is_original_lang=original, # TODO: Figure out if this is the original title language - cc=True - ) - cc_track._location = out_path - return cc_track + if os.path.exists(out_path): + if os.stat(out_path).st_size <= 3: + # An empty UTF-8 file with BOM is 3 bytes. + # If the subtitle file is empty, mkvmerge will fail to mux. + os.unlink(out_path) + return None + cc_track = TextTrack( + id_=track_id, + source=self.source, + url="", # doesn't need to be downloaded + codec="srt", + language=language, + is_original_lang=original, # TODO: Figure out if this is the original title language + cc=True + ) + cc_track._location = out_path + return cc_track - return None + return None class AudioTrack(Track): - #def __init__(self, *args, bitrate, channels=None, descriptive=False, **kwargs): - def __init__(self, *args, bitrate, size=None, channels=None, - descriptive: bool = False, atmos: bool = False, **kwargs): - super().__init__(*args, **kwargs) - # required - self.bitrate = int(math.ceil(float(bitrate))) if bitrate else None - self.size = size if size else None - self.channels = self.parse_channels(channels) if channels else None - self.atmos = bool(atmos) - # optional - self.descriptive = bool(descriptive) + #def __init__(self, *args, bitrate, channels=None, descriptive=False, **kwargs): + def __init__(self, *args, bitrate, size=None, channels=None, + descriptive: bool = False, atmos: bool = False, **kwargs): + super().__init__(*args, **kwargs) + # required + self.bitrate = int(math.ceil(float(bitrate))) if bitrate else None + self.size = size if size else None + self.channels = self.parse_channels(channels) if channels else None + self.atmos = bool(atmos) + # optional + self.descriptive = bool(descriptive) - @staticmethod - def parse_channels(channels): - """ - Converts a string to a float-like string which represents audio channels. - E.g. "2" -> "2.0", "6" -> "5.1". - """ - # TODO: Support all possible DASH channel configurations (https://datatracker.ietf.org/doc/html/rfc8216) - if channels == "A000": - return "2.0" - if channels == "F801": - return "5.1" + @staticmethod + def parse_channels(channels): + """ + Converts a string to a float-like string which represents audio channels. + E.g. "2" -> "2.0", "6" -> "5.1". + """ + # TODO: Support all possible DASH channel configurations (https://datatracker.ietf.org/doc/html/rfc8216) + if channels == "A000": + return "2.0" + if channels == "F801": + return "5.1" - try: - channels = str(float(channels)) - except ValueError: - channels = str(channels) + try: + channels = str(float(channels)) + except ValueError: + channels = str(channels) - if channels == "6.0": - return "5.1" + if channels == "6.0": + return "5.1" - return channels + return channels - def get_track_name(self): - """Return the base Track Name.""" - track_name = super().get_track_name() or "" - flag = self.descriptive and "Descriptive" - if flag: - if track_name: - flag = f" ({flag})" - track_name += flag - return track_name or None + def get_track_name(self): + """Return the base Track Name.""" + track_name = super().get_track_name() or "" + flag = self.descriptive and "Descriptive" + if flag: + if track_name: + flag = f" ({flag})" + track_name += flag + return track_name or None - def __str__(self): - size = f" ({humanfriendly.format_size(self.size, binary=True)})" if self.size else "" - codec = next((CODEC_MAP[x] for x in CODEC_MAP if (self.codec or "").startswith(x)), self.codec) - return " | ".join([x for x in [ - "├─ AUD", - f"[{codec}]", - f"[{self.codec}{', atmos' if self.atmos else ''}]", - f"{self.channels}" if self.channels else None, - f"{self.bitrate // 1000 if self.bitrate else '?'} kb/s{size}", - f"{self.language}", - " ".join([self.get_track_name() or "", "[Original]" if self.is_original_lang else ""]).strip() - ] if x]) + def __str__(self): + size = f" ({humanfriendly.format_size(self.size, binary=True)})" if self.size else "" + codec = next((CODEC_MAP[x] for x in CODEC_MAP if (self.codec or "").startswith(x)), self.codec) + return " | ".join([x for x in [ + "├─ AUD", + f"[{codec}]", + f"[{self.codec}{', atmos' if self.atmos else ''}]", + f"{self.channels}" if self.channels else None, + f"{self.bitrate // 1000 if self.bitrate else '?'} kb/s{size}", + f"{self.language}", + " ".join([self.get_track_name() or "", "[Original]" if self.is_original_lang else ""]).strip() + ] if x]) class TextTrack(Track): - def __init__(self, *args, cc=False, sdh=False, forced=False, **kwargs): - """ - Information on Subtitle Types: - https://bit.ly/2Oe4fLC (3PlayMedia Blog on SUB vs CC vs SDH). - However, I wouldn't pay much attention to the claims about SDH needing to - be in the original source language. It's logically not true. + def __init__(self, *args, cc=False, sdh=False, forced=False, **kwargs): + """ + Information on Subtitle Types: + https://bit.ly/2Oe4fLC (3PlayMedia Blog on SUB vs CC vs SDH). + However, I wouldn't pay much attention to the claims about SDH needing to + be in the original source language. It's logically not true. - CC == Closed Captions. Source: Basically every site. - SDH = Subtitles for the Deaf or Hard-of-Hearing. Source: Basically every site. - HOH = Exact same as SDH. Is a term used in the UK. Source: https://bit.ly/2PGJatz (ICO UK) + CC == Closed Captions. Source: Basically every site. + SDH = Subtitles for the Deaf or Hard-of-Hearing. Source: Basically every site. + HOH = Exact same as SDH. Is a term used in the UK. Source: https://bit.ly/2PGJatz (ICO UK) - More in-depth information, examples, and stuff to look for can be found in the Parameter - explanation list below. + More in-depth information, examples, and stuff to look for can be found in the Parameter + explanation list below. - Parameters: - cc: Closed Caption. - - Intended as if you couldn't hear the audio at all. - - Can have Sound as well as Dialogue, but doesn't have to. - - Original source would be from an EIA-CC encoded stream. Typically all - upper-case characters. - Indicators of it being CC without knowing original source: - - Extracted with CCExtractor, or - - >>> (or similar) being used at the start of some or all lines, or - - All text is uppercase or at least the majority, or - - Subtitles are Scrolling-text style (one line appears, oldest line - then disappears). - Just because you downloaded it as a SRT or VTT or such, doesn't mean it - isn't from an EIA-CC stream. And I wouldn't take the streaming services - (CC) as gospel either as they tend to get it wrong too. - sdh: Deaf or Hard-of-Hearing. Also known as HOH in the UK (EU?). - - Intended as if you couldn't hear the audio at all. - - MUST have Sound as well as Dialogue to be considered SDH. - - It has no "syntax" or "format" but is not transmitted using archaic - forms like EIA-CC streams, would be intended for transmission via - SubRip (SRT), WebVTT (VTT), TTML, etc. - If you can see important audio/sound transcriptions and not just dialogue - and it doesn't have the indicators of CC, then it's most likely SDH. - If it doesn't have important audio/sounds transcriptions it might just be - regular subtitling (you wouldn't mark as CC or SDH). This would be the - case for most translation subtitles. Like Anime for example. - forced: Typically used if there's important information at some point in time - like watching Dubbed content and an important Sign or Letter is shown - or someone talking in a different language. - Forced tracks are recommended by the Matroska Spec to be played if - the player's current playback audio language matches a subtitle - marked as "forced". - However, that doesn't mean every player works like this but there is - no other way to reliably work with Forced subtitles where multiple - forced subtitles may be in the output file. Just know what to expect - with "forced" subtitles. - """ - super().__init__(*args, **kwargs) - self.cc = bool(cc) - self.sdh = bool(sdh) - if self.cc and self.sdh: - raise ValueError("A text track cannot be both CC and SDH.") - self.forced = bool(forced) - if (self.cc or self.sdh) and self.forced: - raise ValueError("A text track cannot be CC/SDH as well as Forced.") + Parameters: + cc: Closed Caption. + - Intended as if you couldn't hear the audio at all. + - Can have Sound as well as Dialogue, but doesn't have to. + - Original source would be from an EIA-CC encoded stream. Typically all + upper-case characters. + Indicators of it being CC without knowing original source: + - Extracted with CCExtractor, or + - >>> (or similar) being used at the start of some or all lines, or + - All text is uppercase or at least the majority, or + - Subtitles are Scrolling-text style (one line appears, oldest line + then disappears). + Just because you downloaded it as a SRT or VTT or such, doesn't mean it + isn't from an EIA-CC stream. And I wouldn't take the streaming services + (CC) as gospel either as they tend to get it wrong too. + sdh: Deaf or Hard-of-Hearing. Also known as HOH in the UK (EU?). + - Intended as if you couldn't hear the audio at all. + - MUST have Sound as well as Dialogue to be considered SDH. + - It has no "syntax" or "format" but is not transmitted using archaic + forms like EIA-CC streams, would be intended for transmission via + SubRip (SRT), WebVTT (VTT), TTML, etc. + If you can see important audio/sound transcriptions and not just dialogue + and it doesn't have the indicators of CC, then it's most likely SDH. + If it doesn't have important audio/sounds transcriptions it might just be + regular subtitling (you wouldn't mark as CC or SDH). This would be the + case for most translation subtitles. Like Anime for example. + forced: Typically used if there's important information at some point in time + like watching Dubbed content and an important Sign or Letter is shown + or someone talking in a different language. + Forced tracks are recommended by the Matroska Spec to be played if + the player's current playback audio language matches a subtitle + marked as "forced". + However, that doesn't mean every player works like this but there is + no other way to reliably work with Forced subtitles where multiple + forced subtitles may be in the output file. Just know what to expect + with "forced" subtitles. + """ + super().__init__(*args, **kwargs) + self.cc = bool(cc) + self.sdh = bool(sdh) + if self.cc and self.sdh: + raise ValueError("A text track cannot be both CC and SDH.") + self.forced = bool(forced) + if (self.cc or self.sdh) and self.forced: + raise ValueError("A text track cannot be CC/SDH as well as Forced.") - def get_track_name(self): - """Return the base Track Name.""" - track_name = super().get_track_name() or "" - flag = self.cc and "CC" or self.sdh and "SDH" or self.forced and "Forced" - if flag: - if track_name: - flag = f" ({flag})" - track_name += flag - return track_name or None + def get_track_name(self): + """Return the base Track Name.""" + track_name = super().get_track_name() or "" + flag = self.cc and "CC" or self.sdh and "SDH" or self.forced and "Forced" + if flag: + if track_name: + flag = f" ({flag})" + track_name += flag + return track_name or None - @staticmethod - def parse(data, codec): - # TODO: Use an "enum" for subtitle codecs - if not isinstance(data, bytes): - raise ValueError(f"Subtitle data must be parsed as bytes data, not {data.__class__.__name__}") - try: - if codec.startswith("stpp"): - captions = defaultdict(list) - for segment in ( - TextTrack.parse(box.data, "ttml") - for box in MP4.parse_stream(BytesIO(data)) if box.type == b"mdat" - ): - lang = segment.get_languages()[0] - for caption in segment.get_captions(lang): - prev_caption = captions and captions[lang][-1] + @staticmethod + def parse(data, codec): + # TODO: Use an "enum" for subtitle codecs + if not isinstance(data, bytes): + raise ValueError(f"Subtitle data must be parsed as bytes data, not {data.__class__.__name__}") + try: + if codec.startswith("stpp"): + captions = defaultdict(list) + for segment in ( + TextTrack.parse(box.data, "ttml") + for box in MP4.parse_stream(BytesIO(data)) if box.type == b"mdat" + ): + lang = segment.get_languages()[0] + for caption in segment.get_captions(lang): + prev_caption = captions and captions[lang][-1] - if prev_caption and (prev_caption.start, prev_caption.end) == (caption.start, caption.end): - # Merge cues with equal start and end timestamps. - # - # pycaption normally does this itself, but we need to do it manually here - # for the next merge to work properly. - prev_caption.nodes += [pycaption.CaptionNode.create_break(), *caption.nodes] - elif prev_caption and caption.start <= prev_caption.end: - # If the previous cue's end timestamp is less or equal to the current cue's start timestamp, - # just extend the previous one's end timestamp to the current one's end timestamp. - # This is to get rid of duplicates, as STPP may duplicate cues at segment boundaries. - prev_caption.end = caption.end - else: - captions[lang].append(caption) + if prev_caption and (prev_caption.start, prev_caption.end) == (caption.start, caption.end): + # Merge cues with equal start and end timestamps. + # + # pycaption normally does this itself, but we need to do it manually here + # for the next merge to work properly. + prev_caption.nodes += [pycaption.CaptionNode.create_break(), *caption.nodes] + elif prev_caption and caption.start <= prev_caption.end: + # If the previous cue's end timestamp is less or equal to the current cue's start timestamp, + # just extend the previous one's end timestamp to the current one's end timestamp. + # This is to get rid of duplicates, as STPP may duplicate cues at segment boundaries. + prev_caption.end = caption.end + else: + captions[lang].append(caption) - return pycaption.CaptionSet(captions) - if codec in ["dfxp", "ttml", "tt"]: - text = data.decode("utf-8").replace("tt:", "") - return pycaption.DFXPReader().read(text) - if codec in ["vtt", "webvtt", "wvtt"] or codec.startswith("webvtt"): - text = data.decode("utf-8").replace("\r", "").replace("\n\n\n", "\n \n\n").replace("\n\n<", "\n<") - text = re.sub(r"‏", "\u202B", text) - return pycaption.WebVTTReader().read(text) - if codec.lower() == "ass": - try: - subs = pysubs2.load(data.decode('utf-8')) - captions = {} - for line in subs: - if line.start is not None and line.end is not None and line.text: - caption = pycaption.Caption( - start=line.start.to_time().total_seconds(), - end=line.end.to_time().total_seconds(), - nodes=[pycaption.CaptionNode.create_text(line.text)] - ) - if line.style: - caption.style = line.style.name # Optionally include the style name - if line.actor: - caption.actor = line.actor # Optionally include the actor name - if line.effect: - caption.effect = line.effect # Optionally include the effect - captions[line.style.name] = captions.get(line.style.name, []) + [caption] + return pycaption.CaptionSet(captions) + if codec in ["dfxp", "ttml", "tt"]: + text = data.decode("utf-8").replace("tt:", "") + return pycaption.DFXPReader().read(text) + if codec in ["vtt", "webvtt", "wvtt"] or codec.startswith("webvtt"): + text = data.decode("utf-8").replace("\r", "").replace("\n\n\n", "\n \n\n").replace("\n\n<", "\n<") + text = re.sub(r"‏", "\u202B", text) + return pycaption.WebVTTReader().read(text) + if codec.lower() == "ass": + try: + subs = pysubs2.load(data.decode('utf-8')) + captions = {} + for line in subs: + if line.start is not None and line.end is not None and line.text: + caption = pycaption.Caption( + start=line.start.to_time().total_seconds(), + end=line.end.to_time().total_seconds(), + nodes=[pycaption.CaptionNode.create_text(line.text)] + ) + if line.style: + caption.style = line.style.name # Optionally include the style name + if line.actor: + caption.actor = line.actor # Optionally include the actor name + if line.effect: + caption.effect = line.effect # Optionally include the effect + captions[line.style.name] = captions.get(line.style.name, []) + [caption] - return pycaption.CaptionSet(captions) - except Exception as e: - raise ValueError(f"Failed to parse .ass subtitle: {str(e)}") - except pycaption.exceptions.CaptionReadSyntaxError: - raise SyntaxError(f"A syntax error has occurred when reading the \"{codec}\" subtitle") - except pycaption.exceptions.CaptionReadNoCaptions: - return pycaption.CaptionSet({"en": []}) + return pycaption.CaptionSet(captions) + except Exception as e: + raise ValueError(f"Failed to parse .ass subtitle: {str(e)}") + except pycaption.exceptions.CaptionReadSyntaxError: + raise SyntaxError(f"A syntax error has occurred when reading the \"{codec}\" subtitle") + except pycaption.exceptions.CaptionReadNoCaptions: + return pycaption.CaptionSet({"en": []}) - raise ValueError(f"Unknown subtitle format: {codec!r}") + raise ValueError(f"Unknown subtitle format: {codec!r}") - @staticmethod - def convert_to_srt(data, codec): - if isinstance(data, bytes): - data = data.decode() + @staticmethod + def convert_to_srt(data, codec): + if isinstance(data, bytes): + data = data.decode() - from vinetrimmer.utils.ttml2ssa import Ttml2Ssa - ttml = Ttml2Ssa() - if codec in ["dfxp", "ttml", "tt"] or codec.startswith("ttml"): - ttml.parse_ttml_from_string(data) - else: # codec in ["vtt", "webvtt", "wvtt"] or codec.startswith("webvtt"): - ttml.parse_vtt_from_string(data) + from vinetrimmer.utils.ttml2ssa import Ttml2Ssa + ttml = Ttml2Ssa() + if codec in ["dfxp", "ttml", "tt"] or codec.startswith("ttml"): + ttml.parse_ttml_from_string(data) + else: # codec in ["vtt", "webvtt", "wvtt"] or codec.startswith("webvtt"): + ttml.parse_vtt_from_string(data) - for entry in ttml.entries: - text = str(entry['text']) - line_split = text.splitlines() - if len(line_split) == 3: - text = f"{line_split[0]}\n" \ - f"{line_split[1]} {line_split[2]}" - if len(line_split) == 4: - text = f"{line_split[0]} {line_split[1]}\n" \ - f"{line_split[2]} {line_split[3]}" - entry['text'] = text - - # return pycaption.SRTWriter().write(TextTrack.parse(data, codec)) - return ttml.generate_srt() - - @staticmethod - def convert_to_srt2(data, codec): - return pycaption.SRTWriter().write(TextTrack.parse(data, codec)) + for entry in ttml.entries: + text = str(entry['text']) + line_split = text.splitlines() + if len(line_split) == 3: + text = f"{line_split[0]}\n" \ + f"{line_split[1]} {line_split[2]}" + if len(line_split) == 4: + text = f"{line_split[0]} {line_split[1]}\n" \ + f"{line_split[2]} {line_split[3]}" + entry['text'] = text + + # return pycaption.SRTWriter().write(TextTrack.parse(data, codec)) + return ttml.generate_srt() + + @staticmethod + def convert_to_srt2(data, codec): + return pycaption.SRTWriter().write(TextTrack.parse(data, codec)) - def download(self, out, name=None, headers=None, proxy=None): - save_path = super().download(out, name, headers, proxy) - if self.codec.lower() == "ass": - return save_path # Return the .ass file as-is without any conversion - elif self.source == "iP": - with open(save_path, "r+b") as fd: - data = fd.read() - fd.seek(0) - fd.truncate() - fd.write(self.convert_to_srt2(data, self.codec).encode("utf-8")) - self.codec = "srt" - return save_path - elif self.codec.lower() != "srt": - with open(save_path, "r+b") as fd: - data = fd.read() - fd.seek(0) - fd.truncate() - fd.write(self.convert_to_srt(data, self.codec).encode("utf-8")) - self.codec = "srt" - return save_path + def download(self, out, name=None, headers=None, proxy=None): + save_path = super().download(out, name, headers, proxy) + if self.codec.lower() == "ass": + return save_path # Return the .ass file as-is without any conversion + elif self.source == "iP": + with open(save_path, "r+b") as fd: + data = fd.read() + fd.seek(0) + fd.truncate() + fd.write(self.convert_to_srt2(data, self.codec).encode("utf-8")) + self.codec = "srt" + return save_path + elif self.codec.lower() != "srt": + with open(save_path, "r+b") as fd: + data = fd.read() + fd.seek(0) + fd.truncate() + fd.write(self.convert_to_srt(data, self.codec).encode("utf-8")) + self.codec = "srt" + return save_path - def __str__(self): - codec = next((CODEC_MAP[x] for x in CODEC_MAP if (self.codec or "").startswith(x)), self.codec) - return " | ".join([x for x in [ - "├─ SUB", - f"[{codec}]", - f"{self.language}", - " ".join([self.get_track_name() or "", "[Original]" if self.is_original_lang else ""]).strip() - ] if x]) + def __str__(self): + codec = next((CODEC_MAP[x] for x in CODEC_MAP if (self.codec or "").startswith(x)), self.codec) + return " | ".join([x for x in [ + "├─ SUB", + f"[{codec}]", + f"{self.language}", + " ".join([self.get_track_name() or "", "[Original]" if self.is_original_lang else ""]).strip() + ] if x]) class MenuTrack: - line_1 = re.compile(r"^CHAPTER(?P\d+)=(?P[\d\\.]+)$") - line_2 = re.compile(r"^CHAPTER(?P\d+)NAME=(?P[\d\\.]+)$") + line_1 = re.compile(r"^CHAPTER(?P<number>\d+)=(?P<timecode>[\d\\.]+)$") + line_2 = re.compile(r"^CHAPTER(?P<number>\d+)NAME=(?P<title>[\d\\.]+)$") - def __init__(self, number, title, timecode): - self.id = f"chapter-{number}" - self.number = number - self.title = title - if "." not in timecode: - timecode += ".000" - self.timecode = timecode + def __init__(self, number, title, timecode): + self.id = f"chapter-{number}" + self.number = number + self.title = title + if "." not in timecode: + timecode += ".000" + self.timecode = timecode - def __bool__(self): - return bool( - self.number and self.number >= 0 and - self.title and - self.timecode - ) + def __bool__(self): + return bool( + self.number and self.number >= 0 and + self.title and + self.timecode + ) - def __repr__(self): - """ - OGM-based Simple Chapter Format intended for use with MKVToolNix. + def __repr__(self): + """ + OGM-based Simple Chapter Format intended for use with MKVToolNix. - This format is not officially part of the Matroska spec. This was a format - designed for OGM tools that MKVToolNix has since re-used. More Information: - https://mkvtoolnix.download/doc/mkvmerge.html#mkvmerge.chapters.simple - """ - return "CHAPTER{num}={time}\nCHAPTER{num}NAME={name}".format( - num=f"{self.number:02}", - time=self.timecode, - name=self.title - ) + This format is not officially part of the Matroska spec. This was a format + designed for OGM tools that MKVToolNix has since re-used. More Information: + https://mkvtoolnix.download/doc/mkvmerge.html#mkvmerge.chapters.simple + """ + return "CHAPTER{num}={time}\nCHAPTER{num}NAME={name}".format( + num=f"{self.number:02}", + time=self.timecode, + name=self.title + ) - def __str__(self): - return " | ".join([ - "├─ CHP", - f"[{self.number:02}]", - self.timecode, - self.title - ]) + def __str__(self): + return " | ".join([ + "├─ CHP", + f"[{self.number:02}]", + self.timecode, + self.title + ]) - @classmethod - def loads(cls, data): - """Load chapter data from a string.""" - lines = [x.strip() for x in data.strip().splitlines(keepends=False)] - if len(lines) > 2: - return MenuTrack.loads("\n".join(lines)) - one, two = lines + @classmethod + def loads(cls, data): + """Load chapter data from a string.""" + lines = [x.strip() for x in data.strip().splitlines(keepends=False)] + if len(lines) > 2: + return MenuTrack.loads("\n".join(lines)) + one, two = lines - one_m = cls.line_1.match(one) - two_m = cls.line_2.match(two) - if not one_m or not two_m: - raise SyntaxError(f"An unexpected syntax error near:\n{one}\n{two}") + one_m = cls.line_1.match(one) + two_m = cls.line_2.match(two) + if not one_m or not two_m: + raise SyntaxError(f"An unexpected syntax error near:\n{one}\n{two}") - one_str, timecode = one_m.groups() - two_str, title = two_m.groups() - one_num, two_num = int(one_str.lstrip("0")), int(two_str.lstrip("0")) + one_str, timecode = one_m.groups() + two_str, title = two_m.groups() + one_num, two_num = int(one_str.lstrip("0")), int(two_str.lstrip("0")) - if one_num != two_num: - raise SyntaxError(f"The chapter numbers ({one_num},{two_num}) does not match.") - if not timecode: - raise SyntaxError("The timecode is missing.") - if not title: - raise SyntaxError("The title is missing.") + if one_num != two_num: + raise SyntaxError(f"The chapter numbers ({one_num},{two_num}) does not match.") + if not timecode: + raise SyntaxError("The timecode is missing.") + if not title: + raise SyntaxError("The title is missing.") - return cls(number=one_num, title=title, timecode=timecode) + return cls(number=one_num, title=title, timecode=timecode) - @classmethod - def load(cls, path): - """Load chapter data from a file.""" - with open(path, encoding="utf-8") as fd: - return cls.loads(fd.read()) + @classmethod + def load(cls, path): + """Load chapter data from a file.""" + with open(path, encoding="utf-8") as fd: + return cls.loads(fd.read()) - def dumps(self): - """Return chapter data as a string.""" - return repr(self) + def dumps(self): + """Return chapter data as a string.""" + return repr(self) - def dump(self, path): - """Write chapter data to a file.""" - with open(path, "w", encoding="utf-8") as fd: - return fd.write(self.dumps()) + def dump(self, path): + """Write chapter data to a file.""" + with open(path, "w", encoding="utf-8") as fd: + return fd.write(self.dumps()) - @staticmethod - def format_duration(seconds): - minutes, seconds = divmod(seconds, 60) - hours, minutes = divmod(minutes, 60) - return f"{hours:02.0f}:{minutes:02.0f}:{seconds:06.3f}" + @staticmethod + def format_duration(seconds): + minutes, seconds = divmod(seconds, 60) + hours, minutes = divmod(minutes, 60) + return f"{hours:02.0f}:{minutes:02.0f}:{seconds:06.3f}" class Tracks: - """ - Tracks. - Stores video, audio, and subtitle tracks. It also stores chapter/menu entries. - It provides convenience functions for listing, sorting, and selecting tracks. - """ + """ + Tracks. + Stores video, audio, and subtitle tracks. It also stores chapter/menu entries. + It provides convenience functions for listing, sorting, and selecting tracks. + """ - TRACK_ORDER_MAP = { - VideoTrack: 0, - AudioTrack: 1, - TextTrack: 2, - MenuTrack: 3 - } + TRACK_ORDER_MAP = { + VideoTrack: 0, + AudioTrack: 1, + TextTrack: 2, + MenuTrack: 3 + } - def __init__(self, *args): - self.videos = [] - self.audios = [] - self.subtitles = [] - self.chapters = [] + def __init__(self, *args): + self.videos = [] + self.audios = [] + self.subtitles = [] + self.chapters = [] - if args: - self.add(as_list(*args)) + if args: + self.add(as_list(*args)) - def __iter__(self): - return iter(as_list(self.videos, self.audios, self.subtitles)) + def __iter__(self): + return iter(as_list(self.videos, self.audios, self.subtitles)) - def __repr__(self): - return "{name}({items})".format( - name=self.__class__.__name__, - items=", ".join([f"{k}={repr(v)}" for k, v in self.__dict__.items()]) - ) + def __repr__(self): + return "{name}({items})".format( + name=self.__class__.__name__, + items=", ".join([f"{k}={repr(v)}" for k, v in self.__dict__.items()]) + ) - def __str__(self): - rep = "" - last_track_type = None - tracks = [*list(self), *self.chapters] - for track in sorted(tracks, key=lambda t: self.TRACK_ORDER_MAP[type(t)]): - if type(track) != last_track_type: - last_track_type = type(track) - count = sum(type(x) is type(track) for x in tracks) - rep += "{count} {type} Track{plural}{colon}\n".format( - count=count, - type=track.__class__.__name__.replace("Track", ""), - plural="s" if count != 1 else "", - colon=":" if count > 0 else "" - ) - rep += f"{track}\n" + def __str__(self): + rep = "" + last_track_type = None + tracks = [*list(self), *self.chapters] + for track in sorted(tracks, key=lambda t: self.TRACK_ORDER_MAP[type(t)]): + if type(track) != last_track_type: + last_track_type = type(track) + count = sum(type(x) is type(track) for x in tracks) + rep += "{count} {type} Track{plural}{colon}\n".format( + count=count, + type=track.__class__.__name__.replace("Track", ""), + plural="s" if count != 1 else "", + colon=":" if count > 0 else "" + ) + rep += f"{track}\n" - return rep.rstrip() + return rep.rstrip() - def exists(self, by_id=None, by_url=None): - """Check if a track already exists by various methods.""" - if by_id: # recommended - return any(x.id == by_id for x in self) - if by_url: - return any(x.url == by_url for x in self) - return False + def exists(self, by_id=None, by_url=None): + """Check if a track already exists by various methods.""" + if by_id: # recommended + return any(x.id == by_id for x in self) + if by_url: + return any(x.url == by_url for x in self) + return False - def add(self, tracks, warn_only=True): - """Add a provided track to its appropriate array and ensuring it's not a duplicate.""" - if isinstance(tracks, Tracks): - tracks = [*list(tracks), *tracks.chapters] + def add(self, tracks, warn_only=True): + """Add a provided track to its appropriate array and ensuring it's not a duplicate.""" + if isinstance(tracks, Tracks): + tracks = [*list(tracks), *tracks.chapters] - duplicates = 0 - for track in as_list(tracks): - if self.exists(by_id=track.id): - if not warn_only: - raise ValueError( - "One or more of the provided Tracks is a duplicate. " - "Track IDs must be unique but accurate using static values. The " - "value should stay the same no matter when you request the same " - "content. Use a value that has relation to the track content " - "itself and is static or permanent and not random/RNG data that " - "wont change each refresh or conflict in edge cases." - ) - duplicates += 1 - continue + duplicates = 0 + for track in as_list(tracks): + if self.exists(by_id=track.id): + if not warn_only: + raise ValueError( + "One or more of the provided Tracks is a duplicate. " + "Track IDs must be unique but accurate using static values. The " + "value should stay the same no matter when you request the same " + "content. Use a value that has relation to the track content " + "itself and is static or permanent and not random/RNG data that " + "wont change each refresh or conflict in edge cases." + ) + duplicates += 1 + continue - if isinstance(track, VideoTrack): - self.videos.append(track) - elif isinstance(track, AudioTrack): - self.audios.append(track) - elif isinstance(track, TextTrack): - self.subtitles.append(track) - elif isinstance(track, MenuTrack): - self.chapters.append(track) - else: - raise ValueError("Track type was not set or is invalid.") + if isinstance(track, VideoTrack): + self.videos.append(track) + elif isinstance(track, AudioTrack): + self.audios.append(track) + elif isinstance(track, TextTrack): + self.subtitles.append(track) + elif isinstance(track, MenuTrack): + self.chapters.append(track) + else: + raise ValueError("Track type was not set or is invalid.") - log = logging.getLogger("Tracks") + log = logging.getLogger("Tracks") - if duplicates: - log.warning(f" - Found and skipped {duplicates} duplicate tracks") + if duplicates: + log.warning(f" - Found and skipped {duplicates} duplicate tracks") - def print(self, level=logging.INFO): - """Print the __str__ to log at a specified level.""" - log = logging.getLogger("Tracks") - for line in str(self).splitlines(keepends=False): - log.log(level, line) + def print(self, level=logging.INFO): + """Print the __str__ to log at a specified level.""" + log = logging.getLogger("Tracks") + for line in str(self).splitlines(keepends=False): + log.log(level, line) - def sort_videos(self, by_language=None): - """Sort video tracks by bitrate, and optionally language.""" - if not self.videos: - return - # bitrate - self.videos = sorted(self.videos, key=lambda x: float(x.bitrate or 0.0), reverse=True) - # language - for language in reversed(by_language or []): - if str(language) == "all": - language = next((x.language for x in self.videos if x.is_original_lang), "") - if not language: - continue - self.videos = sorted( - self.videos, - key=lambda x: "" if is_close_match(language, [x.language]) else str(x.language) - ) + def sort_videos(self, by_language=None): + """Sort video tracks by bitrate, and optionally language.""" + if not self.videos: + return + # bitrate + self.videos = sorted(self.videos, key=lambda x: float(x.bitrate or 0.0), reverse=True) + # language + for language in reversed(by_language or []): + if str(language) == "all": + language = next((x.language for x in self.videos if x.is_original_lang), "") + if not language: + continue + self.videos = sorted( + self.videos, + key=lambda x: "" if is_close_match(language, [x.language]) else str(x.language) + ) - def sort_audios(self, by_language=None): - """Sort audio tracks by bitrate, descriptive, and optionally language.""" - if not self.audios: - return - # bitrate - self.audios = sorted(self.audios, key=lambda x: float(x.bitrate or 0.0), reverse=True) - # channels - self.audios = sorted(self.audios, key=lambda x: float(x.channels.replace("ch", "").replace("/JOC", "") if x.channels is not None else 0.0), reverse=True) - # descriptive - self.audios = sorted(self.audios, key=lambda x: str(x.language) if x.descriptive else "") - # language - for language in reversed(by_language or []): - if str(language) == "all": - language = next((x.language for x in self.audios if x.is_original_lang), "") - if not language: - continue - self.audios = sorted( - self.audios, - key=lambda x: "" if is_close_match(language, [x.language]) else str(x.language) - ) + def sort_audios(self, by_language=None): + """Sort audio tracks by bitrate, descriptive, and optionally language.""" + if not self.audios: + return + # bitrate + self.audios = sorted(self.audios, key=lambda x: float(x.bitrate or 0.0), reverse=True) + # channels + self.audios = sorted(self.audios, key=lambda x: float(x.channels.replace("ch", "").replace("/JOC", "") if x.channels is not None else 0.0), reverse=True) + # descriptive + self.audios = sorted(self.audios, key=lambda x: str(x.language) if x.descriptive else "") + # language + for language in reversed(by_language or []): + if str(language) == "all": + language = next((x.language for x in self.audios if x.is_original_lang), "") + if not language: + continue + self.audios = sorted( + self.audios, + key=lambda x: "" if is_close_match(language, [x.language]) else str(x.language) + ) - def sort_subtitles(self, by_language=None): - """Sort subtitle tracks by sdh, cc, forced, and optionally language.""" - if not self.subtitles: - return - # sdh/cc - self.subtitles = sorted( - self.subtitles, key=lambda x: str(x.language) + ("-cc" if x.cc else "") + ("-sdh" if x.sdh else "") - ) - # forced - self.subtitles = sorted(self.subtitles, key=lambda x: not x.forced) - # language - for language in reversed(by_language or []): - if str(language) == "all": - language = next((x.language for x in self.subtitles if x.is_original_lang), "") - if not language: - continue - self.subtitles = sorted( - self.subtitles, - key=lambda x: "" if is_close_match(language, [x.language]) else str(x.language) - ) + def sort_subtitles(self, by_language=None): + """Sort subtitle tracks by sdh, cc, forced, and optionally language.""" + if not self.subtitles: + return + # sdh/cc + self.subtitles = sorted( + self.subtitles, key=lambda x: str(x.language) + ("-cc" if x.cc else "") + ("-sdh" if x.sdh else "") + ) + # forced + self.subtitles = sorted(self.subtitles, key=lambda x: not x.forced) + # language + for language in reversed(by_language or []): + if str(language) == "all": + language = next((x.language for x in self.subtitles if x.is_original_lang), "") + if not language: + continue + self.subtitles = sorted( + self.subtitles, + key=lambda x: "" if is_close_match(language, [x.language]) else str(x.language) + ) - def sort_chapters(self): - """Sort chapter tracks by chapter number.""" - if not self.chapters: - return - # number - self.chapters = sorted(self.chapters, key=lambda x: x.number) + def sort_chapters(self): + """Sort chapter tracks by chapter number.""" + if not self.chapters: + return + # number + self.chapters = sorted(self.chapters, key=lambda x: x.number) - @staticmethod - def select_by_language(languages, tracks, one_per_lang=True): - """ - Filter a track list by language. + @staticmethod + def select_by_language(languages, tracks, one_per_lang=True): + """ + Filter a track list by language. - If one_per_lang is True, only the first matched track will be returned for - each language. It presumes the first match is what is wanted. + If one_per_lang is True, only the first matched track will be returned for + each language. It presumes the first match is what is wanted. - This means if you intend for it to return the best track per language, - then ensure the iterable is sorted in ascending order (first = best, last = worst). - """ - if "orig" in languages: - nonoriglangs = languages.remove("orig") - else: nonoriglangs = languages - if not tracks: - return - if "all" not in languages: - track_type = tracks[0].__class__.__name__.lower().replace("track", "").replace("text", "subtitle") - orig_tracks = tracks - tracks = [ - x for x in tracks - if is_close_match(x.language, languages) or (x.is_original_lang and "orig" in languages and not any(lang in x.language for lang in nonoriglangs)) - ] - if not tracks: - if languages == ["orig"]: - all_languages = set(x.language for x in orig_tracks) - if len(all_languages) == 1: - # If there's only one language available, take it - languages = list(all_languages) - tracks = [ - x for x in orig_tracks - if is_close_match(x.language, languages) or (x.is_original_lang and "orig" in languages) - ] - else: - raise ValueError( - f"There's no original {track_type} track. Please specify a language manually with " - f"{'-al' if track_type == 'audio' else '-sl'}." - ) - else: - raise ValueError( - f"There's no {track_type} tracks that match the language{'' if len(languages) == 1 else 's'}: " - f"{', '.join(languages)}" - ) - if one_per_lang: - if "all" in languages: - languages = list(sorted(set(x.language for x in tracks), key=str)) - for language in languages: - if language == "orig": - yield next(x for x in tracks if x.is_original_lang) - else: - match = get_closest_match(language, [x.language for x in tracks]) - if match: - yield next(x for x in tracks if x.language == match) - else: - for track in tracks: - yield track + This means if you intend for it to return the best track per language, + then ensure the iterable is sorted in ascending order (first = best, last = worst). + """ + if "orig" in languages: + nonoriglangs = languages.remove("orig") + else: nonoriglangs = languages + if not tracks: + return + if "all" not in languages: + track_type = tracks[0].__class__.__name__.lower().replace("track", "").replace("text", "subtitle") + orig_tracks = tracks + tracks = [ + x for x in tracks + if is_close_match(x.language, languages) or (x.is_original_lang and "orig" in languages and not any(lang in x.language for lang in nonoriglangs)) + ] + if not tracks: + if languages == ["orig"]: + all_languages = set(x.language for x in orig_tracks) + if len(all_languages) == 1: + # If there's only one language available, take it + languages = list(all_languages) + tracks = [ + x for x in orig_tracks + if is_close_match(x.language, languages) or (x.is_original_lang and "orig" in languages) + ] + else: + raise ValueError( + f"There's no original {track_type} track. Please specify a language manually with " + f"{'-al' if track_type == 'audio' else '-sl'}." + ) + else: + raise ValueError( + f"There's no {track_type} tracks that match the language{'' if len(languages) == 1 else 's'}: " + f"{', '.join(languages)}" + ) + if one_per_lang: + if "all" in languages: + languages = list(sorted(set(x.language for x in tracks), key=str)) + for language in languages: + if language == "orig": + yield next(x for x in tracks if x.is_original_lang) + else: + match = get_closest_match(language, [x.language for x in tracks]) + if match: + yield next(x for x in tracks if x.language == match) + else: + for track in tracks: + yield track - def select_videos (self, by_language=None, by_vbitrate=None, by_quality=None, by_range=None, - one_only: bool = True, by_codec=None, - ) -> None: - """Filter video tracks by language and other criteria.""" - if by_quality: - # Note: Do not merge these list comprehensions. They must be done separately so the results - # from the 16:9 canvas check is only used if there's no exact height resolution match. - videos_quality = [x for x in self.videos if x.height == by_quality] - if not videos_quality: - videos_quality = [x for x in self.videos if int(x.width * (9 / 16)) == by_quality] - if not videos_quality: - # AMZN weird resolution (1248x520) - videos_quality = [x for x in self.videos if x.width == 1248 and by_quality == 720] - if not videos_quality: - videos_quality = [x for x in self.videos if (x.width, x.height) < (1024, 576) and by_quality == "SD"] - if not videos_quality: - videos_quality = [ - x for x in self.videos if isinstance(x.extra, dict) and x.extra.get("quality") == by_quality - ] - if not videos_quality: - raise ValueError(f"There's no {by_quality}p resolution video track. Aborting.") - self.videos = videos_quality - if by_vbitrate: - self.videos = [x for x in self.videos if int(x.bitrate) <= int(by_vbitrate * 1001)] - if by_codec: - codec_videos = list(filter(lambda x: any(y for y in self.VIDEO_CODEC_MAP[by_codec] if y in x.codec), self.videos)) - if not codec_videos and not should_fallback: - raise ValueError(f"There's no {by_codec} video tracks. Aborting.") - else: - self.videos = (codec_videos if codec_videos else self.videos) - if by_range: - self.videos = [x for x in self.videos if { - "HDR10": x.hdr10, - "HLG": x.hlg, - "DV": x.dv, - "SDR": not x.hdr10 and not x.dv - }.get((by_range or "").upper(), True)] - if not self.videos: - raise ValueError(f"There's no {by_range} video track. Aborting.") - if by_language: - self.videos = list(self.select_by_language(by_language, self.videos)) - if one_only and self.videos: - self.videos = [self.videos[0]] + def select_videos (self, by_language=None, by_vbitrate=None, by_quality=None, by_range=None, + one_only: bool = True, by_codec=None, + ) -> None: + """Filter video tracks by language and other criteria.""" + if by_quality: + # Note: Do not merge these list comprehensions. They must be done separately so the results + # from the 16:9 canvas check is only used if there's no exact height resolution match. + videos_quality = [x for x in self.videos if x.height == by_quality] + if not videos_quality: + videos_quality = [x for x in self.videos if int(x.width * (9 / 16)) == by_quality] + if not videos_quality: + # AMZN weird resolution (1248x520) + videos_quality = [x for x in self.videos if x.width == 1248 and by_quality == 720] + if not videos_quality: + videos_quality = [x for x in self.videos if (x.width, x.height) < (1024, 576) and by_quality == "SD"] + if not videos_quality: + videos_quality = [ + x for x in self.videos if isinstance(x.extra, dict) and x.extra.get("quality") == by_quality + ] + if not videos_quality: + raise ValueError(f"There's no {by_quality}p resolution video track. Aborting.") + self.videos = videos_quality + if by_vbitrate: + self.videos = [x for x in self.videos if int(x.bitrate) <= int(by_vbitrate * 1001)] + if by_codec: + codec_videos = list(filter(lambda x: any(y for y in self.VIDEO_CODEC_MAP[by_codec] if y in x.codec), self.videos)) + if not codec_videos and not should_fallback: + raise ValueError(f"There's no {by_codec} video tracks. Aborting.") + else: + self.videos = (codec_videos if codec_videos else self.videos) + if by_range: + self.videos = [x for x in self.videos if { + "HDR10": x.hdr10, + "HLG": x.hlg, + "DV": x.dv, + "SDR": not x.hdr10 and not x.dv + }.get((by_range or "").upper(), True)] + if not self.videos: + raise ValueError(f"There's no {by_range} video track. Aborting.") + if by_language: + self.videos = list(self.select_by_language(by_language, self.videos)) + if one_only and self.videos: + self.videos = [self.videos[0]] - def select_audios( - self, - with_descriptive: bool = True, - with_atmos: bool = False, - by_language=None, - by_bitrate=None, - by_channels=None, - by_codec=None, - should_fallback: bool = False - ) -> None: - """Filter audio tracks by language and other criteria.""" - if not with_descriptive: - self.audios = [x for x in self.audios if not x.descriptive] - if by_codec: - codec_audio = list(filter(lambda x: any(y for y in self.AUDIO_CODEC_MAP[by_codec] if y in x.codec), self.audio)) - if not codec_audio and not should_fallback: - raise ValueError(f"There's no {by_codec} audio tracks. Aborting.") - else: - self.audios = (codec_audio if codec_audio else self.audios) - if by_channels: - channels_audio = list(filter(lambda x: x.channels == by_channels, self.audios)) - if not channels_audio and not should_fallback: - raise ValueError(f"There's no {by_channels} {by_codec} audio tracks. Aborting.") - else: - self.audios = (channels_audio if channels_audio else self.audios) - if with_atmos: - atmos_audio = list(filter(lambda x: x.atmos, self.audios)) - self.audios = (atmos_audio if atmos_audio else self.audios) # Fallback if no atmos - if by_bitrate: - self.audios = [x for x in self.audios if int(x.bitrate) <= int(by_bitrate * 1000)] - if by_language: - # Todo: Optimize select_by_language - self.audios = list(self.select_by_language(by_language, self.audios, one_per_lang=True)) + \ - list(self.select_by_language(by_language, [x for x in self.audios if x.descriptive], one_per_lang=True)) + def select_audios( + self, + with_descriptive: bool = True, + with_atmos: bool = False, + by_language=None, + by_bitrate=None, + by_channels=None, + by_codec=None, + should_fallback: bool = False + ) -> None: + """Filter audio tracks by language and other criteria.""" + if not with_descriptive: + self.audios = [x for x in self.audios if not x.descriptive] + if by_codec: + codec_audio = list(filter(lambda x: any(y for y in self.AUDIO_CODEC_MAP[by_codec] if y in x.codec), self.audio)) + if not codec_audio and not should_fallback: + raise ValueError(f"There's no {by_codec} audio tracks. Aborting.") + else: + self.audios = (codec_audio if codec_audio else self.audios) + if by_channels: + channels_audio = list(filter(lambda x: x.channels == by_channels, self.audios)) + if not channels_audio and not should_fallback: + raise ValueError(f"There's no {by_channels} {by_codec} audio tracks. Aborting.") + else: + self.audios = (channels_audio if channels_audio else self.audios) + if with_atmos: + atmos_audio = list(filter(lambda x: x.atmos, self.audios)) + self.audios = (atmos_audio if atmos_audio else self.audios) # Fallback if no atmos + if by_bitrate: + self.audios = [x for x in self.audios if int(x.bitrate) <= int(by_bitrate * 1000)] + if by_language: + # Todo: Optimize select_by_language + self.audios = list(self.select_by_language(by_language, self.audios, one_per_lang=True)) + \ + list(self.select_by_language(by_language, [x for x in self.audios if x.descriptive], one_per_lang=True)) - def select_subtitles(self, by_language=None, with_cc=True, with_sdh=True, with_forced=True): - """Filter subtitle tracks by language and other criteria.""" - if not with_cc: - self.subtitles = [x for x in self.subtitles if not x.cc] - if not with_sdh: - self.subtitles = [x for x in self.subtitles if not x.sdh] - if isinstance(with_forced, list): - self.subtitles = [ - x for x in self.subtitles - if not x.forced or is_close_match(x.language, with_forced) - ] - if not with_forced: - self.subtitles = [x for x in self.subtitles if not x.forced] - if by_language: - self.subtitles = list(self.select_by_language(by_language, self.subtitles, one_per_lang=True)) + def select_subtitles(self, by_language=None, with_cc=True, with_sdh=True, with_forced=True): + """Filter subtitle tracks by language and other criteria.""" + if not with_cc: + self.subtitles = [x for x in self.subtitles if not x.cc] + if not with_sdh: + self.subtitles = [x for x in self.subtitles if not x.sdh] + if isinstance(with_forced, list): + self.subtitles = [ + x for x in self.subtitles + if not x.forced or is_close_match(x.language, with_forced) + ] + if not with_forced: + self.subtitles = [x for x in self.subtitles if not x.forced] + if by_language: + self.subtitles = list(self.select_by_language(by_language, self.subtitles, one_per_lang=True)) - def export_chapters(self, to_file=None): - """Export all chapters in order to a string or file.""" - self.sort_chapters() - data = "\n".join(map(repr, self.chapters)) - if to_file: - os.makedirs(os.path.dirname(to_file), exist_ok=True) - with open(to_file, "w", encoding="utf-8") as fd: - fd.write(data) - return data + def export_chapters(self, to_file=None): + """Export all chapters in order to a string or file.""" + self.sort_chapters() + data = "\n".join(map(repr, self.chapters)) + if to_file: + os.makedirs(os.path.dirname(to_file), exist_ok=True) + with open(to_file, "w", encoding="utf-8") as fd: + fd.write(data) + return data - # converter code + # converter code - @staticmethod - def from_m3u8(*args, **kwargs): - from vinetrimmer import parsers - return parsers.m3u8.parse(*args, **kwargs) + @staticmethod + def from_m3u8(*args, **kwargs): + from vinetrimmer import parsers + return parsers.m3u8.parse(*args, **kwargs) - @staticmethod - def from_mpd(*args, **kwargs): - from vinetrimmer import parsers - return parsers.mpd.parse(**kwargs) + @staticmethod + def from_mpd(*args, **kwargs): + from vinetrimmer import parsers + return parsers.mpd.parse(**kwargs) - @staticmethod - def from_ism(*args, **kwargs): - from vinetrimmer import parsers - return parsers.ism.parse(**kwargs) + @staticmethod + def from_ism(*args, **kwargs): + from vinetrimmer import parsers + return parsers.ism.parse(**kwargs) - def mux(self, prefix): - """ - Takes the Video, Audio and Subtitle Tracks, and muxes them into an MKV file. - It will attempt to detect Forced/Default tracks, and will try to parse the language codes of the Tracks - """ - if self.videos: - muxed_location = self.videos[0].locate() - if not muxed_location: - raise ValueError("The provided video track has not yet been downloaded.") - muxed_location = os.path.splitext(muxed_location)[0] + ".muxed.mkv" - elif self.audios: - muxed_location = self.audios[0].locate() - if not muxed_location: - raise ValueError("A provided audio track has not yet been downloaded.") - muxed_location = os.path.splitext(muxed_location)[0] + ".muxed.mka" - elif self.subtitles: - muxed_location = self.subtitles[0].locate() - if not muxed_location: - raise ValueError("A provided subtitle track has not yet been downloaded.") - muxed_location = os.path.splitext(muxed_location)[0] + ".muxed.mks" - elif self.chapters: - muxed_location = config.filenames.chapters.format(filename=prefix) - if not muxed_location: - raise ValueError("A provided chapter has not yet been downloaded.") - muxed_location = os.path.splitext(muxed_location)[0] + ".muxed.mks" - else: - raise ValueError("No tracks provided, at least one track must be provided.") + def mux(self, prefix): + """ + Takes the Video, Audio and Subtitle Tracks, and muxes them into an MKV file. + It will attempt to detect Forced/Default tracks, and will try to parse the language codes of the Tracks + """ + if self.videos: + muxed_location = self.videos[0].locate() + if not muxed_location: + raise ValueError("The provided video track has not yet been downloaded.") + muxed_location = os.path.splitext(muxed_location)[0] + ".muxed.mkv" + elif self.audios: + muxed_location = self.audios[0].locate() + if not muxed_location: + raise ValueError("A provided audio track has not yet been downloaded.") + muxed_location = os.path.splitext(muxed_location)[0] + ".muxed.mka" + elif self.subtitles: + muxed_location = self.subtitles[0].locate() + if not muxed_location: + raise ValueError("A provided subtitle track has not yet been downloaded.") + muxed_location = os.path.splitext(muxed_location)[0] + ".muxed.mks" + elif self.chapters: + muxed_location = config.filenames.chapters.format(filename=prefix) + if not muxed_location: + raise ValueError("A provided chapter has not yet been downloaded.") + muxed_location = os.path.splitext(muxed_location)[0] + ".muxed.mks" + else: + raise ValueError("No tracks provided, at least one track must be provided.") - muxed_location = os.path.join(config.directories.downloads, os.path.basename(muxed_location)) + muxed_location = os.path.join(config.directories.downloads, os.path.basename(muxed_location)) - cl = [ - "mkvmerge", - "--output", - muxed_location - ] + cl = [ + "mkvmerge", + "--output", + muxed_location + ] - for i, vt in enumerate(self.videos): - location = vt.locate() - if not location: - raise ValueError("Somehow a Video Track was not downloaded before muxing...") - cl.extend([ - "--language", "0:und", - "--disable-language-ietf", - "--default-track", f"0:{i == 0}", - "--compression", "0:none", # disable extra compression - "(", location, ")" - ]) - for i, at in enumerate(self.audios): - location = at.locate() - if not location: - raise ValueError("Somehow an Audio Track was not downloaded before muxing...") - cl.extend([ - "--track-name", f"0:{at.get_track_name() or ''}", - "--language", "0:{}".format(LANGUAGE_MUX_MAP.get( - str(at.language), at.language.to_alpha3() - )), - "--disable-language-ietf", - "--default-track", f"0:{i == 0}", - "--compression", "0:none", # disable extra compression - "(", location, ")" - ]) - for st in self.subtitles: - location = st.locate() - if not location: - raise ValueError("Somehow a Text Track was not downloaded before muxing...") - default = bool(self.audios and is_close_match(st.language, [self.audios[0].language]) and st.forced) - cl.extend([ - "--track-name", f"0:{st.get_track_name() or ''}", - "--language", "0:{}".format(LANGUAGE_MUX_MAP.get( - str(st.language), st.language.to_alpha3() - )), - "--disable-language-ietf", - "--sub-charset", "0:UTF-8", - "--forced-track", f"0:{st.forced}", - "--default-track", f"0:{default}", - "--compression", "0:none", # disable extra compression (probably zlib) - "(", location, ")" - ]) - if self.chapters: - location = config.filenames.chapters.format(filename=prefix) - self.export_chapters(location) - cl.extend(["--chapters", location]) + for i, vt in enumerate(self.videos): + location = vt.locate() + if not location: + raise ValueError("Somehow a Video Track was not downloaded before muxing...") + cl.extend([ + "--language", "0:und", + "--disable-language-ietf", + "--default-track", f"0:{i == 0}", + "--compression", "0:none", # disable extra compression + "(", location, ")" + ]) + for i, at in enumerate(self.audios): + location = at.locate() + if not location: + raise ValueError("Somehow an Audio Track was not downloaded before muxing...") + cl.extend([ + "--track-name", f"0:{at.get_track_name() or ''}", + "--language", "0:{}".format(LANGUAGE_MUX_MAP.get( + str(at.language), at.language.to_alpha3() + )), + "--disable-language-ietf", + "--default-track", f"0:{i == 0}", + "--compression", "0:none", # disable extra compression + "(", location, ")" + ]) + for st in self.subtitles: + location = st.locate() + if not location: + raise ValueError("Somehow a Text Track was not downloaded before muxing...") + default = bool(self.audios and is_close_match(st.language, [self.audios[0].language]) and st.forced) + cl.extend([ + "--track-name", f"0:{st.get_track_name() or ''}", + "--language", "0:{}".format(LANGUAGE_MUX_MAP.get( + str(st.language), st.language.to_alpha3() + )), + "--disable-language-ietf", + "--sub-charset", "0:UTF-8", + "--forced-track", f"0:{st.forced}", + "--default-track", f"0:{default}", + "--compression", "0:none", # disable extra compression (probably zlib) + "(", location, ")" + ]) + if self.chapters: + location = config.filenames.chapters.format(filename=prefix) + self.export_chapters(location) + cl.extend(["--chapters", location]) - # let potential failures go to caller, caller should handle - p = subprocess.Popen(cl, stdout=subprocess.PIPE, stderr=subprocess.STDOUT) - in_progress = False - for line in TextIOWrapper(p.stdout, encoding="utf-8"): - if re.search(r"Using the (?:demultiplexer|output module) for the format", line): - continue - if line.startswith("Progress:"): - in_progress = True - sys.stdout.write("\r" + line.rstrip('\n')) - else: - if in_progress: - in_progress = False - sys.stdout.write("\n") - sys.stdout.write(line) - returncode = p.wait() - return muxed_location, returncode + # let potential failures go to caller, caller should handle + p = subprocess.Popen(cl, stdout=subprocess.PIPE, stderr=subprocess.STDOUT) + in_progress = False + for line in TextIOWrapper(p.stdout, encoding="utf-8"): + if re.search(r"Using the (?:demultiplexer|output module) for the format", line): + continue + if line.startswith("Progress:"): + in_progress = True + sys.stdout.write("\r" + line.rstrip('\n')) + else: + if in_progress: + in_progress = False + sys.stdout.write("\n") + sys.stdout.write(line) + returncode = p.wait() + return muxed_location, returncode diff --git a/vinetrimmer/services/__init__.py b/vinetrimmer/services/__init__.py index 9b934f8..b5ece3f 100644 --- a/vinetrimmer/services/__init__.py +++ b/vinetrimmer/services/__init__.py @@ -15,7 +15,9 @@ from vinetrimmer.services.hotstar import Hotstar from vinetrimmer.services.jio import Jio from vinetrimmer.services.moviesanywhere import MoviesAnywhere from vinetrimmer.services.sonyliv import Sonyliv - +from vinetrimmer.services.disneyplus import DisneyPlus +from vinetrimmer.services.hulu import Hulu +from vinetrimmer.services.paramountplus import ParamountPlus # Above is necessary since dynamic imports like below fuck up nuitak diff --git a/vinetrimmer/services/disneyplus.py b/vinetrimmer/services/disneyplus.py new file mode 100644 index 0000000..d5d8bc2 --- /dev/null +++ b/vinetrimmer/services/disneyplus.py @@ -0,0 +1,567 @@ +import json +import os +import re +import time +import uuid +from datetime import datetime + +import base64 +import click +import m3u8 + +from vinetrimmer.objects import MenuTrack, Title, Tracks +from vinetrimmer.services.BaseService import BaseService +from vinetrimmer.utils.BamSDK import BamSdk +from vinetrimmer.utils.collections import as_list +from vinetrimmer.utils.io import get_ip_info +from vinetrimmer.utils.widevine.device import LocalDevice + + +class DisneyPlus(BaseService): + """ + Service code for Disney's Disney+ streaming service (https://disneyplus.com). + + \b + Authorization: Credentials + Security: UHD@L1 FHD@L1 HD@L3, HEAVILY monitors high-profit and newly released titles!! + + \b + Tips: - Some titles offer a setting in its Details tab to prefer "Remastered" or Original format + - You can specify which profile is used for its preferences and such in the config file + """ + + ALIASES = ["DSNP", "disneyplus", "disney+"] + TITLE_RE = [ + r"^https?://(?:www\.)?disneyplus\.com(?:/[a-z0-9-]+)?(?:/[a-z0-9-]+)?/(?P<type>browse)/(?P<id>entity-[0-9a-f]{8}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{12})" + + ] + + AUDIO_CODEC_MAP = { + "AAC": ["aac"], + "EC3": ["eac", "atmos"] + } + + @staticmethod + @click.command(name="DisneyPlus", short_help="https://disneyplus.com") + @click.argument("title", type=str, required=False) + @click.option("-m", "--movie", is_flag=True, default=False, help="Title is a movie.") + @click.option("-s", "--scenario", default="tv-drm-ctr", type=str, + help="Capability profile that specifies compatible codecs, streams, bit-rates, resolutions and such.") + @click.pass_context + def cli(ctx, **kwargs): + return DisneyPlus(ctx, **kwargs) + + def __init__(self, ctx, title, movie, scenario): + super().__init__(ctx) + m = self.parse_title(ctx, title) + self.movie = movie #or m.get("type") == "movies" + #self.type = m.get("type") + self.scenario = scenario + + self.vcodec = ctx.parent.params["vcodec"] + self.acodec = ctx.parent.params["acodec"] + self.range = ctx.parent.params["range_"] + self.wanted = ctx.parent.params["wanted"] + + self.playready = True if "group_certificate" in dir(ctx.obj.cdm.device) else False # ctx.obj.cdm.device.type == LocalDevice.Types.PLAYREADY + + self.region = None + self.bamsdk = None + self.device_token = None + self.account_tokens = {} + + self.configure() + + def get_titles(self): + + if self.movie: + #original_lang = self.get_hulu_series(self.title)['originalLanguage'] + data = self.get_hulu_series(self.title)["data"]["page"] + movie = Title( + id_=self.title, + type_=Title.Types.MOVIE, + name=data['visuals']['title'], + year=data['visuals']['metastringParts']['releaseYearRange']['startYear'], + source=self.ALIASES[0], + original_lang="en", + service_data=data["containers"][-1]["visuals"] + ) + + movie.service_data = data["actions"][0]["resourceId"] + + return movie + + else: + data = self.get_hulu_series(self.title)["data"].get("page") + if not data: + raise self.log.exit(" - No data returned") + + season_len = len(data["containers"][0]["seasons"]) + if data["containers"][0].get("type") == "episodes": + if season_len == 0: + raise self.log.exit(" - No seasons available") + + seasons = list() + for x, season in enumerate( + reversed(data["containers"][0]["seasons"]), start=1 + ): + episodes = self.get_hulu_season(season["id"])["data"]["season"]["items"] + self.log.debug(episodes) + seasons += [ + Title( + id_=t2["id"], + type_=Title.Types.TV, + name=t2["visuals"]["title"], + season=t2["visuals"].get("seasonNumber"), + episode=t2["visuals"].get("episodeNumber"), + episode_name=t2["visuals"] + .get("episodeTitle") + .replace("(Sub) ", ""), + original_lang="en", + source=self.ALIASES[0], + service_data=t2, + ) + for t2 in episodes + ] + + # Get mediaId from decoded resourceId + for x in seasons: + x.service_data["mediaMetadata"] = {} + x.service_data["mediaMetadata"]["mediaId"] = x.service_data["actions"][0]["resourceId"] + + return seasons + + # get data for every episode in every season via looping due to the fact + # that the api doesn't provide ALL episodes in the initial bundle api call. + # TODO: The season info returned might also be paged/limited + + + def get_tracks(self, title): + # Refresh token in case it expired + self.account_tokens = self.get_account_token( + credential=self.credentials, + device_family=self.config["bamsdk"]["family"], + device_token=self.device_token, + ) + if self.movie: + tracks = self.get_manifest_tracks( + self.get_manifest_url( + media_id=title.service_data, + scenario=self.scenario + ) + ) + + else: + tracks = self.get_manifest_tracks( + self.get_manifest_url( + media_id=title.service_data["mediaMetadata"]["mediaId"], + scenario=self.scenario + ) + ) + + if (not any((x.codec or "").startswith("atmos") for x in tracks.audios) + and not self.scenario.endswith(("-atmos", "~unlimited"))): + self.log.info(" + Attempting to get Atmos audio from H265 manifest") + try: + atmos_scenario = self.get_manifest_tracks( + self.get_manifest_url( + media_id=title.service_data["mediaMetadata"]["mediaId"], + scenario="tv-drm-ctr-h265-atmos" + ) + ) + except: + atmos_scenario = self.get_manifest_tracks( + self.get_manifest_url( + media_id=title.service_data, + scenario="tv-drm-ctr-h265-atmos" + ) + ) + tracks.audios.extend(atmos_scenario.audios) + tracks.subtitles.extend(atmos_scenario.subtitles) + + return tracks + + def get_chapters(self, title): + return [] + + def certificate(self, **_): + return None if self.playready else self.config["certificate"] + + def license(self, challenge, **_): + # Refresh token in case it expired + self.account_tokens = self.get_account_token( + credential=self.credentials, + device_family=self.config["bamsdk"]["family"], + device_token=self.device_token, + ) + + if self.playready: + res = self.bamsdk.drm.playreadyLicense( + licence=challenge.decode(), # expects XML + access_token=self.account_tokens["access_token"] + ) + res = base64.b64encode(res).decode() + else: + res = self.bamsdk.drm.widevineLicense( + licence=challenge, # expects bytes + access_token=self.account_tokens["access_token"] + ) + + return res + + # Service specific functions + + def configure(self): + self.session.headers.update({ + "Accept-Language": "en-US,en;q=0.5", + "User-Agent": self.config["bamsdk"]["user_agent"], + "Origin": "https://www.disneyplus.com" + }) + + self.log.info("Preparing") + if self.range != "SDR" and self.vcodec != "H265": + # vcodec must be H265 for High Dynamic Range + self.vcodec = "H265" + self.log.info(f" + Switched video codec to H265 to be able to get {self.range} dynamic range") + self.scenario = self.prepare_scenario(self.scenario, self.vcodec, self.range) + self.log.info(f" + Scenario: {self.scenario}") + + self.log.info("Getting BAMSDK Configuration") + + ip_info = get_ip_info(self.session, fresh=True) + self.region = ip_info["country_code"].upper() + self.config["location_x"] = ip_info["latitude"] + self.config["location_y"] = ip_info["longitude"] + self.log.info(f" + IP Location: {self.config['location_x']},{self.config['location_y']}") + + self.bamsdk = BamSdk(self.config["bamsdk"]["config"], self.session) + self.session.headers.update(dict(**{ + k.lower(): v.replace( + "{SDKPlatform}", self.config["bamsdk"]["platform"] + ).replace( + "{SDKVersion}", self.config["bamsdk"]["version"] + ) for k, v in self.bamsdk.commonHeaders.items() + }, **{ + "user-agent": self.config["bamsdk"]["user_agent"] + })) + + self.log.debug(" + Capabilities:") + for k, v in self.bamsdk.media.extras.items(): + self.log.debug(f" {k}: {v}") + + self.log.info("Logging into Disney+") + self.device_token, self.account_tokens = self.login(self.credentials) + + session_info = self.bamsdk.session.getInfo(self.account_tokens["access_token"]) + self.log.info(f" + Account ID: {session_info['account']['id']}") + self.log.info(f" + Profile ID: {session_info['profile']['id']}") + self.log.info(f" + Subscribed: {session_info['isSubscriber']}") + self.log.info(f" + Account Region: {session_info['home_location']['country_code']}") + self.log.info(f" + Detected Location: {session_info['location']['country_code']}") + self.log.info(f" + Supported Location: {session_info['inSupportedLocation']}") + self.log.info(f" + Device: {session_info['device']['platform']}") + + if not session_info["isSubscriber"]: + raise self.log.exit(" - Cannot continue, account is not subscribed to Disney+.") + + @staticmethod + def prepare_scenario(scenario, vcodec, range_): + """Prepare Disney+'s scenario based on other arguments and settings.""" + if scenario.endswith("~unlimited"): + # if unlimited scenario, nothing needs to be appended or changed. + # the scenario will return basically all streams it can. + return scenario + if vcodec == "H265": + scenario += "-h265" + if range_ == "HDR10": + scenario += "-hdr10" + elif range_ == "DV": + scenario += "-dovi" + return scenario + + def login(self, credential): + """Log into Disney+ and retrieve various authorisation keys.""" + device_token = self.create_device_token( + family=self.config["bamsdk"]["family"], + profile=self.config["bamsdk"]["profile"], + application=self.config["bamsdk"]["applicationRuntime"], + api_key=self.config["device_api_key"] + ) + self.log.info(" + Obtained Device Token") + account_tokens = self.get_account_token( + credential=credential, + device_family=self.config["bamsdk"]["family"], + device_token=device_token, + ) + self.log.info(" + Obtained Account Token") + return device_token, account_tokens + + def create_device_token(self, family, profile, application, api_key): + """ + Create a Device Token for a specified device type. + This tells the API's what is possible for your device. + :param family: Device Family. + :param profile: Device Profile. + :param application: Device Runtime, the use case of the device. + :param api_key: Device API Key. + :returns: Device Exchange Token. + """ + # create an initial assertion grant used to identify the kind of device profile-level. + # TODO: cache this, it doesn't need to be obtained unless the values change + device_grant = self.bamsdk.device.createDeviceGrant( + json={ + "deviceFamily": family, + "applicationRuntime": application, + "deviceProfile": profile, + "attributes": {} + }, + api_key=api_key + ) + if "errors" in device_grant: + raise self.log.exit( + " - Failed to obtain the device assertion grant: " + f"{device_grant['errors']}" + ) + # exchange the assertion grant for a usable device token. + device_token = self.bamsdk.token.exchange( + data={ + "grant_type": "urn:ietf:params:oauth:grant-type:token-exchange", + "platform": family, + "subject_token": device_grant["assertion"], + "subject_token_type": self.bamsdk.token.subject_tokens["device"] + }, + api_key=api_key + ) + if "error" in device_token: + raise self.log.exit( + " - Failed to exchange the assertion grant for a device token: " + f"{device_token['error_description']} [{device_token['error']}]" + ) + return device_token["access_token"] + + def get_account_token(self, credential, device_family, device_token): + """ + Get an Account Token using Account Credentials and a Device Token, using a Cache store. + It also refreshes the token if needed. + """ + if not credential: + raise self.log.exit(" - No credentials provided, unable to log in.") + tokens_cache_path = self.get_cache(f"tokens_{self.region}_{credential.sha1}.json") + if os.path.isfile(tokens_cache_path): + self.log.info(" + Using cached tokens...") + with open(tokens_cache_path, encoding="utf-8") as fd: + tokens = json.load(fd) + if os.stat(tokens_cache_path).st_ctime > (time.time() - tokens["expires_in"]): + return tokens + # expired + self.log.info(" + Refreshing...") + tokens = self.refresh_token( + device_family=device_family, + refresh_token=tokens["refresh_token"], + api_key=self.config["device_api_key"] + ) + else: + # first time + self.log.info(" + Getting new tokens...") + tokens = self.create_account_token( + device_family=self.config["bamsdk"]["family"], + email=credential.username, + password=credential.password, + device_token=device_token, + api_key=self.config["device_api_key"] + ) + + os.makedirs(os.path.dirname(tokens_cache_path), exist_ok=True) + with open(tokens_cache_path, "w", encoding="utf-8") as fd: + json.dump(tokens, fd) + + return tokens + + def create_account_token(self, device_family, email, password, device_token, api_key): + """ + Create an Account Token using Account Credentials and a Device Token. + :param device_family: Device Family. + :param email: Account Email. + :param password: Account Password. + :param device_token: Device Token. + :param api_key: Device API Key. + :returns: Account Exchange Tokens. + """ + # log in to the account via bamsdk using the device token + identity_token = self.bamsdk.bamIdentity.identityLogin( + email=email, + password=password, + access_token=device_token + ) + if "errors" in identity_token: + raise self.log.exit( + " - Failed to obtain the identity token: " + f"{identity_token['errors']}" + ) + # create an initial assertion grant used to identify the account + # this seems to tie the account to the device token + account_grant = self.bamsdk.account.createAccountGrant( + json={"id_token": identity_token["id_token"]}, + access_token=device_token + ) + # exchange the assertion grant for a usable account token. + account_tokens = self.bamsdk.token.exchange( + data={ + "grant_type": "urn:ietf:params:oauth:grant-type:token-exchange", + "platform": device_family, + "subject_token": account_grant["assertion"], + "subject_token_type": self.bamsdk.token.subject_tokens["account"] + }, + api_key=api_key + ) + # change profile and re-exchange if provided + if self.config.get("profile"): + profile_grant = self.change_profile(self.config["profile"], account_tokens["access_token"]) + account_tokens = self.bamsdk.token.exchange( + data={ + "grant_type": "urn:ietf:params:oauth:grant-type:token-exchange", + "platform": device_family, + "subject_token": profile_grant["assertion"], + "subject_token_type": self.bamsdk.token.subject_tokens["account"] + }, + api_key=api_key + ) + return account_tokens + + def refresh_token(self, device_family, refresh_token, api_key): + """ + Refresh a Token using its adjacent refresh token. + :param device_family: Device Family. + :param refresh_token: Refresh Token. + :param api_key: Device API Key. + :returns: Account Exchange Token. + """ + return self.bamsdk.token.exchange( + data={ + "grant_type": "refresh_token", + "platform": device_family, + "refresh_token": refresh_token + }, + api_key=api_key + ) + + def change_profile(self, profile, access_token): + """ + Change to a different account user profile. + :param profile: profile by name, number, or directly by profile ID. + :param access_token: account access token. + :returns: profile grant tokens. + """ + if not profile: + raise self.log.exit(" - Profile cannot be empty") + try: + profile_id = uuid.UUID(str(profile)) + self.log.info(f" + Switching profile to {profile_id}") + # is UUID + except ValueError: + profiles = self.bamsdk.account.getUserProfiles(access_token) + if isinstance(profile, int): + if len(profiles) < profile: + raise self.log.exit( + " - There isn't a {}{} profile for this account".format( + profile, "tsnrhtdd"[(profile // 10 % 10 != 1) * (profile % 10 < 4) * profile % 10::4] + ) + ) + profile_data = profiles[profile - 1] + else: + profile_data = [x for x in profiles if x["profileName"] == profile] + if not profile_data: + raise self.log.exit(f" - Profile {profile!r} does not exist in this account") + profile_data = profile_data[0] + profile_id = profile_data["profileId"] + self.log.info(f" + Switching profile to {profile_data['profileName']!r} ({profile_id})") + res = self.bamsdk.account.setActiveUserProfile(str(profile_id), access_token) + if "errors" in res: + raise self.log.exit(f" - Failed! {res['errors'][0]['description']}") + return res + + def get_manifest_url(self, media_id, scenario): + self.log.info(f"Retrieving manifest for {scenario}") + self.session.headers['x-dss-feature-filtering'] = 'true' + self.session.headers['x-application-version'] = '1.1.2' + self.session.headers['x-bamsdk-client-id'] = 'disney-svod' + self.session.headers['x-bamsdk-platform'] = 'javascript/windows/chrome' + self.session.headers['x-bamsdk-version'] = '28.0' + resolution = "1280x720" if str(self.scenario).lower() == "browser" else "" + + json_data = { + 'playback': { + 'attributes': { + 'resolution': { + 'max': [ + f'{resolution}', + ], + }, + 'protocol': 'HTTPS', + 'assetInsertionStrategy': 'SGAI', + 'playbackInitiationContext': 'ONLINE', + 'frameRates': [ + 60, + ], + }, + }, + 'playbackId': media_id, + } + + manifest = self.session.post( + f'https://disney.playback.edge.bamgrid.com/v7/playback/{scenario}', + headers={"authorization": f"Bearer {self.account_tokens['access_token']}"}, + json=json_data + ).json() + + self.chaps = {} + self.chaps["editorial"] = manifest["stream"].get("editorial", {}) + + return manifest["stream"]["sources"][0]['complete']['url'] + + def get_manifest_tracks(self, url): + self.session.get(url) + tracks = Tracks.from_m3u8(m3u8.load(url), source=self.ALIASES[0]) + if self.acodec: + tracks.audios = [ + x for x in tracks.audios if (x.codec or "").split("-")[0] in self.AUDIO_CODEC_MAP[self.acodec] + ] + for video in tracks.videos: + # This is needed to remove weird glitchy NOP data at the end of stream + video.needs_repack = True + for audio in tracks.audios: + bitrate = re.search(r"(?<=r/composite_)\d+|\d+(?=_complete.m3u8)", as_list(audio.url)[0]) + if not bitrate: + raise self.log.exit(" - Unable to get bitrate for an audio track") + audio.bitrate = int(bitrate.group()) * 1000 + if audio.bitrate == 1000_000: + # DSNP lies about the Atmos bitrate + audio.bitrate = 768_000 + for subtitle in tracks.subtitles: + subtitle.codec = "vtt" + subtitle.forced = subtitle.forced or subtitle.extra.name.endswith("--forced--") + # sdh might not actually occur, either way DSNP CC == SDH :) + subtitle.sdh = "[cc]" in subtitle.extra.name.lower() or "[sdh]" in subtitle.extra.name.lower() + return tracks + + def get_hulu_series(self, content_id: str) -> dict: + r = self.session.get( + url=self.config["bamsdk"]["page"].format(id=content_id), + params={ + "disableSmartFocus": True, + "enhancedContainersLimit": 12, + "limit": 999, + }, + headers={"authorization": f"Bearer {self.account_tokens['access_token']}"}, + ).json() + + return r + + def get_hulu_season(self, season_id: str) -> dict: + r = self.session.get( + url=self.config["bamsdk"]["season"].format(id=season_id), + params={"limit": 999}, + headers={"authorization": f"Bearer {self.account_tokens['access_token']}"}, + ).json() + + return r diff --git a/vinetrimmer/services/hotstar1.py b/vinetrimmer/services/hotstar1.py deleted file mode 100644 index 2e47358..0000000 --- a/vinetrimmer/services/hotstar1.py +++ /dev/null @@ -1,456 +0,0 @@ -import base64 -import hashlib -import hmac -import json -import os -import time -import uuid -import re -import requests -from datetime import datetime -from urllib.parse import urlparse, parse_qs -from urllib.request import urlopen, Request -import http.cookiejar as cookiejar - -import click - -from vinetrimmer.objects import Title, Tracks -from vinetrimmer.services.BaseService import BaseService -from vinetrimmer.config import config, directories - -class Hotstar(BaseService): - """ - Service code for Star India's Hotstar (aka Disney+ Hotstar) streaming service (https://hotstar.com). - - \b - Authorization: Credentials - Security: UHD@L3, doesn't seem to care about releases. - - \b - Tips: - The library of contents can be viewed without logging in at https://hotstar.com - - The homepage hosts domestic programming; Disney+ content is at https://hotstar.com/in/disneyplus - """ - - ALIASES = ["HS", "hotstar"] - #GEOFENCE = ["in"] - TITLE_RE = r"^(?:https?://(?:www\.)?hotstar\.com/[a-z0-9/-]+/)(?P<id>\d+)" - - @staticmethod - @click.command(name="Hotstar", short_help="https://hotstar.com") - @click.argument("title", type=str, required=False) - @click.option("-q", "--quality", default="hd", - type=click.Choice(["4k", "fhd", "hd", "sd"], case_sensitive=False), - help="Manifest quality to request.") - @click.option("-c", "--channels", default="5.1", type=click.Choice(["5.1", "2.0", "atmos"], case_sensitive=False), - help="Audio Codec") - @click.option("-rg", "--region", default="in", type=click.Choice(["in", "id", "th"], case_sensitive=False), - help="Account region") - @click.pass_context - def cli(ctx, **kwargs): - return Hotstar(ctx, **kwargs) - - def __init__(self, ctx, title, quality, channels, region): - super().__init__(ctx) - self.parse_title(ctx, title) - self.quality = quality - self.channels = channels - self.region = region.lower() - - assert ctx.parent is not None - - self.vcodec = ctx.parent.params["vcodec"] - self.acodec = ctx.parent.params["acodec"] or "EC3" - self.range = ctx.parent.params["range_"] - - - self.profile = ctx.obj.profile - - self.device_id = None - self.hotstar_auth = None - self.token = None - self.license_api = None - - self.configure() - - def get_titles(self): - headers = { - "Accept": "*/*", - "Accept-Language": "en-GB,en;q=0.5", - "hotstarauth": self.hotstar_auth, - "X-HS-UserToken": self.token, - "X-HS-Platform": self.config["device"]["platform"]["name"], - "X-HS-AppVersion": self.config["device"]["platform"]["version"], - "X-Country-Code": self.region, - "x-platform-code": "PCTV" - } - try: - r = self.session.get( - url=self.config["endpoints"]["movie_title"], - headers=headers, - params={"contentId": self.title} - ) - try: - res = r.json()["body"]["results"]["item"] - except json.JSONDecodeError: - raise ValueError(f"Failed to load title manifest: {res.text}") - except: - r = self.session.get( - url=self.config["endpoints"]["tv_title"], - headers=headers, - params={"contentId": self.title} - ) - try: - res = r.json()["body"]["results"]["item"] - except json.JSONDecodeError: - raise ValueError(f"Failed to load title manifest: {res.text}") - - if res["assetType"] == "MOVIE": - return Title( - id_=self.title, - type_=Title.Types.MOVIE, - name=res["title"], - year=res["year"], - original_lang=res["langObjs"][0]["iso3code"], - source=self.ALIASES[0], - service_data=res, - ) - else: - r = self.session.get( - url=self.config["endpoints"]["tv_episodes"], - headers=headers, - params={ - "eid": res["id"], - "etid": "2", - "tao": "0", - "tas": "1000" - } - ) - try: - res = r.json()["body"]["results"]["assets"]["items"] - except json.JSONDecodeError: - raise ValueError(f"Failed to load episodes list: {r.text}") - return [Title( - id_=self.title, - type_=Title.Types.TV, - name=x.get("showShortTitle"), - year=x.get("year"), - season=x.get("seasonNo"), - episode=x.get("episodeNo"), - episode_name=x.get("title"), - original_lang=x["langObjs"][0]["iso3code"], - source=self.ALIASES[0], - service_data=x - ) for x in res] - - def get_tracks(self, title): - if title.service_data.get("parentalRating", 0) > 2: - body = json.dumps({ - "devices": [{ - "id": self.device_id, - "name": "Chrome Browser on Windows", - "consentProvided": True - }] - }) - - self.session.post( - url="https://api.hotstar.com/play/v1/consent/content/{id}?".format(id=title.service_data["contentId"]), - headers={ - "Accept": "*/*", - "Content-Type": "application/json", - "hotstarauth": self.hotstar_auth, - "X-HS-UserToken": self.token, - "X-HS-Platform": self.config["device"]["platform"]["name"], - "X-HS-AppVersion": self.config["device"]["platform"]["version"], - "X-HS-Request-Id": str(uuid.uuid4()), - "X-Country-Code": self.region - }, - data=body - ).json() - akamai_cdn=True - count = 1 - while akamai_cdn: - r = self.session.post( - url=self.config["endpoints"]["manifest"].format(id=title.service_data["contentId"]), - params={ - # TODO: Perhaps set up desired-config to actual desired playback set values? - "desired-config": "|".join([ - "audio_channel:stereo", - "container:fmp4", - "dynamic_range:sdr", - "encryption:widevine", - "ladder:tv", - "package:dash", - "resolution:fhd", - "video_codec:h264" - ]), - "device-id": self.device_id, - "type": "paid", - }, - headers={ - "Accept": "*/*", - "hotstarauth": self.hotstar_auth, - "x-hs-usertoken": self.token, - "x-hs-request-id": self.device_id, - "x-country-code": self.region - }, - json={ - "os_name": "Windows", - "os_version": "10", - "app_name": "web", - "app_version": "7.34.1", - "platform": "Chrome", - "platform_version": "99.0.4844.82", - "client_capabilities": { - "ads": ["non_ssai"], - "audio_channel": ["stereo"], - "dvr": ["short"], - "package": ["dash", "hls"], - "dynamic_range": ["sdr"], - "video_codec": ["h264"], - "encryption": ["widevine"], - "ladder": ["tv"], - "container": ["fmp4", "ts"], - "resolution": ["hd"] - }, - "drm_parameters": { - "widevine_security_level": ["SW_SECURE_DECODE", "SW_SECURE_CRYPTO"], - "hdcp_version": ["HDCP_V2_2", "HDCP_V2_1", "HDCP_V2", "HDCP_V1"] - }, - "resolution": "auto", - "type": "paid", - } - ) - try: - playback_sets = r.json()["data"]["playback_sets"] - except json.JSONDecodeError: - raise ValueError(f"Manifest fetch failed: {r.text}") - - # transform tagsCombination into `tags` key-value dictionary for easier usage - playback_sets = [dict( - **x, - tags=dict(y.split(":") for y in x["tags_combination"].lower().split(";")) - ) for x in playback_sets] - #self.log.debug(playback_sets) - playback_set = next(( - x for x in playback_sets - if x["tags"].get("encryption") == "widevine" or x["tags"].get("encryption") == "plain" # widevine, fairplay, playready - if x["tags"].get("package") == "dash" # dash, hls - if x["tags"].get("container") == "fmp4br" # fmp4, fmp4br, ts - if x["tags"].get("ladder") == "tv" # tv, phone - if x["tags"].get("video_codec").endswith(self.vcodec.lower()) # dvh265, h265, h264 - vp9? - # user defined, may not be available in the tags list: - if x["tags"].get("resolution") in [self.quality, None] # max is fine, -q can choose lower if wanted - if x["tags"].get("dynamic_range") in [self.range.lower(), None] # dv, hdr10, sdr - hdr10+? - if x["tags"].get("audio_codec") in [self.acodec.lower(), None] # ec3, aac - atmos? - if x["tags"].get("audio_channel") in [{"5.1": "dolby51", "2.0": "stereo", "atmos": "atmos"}[self.channels], None] - ), None) - if not playback_set: - playback_set = next(( - x for x in playback_sets - if x["tags"].get("encryption") == "widevine" or x["tags"].get("encryption") == "plain" # widevine, fairplay, playready - if x["tags"].get("package") == "dash" # dash, hls - if x["tags"].get("ladder") == "tv" # tv, phone - if x["tags"].get("resolution") in [self.quality, None] - ), None) - if not playback_set: - raise ValueError("Wanted playback set is unavailable for this title...") - if "licence_url" in playback_set: self.license_api = playback_set["licence_url"] - if playback_set['token_algorithm'] == 'airtel-qwilt-vod' or playback_set['token_algorithm'] == 'AKAMAI-HMAC': - self.log.info(f'Gotcha!') - akamai_cdn = False - else: - self.log.info(f'Finding MPD... {count}') - count += 1 - - r = Request(playback_set["playback_url"]) - r.add_header("user-agent", "Hotstar;in.startv.hotstar/3.3.0 (Android/8.1.0)") - data = urlopen(r).read() - - - mpd_url = playback_set["playback_url"] # .replace(".hotstar.com", ".akamaized.net") - self.log.debug(mpd_url) - try: - self.session.headers.update({ - "Cookie": self.hdntl, - }) - except: - pass - - tracks = Tracks.from_mpd( - url=mpd_url, - data=data, - session=self.session, - source=self.ALIASES[0] - ) - for track in tracks: - track.needs_proxy = True - return tracks - - def get_chapters(self, title): - return [] - - def certificate(self, **_): - return None # will use common privacy cert - - def license(self, challenge, **_): - return self.session.post( - url=self.license_api, - data=challenge # expects bytes - ).content - - # Service specific functions - - def configure(self): - self.session.headers.update({ - "Origin": "https://www.hotstar.com", - "Referer": f'"https://www.hotstar.com/{self.region}"' - }) - self.log.info("Logging into Hotstar") - self.log.info(f'Setting region to "{self.region}"') - self.hotstar_auth = self.get_akamai() - self.log.info(f" + Calculated HotstarAuth: {self.hotstar_auth}") - try: - if self.cookies: - hdntl_cookies = [cookie for cookie in self.session.cookies if cookie.name == 'hdntl'] - self.hdntl = f"hdntl={hdntl_cookies[-1].value}" - self.device_id = self.session.cookies.get("deviceId") - self.log.info(f" + Using Device ID: {self.device_id}") - except: - self.device_id = str(uuid.uuid4()) - self.log.info(f" + Created Device ID: {self.device_id}") - self.session.headers.update({ - "dnt": "1" - }) - - self.token = self.get_token() - self.log.info(" + Obtained tokens") - - @staticmethod - def get_akamai(): - enc_key = b"\x05\xfc\x1a\x01\xca\xc9\x4b\xc4\x12\xfc\x53\x12\x07\x75\xf9\xee" - st = int(time.time()) - exp = st + 6000 - res = f"st={st}~exp={exp}~acl=/*" - res += "~hmac=" + hmac.new(enc_key, res.encode(), hashlib.sha256).hexdigest() - return res - - def get_token(self): - token_cache_path = self.get_cache("token_{profile}.json".format(profile=self.profile)) - if os.path.isfile(token_cache_path): - with open(token_cache_path, encoding="utf-8") as fd: - token = json.load(fd) - if token.get("exp", 0) > int(time.time()): - # not expired, lets use - self.log.info(" + Using cached auth tokens...") - return token["uid"] - else: - # expired, refresh - self.log.info(" + Refreshing and using cached auth tokens...") - return self.save_token(self.refresh(token["uid"], token["sub"]["deviceId"]), token_cache_path) - # get new token - if self.cookies: - token = self.session.cookies.get("sessionUserUP", None, 'www.hotstar.com', '/' + self.region) - else: - raise self.log.exit(f" - Please add cookies") - # token = self.login() - return self.save_token(token, token_cache_path) - - @staticmethod - def save_token(token, to): - # Decode the JWT data component - data = json.loads(base64.b64decode(token.split(".")[1] + "===").decode("utf-8")) - data["uid"] = token - data["sub"] = json.loads(data["sub"]) - - os.makedirs(os.path.dirname(to), exist_ok=True) - with open(to, "w", encoding="utf-8") as fd: - json.dump(data, fd) - - return token - - def refresh(self, user_id_token, device_id): - json_data = { - 'deeplink_url': f'/{self.region}?client_capabilities=%7B%22ads%22%3A%5B%22non_ssai%22%5D%2C%22audio_channel%22%3A%5B%22stereo%22%5D%2C%22container%22%3A%5B%22fmp4%22%2C%22ts%22%5D%2C%22dvr%22%3A%5B%22short%22%5D%2C%22dynamic_range%22%3A%5B%22sdr%22%5D%2C%22encryption%22%3A%5B%22widevine%22%2C%22plain%22%5D%2C%22ladder%22%3A%5B%22web%22%2C%22tv%22%2C%22phone%22%5D%2C%22package%22%3A%5B%22dash%22%2C%22hls%22%5D%2C%22resolution%22%3A%5B%22sd%22%2C%22hd%22%5D%2C%22video_codec%22%3A%5B%22h264%22%5D%2C%22true_resolution%22%3A%5B%22sd%22%2C%22hd%22%2C%22fhd%22%5D%7D&drm_parameters=%7B%22hdcp_version%22%3A%5B%22HDCP_V2_2%22%5D%2C%22widevine_security_level%22%3A%5B%22SW_SECURE_DECODE%22%5D%2C%22playready_security_level%22%3A%5B%5D%7D', - 'app_launch_count': 1, - } - r = self.session.post( - url=self.config["endpoints"]["refresh"], - headers={ - 'x-hs-usertoken': user_id_token, - 'X-HS-Platform': self.config["device"]["platform"]["name"], - 'X-Country-Code': self.region, - 'X-HS-Accept-language': 'eng', - 'X-Request-Id': str(uuid.uuid4()), - 'x-hs-device-id': device_id, - 'X-HS-Client-Targeting': f'ad_id:{device_id};user_lat:false', - 'x-hs-request-id': str(uuid.uuid4()), - 'X-HS-Client': 'platform:web;app_version:23.06.23.3;browser:Firefox;schema_version:0.0.911', - 'Origin': 'https://www.hotstar.com', - 'Referer': f'https://www.hotstar.com/{self.region}', - }, - json=json_data - ) - for cookie in self.cookies: - if cookie.name == 'sessionUserUP' and cookie.path == f"/{self.region}" and cookie.domain == 'www.hotstar.com': - cookie.value = r.headers["x-hs-usertoken"] - for x in self.ALIASES: - cookie_file = os.path.join(directories.cookies, x.lower(), f"{self.profile}.txt") - if not os.path.isfile(cookie_file): - cookie_file = os.path.join(directories.cookies, x, f"{self.profile}.txt") - if os.path.isfile(cookie_file): - self.cookies.save(cookie_file, ignore_discard=True, ignore_expires=True) - break - return r.headers["x-hs-usertoken"] - - def login(self): - """ - Log in to HOTSTAR and return a JWT User Identity token. - :returns: JWT User Identity token. - """ - if self.credentials.username == "username" and self.credentials.password == "password": - logincode_url = f"https://api.hotstar.com/{self.region}/aadhar/v2/firetv/{self.region}/users/logincode/" - logincode_headers = { - "Content-Length": "0", - "User-Agent": "Hotstar;in.startv.hotstar/3.3.0 (Android/8.1.0)" - } - logincode = self.session.post( - url = logincode_url, - headers = logincode_headers - ).json()["description"]["code"] - print(f"Go to tv.hotstar.com and put {logincode}") - logincode_choice = input('Did you put as informed above? (y/n): ') - if logincode_choice.lower() == 'y': - res = self.session.get( - url = logincode_url+logincode, - headers = logincode_headers - ) - else: - self.log.exit(" - Exited.") - raise - else: - res = self.session.post( - url=self.config["endpoints"]["login"], - json={ - "isProfileRequired": "false", - "userData": { - "deviceId": self.device_id, - "password": self.credentials.password, - "username": self.credentials.username, - "usertype": "email" - }, - "verification": {} - }, - headers={ - "hotstarauth": self.hotstar_auth, - "content-type": "application/json" - } - ) - try: - data = res.json() - except json.JSONDecodeError: - self.log.exit(f" - Failed to get auth token, response was not JSON: {res.text}") - raise - if "errorCode" in data: - self.log.exit(f" - Login failed: {data['description']} [{data['errorCode']}]") - raise - return data["description"]["userIdentity"] diff --git a/vinetrimmer/services/hulu.py b/vinetrimmer/services/hulu.py new file mode 100644 index 0000000..8e6515f --- /dev/null +++ b/vinetrimmer/services/hulu.py @@ -0,0 +1,308 @@ +import hashlib +import re +import click +import base64 +import requests +import xml.etree.ElementTree as ET +from langcodes import Language + +from vinetrimmer.objects import TextTrack, Title, Tracks +from vinetrimmer.services.BaseService import BaseService +from vinetrimmer.utils.pyhulu import Device, HuluClient +from vinetrimmer.utils.widevine.device import LocalDevice + +class Hulu(BaseService): + """ + Service code for the Hulu streaming service (https://hulu.com). + + \b + Authorization: Cookies + Security: UHD@L3 + """ + + ALIASES = ["HULU"] + #GEOFENCE = ["us"] + TITLE_RE = (r"^(?:https?://(?:www\.)?hulu\.com/(?P<type>movie|series)/)?(?:[a-z0-9-]+-)?" + r"(?P<id>[a-f0-9]{8}(?:-[a-f0-9]{4}){3}-[a-f0-9]{12})") + + AUDIO_CODEC_MAP = { + "AAC": "mp4a", + "EC3": "ec-3" + } + + @staticmethod + @click.command(name="Hulu", short_help="https://hulu.com") + @click.argument("title", type=str, required=False) + @click.option("-m", "--movie", is_flag=True, default=False, help="Title is a movie.") + @click.pass_context + def cli(ctx, **kwargs): + return Hulu(ctx, **kwargs) + + def __init__(self, ctx, title, movie): + super().__init__(ctx) + m = self.parse_title(ctx, title) + self.movie = movie or m.get("type") == "movie" + + self.vcodec = ctx.parent.params["vcodec"] + self.acodec = ctx.parent.params["acodec"] + + quality = ctx.parent.params.get("quality") or 0 + if quality != "SD" and quality > 1080 and self.vcodec != "H265": + self.log.info("Switched video codec to H265 to be able to get 2160p video track") + self.vcodec = "H265" + + if ctx.parent.params["range_"] == "HDR10": + self.log.info("Switched dynamic range to DV as Hulu only has HDR10+ compatible DV tracks") + ctx.parent.params["range_"] = "DV" + + if ctx.parent.params["range_"] != "SDR" and self.vcodec != "H265": + self.log.info(f"Switched video codec to H265 to be able to get {ctx.parent.params['range_']} dynamic range") + self.vcodec = "H265" + + self.device = None + self.playready = True if "group_certificate" in dir(ctx.obj.cdm.device) else False # ctx.obj.cdm.device.type == LocalDevice.Types.PLAYREADY + self.playback_params = {} + self.hulu_client = None + self.license_url = None + + self.configure() + + def get_titles(self): + titles = [] + + if self.movie: + res = self.session.get(self.config["endpoints"]["movie"].format(id=self.title)).json() + title_data = res["details"]["vod_items"]["focus"]["entity"] + titles.append(Title( + id_=self.title, + type_=Title.Types.MOVIE, + name=title_data["name"], + year=int(title_data["premiere_date"][:4]), + source=self.ALIASES[0], + service_data=title_data + )) + else: + try: + res = self.session.get(self.config["endpoints"]["series"].format(id=self.title)).json() + except requests.HTTPError as e: + res = e.response.json() + raise self.log.exit(f" - Failed to get titles for {self.title}: {res['message']} [{res['code']}]") + + season_data = next((x for x in res["components"] if x["name"] == "Episodes"), None) + if not season_data: + raise self.log.exit(" - Unable to get episodes. Maybe you need a proxy?") + + for season in season_data["items"]: + episodes = self.session.get( + self.config["endpoints"]["season"].format( + id=self.title, + season=season["id"].rsplit("::", 1)[1] + ) + ).json() + for episode in episodes["items"]: + titles.append(Title( + id_=f"{season['id']}::{episode['season']}::{episode['number']}", + type_=Title.Types.TV, + name=episode["series_name"], + season=int(episode["season"]), + episode=int(episode["number"]), + episode_name=episode["name"], + source=self.ALIASES[0], + service_data=episode + )) + + playlist = self.hulu_client.load_playlist(titles[0].service_data["bundle"]["eab_id"]) + for title in titles: + title.original_lang = Language.get(playlist["video_metadata"]["language"]) + + return titles + + def remove_parts_mpd(self, mpd): + pattern = r'<Representation[^>]*id="(?![^"]*ALT_1)[^"]*CENC_CTR_[^"]*"[^>]*width="1920"[^>]*height="1080"[^>]*>.*?</Representation>\s*' + m = re.sub(pattern, "", mpd, flags=re.DOTALL) + return m + + def get_pssh(self, kid) -> str: + array_of_bytes = bytearray(b'\x00\x00\x002pssh\x00\x00\x00\x00') + array_of_bytes.extend(bytes.fromhex("edef8ba979d64acea3c827dcd51d21ed")) + array_of_bytes.extend(b'\x00\x00\x00\x12\x12\x10') + array_of_bytes.extend(bytes.fromhex(str(kid).replace("-", ""))) + pssh: str = base64.b64encode(bytes.fromhex(array_of_bytes.hex())).decode("utf-8") + return pssh + + def get_pssh_mpd(self, xml_mpd): + root = ET.fromstring(xml_mpd) + pssh = None + namespaces = { + '': 'urn:mpeg:dash:schema:mpd:2011', + 'cenc': 'urn:mpeg:cenc:2013' + } + content_protection = root.find(".//{urn:mpeg:dash:schema:mpd:2011}AdaptationSet//{urn:mpeg:dash:schema:mpd:2011}ContentProtection[@schemeIdUri='urn:mpeg:dash:mp4protection:2011'][@value='cenc']", namespaces) + if content_protection is not None: + default_kid = content_protection.get('{urn:mpeg:cenc:2013}default_KID') + kid = default_kid.replace('-', '') + pssh = self.get_pssh(kid) + + return pssh + + def get_tracks(self, title): + try: + playlist = self.hulu_client.load_playlist(title.service_data["bundle"]["eab_id"]) + except requests.HTTPError as e: + res = e.response.json() + raise self.log.exit(f" - {res['message']} ({res['code']})") + + self.license_url = playlist["dash_pr_server"] if self.playready else playlist["wv_server"] + + manifest = playlist["stream_url"] + + if 'disney' in manifest: + mpd = self.session.get(manifest).text + mpd_data = self.remove_parts_mpd(mpd) + pssh = self.get_pssh_mpd(mpd_data) + + tracks = Tracks.from_mpd( + url=manifest, + session=self.session, + source=self.ALIASES[0] + ) + #for track in tracks: + # print("pssh:",track.pssh) + + if not self.playready: + tracks0 = [] + for track in tracks.videos: + track.psshWV = [pssh] + if int(track.width) >= int(1920): + rep = track.extra[0] + id = rep.get("id") + if 'ALT_1' in id: + tracks0.append(track) + else: + tracks0.append(track) + tracks.videos = tracks0 + else: + tracks = Tracks.from_mpd( + url=manifest, + session=self.session, + source=self.ALIASES[0] + ) + + for track in tracks.videos: + if track.hdr10: + # MPD only says HDR10+, but Hulu HDR streams are always Dolby Vision Profile 8 with HDR10+ compatibility + track.hdr10 = False + track.dv = True + + for track in tracks.audios: + if not track.psshPR: + track.psshPR = next(x.psshPR for x in tracks.videos if x.psshPR) + if not track.psshWV: + track.psshWV = next(x.psshWV for x in tracks.videos if x.psshWV) + + if self.acodec: + tracks.audios = [x for x in tracks.audios if (x.codec or "")[:4] == self.AUDIO_CODEC_MAP[self.acodec]] + + try: + for sub_lang, sub_url in playlist["transcripts_urls"]["webvtt"].items(): + tracks.add(TextTrack( + id_=hashlib.md5(sub_url.encode()).hexdigest()[0:6], + source=self.ALIASES[0], + url=sub_url, + # metadata + codec="vtt", + language=sub_lang, + forced=False, # TODO: find out if sub is forced + sdh=False # TODO: find out if sub is SDH/CC, it's actually quite likely to be true + )) + except KeyError: + pass + + return tracks + + def get_chapters(self, title): + return [] + + def certificate(self, **_): + return None # will use common privacy cert + + def license(self, challenge, track, **_): + res = self.session.post( + url=self.license_url, + data=challenge # expects bytes + ) + self.log.debug(res.text) if self.playready else self.log.debug(res.content) + return base64.b64encode(res.text.encode()).decode() if self.playready else res.content + + # Service specific functions + + def configure(self): + self.device = Device( + device_code=self.config["device"]["FireTV4K"]["code"], + device_key=self.config["device"]["FireTV4K"]["key"] + ) + self.session.headers.update({ + "User-Agent": self.config["user_agent"], + }) + for schemas in self.config["drm"]["schemas"]: + if schemas["type"] == "WIDEVINE": + schemas_widevine = [schemas] + elif schemas["type"] == "PLAYREADY": + schemas_playready = [schemas] + self.playback_params = { + "all_cdn": False, + "region": "US", + "language": "en", + "interface_version": "1.9.0", + "network_mode": "wifi", + "play_intent": "resume", + "playback": { + "version": 2, + "video": { + "dynamic_range": "DOLBY_VISION", + "codecs": { + "values": [x for x in self.config["codecs"]["video"] if x["type"] == self.vcodec], + "selection_mode": self.config["codecs"]["video_selection"] + } + }, + "audio": { + "codecs": { + "values": self.config["codecs"]["audio"], + "selection_mode": self.config["codecs"]["audio_selection"] + } + }, + "drm": { + "multi_key": True, + "values": schemas_playready if self.playready else schemas_widevine, + "selection_mode": self.config["drm"]["selection_mode"], + "hdcp": self.config["drm"]["hdcp"] + }, + "manifest": { + "type": "DASH", + "https": True, + "multiple_cdns": False, + "patch_updates": True, + "hulu_types": True, + "live_dai": True, + "secondary_audio": True, + "live_fragment_delay": 3 + }, + "segments": { + "values": [{ + "type": "FMP4", + "encryption": { + "mode": "CENC", + "type": "CENC" + }, + "https": True + }], + "selection_mode": "ONE" + } + } + } + self.hulu_client = HuluClient( + device=self.device, + session=self.session, + version=self.config["device"].get("device_version"), + **self.playback_params + ) diff --git a/vinetrimmer/services/paramountplus.py b/vinetrimmer/services/paramountplus.py new file mode 100644 index 0000000..ece80ce --- /dev/null +++ b/vinetrimmer/services/paramountplus.py @@ -0,0 +1,436 @@ +import json +import re +from urllib.parse import urljoin +from datetime import datetime, timedelta +from pathlib import Path + +import click +import m3u8 +import requests +import base64 +import jsonpickle + +from vinetrimmer.objects import Title, Tracks +from vinetrimmer.objects.tracks import AudioTrack, MenuTrack, TextTrack, VideoTrack +from vinetrimmer.services.BaseService import BaseService +from vinetrimmer.utils.widevine.device import LocalDevice + +class ParamountPlus(BaseService): + """ + Service code for Paramount's Paramount+ streaming service (https://paramountplus.com). + + \b + Authorization: Credentials + Security: UHD@L3, doesn't care about releases. + """ + + ALIASES = ["PMTP", "paramountplus", "paramount+"] + TITLE_RE = [ + r"^https?://(?:www\.)?paramountplus\.com/(?P<type>movies)/[a-z0-9_-]+/(?P<id>\w+)", + r"^https?://(?:www\.)?paramountplus\.com/(?P<type>shows)/(?P<id>[a-zA-Z0-9_-]+)(/)?", + r"^https?://(?:www\.)?paramountplus\.com(?:/[a-z]{2})?/(?P<type>movies)/[a-z0-9_-]+/(?P<id>\w+)", + r"^https?://(?:www\.)?paramountplus\.com(?:/[a-z]{2})?/(?P<type>shows)/(?P<id>[a-zA-Z0-9_-]+)(/)?", + r"^(?P<id>\d+)$", + ] + VIDEO_CODEC_MAP = {"H264": ["avc", "avc1"], "H265": ["hvc", "dvh", "hvc1", "hev1", "dvh1", "dvhe"]} + AUDIO_CODEC_MAP = {"AAC": "mp4a", "AC3": "ac-3", "EC3": "ec-3"} + + @staticmethod + @click.command(name="ParamountPlus", short_help="https://paramountplus.com") + @click.argument("title", type=str, required=False) + @click.option("-m", "--movie", is_flag=True, default=False, help="Title is a Movie.") + @click.option( + "-c", "--clips", is_flag=True, default=False, help="Download clips instead of episodes (for TV shows)" + ) + @click.pass_context + def cli(ctx: click.Context, **kwargs): + return ParamountPlus(ctx, **kwargs) + + def __init__(self, ctx: click.Context, title: str, movie: bool, clips: bool): + super().__init__(ctx) + m = self.parse_title(ctx, title) + self.movie = movie or m.get("type") == "movies" + self.clips = clips + + self.vcodec = ctx.parent.params["vcodec"] + self.acodec = ctx.parent.params["acodec"] + self.range = ctx.parent.params["range_"] + self.wanted = ctx.parent.params["wanted"] + self.shorts = False + + self.profile = ctx.obj.profile + self.playready = ctx.obj.cdm.device.type == LocalDevice.Types.PLAYREADY + + ctx.parent.params["acodec"] = "EC3" + + if self.range != "SDR": + # vcodec must be H265 for High Dynamic Range + self.vcodec = "H265" + + self.configure() + + def get_titles(self): + if self.movie: + res = self.session.get( + url=self.config[self.region]["movie"].format(title_id=self.title), + params={ + "includeTrailerInfo": "true", + "includeContentInfo": "true", + "locale": "en-us", + "at": self.config[self.region]["at_token"], + }, + ).json() + if not res["success"]: + if res["message"] == "No movie found for contentId.": + raise self.log.exit(" - Unable to find movie. For TV shows, use the numeric ID.") + else: + raise self.log.exit(f" - Failed to get title information: {res['message']}") + + title = res["movie"]["movieContent"] + + return Title( + id_=title["contentId"], + type_=Title.Types.MOVIE, + name=title["title"], + year=title["_airDateISO"][:4], # todo: find a way to get year, this api doesnt return it + original_lang="en", # TODO: Don't assume + source=self.ALIASES[0], + service_data=title, + ) + else: + res = self.session.get( + url=self.config[self.region]["shows"].format(title=self.title) + ).json() + links = next((x.get("links") for x in res["showMenu"] if x.get("device_app_id") == "all_platforms"), None) + config = next((x.get("videoConfigUniqueName") for x in links if x.get("title").strip() == "Episodes"), None) + show = next((x for x in res["show"]["results"] if x.get("type") == "show"), None) + seasons = [x["seasonNum"] for x in res["available_video_seasons"]["itemList"] if x.get("seasonNum")] + showId = show.get("show_id") + + show_data = self.session.get( + url=self.config[self.region]["section"].format(showId=showId, config=config), + params={"platformType": "apps", "rows": "1", "begin": "0"}, + ).json() + + section = next( + (x["sectionId"] for x in show_data["videoSectionMetadata"] if x["title"] == "Full Episodes"), None + ) + + episodes = [] + for season in seasons: + res = self.session.get( + url=self.config[self.region]["seasons"].format(section=section), + params={"begin": "0", "rows": "999", "params": f"seasonNum={season}", "seasonNum": season}, + ).json() + episodes.extend(res["sectionItems"].get("itemList")) + + titles = [] + for episode in episodes: + titles.append( + Title( + id_=episode.get("contentId") or episode.get("content_id"), + type_=Title.Types.TV, + name=episode.get("seriesTitle") or episode.get("series_title"), + season=episode.get("seasonNum") or episode.get("season_number") or 0, + episode=episode["episodeNum"] if episode["fullEpisode"] else episode["positionNum"], + episode_name=episode["label"], + original_lang="en", # TODO: Don't assume + source=self.ALIASES[0], + service_data=episode, + ) + ) + + return titles + + def get_tracks(self, title: Title): + assets = ( + ["DASH_CENC_HDR10"], + [ + "HLS_AES", + "DASH_LIVE", + "DASH_CENC_HDR10", + "DASH_TA", + "DASH_CENC", + "DASH_CENC_PRECON", + "DASH_CENC_PS4", + ], + ) + for asset in assets: + r = requests.Request( + "GET", + url=self.config["LINK_PLATFORM_URL"].format(video_id=title.id), + params={ + "format": "redirect", + "formats": "MPEG-DASH", + "assetTypes": "|".join(asset), + "manifest": "M3U", + "Tracking": "true", + "mbr": "true", + }, + ) + req = self.session.send(self.session.prepare_request(r), allow_redirects=False) + if req.ok: + break + else: + raise ValueError(f"Manifest Error: {req.text}") + + mpd_url = req.headers.get('location') + + try: + tracks: Tracks = Tracks.from_mpd( + url=mpd_url.replace("cenc_precon_dash", "cenc_dash"), + source=self.ALIASES[0], + session=self.session, + ) + except: + tracks: Tracks = Tracks.from_mpd( + url=mpd_url, + source=self.ALIASES[0], + session=self.session, + ) + tracks.subtitles.clear() + + req = self.session.get( + url=self.config["LINK_PLATFORM_URL"].format(video_id=title.id), + params={ + "format": "redirect", + "formats": "M3U", + "assetTypes": "|".join(["HLS_FPS_PRECON"]), + "manifest": "M3U", + "Tracking": "true", + "mbr": "true", + }, + ) + hls_url = req.url + + tracks_m3u8 = Tracks.from_m3u8( + m3u8.load(hls_url), + source=self.ALIASES[0], + ) + tracks.subtitles = tracks_m3u8.subtitles + + for track in tracks: + # track.id = track.id + if isinstance(track, VideoTrack): + track.hdr10 = ( + track.codec[:4] in ("hvc1", "hev1") and track.extra[0].attrib.get("codecs")[5] == "2" + ) or (track.codec[:4] in ("hvc1", "hev1") and "HDR10plus" in track.url) + + track.dv = track.codec[:4] in ("dvh1", "dvhe") + + if isinstance(track, VideoTrack) or isinstance(track, AudioTrack): + if self.shorts: + track.encrypted = False + + if isinstance(track, TextTrack): + track.codec = "vtt" + #if track.language.language == "en": + # track.sdh = True # TODO: don't assume SDH + + if self.vcodec: + tracks.videos = [x for x in tracks.videos if (x.codec or "")[:4] in self.VIDEO_CODEC_MAP[self.vcodec]] + + if self.acodec: + tracks.audios = [x for x in tracks.audios if (x.codec or "")[:4] == self.AUDIO_CODEC_MAP[self.acodec]] + + return tracks + + def get_chapters(self, title: Title): + chapters = [] + events = title.service_data.get("playbackEvents") + events = {k: v for k, v in events.items() if v is not None} + events = dict(sorted(events.items(), key=lambda item: item[1])) + if not events: + return chapters + + chapters_titles = { + "endCreditChapterTimeMs": "Credits", + "previewStartTimeMs": "Preview Start", + "previewEndTimeMs": "Preview End", + "openCreditEndTimeMs": "openCreditEnd", + "openCreditStartTime": "openCreditStart", + } + + for name, time_ in events.items(): + if isinstance(time_, (int, float)): + chapters.append( + MenuTrack( + number=len(chapters) + 1, + title=chapters_titles.get(name), + timecode=MenuTrack.format_duration(time_ / 1000), + ) + ) + + # chapters = sorted(chapters, key=self.converter_timecode) + + return chapters + + def certificate(self, **_): + return None # will use common privacy cert + + def license(self, challenge, title, **_): + contentId = title.service_data.get("contentId") or title.service_data.get("content_id") + if not contentId: + raise self.log.exit("Error") + + r = self.session.post( + url=self.config["license_pr"] if self.playready else self.config["license"], + params={ + "CrmId": "cbsi", + "AccountId": "cbsi", + "SubContentType": "Default", + "ContentId": title.service_data.get("contentId") or title.service_data.get("content_id"), + }, + headers={"Authorization": f"Bearer {self.get_barrear(content_id=contentId)}"}, + data=challenge, # expects bytes + ) + + if r.headers["Content-Type"].startswith("application/json"): + res = r.json() + raise self.log.exit(res["message"]) + + return base64.b64encode(r.content).decode() if self.playready else r.content + + def configure(self): + self.region = self.session.get("https://ipinfo.io/json").json()["country"] + if self.region != "US": + if self.region != "FR": + self.region = "INTL" + + #self.device_cache_path = Path(self.get_cache("device_tokens_{profile}.json".format( + #profile=self.profile, + #))) + + #if self.device_cache_path.exists(): + #with open(self.device_cache_path, encoding="utf-8") as fd: + #date = jsonpickle.decode(fd.read()) + #if "expiry" in date and datetime.fromisoformat(date["expiry"]) > datetime.now(): + #self.log.warning(" + Using cached device tokens") + #cache = date + #else: + #self.log.warning(" + Refreshing cookies") + #self.device_cache_path.unlink() + #if not self.credentials: + #raise self.log.exit(" - No credentials provided, unable to log in.") + #self.session.headers.update({"user-agent": self.config["Android"]["UserAgent"]}) + #self.session.params.update({"at": self.config[self.region]["at_token"]}) + #username = self.credentials.username + #password = self.credentials.password + #expiry = (datetime.now() + timedelta(minutes=3)).isoformat() + #cookie = self.login(username=username, password=password) + #cache = {"cookie": cookie, "expiry": expiry} + #self.device_cache_path.parent.mkdir(exist_ok=True, parents=True) + #with open(self.device_cache_path, "w", encoding="utf-8") as fd: + #fd.write(jsonpickle.encode(cache)) + #else: + if not self.credentials: + raise self.log.exit(" - No credentials provided, unable to log in.") + self.log.warning(" + Logging in") + self.session.headers.update({"user-agent": self.config["Android"]["UserAgent"]}) + self.session.params.update({"at": self.config[self.region]["at_token"]}) + username = self.credentials.username + password = self.credentials.password + #expiry = (datetime.now() + timedelta(minutes=3)).isoformat() + cookie = self.login(username=username, password=password) + #cache = {"cookie": cookie, "expiry": expiry} + #self.device_cache_path.parent.mkdir(exist_ok=True, parents=True) + #with open(self.device_cache_path, "w", encoding="utf-8") as fd: + #fd.write(jsonpickle.encode(cache)) + #cookie = cache["cookie"] + self.session.headers.update({"cookie": cookie}) + else: + self.session.headers.update( + { + "Origin": "https://www.paramountplus.com", + } + ) + self.session.params.update({"at": self.config[self.region]["at_token"]}) + + #if not self.is_logged_in(): + #raise ValueError("InvalidCookies") + + #if not self.is_subscribed(): + #raise ValueError("NotEntitled") + + # Service specific functions + + def get_prop(self, prop): + res = self.session.get("https://www.paramountplus.com") + prop_re = prop.replace(".", r"\.") + search = re.search(rf"{prop_re} ?= ?[\"']?([^\"';]+)", res.text) + if not search: + raise ValueError("InvalidCookies") + + return search.group(1) + + def is_logged_in(self): + return self.get_prop("CBS.UserAuthStatus") == "true" + + def is_subscribed(self): + return self.get_prop("CBS.Registry.user.sub_status") == "SUBSCRIBER" + + def login(self, username, password): + login_params = { + "j_username": username, + "j_password": password + } + + response = self.session.post(url=self.config[self.region]["login"], params=login_params) + + status_response = self.session.get(url=self.config[self.region]["status"]).json() + self.log.debug(status_response) + if status_response["success"] == False: + raise self.log.exit("InvalidCredentials") + #if not status_response["userStatus"]["description"] == "SUBSCRIBER": + #raise ValueError("NotEntitled") + + cookies = ";".join([f"{key}={value}" for key, value in response.cookies.get_dict().items()]) + + return cookies + + def get_barrear(self, content_id): + #license_data = self.session.get(url="https://www.intl.paramountplus.com/apps-api/v3.0/androidphone/irdeto-control/session-token.json?contentId=%s&locale=en-us&at=ABATOpD5wXyjhjIMO0BaNh/gW0iCu0ISRy2U7/tyGiKZTQTlYDFL1NPD58CcuJLOQYY=" % (content_id)).json() + try: + res = self.session.get( + url=self.config[self.region]["barrearUrl"].replace("iphone", "androidtv") if self.playready else self.config[self.region]["barrearUrl"], + params={"contentId": content_id} + ) + res.raise_for_status() + except requests.HTTPError as e: + if e.response.status_code == 401: + self.log.warning("Received a 401 error, deleting cached cookies") + self.device_cache_path.unlink() + self.session.headers.clear() + self.session.params = {} + self.configure() + self.retrying = True + + res = res.json() + + if not res["success"]: + raise self.log.exit("Unable to get license token: %s" % (res["errors"])) + + self.license_url = res["url"] + ls_session = res["ls_session"] + + return ls_session + + def parse_movie_year(self, url): + html_raw = self.session.get(url) + + if html_raw.status_code != 200: + return None + + self.year = int( + re.findall('"movie__air-year">[0-9]+<', html_raw.text)[0].replace('"movie__air-year">', "").replace("<", "") + ) + + def parse_show_id(self, url): + html_raw = self.session.get(url) + + if html_raw.status_code != 200: + self.log.exit("Could not parse Show Id.") + + show = json.loads('{"' + re.search('CBS.Registry.Show = {"(.*)"}', html_raw.text).group(1) + '"}') + + return str(show["id"])