Unshackle-Services/CR-FIX/__init__.py

import re
import time
import uuid
from threading import Lock
from typing import Generator, Optional, Union

import click
import jwt
from langcodes import Language

from unshackle.core.manifests import DASH
from unshackle.core.search_result import SearchResult
from unshackle.core.service import Service
from unshackle.core.session import session
from unshackle.core.titles import Episode, Series
from unshackle.core.tracks import Attachment, Chapters, Tracks
from unshackle.core.tracks.chapter import Chapter
from unshackle.core.tracks.subtitle import Subtitle


class CR(Service):
    """
    Service code for Crunchyroll streaming service (https://www.crunchyroll.com).

    \b
    Version: 2.0.0
    Author: sp4rk.y
    Date: 2025-11-01
    Authorization: Credentials
    Robustness:
        Widevine:
            L3: 1080p, AAC2.0

    \b
    Tips:
        - Input should be complete URL or series ID
            https://www.crunchyroll.com/series/GRMG8ZQZR/series-name OR GRMG8ZQZR
        - Supports multiple audio and subtitle languages
        - Device ID is cached for consistent authentication across runs

    \b
    Notes:
        - Uses password-based authentication with token caching
        - Manages concurrent stream limits automatically
    """

    TITLE_RE = r"^(?:https?://(?:www\.)?crunchyroll\.com/(?:series|watch)/)?(?P<id>[A-Z0-9]+)"
    LICENSE_LOCK = Lock()
    MAX_CONCURRENT_STREAMS = 3
    ACTIVE_STREAMS: list[tuple[str, str]] = []

    @staticmethod
    def get_session():
        return session("okhttp4")

    @staticmethod
    @click.command(name="CR", short_help="https://crunchyroll.com")
    @click.argument("title", type=str, required=True)
    @click.pass_context
    def cli(ctx, **kwargs) -> "CR":
        return CR(ctx, **kwargs)

    def __init__(self, ctx, title: str):
        self.title = title
        self.account_id: Optional[str] = None
        self.access_token: Optional[str] = None
        self.token_expiration: Optional[int] = None
        self.anonymous_id = str(uuid.uuid4())

        super().__init__(ctx)

        device_cache_key = "cr_device_id"
        cached_device = self.cache.get(device_cache_key)

        if cached_device and not cached_device.expired:
            self.device_id = cached_device.data["device_id"]
        else:
            self.device_id = str(uuid.uuid4())
            cached_device.set(
                data={"device_id": self.device_id},
                expiration=60 * 60 * 24 * 365 * 10,
            )

        self.device_name = self.config.get("device", {}).get("name", "SHIELD Android TV")
        self.device_type = self.config.get("device", {}).get("type", "ANDROIDTV")

        self.session.headers.update(self.config.get("headers", {}))
        self.session.headers["etp-anonymous-id"] = self.anonymous_id

    @property
    def auth_header(self) -> dict:
        """Return authorization header dict."""
        return {"authorization": f"Bearer {self.access_token}"}

    def ensure_authenticated(self) -> None:
        """Check if token is expired and re-authenticate if needed."""
        if not self.token_expiration:
            cache_key = f"cr_auth_token_{self.credential.sha1 if self.credential else 'default'}"
            cached = self.cache.get(cache_key)

            if cached and not cached.expired:
                self.access_token = cached.data["access_token"]
                self.account_id = cached.data.get("account_id")
                self.token_expiration = cached.data.get("token_expiration")
                self.session.headers.update(self.auth_header)
                self.log.debug("Loaded authentication from cache")
            else:
                self.log.debug("No valid cached token, authenticating")
                self.authenticate(credential=self.credential)
                return

        current_time = int(time.time())
        if current_time >= (self.token_expiration - 60):
            self.log.debug("Authentication token expired or expiring soon, re-authenticating")
            self.authenticate(credential=self.credential)

    def authenticate(self, cookies=None, credential=None) -> None:
        """Authenticate using username and password credentials."""
        super().authenticate(cookies, credential)

        cache_key = f"cr_auth_token_{credential.sha1 if credential else 'default'}"
        cached = self.cache.get(cache_key)

        if cached and not cached.expired:
            self.access_token = cached.data["access_token"]
            self.account_id = cached.data.get("account_id")
            self.token_expiration = cached.data.get("token_expiration")
        else:
            if not credential:
                class HardcodedCreds:
                    username = "akjrtx@gmail.com"
                    password = "Ariyan@45"
                    sha1 = "dummy_hash"
                credential = HardcodedCreds()

            response = self.session.post(
                url=self.config["endpoints"]["token"],
                headers={
                    "content-type": "application/x-www-form-urlencoded; charset=UTF-8",
                    "request-type": "SignIn",
                },
                data={
                    "grant_type": "password",
                    "username": credential.username,
                    "password": credential.password,
                    "scope": "offline_access",
                    "client_id": self.config["client"]["id"],
                    "client_secret": self.config["client"]["secret"],
                    "device_type": self.device_type,
                    "device_id": self.device_id,
                    "device_name": self.device_name,
                },
            )

            if response.status_code != 200:
                self.log.error(f"Login failed: {response.status_code}")
                try:
                    error_data = response.json()
                    error_msg = error_data.get("error", "Unknown error")
                    error_code = error_data.get("code", "")
                    self.log.error(f"Error: {error_msg} ({error_code})")
                except Exception:
                    self.log.error(f"Response: {response.text}")
                response.raise_for_status()

            token_data = response.json()
            self.access_token = token_data["access_token"]
            self.account_id = self.get_account_id()

            try:
                decoded_token = jwt.decode(self.access_token, options={"verify_signature": False})
                self.token_expiration = decoded_token.get("exp")
            except Exception:
                self.token_expiration = int(time.time()) + token_data.get("expires_in", 3600)

            cached.set(
                data={
                    "access_token": self.access_token,
                    "account_id": self.account_id,
                    "token_expiration": self.token_expiration,
                },
                expiration=self.token_expiration
                if isinstance(self.token_expiration, int) and self.token_expiration > int(time.time())
                else 3600,
            )

        self.session.headers.update(self.auth_header)

        if self.ACTIVE_STREAMS:
            self.ACTIVE_STREAMS.clear()

        try:
            self.clear_all_sessions()
        except Exception as e:
            self.log.warning(f"Failed to clear previous sessions: {e}")

    def get_titles(self) -> Union[Series]:
        """Fetch series and episode information."""
        series_id = self.parse_series_id(self.title)

        series_response = self.session.get(
            url=self.config["endpoints"]["series"].format(series_id=series_id),
            params={"locale": self.config["params"]["locale"]},
        ).json()

        if "error" in series_response:
            raise ValueError(f"Series not found: {series_id}")

        series_data = (
            series_response.get("data", [{}])[0] if isinstance(series_response.get("data"), list) else series_response
        )
        series_title = series_data.get("title", "Unknown Series")

        seasons_response = self.session.get(
            url=self.config["endpoints"]["seasons"].format(series_id=series_id),
            params={"locale": self.config["params"]["locale"]},
        ).json()

        seasons_data = seasons_response.get("data", [])

        if not seasons_data:
            raise ValueError(f"No seasons found for series: {series_id}")

        all_episode_data = []
        special_episodes = []

        for season in seasons_data:
            season_id = season["id"]
            season_number = season.get("season_number", 0)

            episodes_response = self.session.get(
                url=self.config["endpoints"]["season_episodes"].format(season_id=season_id),
                params={"locale": self.config["params"]["locale"]},
            ).json()

            episodes_data = episodes_response.get("data", [])

            for episode_data in episodes_data:
                episode_number = episode_data.get("episode_number")

                if episode_number is None or isinstance(episode_number, float):
                    special_episodes.append(episode_data)

                all_episode_data.append((episode_data, season_number))

        if not all_episode_data:
            raise ValueError(f"No episodes found for series: {series_id}")

        series_year = None
        if all_episode_data:
            first_episode_data = all_episode_data[0][0]
            first_air_date = first_episode_data.get("episode_air_date")
            if first_air_date:
                series_year = int(first_air_date[:4])

        special_episodes.sort(key=lambda x: x.get("episode_air_date", ""))
        special_episode_numbers = {ep["id"]: idx + 1 for idx, ep in enumerate(special_episodes)}
        episodes = []
        season_episode_counts = {}

        for episode_data, season_number in all_episode_data:
            episode_number = episode_data.get("episode_number")

            if episode_number is None or isinstance(episode_number, float):
                final_season = 0
                final_number = special_episode_numbers[episode_data["id"]]
            else:
                final_season = season_number
                if final_season not in season_episode_counts:
                    season_episode_counts[final_season] = 0

                season_episode_counts[final_season] += 1
                final_number = season_episode_counts[final_season]

            original_language = None
            versions = episode_data.get("versions", [])
            for version in versions:
                if "main" in version.get("roles", []):
                    original_language = version.get("audio_locale")
                    break

            episode = Episode(
                id_=episode_data["id"],
                service=self.__class__,
                title=series_title,
                season=final_season,
                number=final_number,
                name=episode_data.get("title"),
                year=series_year,
                language=original_language,
                description=episode_data.get("description"),
                data=episode_data,
            )
            episodes.append(episode)

        return Series(episodes)

    def set_track_metadata(self, tracks: Tracks, episode_id: str, is_original: bool) -> None:
        """Set metadata for video and audio tracks."""
        for video in tracks.videos:
            video.needs_repack = True
            video.data["episode_id"] = episode_id
            video.is_original_lang = is_original
        for audio in tracks.audio:
            audio.data["episode_id"] = episode_id
            audio.is_original_lang = is_original

    def get_tracks(self, title: Episode) -> Tracks:
        """Fetch video, audio, and subtitle tracks for an episode."""
        self.ensure_authenticated()

        episode_id = title.id

        if self.ACTIVE_STREAMS:
            self.ACTIVE_STREAMS.clear()

        self.clear_all_sessions()

        initial_response = self.get_playback_data(episode_id, track_stream=False)
        versions = initial_response.get("versions", [])

        if not versions:
            self.log.warning("No versions found in playback response, using single version")
            versions = [{"audio_locale": initial_response.get("audioLocale", "ja-JP")}]

        tracks = None

        for idx, version in enumerate(versions):
            audio_locale = version.get("audio_locale")
            version_guid = version.get("guid")
            is_original = version.get("original", False)

            if not audio_locale:
                continue

            request_episode_id = version_guid if version_guid else episode_id

            if idx == 0 and not version_guid:
                version_response = initial_response
                version_token = version_response.get("token")
            else:
                if idx == 1 and not versions[0].get("guid"):
                    initial_token = initial_response.get("token")
                    if initial_token:
                        self.close_stream(episode_id, initial_token)

                try:
                    version_response = self.get_playback_data(request_episode_id, track_stream=False)
                except ValueError as e:
                    self.log.warning(f"Could not get playback info for audio {audio_locale}: {e}")
                    continue

                version_token = version_response.get("token")

            hard_subs = version_response.get("hardSubs", {})
            dash_url = None

            if "none" in hard_subs:
                dash_url = hard_subs["none"].get("url")
            elif hard_subs:
                first_key = list(hard_subs.keys())[0]
                dash_url = hard_subs[first_key].get("url")

            if not dash_url:
                self.log.warning(f"No DASH manifest found for audio {audio_locale}, skipping")
                if version_token:
                    self.close_stream(request_episode_id, version_token)
                continue

            try:
                version_tracks = DASH.from_url(
                    url=dash_url,
                    session=self.session,
                ).to_tracks(language=audio_locale)

                if tracks is None:
                    tracks = version_tracks
                    self.set_track_metadata(tracks, request_episode_id, is_original)
                else:
                    self.set_track_metadata(version_tracks, request_episode_id, is_original)
                    for video in version_tracks.videos:
                        tracks.add(video)
                    for audio in version_tracks.audio:
                        tracks.add(audio)

            except Exception as e:
                self.log.warning(f"Failed to parse DASH manifest for audio {audio_locale}: {e}")
                if version_token:
                    self.close_stream(request_episode_id, version_token)
                continue

            if is_original:
                captions = version_response.get("captions", {})
                subtitles_data = version_response.get("subtitles", {})
                all_subs = {**captions, **subtitles_data}

                for lang_code, sub_data in all_subs.items():
                    if lang_code == "none":
                        continue

                    if isinstance(sub_data, dict) and "url" in sub_data:
                        try:
                            lang = Language.get(lang_code)
                        except (ValueError, LookupError):
                            lang = Language.get("en")

                        subtitle_format = sub_data.get("format", "vtt").lower()
                        if subtitle_format == "ass" or subtitle_format == "ssa":
                            codec = Subtitle.Codec.SubStationAlphav4
                        else:
                            codec = Subtitle.Codec.WebVTT

                        tracks.add(
                            Subtitle(
                                id_=f"subtitle-{audio_locale}-{lang_code}",
                                url=sub_data["url"],
                                codec=codec,
                                language=lang,
                                forced=False,
                                sdh=False,
                            ),
                            warn_only=True,
                        )

            if version_token:
                self.close_stream(request_episode_id, version_token)

        if versions and versions[0].get("guid"):
            initial_token = initial_response.get("token")
            if initial_token:
                self.close_stream(episode_id, initial_token)

        if tracks is None:
            raise ValueError(f"Failed to fetch any tracks for episode: {episode_id}")

        for track in tracks.audio + tracks.subtitles:
            if track.language:
                try:
                    lang_obj = Language.get(str(track.language))
                    base_lang = Language.get(lang_obj.language)
                    lang_display = base_lang.language_name()
                    track.name = lang_display
                except (ValueError, LookupError):
                    pass

        images = title.data.get("images", {})
        thumbnails = images.get("thumbnail", [])
        if thumbnails:
            thumb_variants = thumbnails[0] if isinstance(thumbnails[0], list) else [thumbnails[0]]
            if thumb_variants:
                thumb_index = min(7, len(thumb_variants) - 1)
                thumb = thumb_variants[thumb_index]
                if isinstance(thumb, dict) and "source" in thumb:
                    thumbnail_name = f"{title.name or title.title} - S{title.season:02d}E{title.number:02d}"

        return tracks

    def get_widevine_license(self, challenge: bytes, title: Episode, track) -> bytes:
        """
        Get Widevine license for decryption.

        Creates a fresh playback session for each track, gets the license, then immediately
        closes the stream. This prevents hitting the 3 concurrent stream limit.
        CDN authorization is embedded in the manifest URLs, not tied to active sessions.
        """
        self.ensure_authenticated()

        track_episode_id = track.data.get("episode_id", title.id)

        with self.LICENSE_LOCK:
            playback_token = None
            try:
                playback_data = self.get_playback_data(track_episode_id, track_stream=True)
                playback_token = playback_data.get("token")

                if not playback_token:
                    raise ValueError(f"No playback token in response for {track_episode_id}")

                track.data["playback_token"] = playback_token

                license_response = self.session.post(
                    url=self.config["endpoints"]["license_widevine"],
                    params={"specConform": "true"},
                    data=challenge,
                    headers={
                        **self.auth_header,
                        "content-type": "application/octet-stream",
                        "accept": "application/octet-stream",
                        "x-cr-content-id": track_episode_id,
                        "x-cr-video-token": playback_token,
                    },
                )

                if license_response.status_code != 200:
                    self.log.error(f"License request failed with status {license_response.status_code}")
                    self.log.error(f"Response: {license_response.text[:500]}")
                    self.close_stream(track_episode_id, playback_token)
                    raise ValueError(f"License request failed: {license_response.status_code}")

                self.close_stream(track_episode_id, playback_token)
                return license_response.content

            except Exception:
                if playback_token:
                    try:
                        self.close_stream(track_episode_id, playback_token)
                    except Exception:
                        pass
                raise

    def cleanup_active_streams(self) -> None:
        """
        Close all remaining active streams.
        Called to ensure no streams are left open.
        """
        if self.ACTIVE_STREAMS:
            try:
                self.authenticate()
            except Exception as e:
                self.log.warning(f"Failed to re-authenticate during cleanup: {e}")

            for episode_id, token in list(self.ACTIVE_STREAMS):
                try:
                    self.close_stream(episode_id, token)
                except Exception as e:
                    self.log.warning(f"Failed to close stream {episode_id}: {e}")
                    if (episode_id, token) in self.ACTIVE_STREAMS:
                        self.ACTIVE_STREAMS.remove((episode_id, token))

    def __del__(self) -> None:
        """Cleanup any remaining streams when service is destroyed."""
        try:
            self.cleanup_active_streams()
        except Exception:
            pass

    def get_chapters(self, title: Episode) -> Chapters:
        """Get chapters/skip events for an episode."""
        chapters = Chapters()

        chapter_response = self.session.get(
            url=self.config["endpoints"]["skip_events"].format(episode_id=title.id),
        )

        if chapter_response.status_code == 200:
            try:
                chapter_data = chapter_response.json()
            except Exception as e:
                self.log.warning(f"Failed to parse chapter data: {e}")
                return chapters

            for chapter_type in ["intro", "recap", "credits", "preview"]:
                if chapter_info := chapter_data.get(chapter_type):
                    try:
                        chapters.add(
                            Chapter(
                                timestamp=int(chapter_info["start"] * 1000),
                                name=chapter_info["type"].capitalize(),
                            )
                        )
                    except Exception as e:
                        self.log.debug(f"Failed to add {chapter_type} chapter: {e}")

        return chapters

    def search(self) -> Generator[SearchResult, None, None]:
        """Search for content on Crunchyroll."""
        try:
            response = self.session.get(
                url=self.config["endpoints"]["search"],
                params={
                    "q": self.title,
                    "type": "series",
                    "start": 0,
                    "n": 20,
                    "locale": self.config["params"]["locale"],
                },
            )

            if response.status_code != 200:
                self.log.error(f"Search request failed with status {response.status_code}")
                return

            search_data = response.json()
            for result_group in search_data.get("data", []):
                for series in result_group.get("items", []):
                    series_id = series.get("id")

                    if not series_id:
                        continue

                    title = series.get("title", "Unknown")
                    description = series.get("description", "")
                    year = series.get("series_launch_year")
                    if len(description) > 300:
                        description = description[:300] + "..."

                    url = f"https://www.crunchyroll.com/series/{series_id}"
                    label = f"SERIES ({year})" if year else "SERIES"

                    yield SearchResult(
                        id_=series_id,
                        title=title,
                        label=label,
                        description=description,
                        url=url,
                    )

        except Exception as e:
            self.log.error(f"Search failed: {e}")
            return

    def get_account_id(self) -> str:
        """Fetch and return the account ID."""
        response = self.session.get(url=self.config["endpoints"]["account_me"], headers=self.auth_header)

        if response.status_code != 200:
            self.log.error(f"Failed to get account info: {response.status_code}")
            self.log.error(f"Response: {response.text}")
            response.raise_for_status()

        data = response.json()
        return data["account_id"]

    def close_stream(self, episode_id: str, token: str) -> None:
        """Close an active playback stream to free up concurrent stream slots."""
        should_remove = False
        try:
            response = self.session.delete(
                url=self.config["endpoints"]["playback_delete"].format(episode_id=episode_id, token=token),
                headers=self.auth_header,
            )
            if response.status_code in (200, 204, 403):
                should_remove = True
            else:
                self.log.error(
                    f"Failed to close stream for {episode_id} (status {response.status_code}): {response.text[:200]}"
                )
        except Exception as e:
            self.log.error(f"Error closing stream for {episode_id}: {e}")
        finally:
            if should_remove and (episode_id, token) in self.ACTIVE_STREAMS:
                self.ACTIVE_STREAMS.remove((episode_id, token))

    def get_active_sessions(self) -> list:
        """Get all active streaming sessions for the account."""
        try:
            response = self.session.get(
                url=self.config["endpoints"]["playback_sessions"],
                headers=self.auth_header,
            )
            if response.status_code == 200:
                data = response.json()
                return data.get("items", [])
            else:
                self.log.warning(f"Failed to get active sessions (status {response.status_code})")
                return []
        except Exception as e:
            self.log.warning(f"Error getting active sessions: {e}")
            return []

    def clear_all_sessions(self) -> int:
        """
        Clear all active streaming sessions created during this or previous runs.

        Tries multiple approaches to ensure all streams are closed:
        1. Clear tracked streams with known tokens
        2. Query active sessions API and close all found streams
        3. Try alternate token formats if needed
        """
        cleared = 0

        if self.ACTIVE_STREAMS:
            streams_to_close = self.ACTIVE_STREAMS[:]
            for episode_id, playback_token in streams_to_close:
                try:
                    self.close_stream(episode_id, playback_token)
                    cleared += 1
                except Exception:
                    if (episode_id, playback_token) in self.ACTIVE_STREAMS:
                        self.ACTIVE_STREAMS.remove((episode_id, playback_token))

        sessions = self.get_active_sessions()
        if sessions:
            for session_data in sessions:
                content_id = session_data.get("contentId")
                session_token = session_data.get("token")

                if content_id and session_token:
                    tokens_to_try = (
                        ["11-" + session_token[3:], session_token]
                        if session_token.startswith("08-")
                        else [session_token]
                    )

                    session_closed = False
                    for token in tokens_to_try:
                        try:
                            response = self.session.delete(
                                url=self.config["endpoints"]["playback_delete"].format(
                                    episode_id=content_id, token=token
                                ),
                                headers=self.auth_header,
                            )
                            if response.status_code in (200, 204):
                                cleared += 1
                                session_closed = True
                                break
                            elif response.status_code == 403:
                                session_closed = True
                                break
                        except Exception:
                            pass

                    if not session_closed:
                        self.log.warning(f"Unable to close session {content_id} with any token format")

        return cleared

    def get_playback_data(self, episode_id: str, track_stream: bool = True) -> dict:
        """
        Get playback data for an episode with automatic retry on stream limits.

        Args:
            episode_id: The episode ID to get playback data for
            track_stream: Whether to track this stream in active_streams (False for temporary streams)

        Returns:
            dict: The playback response data

        Raises:
            ValueError: If playback request fails after retry
        """
        self.ensure_authenticated()

        max_retries = 2
        for attempt in range(max_retries + 1):
            response = self.session.get(
                url=self.config["endpoints"]["playback"].format(episode_id=episode_id),
                params={"queue": "false"},
            ).json()

            if "error" in response:
                error_code = response.get("code", "")
                error_msg = response.get("message", response.get("error", "Unknown error"))

                if error_code == "TOO_MANY_ACTIVE_STREAMS" and attempt < max_retries:
                    self.log.warning(f"Hit stream limit: {error_msg}")
                    cleared = self.clear_all_sessions()

                    if cleared == 0 and attempt == 0:
                        wait_time = 30
                        self.log.warning(
                            f"Found orphaned sessions from previous run. Waiting {wait_time}s for them to expire..."
                        )
                        time.sleep(wait_time)

                    continue

                self.log.error(f"Playback API error: {error_msg}")
                self.log.debug(f"Full response: {response}")
                raise ValueError(f"Could not get playback info for episode: {episode_id} - {error_msg}")

            playback_token = response.get("token")
            if playback_token and track_stream:
                self.ACTIVE_STREAMS.append((episode_id, playback_token))

            return response

        raise ValueError(f"Failed to get playback data for episode: {episode_id}")

    def parse_series_id(self, title_input: str) -> str:
        """Parse series ID from URL or direct ID input."""
        match = re.match(self.TITLE_RE, title_input, re.IGNORECASE)
        if not match:
            raise ValueError(f"Could not parse series ID from: {title_input}")
        return match.group("id")