import os import time import random import string from selenium import webdriver from selenium.webdriver.common.by import By from selenium.webdriver.chrome.options import Options from selenium.webdriver.chrome.service import Service from webdriver_manager.chrome import ChromeDriverManager SEEN_LINKS_FILE = "seen_links.txt" GENRE_URLS = [ "https://play.max.com/genre/action", "https://play.max.com/genre/comedy", "https://play.max.com/genre/drama", "https://play.max.com/genre/horror", "https://play.max.com/genre/sci-fi", "https://play.max.com/genre/documentary" ] def setup_browser(): options = Options() options.add_argument("--disable-gpu") options.add_argument("--window-size=1920,1080") options.add_argument("--disable-blink-features=AutomationControlled") return webdriver.Chrome(service=Service(ChromeDriverManager().install()), options=options) def scroll_to_bottom(driver, max_wait=60): print("\U0001f4dc Scrolling to load all movies...") last_height = driver.execute_script("return document.body.scrollHeight") start_time = time.time() while True: driver.execute_script("window.scrollTo(0, document.body.scrollHeight);") time.sleep(2.5) new_height = driver.execute_script("return document.body.scrollHeight") if new_height == last_height: print("\u2705 Reached bottom of page.") break last_height = new_height if time.time() - start_time > max_wait: print("\u23f1\ufe0f Scroll timed out.") break def extract_movie_links(driver): anchors = driver.find_elements(By.XPATH, '//a[contains(@href, "/movie/")]') links = list(set([a.get_attribute("href") for a in anchors if "/movie/" in a.get_attribute("href")])) return links def generate_random_query(): letters = string.ascii_lowercase return ''.join(random.choices(letters, k=random.choice([1, 2]))) def load_seen_links(): if not os.path.exists(SEEN_LINKS_FILE): return set() with open(SEEN_LINKS_FILE, "r") as f: return set([line.strip() for line in f if line.strip()]) def save_seen_links(new_links): with open(SEEN_LINKS_FILE, "a") as f: for link in new_links: f.write(link + "\n") def save_links(links, batch_id, prefix): output_dir = "output_movies" os.makedirs(output_dir, exist_ok=True) for i in range(0, len(links), 10): chunk = links[i:i+10] file_path = os.path.join(output_dir, f"{prefix}_{batch_id}_{(i // 10) + 1}.txt") with open(file_path, "w") as f: for link in chunk: f.write(link + "\n") print(f"\U0001f4be Saved {len(chunk)} links to {file_path}") def run_movies_mode(driver, seen_links, batch_counter): driver.get("https://play.max.com/movies") time.sleep(4) scroll_to_bottom(driver) all_links = extract_movie_links(driver) print(f"\u2705 Loaded {len(all_links)} movie links.") while True: try: count = int(input("\U0001f522 How many random movies do you want to extract? ")) break except ValueError: print("\u274c Invalid number.") sample = random.sample(all_links, min(count, len(all_links))) unique_links = [link for link in sample if link not in seen_links] duplicates = len(sample) - len(unique_links) if unique_links: save_links(unique_links, batch_counter, "movies") save_seen_links(unique_links) print(f"\u2705 Saved {len(unique_links)} new links. \u267b Skipped {duplicates} duplicates.") else: print("\u26a0\ufe0f All selected links were duplicates.") def run_search_mode(driver, seen_links, search_counter): search_terms = [ "the", "man", "love", "dark", "moon", "fire", "red", "blue", "night", "girl", "life", "death", "dream", "war", "blood", "star", "light", "king", "queen" ] max_attempts = 10 found_total = 0 for attempt in range(max_attempts): query = random.choice(search_terms) url = f"https://play.max.com/search?q={query}" print(f"\n\U0001f50d Searching for query: '{query}'") driver.get(url) time.sleep(4) links = extract_movie_links(driver) unique_links = [link for link in links if link not in seen_links] duplicates = len(links) - len(unique_links) if unique_links: save_links(unique_links, search_counter, "search") save_seen_links(unique_links) print(f"\u2705 Found {len(unique_links)} new links. \u267b Skipped {duplicates} duplicates.") search_counter += 1 found_total += len(unique_links) else: print(f"\u26a0\ufe0f No new links found for query '{query}'.") print("\u23f3 Waiting 10 seconds before next search...") time.sleep(10) print(f"\n\ud83d\udcca Search complete. Found total {found_total} new movie links in {max_attempts} searches.") def run_genre_mode(driver, seen_links, genre_batch): found_total = 0 for genre_url in GENRE_URLS: print(f"\n\U0001f3ac Loading genre: {genre_url}") driver.get(genre_url) time.sleep(4) scroll_to_bottom(driver) links = extract_movie_links(driver) unique_links = [link for link in links if link not in seen_links] duplicates = len(links) - len(unique_links) if unique_links: save_links(unique_links, genre_batch, "genre") save_seen_links(unique_links) print(f"\u2705 Found {len(unique_links)} new links. \u267b Skipped {duplicates} duplicates.") genre_batch += 1 found_total += len(unique_links) else: print("\u26a0\ufe0f No new unique links in this genre.") time.sleep(5) print("📊 Genre scan complete. Found total", found_total, "new movie links.") def main(): print("🛠️ Max.com Movie Scraper — Created by Mike | DRMLab.io Project") driver = setup_browser() try: print("\U0001f310 Opening Max.com homepage...") driver.get("https://play.max.com") time.sleep(3) print("\n\U0001f511 Please log in manually in the browser.") input("\u23f3 After you're logged in, press ENTER here to continue...") seen_links = load_seen_links() movie_batch = 1 search_batch = 1 genre_batch = 1 while True: print("📋 === MAIN MENU ===") print("1️⃣ Extract movies from /movies page") print("2️⃣ Extract random movies via search") print("3️⃣ Extract movies by genre") print("4️⃣ Exit") choice = input("Enter your choice: ").strip() if choice == "1": run_movies_mode(driver, seen_links, movie_batch) movie_batch += 1 elif choice == "2": run_search_mode(driver, seen_links, search_batch) search_batch += 1 elif choice == "3": run_genre_mode(driver, seen_links, genre_batch) genre_batch += 1 elif choice == "4": break else: print("\u274c Invalid choice.") finally: again = input("⏹ Do you want to close the browser? (yes/no): ").strip().lower() if again != "yes": main() else: input("✅ Press ENTER to exit.") driver.quit() if __name__ == "__main__": main()