import os import time import random import datetime from selenium import webdriver from selenium.webdriver.common.by import By from selenium.webdriver.chrome.options import Options from selenium.webdriver.chrome.service import Service from webdriver_manager.chrome import ChromeDriverManager SEEN_FILE = "seen_links_prime.txt" LOG_FILE = "log.txt" OUTPUT_DIR = "output_prime" CATEGORIES = { "Action": "https://www.primevideo.com/region/{region}/storefront/action", "Comedy": "https://www.primevideo.com/region/{region}/storefront/comedy", "Drama": "https://www.primevideo.com/region/{region}/storefront/drama", "Sci-Fi": "https://www.primevideo.com/region/{region}/storefront/scifi", "Documentary": "https://www.primevideo.com/region/{region}/storefront/documentary" } def setup_browser(): options = Options() options.add_argument("--window-size=1920,1080") options.add_argument("--disable-blink-features=AutomationControlled") return webdriver.Chrome(service=Service(ChromeDriverManager().install()), options=options) def scroll_to_bottom(driver, max_wait=60): print("šŸ“œ Scrolling to load more...") last_height = driver.execute_script("return document.body.scrollHeight") start_time = time.time() while True: driver.execute_script("window.scrollTo(0, document.body.scrollHeight);") time.sleep(2) new_height = driver.execute_script("return document.body.scrollHeight") if new_height == last_height or (time.time() - start_time > max_wait): break last_height = new_height print("āœ… Reached bottom.") def extract_links(driver): anchors = driver.find_elements(By.TAG_NAME, "a") results = [] for a in anchors: href = a.get_attribute("href") if href and "/detail/" in href: title = a.get_attribute("title") or a.get_attribute("alt") or a.text.strip() results.append((title or "Unknown Title", href)) return list(set(results)) def load_seen(): if not os.path.exists(SEEN_FILE): return set() with open(SEEN_FILE, "r") as f: return set(line.strip() for line in f) def save_seen(new_links): with open(SEEN_FILE, "a") as f: for title, link in new_links: f.write(link + "\n") def save_links(new_links, mode_label): os.makedirs(OUTPUT_DIR, exist_ok=True) timestamp = datetime.datetime.now().strftime("%Y%m%d_%H%M%S") filename = f"{OUTPUT_DIR}/prime_{mode_label}_{timestamp}.txt" with open(filename, "w") as f: for title, link in new_links: f.write(f"{title} — {link}\n") print(f"šŸ’¾ Saved {len(new_links)} links to {filename}") def write_log(message): with open(LOG_FILE, "a") as log: timestamp = datetime.datetime.now().strftime("%Y-%m-%d %H:%M:%S") log.write(f"[{timestamp}] {message}\n") def process_extraction(driver, label, url): driver.get(url) time.sleep(3) scroll_to_bottom(driver) extracted = extract_links(driver) seen = load_seen() unique = [(title, link) for title, link in extracted if link not in seen] duplicates = len(extracted) - len(unique) if unique: save_links(unique, label) save_seen(unique) write_log(f"Added {len(unique)} new links from: {label}") print(f"šŸ“Š Found {len(unique)} new links. šŸ” Skipped {duplicates} duplicates.") else: print("āš ļø No new unique links found.") write_log(f"No new links found for: {label}") def main(): print("šŸ› ļø Amazon Prime Scraper — Created by Mike | DRMLab.io Project") region = input("šŸŒ Enter your Prime Video region (e.g. eu, us, uk): ").strip().lower() driver = setup_browser() print("🌐 Opening Prime Video storefront...") driver.get(f"https://www.primevideo.com/region/{region}/storefront/") time.sleep(5) input("šŸ” Log in manually, then press ENTER to continue...") while True: print("\nšŸ“‹ === MAIN MENU ===") print("1ļøāƒ£ Extract from storefront") print("2ļøāƒ£ Search by keyword") print("3ļøāƒ£ Browse by genre") print("4ļøāƒ£ Exit") choice = input("āž”ļø Enter your choice: ").strip() if choice == "1": process_extraction(driver, "storefront", f"https://www.primevideo.com/region/{region}/storefront/") elif choice == "2": keyword = input("šŸ” Enter search keyword (or leave blank for random): ").strip() if not keyword: keyword = random.choice(["war", "love", "moon", "dark", "fire", "life", "death", "dream"]) print(f"šŸŽ² Using random keyword: {keyword}") url = f"https://www.primevideo.com/region/{region}/search/ref=atv_nb_sr?phrase={keyword}" process_extraction(driver, f"search_{keyword}", url) elif choice == "3": print("šŸŽ­ Available genres:") for i, (name, _) in enumerate(CATEGORIES.items(), start=1): print(f"{i}. {name}") genre_choice = input("šŸŽÆ Choose genre number: ").strip() try: index = int(genre_choice) - 1 genre_name = list(CATEGORIES.keys())[index] genre_url = CATEGORIES[genre_name].format(region=region) process_extraction(driver, f"genre_{genre_name.lower()}", genre_url) except (ValueError, IndexError): print("āŒ Invalid genre choice.") elif choice == "4": print("šŸ‘‹ Exiting scraper. Goodbye!") break else: print("āŒ Invalid input. Try again.") driver.quit() if __name__ == "__main__": main()