diff --git a/prime/prime.py b/prime/prime.py new file mode 100644 index 0000000..c4027ec --- /dev/null +++ b/prime/prime.py @@ -0,0 +1,143 @@ +import os +import time +import random +import datetime +from selenium import webdriver +from selenium.webdriver.common.by import By +from selenium.webdriver.chrome.options import Options +from selenium.webdriver.chrome.service import Service +from webdriver_manager.chrome import ChromeDriverManager + +SEEN_FILE = "seen_links_prime.txt" +LOG_FILE = "log.txt" +OUTPUT_DIR = "output_prime" + +CATEGORIES = { + "Action": "https://www.primevideo.com/region/{region}/storefront/action", + "Comedy": "https://www.primevideo.com/region/{region}/storefront/comedy", + "Drama": "https://www.primevideo.com/region/{region}/storefront/drama", + "Sci-Fi": "https://www.primevideo.com/region/{region}/storefront/scifi", + "Documentary": "https://www.primevideo.com/region/{region}/storefront/documentary" +} + +def setup_browser(): + options = Options() + options.add_argument("--window-size=1920,1080") + options.add_argument("--disable-blink-features=AutomationControlled") + return webdriver.Chrome(service=Service(ChromeDriverManager().install()), options=options) + +def scroll_to_bottom(driver, max_wait=60): + print("šŸ“œ Scrolling to load more...") + last_height = driver.execute_script("return document.body.scrollHeight") + start_time = time.time() + while True: + driver.execute_script("window.scrollTo(0, document.body.scrollHeight);") + time.sleep(2) + new_height = driver.execute_script("return document.body.scrollHeight") + if new_height == last_height or (time.time() - start_time > max_wait): + break + last_height = new_height + print("āœ… Reached bottom.") + +def extract_links(driver): + anchors = driver.find_elements(By.TAG_NAME, "a") + results = [] + for a in anchors: + href = a.get_attribute("href") + if href and "/detail/" in href: + title = a.get_attribute("title") or a.get_attribute("alt") or a.text.strip() + results.append((title or "Unknown Title", href)) + return list(set(results)) + +def load_seen(): + if not os.path.exists(SEEN_FILE): + return set() + with open(SEEN_FILE, "r") as f: + return set(line.strip() for line in f) + +def save_seen(new_links): + with open(SEEN_FILE, "a") as f: + for title, link in new_links: + f.write(link + "\n") + +def save_links(new_links, mode_label): + os.makedirs(OUTPUT_DIR, exist_ok=True) + timestamp = datetime.datetime.now().strftime("%Y%m%d_%H%M%S") + filename = f"{OUTPUT_DIR}/prime_{mode_label}_{timestamp}.txt" + with open(filename, "w") as f: + for title, link in new_links: + f.write(f"{title} — {link}\n") + print(f"šŸ’¾ Saved {len(new_links)} links to {filename}") + +def write_log(message): + with open(LOG_FILE, "a") as log: + timestamp = datetime.datetime.now().strftime("%Y-%m-%d %H:%M:%S") + log.write(f"[{timestamp}] {message}\n") + +def process_extraction(driver, label, url): + driver.get(url) + time.sleep(3) + scroll_to_bottom(driver) + extracted = extract_links(driver) + seen = load_seen() + unique = [(title, link) for title, link in extracted if link not in seen] + duplicates = len(extracted) - len(unique) + + if unique: + save_links(unique, label) + save_seen(unique) + write_log(f"Added {len(unique)} new links from: {label}") + print(f"šŸ“Š Found {len(unique)} new links. šŸ” Skipped {duplicates} duplicates.") + else: + print("āš ļø No new unique links found.") + write_log(f"No new links found for: {label}") + +def main(): + print("šŸ› ļø Amazon Prime Scraper — Created by Mike | DRMLab.io Project") + region = input("šŸŒ Enter your Prime Video region (e.g. eu, us, uk): ").strip().lower() + driver = setup_browser() + + print("🌐 Opening Prime Video storefront...") + driver.get(f"https://www.primevideo.com/region/{region}/storefront/") + time.sleep(5) + input("šŸ” Log in manually, then press ENTER to continue...") + + while True: + print("\nšŸ“‹ === MAIN MENU ===") + print("1ļøāƒ£ Extract from storefront") + print("2ļøāƒ£ Search by keyword") + print("3ļøāƒ£ Browse by genre") + print("4ļøāƒ£ Exit") + choice = input("āž”ļø Enter your choice: ").strip() + + if choice == "1": + process_extraction(driver, "storefront", f"https://www.primevideo.com/region/{region}/storefront/") + elif choice == "2": + keyword = input("šŸ” Enter search keyword (or leave blank for random): ").strip() + if not keyword: + keyword = random.choice(["war", "love", "moon", "dark", "fire", "life", "death", "dream"]) + print(f"šŸŽ² Using random keyword: {keyword}") + url = f"https://www.primevideo.com/region/{region}/search/ref=atv_nb_sr?phrase={keyword}" + process_extraction(driver, f"search_{keyword}", url) + elif choice == "3": + print("šŸŽ­ Available genres:") + for i, (name, _) in enumerate(CATEGORIES.items(), start=1): + print(f"{i}. {name}") + genre_choice = input("šŸŽÆ Choose genre number: ").strip() + try: + index = int(genre_choice) - 1 + genre_name = list(CATEGORIES.keys())[index] + genre_url = CATEGORIES[genre_name].format(region=region) + process_extraction(driver, f"genre_{genre_name.lower()}", genre_url) + except (ValueError, IndexError): + print("āŒ Invalid genre choice.") + elif choice == "4": + print("šŸ‘‹ Exiting scraper. Goodbye!") + break + else: + print("āŒ Invalid input. Try again.") + + driver.quit() + +if __name__ == "__main__": + main()