From 4d6ab1391487ff48b2faa2c65b1994928b904d8d Mon Sep 17 00:00:00 2001 From: Clarth Date: Sat, 18 Oct 2025 12:56:03 -0400 Subject: [PATCH] Resolve spotify.link short URL Added ability to resolve spotify.link short URL --- start.py | 214 +++++++++++++++++++++++++++++++++++++++++++++++-------- 1 file changed, 185 insertions(+), 29 deletions(-) diff --git a/start.py b/start.py index b7a1d0b..66a27d1 100644 --- a/start.py +++ b/start.py @@ -11,81 +11,237 @@ bot = telebot.TeleBot("TOKEN") dldir = "/path/to/Music/" workingdir = "/path/to/script" +script_dir = os.path.dirname(os.path.abspath(__file__)) +spotdl_log_path = os.path.join(script_dir, "spotdl.log") + +with open(spotdl_log_path, "w") as _log_init: + _log_init.write(f"{datetime.datetime.now().isoformat()} Starting new run\n") print("tg_spotdl bot is running.") -@bot.message_handler(commands=['start']) + +@bot.message_handler(commands=["start"]) def start(message): - bot.send_message(message.chat.id, "Hi! Send me a Spotify artist URL (not playlists, albums, or tracks).") + bot.send_message( + message.chat.id, + "Hi! Send me a Spotify artist URL (not playlists, albums, or tracks).", + ) bot.register_next_step_handler(message, process_url) print("User pushed start.") + +def resolve_spotify_link(url): + """ + Resolves Spotify short URLs (spotify.link and spotify.app.link) to their final destination. + Returns the final resolved URL or the original URL if resolution fails. + """ + try: + if re.match( + r"^https?://(www\.)?(spotify\.link|spotify\.app\.link)/", + url, + flags=re.IGNORECASE, + ): + headers = {"User-Agent": "Mozilla/5.0"} + response = requests.get( + url, headers=headers, timeout=10, allow_redirects=True + ) + final_url = response.url + print(f"Resolved short link: {url} -> {final_url}") + if re.match( + r"^https?://open\.spotify\.com/", final_url, flags=re.IGNORECASE + ): + return final_url + html = "" + try: + html = response.text or "" + except Exception: + html = "" + # Try meta refresh URL + m = re.search( + r']+http-equiv=["\']refresh["\'][^>]+content=["\'][^"\']*url=([^"\']+)["\']', + html, + flags=re.IGNORECASE, + ) + if m: + candidate = m.group(1) + if re.match( + r"^https?://open\.spotify\.com/", candidate, flags=re.IGNORECASE + ): + return candidate + # Try OpenGraph URL + m = re.search( + r']+property=["\']og:url["\'][^>]+content=["\']([^"\']+)["\']', + html, + flags=re.IGNORECASE, + ) + if m: + candidate = m.group(1) + if re.match( + r"^https?://open\.spotify\.com/", candidate, flags=re.IGNORECASE + ): + return candidate + # Try canonical link + m = re.search( + r']+rel=["\']canonical["\'][^>]+href=["\']([^"\']+)["\']', + html, + flags=re.IGNORECASE, + ) + if m: + candidate = m.group(1) + if re.match( + r"^https?://open\.spotify\.com/", candidate, flags=re.IGNORECASE + ): + return candidate + # Fallback: scan for artist URLs in the HTML + m = re.search( + r"https?://open\.spotify\.com/(?:intl-[a-z-]+/)?artist/[a-zA-Z0-9]+", + html, + flags=re.IGNORECASE, + ) + return m.group(0) if m else final_url + return url + except requests.RequestException as e: + print(f"Error resolving Spotify short URL '{url}': {e}") + return url + + +def canonicalize_artist_url(url): + """ + Canonicalizes Spotify artist URLs by removing locale prefixes and query params. + Example: https://open.spotify.com/intl-en/artist/?si=... -> https://open.spotify.com/artist/ + """ + m = re.match( + r"^https?://open\.spotify\.com/(?:intl-[a-z-]+/)?(artist/[a-zA-Z0-9]+)(?:\?.*)?$", + url, + ) + if m: + canonical = "https://open.spotify.com/" + m.group(1) + if canonical != url: + print(f"Canonicalized artist URL: {url} -> {canonical}") + return canonical + return url + + def is_valid_artist_url(url): """Checks if the URL is a valid Spotify artist URL.""" - match = re.match(r"https://open\.spotify\.com/artist/([a-zA-Z0-9]+)", url) + # Allow locale-prefixed paths like /intl-en/artist/ and optional query params + match = re.match( + r"^https?://open\.spotify\.com/(?:intl-[a-z-]+/)?artist/([a-zA-Z0-9]+)(?:\?.*)?$", + url, + ) return bool(match) + def get_artist_name(url): """Fetches the Spotify artist page and extracts the cleaned artist's name.""" headers = {"User-Agent": "Mozilla/5.0"} try: response = requests.get(url, headers=headers, timeout=10) response.raise_for_status() - soup = BeautifulSoup(response.text, 'html.parser') + soup = BeautifulSoup(response.text, "html.parser") title_tag = soup.find("title") if title_tag: artist_name = title_tag.text.replace(" | Spotify", "").strip() - - # Remove unwanted suffixes + + # Remove unwanted suffixes that occasionally appear artist_name = re.sub( - r"\s*(Songs and Music|Songs|Songs, Albums, Bio & More)\s*$", - "", - artist_name, - flags=re.IGNORECASE + r"\s*(Songs and Music|Songs|Songs, Albums, Bio & More)\s*$", + "", + artist_name, + flags=re.IGNORECASE, ) - - return artist_name.strip() + + return artist_name.strip() or "Unknown Artist" else: return "Unknown Artist" except requests.RequestException as e: print(f"Error fetching artist name: {e}") return "Unknown Artist" -def process_url(message): - url = message.text.strip() - # Validate URL - if not is_valid_artist_url(url): - bot.send_message(message.chat.id, "Please send a valid Spotify artist URL, not a playlist, album, or track.") - print(f"Rejected URL: {url}") # Debugging +def process_url(message): + raw_url = (message.text or "").strip() + if not raw_url: + bot.send_message(message.chat.id, "Please send a valid Spotify artist URL.") + print("Empty message text received.") return - artist = get_artist_name(url) + # 1) Resolve spotify.link short URLs + resolved_url = resolve_spotify_link(raw_url) - bot.send_message(message.chat.id, f"Downloading music for {artist}. This may take a while.") + # 2) Canonicalize open.spotify.com artist URL format + resolved_url = canonicalize_artist_url(resolved_url) + + # 3) Validate URL + if not is_valid_artist_url(resolved_url): + bot.send_message( + message.chat.id, + "Please send a valid Spotify artist URL, not a playlist, album, or track.", + ) + print(f"Rejected URL: {raw_url} -> {resolved_url}") # Debugging + return + + artist = get_artist_name(resolved_url) + + bot.send_message( + message.chat.id, f"Downloading music for {artist}. This may take a while." + ) artist_directory = os.path.join(dldir, artist) os.makedirs(artist_directory, exist_ok=True) os.chdir(artist_directory) - subprocess.run([ - f"{workingdir}bin/spotdl", - "--format", "opus", - "--bitrate", "80k", - url - ]) + with open(spotdl_log_path, "a") as log_file: + log_file.write( + f"{datetime.datetime.now().isoformat()} Starting spotdl for '{artist}' url={resolved_url} dir={artist_directory}\n" + ) + subprocess.run( + [ + f"{workingdir}bin/spotdl", + "--format", + "opus", + "--bitrate", + "80k", + resolved_url, + ], + stdout=log_file, + stderr=log_file, + ) + log_file.write( + f"{datetime.datetime.now().isoformat()} Finished spotdl for '{artist}'\n" + ) bot.send_message(message.chat.id, f"Finished downloading {artist}.") print(f"Download completed for {artist}.") - bot.send_message(message.chat.id, "Send another artist URL or use /start to begin again.") + bot.send_message( + message.chat.id, "Send another artist URL or use /start to begin again." + ) -@bot.message_handler(func=lambda message: message.text.startswith("https://open.spotify.com/artist/")) -def process_direct_url(message): + +@bot.message_handler( + func=lambda m: isinstance(m.text, str) + and re.match( + r"^https?://open\.spotify\.com/(?:intl-[a-z-]+/)?artist/", m.text.strip() + ) +) +def process_direct_artist_url(message): process_url(message) + +@bot.message_handler( + func=lambda m: isinstance(m.text, str) + and re.match( + r"^https?://(www\.)?(spotify\.link|spotify\.app\.link)/", m.text.strip() + ) +) +def process_spotify_link(message): + process_url(message) + + @bot.message_handler(func=lambda message: True) def echo_all(message): bot.reply_to(message, "I don't understand. Please send a Spotify artist URL.") + if __name__ == "__main__": bot.polling()