Resolve spotify.link short URL

Added ability to resolve spotify.link short URL
This commit is contained in:
Clarth 2025-10-18 12:56:03 -04:00
parent e34257e97c
commit 4d6ab13914

214
start.py
View file

@ -11,81 +11,237 @@ bot = telebot.TeleBot("TOKEN")
dldir = "/path/to/Music/" dldir = "/path/to/Music/"
workingdir = "/path/to/script" workingdir = "/path/to/script"
script_dir = os.path.dirname(os.path.abspath(__file__))
spotdl_log_path = os.path.join(script_dir, "spotdl.log")
with open(spotdl_log_path, "w") as _log_init:
_log_init.write(f"{datetime.datetime.now().isoformat()} Starting new run\n")
print("tg_spotdl bot is running.") print("tg_spotdl bot is running.")
@bot.message_handler(commands=['start'])
@bot.message_handler(commands=["start"])
def start(message): def start(message):
bot.send_message(message.chat.id, "Hi! Send me a Spotify artist URL (not playlists, albums, or tracks).") bot.send_message(
message.chat.id,
"Hi! Send me a Spotify artist URL (not playlists, albums, or tracks).",
)
bot.register_next_step_handler(message, process_url) bot.register_next_step_handler(message, process_url)
print("User pushed start.") print("User pushed start.")
def resolve_spotify_link(url):
"""
Resolves Spotify short URLs (spotify.link and spotify.app.link) to their final destination.
Returns the final resolved URL or the original URL if resolution fails.
"""
try:
if re.match(
r"^https?://(www\.)?(spotify\.link|spotify\.app\.link)/",
url,
flags=re.IGNORECASE,
):
headers = {"User-Agent": "Mozilla/5.0"}
response = requests.get(
url, headers=headers, timeout=10, allow_redirects=True
)
final_url = response.url
print(f"Resolved short link: {url} -> {final_url}")
if re.match(
r"^https?://open\.spotify\.com/", final_url, flags=re.IGNORECASE
):
return final_url
html = ""
try:
html = response.text or ""
except Exception:
html = ""
# Try meta refresh URL
m = re.search(
r'<meta[^>]+http-equiv=["\']refresh["\'][^>]+content=["\'][^"\']*url=([^"\']+)["\']',
html,
flags=re.IGNORECASE,
)
if m:
candidate = m.group(1)
if re.match(
r"^https?://open\.spotify\.com/", candidate, flags=re.IGNORECASE
):
return candidate
# Try OpenGraph URL
m = re.search(
r'<meta[^>]+property=["\']og:url["\'][^>]+content=["\']([^"\']+)["\']',
html,
flags=re.IGNORECASE,
)
if m:
candidate = m.group(1)
if re.match(
r"^https?://open\.spotify\.com/", candidate, flags=re.IGNORECASE
):
return candidate
# Try canonical link
m = re.search(
r'<link[^>]+rel=["\']canonical["\'][^>]+href=["\']([^"\']+)["\']',
html,
flags=re.IGNORECASE,
)
if m:
candidate = m.group(1)
if re.match(
r"^https?://open\.spotify\.com/", candidate, flags=re.IGNORECASE
):
return candidate
# Fallback: scan for artist URLs in the HTML
m = re.search(
r"https?://open\.spotify\.com/(?:intl-[a-z-]+/)?artist/[a-zA-Z0-9]+",
html,
flags=re.IGNORECASE,
)
return m.group(0) if m else final_url
return url
except requests.RequestException as e:
print(f"Error resolving Spotify short URL '{url}': {e}")
return url
def canonicalize_artist_url(url):
"""
Canonicalizes Spotify artist URLs by removing locale prefixes and query params.
Example: https://open.spotify.com/intl-en/artist/<id>?si=... -> https://open.spotify.com/artist/<id>
"""
m = re.match(
r"^https?://open\.spotify\.com/(?:intl-[a-z-]+/)?(artist/[a-zA-Z0-9]+)(?:\?.*)?$",
url,
)
if m:
canonical = "https://open.spotify.com/" + m.group(1)
if canonical != url:
print(f"Canonicalized artist URL: {url} -> {canonical}")
return canonical
return url
def is_valid_artist_url(url): def is_valid_artist_url(url):
"""Checks if the URL is a valid Spotify artist URL.""" """Checks if the URL is a valid Spotify artist URL."""
match = re.match(r"https://open\.spotify\.com/artist/([a-zA-Z0-9]+)", url) # Allow locale-prefixed paths like /intl-en/artist/<id> and optional query params
match = re.match(
r"^https?://open\.spotify\.com/(?:intl-[a-z-]+/)?artist/([a-zA-Z0-9]+)(?:\?.*)?$",
url,
)
return bool(match) return bool(match)
def get_artist_name(url): def get_artist_name(url):
"""Fetches the Spotify artist page and extracts the cleaned artist's name.""" """Fetches the Spotify artist page and extracts the cleaned artist's name."""
headers = {"User-Agent": "Mozilla/5.0"} headers = {"User-Agent": "Mozilla/5.0"}
try: try:
response = requests.get(url, headers=headers, timeout=10) response = requests.get(url, headers=headers, timeout=10)
response.raise_for_status() response.raise_for_status()
soup = BeautifulSoup(response.text, 'html.parser') soup = BeautifulSoup(response.text, "html.parser")
title_tag = soup.find("title") title_tag = soup.find("title")
if title_tag: if title_tag:
artist_name = title_tag.text.replace(" | Spotify", "").strip() artist_name = title_tag.text.replace(" | Spotify", "").strip()
# Remove unwanted suffixes # Remove unwanted suffixes that occasionally appear
artist_name = re.sub( artist_name = re.sub(
r"\s*(Songs and Music|Songs|Songs, Albums, Bio & More)\s*$", r"\s*(Songs and Music|Songs|Songs, Albums, Bio & More)\s*$",
"", "",
artist_name, artist_name,
flags=re.IGNORECASE flags=re.IGNORECASE,
) )
return artist_name.strip() return artist_name.strip() or "Unknown Artist"
else: else:
return "Unknown Artist" return "Unknown Artist"
except requests.RequestException as e: except requests.RequestException as e:
print(f"Error fetching artist name: {e}") print(f"Error fetching artist name: {e}")
return "Unknown Artist" return "Unknown Artist"
def process_url(message):
url = message.text.strip()
# Validate URL def process_url(message):
if not is_valid_artist_url(url): raw_url = (message.text or "").strip()
bot.send_message(message.chat.id, "Please send a valid Spotify artist URL, not a playlist, album, or track.") if not raw_url:
print(f"Rejected URL: {url}") # Debugging bot.send_message(message.chat.id, "Please send a valid Spotify artist URL.")
print("Empty message text received.")
return return
artist = get_artist_name(url) # 1) Resolve spotify.link short URLs
resolved_url = resolve_spotify_link(raw_url)
bot.send_message(message.chat.id, f"Downloading music for {artist}. This may take a while.") # 2) Canonicalize open.spotify.com artist URL format
resolved_url = canonicalize_artist_url(resolved_url)
# 3) Validate URL
if not is_valid_artist_url(resolved_url):
bot.send_message(
message.chat.id,
"Please send a valid Spotify artist URL, not a playlist, album, or track.",
)
print(f"Rejected URL: {raw_url} -> {resolved_url}") # Debugging
return
artist = get_artist_name(resolved_url)
bot.send_message(
message.chat.id, f"Downloading music for {artist}. This may take a while."
)
artist_directory = os.path.join(dldir, artist) artist_directory = os.path.join(dldir, artist)
os.makedirs(artist_directory, exist_ok=True) os.makedirs(artist_directory, exist_ok=True)
os.chdir(artist_directory) os.chdir(artist_directory)
subprocess.run([ with open(spotdl_log_path, "a") as log_file:
f"{workingdir}bin/spotdl", log_file.write(
"--format", "opus", f"{datetime.datetime.now().isoformat()} Starting spotdl for '{artist}' url={resolved_url} dir={artist_directory}\n"
"--bitrate", "80k", )
url subprocess.run(
]) [
f"{workingdir}bin/spotdl",
"--format",
"opus",
"--bitrate",
"80k",
resolved_url,
],
stdout=log_file,
stderr=log_file,
)
log_file.write(
f"{datetime.datetime.now().isoformat()} Finished spotdl for '{artist}'\n"
)
bot.send_message(message.chat.id, f"Finished downloading {artist}.") bot.send_message(message.chat.id, f"Finished downloading {artist}.")
print(f"Download completed for {artist}.") print(f"Download completed for {artist}.")
bot.send_message(message.chat.id, "Send another artist URL or use /start to begin again.") bot.send_message(
message.chat.id, "Send another artist URL or use /start to begin again."
)
@bot.message_handler(func=lambda message: message.text.startswith("https://open.spotify.com/artist/"))
def process_direct_url(message): @bot.message_handler(
func=lambda m: isinstance(m.text, str)
and re.match(
r"^https?://open\.spotify\.com/(?:intl-[a-z-]+/)?artist/", m.text.strip()
)
)
def process_direct_artist_url(message):
process_url(message) process_url(message)
@bot.message_handler(
func=lambda m: isinstance(m.text, str)
and re.match(
r"^https?://(www\.)?(spotify\.link|spotify\.app\.link)/", m.text.strip()
)
)
def process_spotify_link(message):
process_url(message)
@bot.message_handler(func=lambda message: True) @bot.message_handler(func=lambda message: True)
def echo_all(message): def echo_all(message):
bot.reply_to(message, "I don't understand. Please send a Spotify artist URL.") bot.reply_to(message, "I don't understand. Please send a Spotify artist URL.")
if __name__ == "__main__": if __name__ == "__main__":
bot.polling() bot.polling()