Resolve spotify.link short URL
Added ability to resolve spotify.link short URL
This commit is contained in:
parent
e34257e97c
commit
4d6ab13914
1 changed files with 185 additions and 29 deletions
204
start.py
204
start.py
|
|
@ -11,81 +11,237 @@ bot = telebot.TeleBot("TOKEN")
|
|||
dldir = "/path/to/Music/"
|
||||
workingdir = "/path/to/script"
|
||||
|
||||
script_dir = os.path.dirname(os.path.abspath(__file__))
|
||||
spotdl_log_path = os.path.join(script_dir, "spotdl.log")
|
||||
|
||||
with open(spotdl_log_path, "w") as _log_init:
|
||||
_log_init.write(f"{datetime.datetime.now().isoformat()} Starting new run\n")
|
||||
print("tg_spotdl bot is running.")
|
||||
|
||||
@bot.message_handler(commands=['start'])
|
||||
|
||||
@bot.message_handler(commands=["start"])
|
||||
def start(message):
|
||||
bot.send_message(message.chat.id, "Hi! Send me a Spotify artist URL (not playlists, albums, or tracks).")
|
||||
bot.send_message(
|
||||
message.chat.id,
|
||||
"Hi! Send me a Spotify artist URL (not playlists, albums, or tracks).",
|
||||
)
|
||||
bot.register_next_step_handler(message, process_url)
|
||||
print("User pushed start.")
|
||||
|
||||
|
||||
def resolve_spotify_link(url):
|
||||
"""
|
||||
Resolves Spotify short URLs (spotify.link and spotify.app.link) to their final destination.
|
||||
Returns the final resolved URL or the original URL if resolution fails.
|
||||
"""
|
||||
try:
|
||||
if re.match(
|
||||
r"^https?://(www\.)?(spotify\.link|spotify\.app\.link)/",
|
||||
url,
|
||||
flags=re.IGNORECASE,
|
||||
):
|
||||
headers = {"User-Agent": "Mozilla/5.0"}
|
||||
response = requests.get(
|
||||
url, headers=headers, timeout=10, allow_redirects=True
|
||||
)
|
||||
final_url = response.url
|
||||
print(f"Resolved short link: {url} -> {final_url}")
|
||||
if re.match(
|
||||
r"^https?://open\.spotify\.com/", final_url, flags=re.IGNORECASE
|
||||
):
|
||||
return final_url
|
||||
html = ""
|
||||
try:
|
||||
html = response.text or ""
|
||||
except Exception:
|
||||
html = ""
|
||||
# Try meta refresh URL
|
||||
m = re.search(
|
||||
r'<meta[^>]+http-equiv=["\']refresh["\'][^>]+content=["\'][^"\']*url=([^"\']+)["\']',
|
||||
html,
|
||||
flags=re.IGNORECASE,
|
||||
)
|
||||
if m:
|
||||
candidate = m.group(1)
|
||||
if re.match(
|
||||
r"^https?://open\.spotify\.com/", candidate, flags=re.IGNORECASE
|
||||
):
|
||||
return candidate
|
||||
# Try OpenGraph URL
|
||||
m = re.search(
|
||||
r'<meta[^>]+property=["\']og:url["\'][^>]+content=["\']([^"\']+)["\']',
|
||||
html,
|
||||
flags=re.IGNORECASE,
|
||||
)
|
||||
if m:
|
||||
candidate = m.group(1)
|
||||
if re.match(
|
||||
r"^https?://open\.spotify\.com/", candidate, flags=re.IGNORECASE
|
||||
):
|
||||
return candidate
|
||||
# Try canonical link
|
||||
m = re.search(
|
||||
r'<link[^>]+rel=["\']canonical["\'][^>]+href=["\']([^"\']+)["\']',
|
||||
html,
|
||||
flags=re.IGNORECASE,
|
||||
)
|
||||
if m:
|
||||
candidate = m.group(1)
|
||||
if re.match(
|
||||
r"^https?://open\.spotify\.com/", candidate, flags=re.IGNORECASE
|
||||
):
|
||||
return candidate
|
||||
# Fallback: scan for artist URLs in the HTML
|
||||
m = re.search(
|
||||
r"https?://open\.spotify\.com/(?:intl-[a-z-]+/)?artist/[a-zA-Z0-9]+",
|
||||
html,
|
||||
flags=re.IGNORECASE,
|
||||
)
|
||||
return m.group(0) if m else final_url
|
||||
return url
|
||||
except requests.RequestException as e:
|
||||
print(f"Error resolving Spotify short URL '{url}': {e}")
|
||||
return url
|
||||
|
||||
|
||||
def canonicalize_artist_url(url):
|
||||
"""
|
||||
Canonicalizes Spotify artist URLs by removing locale prefixes and query params.
|
||||
Example: https://open.spotify.com/intl-en/artist/<id>?si=... -> https://open.spotify.com/artist/<id>
|
||||
"""
|
||||
m = re.match(
|
||||
r"^https?://open\.spotify\.com/(?:intl-[a-z-]+/)?(artist/[a-zA-Z0-9]+)(?:\?.*)?$",
|
||||
url,
|
||||
)
|
||||
if m:
|
||||
canonical = "https://open.spotify.com/" + m.group(1)
|
||||
if canonical != url:
|
||||
print(f"Canonicalized artist URL: {url} -> {canonical}")
|
||||
return canonical
|
||||
return url
|
||||
|
||||
|
||||
def is_valid_artist_url(url):
|
||||
"""Checks if the URL is a valid Spotify artist URL."""
|
||||
match = re.match(r"https://open\.spotify\.com/artist/([a-zA-Z0-9]+)", url)
|
||||
# Allow locale-prefixed paths like /intl-en/artist/<id> and optional query params
|
||||
match = re.match(
|
||||
r"^https?://open\.spotify\.com/(?:intl-[a-z-]+/)?artist/([a-zA-Z0-9]+)(?:\?.*)?$",
|
||||
url,
|
||||
)
|
||||
return bool(match)
|
||||
|
||||
|
||||
def get_artist_name(url):
|
||||
"""Fetches the Spotify artist page and extracts the cleaned artist's name."""
|
||||
headers = {"User-Agent": "Mozilla/5.0"}
|
||||
try:
|
||||
response = requests.get(url, headers=headers, timeout=10)
|
||||
response.raise_for_status()
|
||||
soup = BeautifulSoup(response.text, 'html.parser')
|
||||
soup = BeautifulSoup(response.text, "html.parser")
|
||||
title_tag = soup.find("title")
|
||||
|
||||
if title_tag:
|
||||
artist_name = title_tag.text.replace(" | Spotify", "").strip()
|
||||
|
||||
# Remove unwanted suffixes
|
||||
# Remove unwanted suffixes that occasionally appear
|
||||
artist_name = re.sub(
|
||||
r"\s*(Songs and Music|Songs|Songs, Albums, Bio & More)\s*$",
|
||||
"",
|
||||
artist_name,
|
||||
flags=re.IGNORECASE
|
||||
flags=re.IGNORECASE,
|
||||
)
|
||||
|
||||
return artist_name.strip()
|
||||
return artist_name.strip() or "Unknown Artist"
|
||||
else:
|
||||
return "Unknown Artist"
|
||||
except requests.RequestException as e:
|
||||
print(f"Error fetching artist name: {e}")
|
||||
return "Unknown Artist"
|
||||
|
||||
def process_url(message):
|
||||
url = message.text.strip()
|
||||
|
||||
# Validate URL
|
||||
if not is_valid_artist_url(url):
|
||||
bot.send_message(message.chat.id, "Please send a valid Spotify artist URL, not a playlist, album, or track.")
|
||||
print(f"Rejected URL: {url}") # Debugging
|
||||
def process_url(message):
|
||||
raw_url = (message.text or "").strip()
|
||||
if not raw_url:
|
||||
bot.send_message(message.chat.id, "Please send a valid Spotify artist URL.")
|
||||
print("Empty message text received.")
|
||||
return
|
||||
|
||||
artist = get_artist_name(url)
|
||||
# 1) Resolve spotify.link short URLs
|
||||
resolved_url = resolve_spotify_link(raw_url)
|
||||
|
||||
bot.send_message(message.chat.id, f"Downloading music for {artist}. This may take a while.")
|
||||
# 2) Canonicalize open.spotify.com artist URL format
|
||||
resolved_url = canonicalize_artist_url(resolved_url)
|
||||
|
||||
# 3) Validate URL
|
||||
if not is_valid_artist_url(resolved_url):
|
||||
bot.send_message(
|
||||
message.chat.id,
|
||||
"Please send a valid Spotify artist URL, not a playlist, album, or track.",
|
||||
)
|
||||
print(f"Rejected URL: {raw_url} -> {resolved_url}") # Debugging
|
||||
return
|
||||
|
||||
artist = get_artist_name(resolved_url)
|
||||
|
||||
bot.send_message(
|
||||
message.chat.id, f"Downloading music for {artist}. This may take a while."
|
||||
)
|
||||
|
||||
artist_directory = os.path.join(dldir, artist)
|
||||
os.makedirs(artist_directory, exist_ok=True)
|
||||
os.chdir(artist_directory)
|
||||
|
||||
subprocess.run([
|
||||
f"{workingdir}bin/spotdl",
|
||||
"--format", "opus",
|
||||
"--bitrate", "80k",
|
||||
url
|
||||
])
|
||||
with open(spotdl_log_path, "a") as log_file:
|
||||
log_file.write(
|
||||
f"{datetime.datetime.now().isoformat()} Starting spotdl for '{artist}' url={resolved_url} dir={artist_directory}\n"
|
||||
)
|
||||
subprocess.run(
|
||||
[
|
||||
f"{workingdir}bin/spotdl",
|
||||
"--format",
|
||||
"opus",
|
||||
"--bitrate",
|
||||
"80k",
|
||||
resolved_url,
|
||||
],
|
||||
stdout=log_file,
|
||||
stderr=log_file,
|
||||
)
|
||||
log_file.write(
|
||||
f"{datetime.datetime.now().isoformat()} Finished spotdl for '{artist}'\n"
|
||||
)
|
||||
|
||||
bot.send_message(message.chat.id, f"Finished downloading {artist}.")
|
||||
print(f"Download completed for {artist}.")
|
||||
bot.send_message(message.chat.id, "Send another artist URL or use /start to begin again.")
|
||||
bot.send_message(
|
||||
message.chat.id, "Send another artist URL or use /start to begin again."
|
||||
)
|
||||
|
||||
@bot.message_handler(func=lambda message: message.text.startswith("https://open.spotify.com/artist/"))
|
||||
def process_direct_url(message):
|
||||
|
||||
@bot.message_handler(
|
||||
func=lambda m: isinstance(m.text, str)
|
||||
and re.match(
|
||||
r"^https?://open\.spotify\.com/(?:intl-[a-z-]+/)?artist/", m.text.strip()
|
||||
)
|
||||
)
|
||||
def process_direct_artist_url(message):
|
||||
process_url(message)
|
||||
|
||||
|
||||
@bot.message_handler(
|
||||
func=lambda m: isinstance(m.text, str)
|
||||
and re.match(
|
||||
r"^https?://(www\.)?(spotify\.link|spotify\.app\.link)/", m.text.strip()
|
||||
)
|
||||
)
|
||||
def process_spotify_link(message):
|
||||
process_url(message)
|
||||
|
||||
|
||||
@bot.message_handler(func=lambda message: True)
|
||||
def echo_all(message):
|
||||
bot.reply_to(message, "I don't understand. Please send a Spotify artist URL.")
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
bot.polling()
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue