Resolve spotify.link short URL
Added ability to resolve spotify.link short URL
This commit is contained in:
parent
e34257e97c
commit
4d6ab13914
1 changed files with 185 additions and 29 deletions
214
start.py
214
start.py
|
|
@ -11,81 +11,237 @@ bot = telebot.TeleBot("TOKEN")
|
||||||
dldir = "/path/to/Music/"
|
dldir = "/path/to/Music/"
|
||||||
workingdir = "/path/to/script"
|
workingdir = "/path/to/script"
|
||||||
|
|
||||||
|
script_dir = os.path.dirname(os.path.abspath(__file__))
|
||||||
|
spotdl_log_path = os.path.join(script_dir, "spotdl.log")
|
||||||
|
|
||||||
|
with open(spotdl_log_path, "w") as _log_init:
|
||||||
|
_log_init.write(f"{datetime.datetime.now().isoformat()} Starting new run\n")
|
||||||
print("tg_spotdl bot is running.")
|
print("tg_spotdl bot is running.")
|
||||||
|
|
||||||
@bot.message_handler(commands=['start'])
|
|
||||||
|
@bot.message_handler(commands=["start"])
|
||||||
def start(message):
|
def start(message):
|
||||||
bot.send_message(message.chat.id, "Hi! Send me a Spotify artist URL (not playlists, albums, or tracks).")
|
bot.send_message(
|
||||||
|
message.chat.id,
|
||||||
|
"Hi! Send me a Spotify artist URL (not playlists, albums, or tracks).",
|
||||||
|
)
|
||||||
bot.register_next_step_handler(message, process_url)
|
bot.register_next_step_handler(message, process_url)
|
||||||
print("User pushed start.")
|
print("User pushed start.")
|
||||||
|
|
||||||
|
|
||||||
|
def resolve_spotify_link(url):
|
||||||
|
"""
|
||||||
|
Resolves Spotify short URLs (spotify.link and spotify.app.link) to their final destination.
|
||||||
|
Returns the final resolved URL or the original URL if resolution fails.
|
||||||
|
"""
|
||||||
|
try:
|
||||||
|
if re.match(
|
||||||
|
r"^https?://(www\.)?(spotify\.link|spotify\.app\.link)/",
|
||||||
|
url,
|
||||||
|
flags=re.IGNORECASE,
|
||||||
|
):
|
||||||
|
headers = {"User-Agent": "Mozilla/5.0"}
|
||||||
|
response = requests.get(
|
||||||
|
url, headers=headers, timeout=10, allow_redirects=True
|
||||||
|
)
|
||||||
|
final_url = response.url
|
||||||
|
print(f"Resolved short link: {url} -> {final_url}")
|
||||||
|
if re.match(
|
||||||
|
r"^https?://open\.spotify\.com/", final_url, flags=re.IGNORECASE
|
||||||
|
):
|
||||||
|
return final_url
|
||||||
|
html = ""
|
||||||
|
try:
|
||||||
|
html = response.text or ""
|
||||||
|
except Exception:
|
||||||
|
html = ""
|
||||||
|
# Try meta refresh URL
|
||||||
|
m = re.search(
|
||||||
|
r'<meta[^>]+http-equiv=["\']refresh["\'][^>]+content=["\'][^"\']*url=([^"\']+)["\']',
|
||||||
|
html,
|
||||||
|
flags=re.IGNORECASE,
|
||||||
|
)
|
||||||
|
if m:
|
||||||
|
candidate = m.group(1)
|
||||||
|
if re.match(
|
||||||
|
r"^https?://open\.spotify\.com/", candidate, flags=re.IGNORECASE
|
||||||
|
):
|
||||||
|
return candidate
|
||||||
|
# Try OpenGraph URL
|
||||||
|
m = re.search(
|
||||||
|
r'<meta[^>]+property=["\']og:url["\'][^>]+content=["\']([^"\']+)["\']',
|
||||||
|
html,
|
||||||
|
flags=re.IGNORECASE,
|
||||||
|
)
|
||||||
|
if m:
|
||||||
|
candidate = m.group(1)
|
||||||
|
if re.match(
|
||||||
|
r"^https?://open\.spotify\.com/", candidate, flags=re.IGNORECASE
|
||||||
|
):
|
||||||
|
return candidate
|
||||||
|
# Try canonical link
|
||||||
|
m = re.search(
|
||||||
|
r'<link[^>]+rel=["\']canonical["\'][^>]+href=["\']([^"\']+)["\']',
|
||||||
|
html,
|
||||||
|
flags=re.IGNORECASE,
|
||||||
|
)
|
||||||
|
if m:
|
||||||
|
candidate = m.group(1)
|
||||||
|
if re.match(
|
||||||
|
r"^https?://open\.spotify\.com/", candidate, flags=re.IGNORECASE
|
||||||
|
):
|
||||||
|
return candidate
|
||||||
|
# Fallback: scan for artist URLs in the HTML
|
||||||
|
m = re.search(
|
||||||
|
r"https?://open\.spotify\.com/(?:intl-[a-z-]+/)?artist/[a-zA-Z0-9]+",
|
||||||
|
html,
|
||||||
|
flags=re.IGNORECASE,
|
||||||
|
)
|
||||||
|
return m.group(0) if m else final_url
|
||||||
|
return url
|
||||||
|
except requests.RequestException as e:
|
||||||
|
print(f"Error resolving Spotify short URL '{url}': {e}")
|
||||||
|
return url
|
||||||
|
|
||||||
|
|
||||||
|
def canonicalize_artist_url(url):
|
||||||
|
"""
|
||||||
|
Canonicalizes Spotify artist URLs by removing locale prefixes and query params.
|
||||||
|
Example: https://open.spotify.com/intl-en/artist/<id>?si=... -> https://open.spotify.com/artist/<id>
|
||||||
|
"""
|
||||||
|
m = re.match(
|
||||||
|
r"^https?://open\.spotify\.com/(?:intl-[a-z-]+/)?(artist/[a-zA-Z0-9]+)(?:\?.*)?$",
|
||||||
|
url,
|
||||||
|
)
|
||||||
|
if m:
|
||||||
|
canonical = "https://open.spotify.com/" + m.group(1)
|
||||||
|
if canonical != url:
|
||||||
|
print(f"Canonicalized artist URL: {url} -> {canonical}")
|
||||||
|
return canonical
|
||||||
|
return url
|
||||||
|
|
||||||
|
|
||||||
def is_valid_artist_url(url):
|
def is_valid_artist_url(url):
|
||||||
"""Checks if the URL is a valid Spotify artist URL."""
|
"""Checks if the URL is a valid Spotify artist URL."""
|
||||||
match = re.match(r"https://open\.spotify\.com/artist/([a-zA-Z0-9]+)", url)
|
# Allow locale-prefixed paths like /intl-en/artist/<id> and optional query params
|
||||||
|
match = re.match(
|
||||||
|
r"^https?://open\.spotify\.com/(?:intl-[a-z-]+/)?artist/([a-zA-Z0-9]+)(?:\?.*)?$",
|
||||||
|
url,
|
||||||
|
)
|
||||||
return bool(match)
|
return bool(match)
|
||||||
|
|
||||||
|
|
||||||
def get_artist_name(url):
|
def get_artist_name(url):
|
||||||
"""Fetches the Spotify artist page and extracts the cleaned artist's name."""
|
"""Fetches the Spotify artist page and extracts the cleaned artist's name."""
|
||||||
headers = {"User-Agent": "Mozilla/5.0"}
|
headers = {"User-Agent": "Mozilla/5.0"}
|
||||||
try:
|
try:
|
||||||
response = requests.get(url, headers=headers, timeout=10)
|
response = requests.get(url, headers=headers, timeout=10)
|
||||||
response.raise_for_status()
|
response.raise_for_status()
|
||||||
soup = BeautifulSoup(response.text, 'html.parser')
|
soup = BeautifulSoup(response.text, "html.parser")
|
||||||
title_tag = soup.find("title")
|
title_tag = soup.find("title")
|
||||||
|
|
||||||
if title_tag:
|
if title_tag:
|
||||||
artist_name = title_tag.text.replace(" | Spotify", "").strip()
|
artist_name = title_tag.text.replace(" | Spotify", "").strip()
|
||||||
|
|
||||||
# Remove unwanted suffixes
|
# Remove unwanted suffixes that occasionally appear
|
||||||
artist_name = re.sub(
|
artist_name = re.sub(
|
||||||
r"\s*(Songs and Music|Songs|Songs, Albums, Bio & More)\s*$",
|
r"\s*(Songs and Music|Songs|Songs, Albums, Bio & More)\s*$",
|
||||||
"",
|
"",
|
||||||
artist_name,
|
artist_name,
|
||||||
flags=re.IGNORECASE
|
flags=re.IGNORECASE,
|
||||||
)
|
)
|
||||||
|
|
||||||
return artist_name.strip()
|
return artist_name.strip() or "Unknown Artist"
|
||||||
else:
|
else:
|
||||||
return "Unknown Artist"
|
return "Unknown Artist"
|
||||||
except requests.RequestException as e:
|
except requests.RequestException as e:
|
||||||
print(f"Error fetching artist name: {e}")
|
print(f"Error fetching artist name: {e}")
|
||||||
return "Unknown Artist"
|
return "Unknown Artist"
|
||||||
|
|
||||||
def process_url(message):
|
|
||||||
url = message.text.strip()
|
|
||||||
|
|
||||||
# Validate URL
|
def process_url(message):
|
||||||
if not is_valid_artist_url(url):
|
raw_url = (message.text or "").strip()
|
||||||
bot.send_message(message.chat.id, "Please send a valid Spotify artist URL, not a playlist, album, or track.")
|
if not raw_url:
|
||||||
print(f"Rejected URL: {url}") # Debugging
|
bot.send_message(message.chat.id, "Please send a valid Spotify artist URL.")
|
||||||
|
print("Empty message text received.")
|
||||||
return
|
return
|
||||||
|
|
||||||
artist = get_artist_name(url)
|
# 1) Resolve spotify.link short URLs
|
||||||
|
resolved_url = resolve_spotify_link(raw_url)
|
||||||
|
|
||||||
bot.send_message(message.chat.id, f"Downloading music for {artist}. This may take a while.")
|
# 2) Canonicalize open.spotify.com artist URL format
|
||||||
|
resolved_url = canonicalize_artist_url(resolved_url)
|
||||||
|
|
||||||
|
# 3) Validate URL
|
||||||
|
if not is_valid_artist_url(resolved_url):
|
||||||
|
bot.send_message(
|
||||||
|
message.chat.id,
|
||||||
|
"Please send a valid Spotify artist URL, not a playlist, album, or track.",
|
||||||
|
)
|
||||||
|
print(f"Rejected URL: {raw_url} -> {resolved_url}") # Debugging
|
||||||
|
return
|
||||||
|
|
||||||
|
artist = get_artist_name(resolved_url)
|
||||||
|
|
||||||
|
bot.send_message(
|
||||||
|
message.chat.id, f"Downloading music for {artist}. This may take a while."
|
||||||
|
)
|
||||||
|
|
||||||
artist_directory = os.path.join(dldir, artist)
|
artist_directory = os.path.join(dldir, artist)
|
||||||
os.makedirs(artist_directory, exist_ok=True)
|
os.makedirs(artist_directory, exist_ok=True)
|
||||||
os.chdir(artist_directory)
|
os.chdir(artist_directory)
|
||||||
|
|
||||||
subprocess.run([
|
with open(spotdl_log_path, "a") as log_file:
|
||||||
f"{workingdir}bin/spotdl",
|
log_file.write(
|
||||||
"--format", "opus",
|
f"{datetime.datetime.now().isoformat()} Starting spotdl for '{artist}' url={resolved_url} dir={artist_directory}\n"
|
||||||
"--bitrate", "80k",
|
)
|
||||||
url
|
subprocess.run(
|
||||||
])
|
[
|
||||||
|
f"{workingdir}bin/spotdl",
|
||||||
|
"--format",
|
||||||
|
"opus",
|
||||||
|
"--bitrate",
|
||||||
|
"80k",
|
||||||
|
resolved_url,
|
||||||
|
],
|
||||||
|
stdout=log_file,
|
||||||
|
stderr=log_file,
|
||||||
|
)
|
||||||
|
log_file.write(
|
||||||
|
f"{datetime.datetime.now().isoformat()} Finished spotdl for '{artist}'\n"
|
||||||
|
)
|
||||||
|
|
||||||
bot.send_message(message.chat.id, f"Finished downloading {artist}.")
|
bot.send_message(message.chat.id, f"Finished downloading {artist}.")
|
||||||
print(f"Download completed for {artist}.")
|
print(f"Download completed for {artist}.")
|
||||||
bot.send_message(message.chat.id, "Send another artist URL or use /start to begin again.")
|
bot.send_message(
|
||||||
|
message.chat.id, "Send another artist URL or use /start to begin again."
|
||||||
|
)
|
||||||
|
|
||||||
@bot.message_handler(func=lambda message: message.text.startswith("https://open.spotify.com/artist/"))
|
|
||||||
def process_direct_url(message):
|
@bot.message_handler(
|
||||||
|
func=lambda m: isinstance(m.text, str)
|
||||||
|
and re.match(
|
||||||
|
r"^https?://open\.spotify\.com/(?:intl-[a-z-]+/)?artist/", m.text.strip()
|
||||||
|
)
|
||||||
|
)
|
||||||
|
def process_direct_artist_url(message):
|
||||||
process_url(message)
|
process_url(message)
|
||||||
|
|
||||||
|
|
||||||
|
@bot.message_handler(
|
||||||
|
func=lambda m: isinstance(m.text, str)
|
||||||
|
and re.match(
|
||||||
|
r"^https?://(www\.)?(spotify\.link|spotify\.app\.link)/", m.text.strip()
|
||||||
|
)
|
||||||
|
)
|
||||||
|
def process_spotify_link(message):
|
||||||
|
process_url(message)
|
||||||
|
|
||||||
|
|
||||||
@bot.message_handler(func=lambda message: True)
|
@bot.message_handler(func=lambda message: True)
|
||||||
def echo_all(message):
|
def echo_all(message):
|
||||||
bot.reply_to(message, "I don't understand. Please send a Spotify artist URL.")
|
bot.reply_to(message, "I don't understand. Please send a Spotify artist URL.")
|
||||||
|
|
||||||
|
|
||||||
if __name__ == "__main__":
|
if __name__ == "__main__":
|
||||||
bot.polling()
|
bot.polling()
|
||||||
|
|
|
||||||
Loading…
Add table
Reference in a new issue