From 4d6ab1391487ff48b2faa2c65b1994928b904d8d Mon Sep 17 00:00:00 2001
From: Clarth <clarth@admin@claytonia.net>
Date: Sat, 18 Oct 2025 12:56:03 -0400
Subject: [PATCH] Resolve spotify.link short URL

Added ability to resolve spotify.link short URL
---
 start.py | 214 +++++++++++++++++++++++++++++++++++++++++++++++--------
 1 file changed, 185 insertions(+), 29 deletions(-)

diff --git a/start.py b/start.py
index b7a1d0b..66a27d1 100644
--- a/start.py
+++ b/start.py
@@ -11,81 +11,237 @@ bot = telebot.TeleBot("TOKEN")
 dldir = "/path/to/Music/"
 workingdir = "/path/to/script"
 
+script_dir = os.path.dirname(os.path.abspath(__file__))
+spotdl_log_path = os.path.join(script_dir, "spotdl.log")
+
+with open(spotdl_log_path, "w") as _log_init:
+    _log_init.write(f"{datetime.datetime.now().isoformat()} Starting new run\n")
 print("tg_spotdl bot is running.")
 
-@bot.message_handler(commands=['start'])
+
+@bot.message_handler(commands=["start"])
 def start(message):
-    bot.send_message(message.chat.id, "Hi! Send me a Spotify artist URL (not playlists, albums, or tracks).")
+    bot.send_message(
+        message.chat.id,
+        "Hi! Send me a Spotify artist URL (not playlists, albums, or tracks).",
+    )
     bot.register_next_step_handler(message, process_url)
     print("User pushed start.")
 
+
+def resolve_spotify_link(url):
+    """
+    Resolves Spotify short URLs (spotify.link and spotify.app.link) to their final destination.
+    Returns the final resolved URL or the original URL if resolution fails.
+    """
+    try:
+        if re.match(
+            r"^https?://(www\.)?(spotify\.link|spotify\.app\.link)/",
+            url,
+            flags=re.IGNORECASE,
+        ):
+            headers = {"User-Agent": "Mozilla/5.0"}
+            response = requests.get(
+                url, headers=headers, timeout=10, allow_redirects=True
+            )
+            final_url = response.url
+            print(f"Resolved short link: {url} -> {final_url}")
+            if re.match(
+                r"^https?://open\.spotify\.com/", final_url, flags=re.IGNORECASE
+            ):
+                return final_url
+            html = ""
+            try:
+                html = response.text or ""
+            except Exception:
+                html = ""
+            # Try meta refresh URL
+            m = re.search(
+                r'<meta[^>]+http-equiv=["\']refresh["\'][^>]+content=["\'][^"\']*url=([^"\']+)["\']',
+                html,
+                flags=re.IGNORECASE,
+            )
+            if m:
+                candidate = m.group(1)
+                if re.match(
+                    r"^https?://open\.spotify\.com/", candidate, flags=re.IGNORECASE
+                ):
+                    return candidate
+            # Try OpenGraph URL
+            m = re.search(
+                r'<meta[^>]+property=["\']og:url["\'][^>]+content=["\']([^"\']+)["\']',
+                html,
+                flags=re.IGNORECASE,
+            )
+            if m:
+                candidate = m.group(1)
+                if re.match(
+                    r"^https?://open\.spotify\.com/", candidate, flags=re.IGNORECASE
+                ):
+                    return candidate
+            # Try canonical link
+            m = re.search(
+                r'<link[^>]+rel=["\']canonical["\'][^>]+href=["\']([^"\']+)["\']',
+                html,
+                flags=re.IGNORECASE,
+            )
+            if m:
+                candidate = m.group(1)
+                if re.match(
+                    r"^https?://open\.spotify\.com/", candidate, flags=re.IGNORECASE
+                ):
+                    return candidate
+            # Fallback: scan for artist URLs in the HTML
+            m = re.search(
+                r"https?://open\.spotify\.com/(?:intl-[a-z-]+/)?artist/[a-zA-Z0-9]+",
+                html,
+                flags=re.IGNORECASE,
+            )
+            return m.group(0) if m else final_url
+        return url
+    except requests.RequestException as e:
+        print(f"Error resolving Spotify short URL '{url}': {e}")
+        return url
+
+
+def canonicalize_artist_url(url):
+    """
+    Canonicalizes Spotify artist URLs by removing locale prefixes and query params.
+    Example: https://open.spotify.com/intl-en/artist/<id>?si=... -> https://open.spotify.com/artist/<id>
+    """
+    m = re.match(
+        r"^https?://open\.spotify\.com/(?:intl-[a-z-]+/)?(artist/[a-zA-Z0-9]+)(?:\?.*)?$",
+        url,
+    )
+    if m:
+        canonical = "https://open.spotify.com/" + m.group(1)
+        if canonical != url:
+            print(f"Canonicalized artist URL: {url} -> {canonical}")
+        return canonical
+    return url
+
+
 def is_valid_artist_url(url):
     """Checks if the URL is a valid Spotify artist URL."""
-    match = re.match(r"https://open\.spotify\.com/artist/([a-zA-Z0-9]+)", url)
+    # Allow locale-prefixed paths like /intl-en/artist/<id> and optional query params
+    match = re.match(
+        r"^https?://open\.spotify\.com/(?:intl-[a-z-]+/)?artist/([a-zA-Z0-9]+)(?:\?.*)?$",
+        url,
+    )
     return bool(match)
 
+
 def get_artist_name(url):
     """Fetches the Spotify artist page and extracts the cleaned artist's name."""
     headers = {"User-Agent": "Mozilla/5.0"}
     try:
         response = requests.get(url, headers=headers, timeout=10)
         response.raise_for_status()
-        soup = BeautifulSoup(response.text, 'html.parser')
+        soup = BeautifulSoup(response.text, "html.parser")
         title_tag = soup.find("title")
 
         if title_tag:
             artist_name = title_tag.text.replace(" | Spotify", "").strip()
-            
-            # Remove unwanted suffixes
+
+            # Remove unwanted suffixes that occasionally appear
             artist_name = re.sub(
-                r"\s*(Songs and Music|Songs|Songs, Albums, Bio & More)\s*$", 
-                "", 
-                artist_name, 
-                flags=re.IGNORECASE
+                r"\s*(Songs and Music|Songs|Songs, Albums, Bio & More)\s*$",
+                "",
+                artist_name,
+                flags=re.IGNORECASE,
             )
-            
-            return artist_name.strip()
+
+            return artist_name.strip() or "Unknown Artist"
         else:
             return "Unknown Artist"
     except requests.RequestException as e:
         print(f"Error fetching artist name: {e}")
         return "Unknown Artist"
 
-def process_url(message):
-    url = message.text.strip()
 
-    # Validate URL
-    if not is_valid_artist_url(url):
-        bot.send_message(message.chat.id, "Please send a valid Spotify artist URL, not a playlist, album, or track.")
-        print(f"Rejected URL: {url}")  # Debugging
+def process_url(message):
+    raw_url = (message.text or "").strip()
+    if not raw_url:
+        bot.send_message(message.chat.id, "Please send a valid Spotify artist URL.")
+        print("Empty message text received.")
         return
 
-    artist = get_artist_name(url)
+    # 1) Resolve spotify.link short URLs
+    resolved_url = resolve_spotify_link(raw_url)
 
-    bot.send_message(message.chat.id, f"Downloading music for {artist}. This may take a while.")
+    # 2) Canonicalize open.spotify.com artist URL format
+    resolved_url = canonicalize_artist_url(resolved_url)
+
+    # 3) Validate URL
+    if not is_valid_artist_url(resolved_url):
+        bot.send_message(
+            message.chat.id,
+            "Please send a valid Spotify artist URL, not a playlist, album, or track.",
+        )
+        print(f"Rejected URL: {raw_url} -> {resolved_url}")  # Debugging
+        return
+
+    artist = get_artist_name(resolved_url)
+
+    bot.send_message(
+        message.chat.id, f"Downloading music for {artist}. This may take a while."
+    )
 
     artist_directory = os.path.join(dldir, artist)
     os.makedirs(artist_directory, exist_ok=True)
     os.chdir(artist_directory)
 
-    subprocess.run([
-        f"{workingdir}bin/spotdl",
-        "--format", "opus",
-        "--bitrate", "80k",
-        url
-    ])
+    with open(spotdl_log_path, "a") as log_file:
+        log_file.write(
+            f"{datetime.datetime.now().isoformat()} Starting spotdl for '{artist}' url={resolved_url} dir={artist_directory}\n"
+        )
+        subprocess.run(
+            [
+                f"{workingdir}bin/spotdl",
+                "--format",
+                "opus",
+                "--bitrate",
+                "80k",
+                resolved_url,
+            ],
+            stdout=log_file,
+            stderr=log_file,
+        )
+        log_file.write(
+            f"{datetime.datetime.now().isoformat()} Finished spotdl for '{artist}'\n"
+        )
 
     bot.send_message(message.chat.id, f"Finished downloading {artist}.")
     print(f"Download completed for {artist}.")
-    bot.send_message(message.chat.id, "Send another artist URL or use /start to begin again.")
+    bot.send_message(
+        message.chat.id, "Send another artist URL or use /start to begin again."
+    )
 
-@bot.message_handler(func=lambda message: message.text.startswith("https://open.spotify.com/artist/"))
-def process_direct_url(message):
+
+@bot.message_handler(
+    func=lambda m: isinstance(m.text, str)
+    and re.match(
+        r"^https?://open\.spotify\.com/(?:intl-[a-z-]+/)?artist/", m.text.strip()
+    )
+)
+def process_direct_artist_url(message):
     process_url(message)
 
+
+@bot.message_handler(
+    func=lambda m: isinstance(m.text, str)
+    and re.match(
+        r"^https?://(www\.)?(spotify\.link|spotify\.app\.link)/", m.text.strip()
+    )
+)
+def process_spotify_link(message):
+    process_url(message)
+
+
 @bot.message_handler(func=lambda message: True)
 def echo_all(message):
     bot.reply_to(message, "I don't understand. Please send a Spotify artist URL.")
 
+
 if __name__ == "__main__":
     bot.polling()