Resolve spotify.link short URL
Added ability to resolve spotify.link short URL
This commit is contained in:
		
							parent
							
								
									e34257e97c
								
							
						
					
					
						commit
						4d6ab13914
					
				
					 1 changed files with 185 additions and 29 deletions
				
			
		
							
								
								
									
										202
									
								
								start.py
									
										
									
									
									
								
							
							
						
						
									
										202
									
								
								start.py
									
										
									
									
									
								
							| 
						 | 
					@ -11,81 +11,237 @@ bot = telebot.TeleBot("TOKEN")
 | 
				
			||||||
dldir = "/path/to/Music/"
 | 
					dldir = "/path/to/Music/"
 | 
				
			||||||
workingdir = "/path/to/script"
 | 
					workingdir = "/path/to/script"
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					script_dir = os.path.dirname(os.path.abspath(__file__))
 | 
				
			||||||
 | 
					spotdl_log_path = os.path.join(script_dir, "spotdl.log")
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					with open(spotdl_log_path, "w") as _log_init:
 | 
				
			||||||
 | 
					    _log_init.write(f"{datetime.datetime.now().isoformat()} Starting new run\n")
 | 
				
			||||||
print("tg_spotdl bot is running.")
 | 
					print("tg_spotdl bot is running.")
 | 
				
			||||||
 | 
					
 | 
				
			||||||
@bot.message_handler(commands=['start'])
 | 
					
 | 
				
			||||||
 | 
					@bot.message_handler(commands=["start"])
 | 
				
			||||||
def start(message):
 | 
					def start(message):
 | 
				
			||||||
    bot.send_message(message.chat.id, "Hi! Send me a Spotify artist URL (not playlists, albums, or tracks).")
 | 
					    bot.send_message(
 | 
				
			||||||
 | 
					        message.chat.id,
 | 
				
			||||||
 | 
					        "Hi! Send me a Spotify artist URL (not playlists, albums, or tracks).",
 | 
				
			||||||
 | 
					    )
 | 
				
			||||||
    bot.register_next_step_handler(message, process_url)
 | 
					    bot.register_next_step_handler(message, process_url)
 | 
				
			||||||
    print("User pushed start.")
 | 
					    print("User pushed start.")
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					def resolve_spotify_link(url):
 | 
				
			||||||
 | 
					    """
 | 
				
			||||||
 | 
					    Resolves Spotify short URLs (spotify.link and spotify.app.link) to their final destination.
 | 
				
			||||||
 | 
					    Returns the final resolved URL or the original URL if resolution fails.
 | 
				
			||||||
 | 
					    """
 | 
				
			||||||
 | 
					    try:
 | 
				
			||||||
 | 
					        if re.match(
 | 
				
			||||||
 | 
					            r"^https?://(www\.)?(spotify\.link|spotify\.app\.link)/",
 | 
				
			||||||
 | 
					            url,
 | 
				
			||||||
 | 
					            flags=re.IGNORECASE,
 | 
				
			||||||
 | 
					        ):
 | 
				
			||||||
 | 
					            headers = {"User-Agent": "Mozilla/5.0"}
 | 
				
			||||||
 | 
					            response = requests.get(
 | 
				
			||||||
 | 
					                url, headers=headers, timeout=10, allow_redirects=True
 | 
				
			||||||
 | 
					            )
 | 
				
			||||||
 | 
					            final_url = response.url
 | 
				
			||||||
 | 
					            print(f"Resolved short link: {url} -> {final_url}")
 | 
				
			||||||
 | 
					            if re.match(
 | 
				
			||||||
 | 
					                r"^https?://open\.spotify\.com/", final_url, flags=re.IGNORECASE
 | 
				
			||||||
 | 
					            ):
 | 
				
			||||||
 | 
					                return final_url
 | 
				
			||||||
 | 
					            html = ""
 | 
				
			||||||
 | 
					            try:
 | 
				
			||||||
 | 
					                html = response.text or ""
 | 
				
			||||||
 | 
					            except Exception:
 | 
				
			||||||
 | 
					                html = ""
 | 
				
			||||||
 | 
					            # Try meta refresh URL
 | 
				
			||||||
 | 
					            m = re.search(
 | 
				
			||||||
 | 
					                r'<meta[^>]+http-equiv=["\']refresh["\'][^>]+content=["\'][^"\']*url=([^"\']+)["\']',
 | 
				
			||||||
 | 
					                html,
 | 
				
			||||||
 | 
					                flags=re.IGNORECASE,
 | 
				
			||||||
 | 
					            )
 | 
				
			||||||
 | 
					            if m:
 | 
				
			||||||
 | 
					                candidate = m.group(1)
 | 
				
			||||||
 | 
					                if re.match(
 | 
				
			||||||
 | 
					                    r"^https?://open\.spotify\.com/", candidate, flags=re.IGNORECASE
 | 
				
			||||||
 | 
					                ):
 | 
				
			||||||
 | 
					                    return candidate
 | 
				
			||||||
 | 
					            # Try OpenGraph URL
 | 
				
			||||||
 | 
					            m = re.search(
 | 
				
			||||||
 | 
					                r'<meta[^>]+property=["\']og:url["\'][^>]+content=["\']([^"\']+)["\']',
 | 
				
			||||||
 | 
					                html,
 | 
				
			||||||
 | 
					                flags=re.IGNORECASE,
 | 
				
			||||||
 | 
					            )
 | 
				
			||||||
 | 
					            if m:
 | 
				
			||||||
 | 
					                candidate = m.group(1)
 | 
				
			||||||
 | 
					                if re.match(
 | 
				
			||||||
 | 
					                    r"^https?://open\.spotify\.com/", candidate, flags=re.IGNORECASE
 | 
				
			||||||
 | 
					                ):
 | 
				
			||||||
 | 
					                    return candidate
 | 
				
			||||||
 | 
					            # Try canonical link
 | 
				
			||||||
 | 
					            m = re.search(
 | 
				
			||||||
 | 
					                r'<link[^>]+rel=["\']canonical["\'][^>]+href=["\']([^"\']+)["\']',
 | 
				
			||||||
 | 
					                html,
 | 
				
			||||||
 | 
					                flags=re.IGNORECASE,
 | 
				
			||||||
 | 
					            )
 | 
				
			||||||
 | 
					            if m:
 | 
				
			||||||
 | 
					                candidate = m.group(1)
 | 
				
			||||||
 | 
					                if re.match(
 | 
				
			||||||
 | 
					                    r"^https?://open\.spotify\.com/", candidate, flags=re.IGNORECASE
 | 
				
			||||||
 | 
					                ):
 | 
				
			||||||
 | 
					                    return candidate
 | 
				
			||||||
 | 
					            # Fallback: scan for artist URLs in the HTML
 | 
				
			||||||
 | 
					            m = re.search(
 | 
				
			||||||
 | 
					                r"https?://open\.spotify\.com/(?:intl-[a-z-]+/)?artist/[a-zA-Z0-9]+",
 | 
				
			||||||
 | 
					                html,
 | 
				
			||||||
 | 
					                flags=re.IGNORECASE,
 | 
				
			||||||
 | 
					            )
 | 
				
			||||||
 | 
					            return m.group(0) if m else final_url
 | 
				
			||||||
 | 
					        return url
 | 
				
			||||||
 | 
					    except requests.RequestException as e:
 | 
				
			||||||
 | 
					        print(f"Error resolving Spotify short URL '{url}': {e}")
 | 
				
			||||||
 | 
					        return url
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					def canonicalize_artist_url(url):
 | 
				
			||||||
 | 
					    """
 | 
				
			||||||
 | 
					    Canonicalizes Spotify artist URLs by removing locale prefixes and query params.
 | 
				
			||||||
 | 
					    Example: https://open.spotify.com/intl-en/artist/<id>?si=... -> https://open.spotify.com/artist/<id>
 | 
				
			||||||
 | 
					    """
 | 
				
			||||||
 | 
					    m = re.match(
 | 
				
			||||||
 | 
					        r"^https?://open\.spotify\.com/(?:intl-[a-z-]+/)?(artist/[a-zA-Z0-9]+)(?:\?.*)?$",
 | 
				
			||||||
 | 
					        url,
 | 
				
			||||||
 | 
					    )
 | 
				
			||||||
 | 
					    if m:
 | 
				
			||||||
 | 
					        canonical = "https://open.spotify.com/" + m.group(1)
 | 
				
			||||||
 | 
					        if canonical != url:
 | 
				
			||||||
 | 
					            print(f"Canonicalized artist URL: {url} -> {canonical}")
 | 
				
			||||||
 | 
					        return canonical
 | 
				
			||||||
 | 
					    return url
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
def is_valid_artist_url(url):
 | 
					def is_valid_artist_url(url):
 | 
				
			||||||
    """Checks if the URL is a valid Spotify artist URL."""
 | 
					    """Checks if the URL is a valid Spotify artist URL."""
 | 
				
			||||||
    match = re.match(r"https://open\.spotify\.com/artist/([a-zA-Z0-9]+)", url)
 | 
					    # Allow locale-prefixed paths like /intl-en/artist/<id> and optional query params
 | 
				
			||||||
 | 
					    match = re.match(
 | 
				
			||||||
 | 
					        r"^https?://open\.spotify\.com/(?:intl-[a-z-]+/)?artist/([a-zA-Z0-9]+)(?:\?.*)?$",
 | 
				
			||||||
 | 
					        url,
 | 
				
			||||||
 | 
					    )
 | 
				
			||||||
    return bool(match)
 | 
					    return bool(match)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
def get_artist_name(url):
 | 
					def get_artist_name(url):
 | 
				
			||||||
    """Fetches the Spotify artist page and extracts the cleaned artist's name."""
 | 
					    """Fetches the Spotify artist page and extracts the cleaned artist's name."""
 | 
				
			||||||
    headers = {"User-Agent": "Mozilla/5.0"}
 | 
					    headers = {"User-Agent": "Mozilla/5.0"}
 | 
				
			||||||
    try:
 | 
					    try:
 | 
				
			||||||
        response = requests.get(url, headers=headers, timeout=10)
 | 
					        response = requests.get(url, headers=headers, timeout=10)
 | 
				
			||||||
        response.raise_for_status()
 | 
					        response.raise_for_status()
 | 
				
			||||||
        soup = BeautifulSoup(response.text, 'html.parser')
 | 
					        soup = BeautifulSoup(response.text, "html.parser")
 | 
				
			||||||
        title_tag = soup.find("title")
 | 
					        title_tag = soup.find("title")
 | 
				
			||||||
 | 
					
 | 
				
			||||||
        if title_tag:
 | 
					        if title_tag:
 | 
				
			||||||
            artist_name = title_tag.text.replace(" | Spotify", "").strip()
 | 
					            artist_name = title_tag.text.replace(" | Spotify", "").strip()
 | 
				
			||||||
 | 
					
 | 
				
			||||||
            # Remove unwanted suffixes
 | 
					            # Remove unwanted suffixes that occasionally appear
 | 
				
			||||||
            artist_name = re.sub(
 | 
					            artist_name = re.sub(
 | 
				
			||||||
                r"\s*(Songs and Music|Songs|Songs, Albums, Bio & More)\s*$",
 | 
					                r"\s*(Songs and Music|Songs|Songs, Albums, Bio & More)\s*$",
 | 
				
			||||||
                "",
 | 
					                "",
 | 
				
			||||||
                artist_name,
 | 
					                artist_name,
 | 
				
			||||||
                flags=re.IGNORECASE
 | 
					                flags=re.IGNORECASE,
 | 
				
			||||||
            )
 | 
					            )
 | 
				
			||||||
 | 
					
 | 
				
			||||||
            return artist_name.strip()
 | 
					            return artist_name.strip() or "Unknown Artist"
 | 
				
			||||||
        else:
 | 
					        else:
 | 
				
			||||||
            return "Unknown Artist"
 | 
					            return "Unknown Artist"
 | 
				
			||||||
    except requests.RequestException as e:
 | 
					    except requests.RequestException as e:
 | 
				
			||||||
        print(f"Error fetching artist name: {e}")
 | 
					        print(f"Error fetching artist name: {e}")
 | 
				
			||||||
        return "Unknown Artist"
 | 
					        return "Unknown Artist"
 | 
				
			||||||
 | 
					
 | 
				
			||||||
def process_url(message):
 | 
					 | 
				
			||||||
    url = message.text.strip()
 | 
					 | 
				
			||||||
 | 
					
 | 
				
			||||||
    # Validate URL
 | 
					def process_url(message):
 | 
				
			||||||
    if not is_valid_artist_url(url):
 | 
					    raw_url = (message.text or "").strip()
 | 
				
			||||||
        bot.send_message(message.chat.id, "Please send a valid Spotify artist URL, not a playlist, album, or track.")
 | 
					    if not raw_url:
 | 
				
			||||||
        print(f"Rejected URL: {url}")  # Debugging
 | 
					        bot.send_message(message.chat.id, "Please send a valid Spotify artist URL.")
 | 
				
			||||||
 | 
					        print("Empty message text received.")
 | 
				
			||||||
        return
 | 
					        return
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    artist = get_artist_name(url)
 | 
					    # 1) Resolve spotify.link short URLs
 | 
				
			||||||
 | 
					    resolved_url = resolve_spotify_link(raw_url)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    bot.send_message(message.chat.id, f"Downloading music for {artist}. This may take a while.")
 | 
					    # 2) Canonicalize open.spotify.com artist URL format
 | 
				
			||||||
 | 
					    resolved_url = canonicalize_artist_url(resolved_url)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    # 3) Validate URL
 | 
				
			||||||
 | 
					    if not is_valid_artist_url(resolved_url):
 | 
				
			||||||
 | 
					        bot.send_message(
 | 
				
			||||||
 | 
					            message.chat.id,
 | 
				
			||||||
 | 
					            "Please send a valid Spotify artist URL, not a playlist, album, or track.",
 | 
				
			||||||
 | 
					        )
 | 
				
			||||||
 | 
					        print(f"Rejected URL: {raw_url} -> {resolved_url}")  # Debugging
 | 
				
			||||||
 | 
					        return
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    artist = get_artist_name(resolved_url)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    bot.send_message(
 | 
				
			||||||
 | 
					        message.chat.id, f"Downloading music for {artist}. This may take a while."
 | 
				
			||||||
 | 
					    )
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    artist_directory = os.path.join(dldir, artist)
 | 
					    artist_directory = os.path.join(dldir, artist)
 | 
				
			||||||
    os.makedirs(artist_directory, exist_ok=True)
 | 
					    os.makedirs(artist_directory, exist_ok=True)
 | 
				
			||||||
    os.chdir(artist_directory)
 | 
					    os.chdir(artist_directory)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    subprocess.run([
 | 
					    with open(spotdl_log_path, "a") as log_file:
 | 
				
			||||||
 | 
					        log_file.write(
 | 
				
			||||||
 | 
					            f"{datetime.datetime.now().isoformat()} Starting spotdl for '{artist}' url={resolved_url} dir={artist_directory}\n"
 | 
				
			||||||
 | 
					        )
 | 
				
			||||||
 | 
					        subprocess.run(
 | 
				
			||||||
 | 
					            [
 | 
				
			||||||
                f"{workingdir}bin/spotdl",
 | 
					                f"{workingdir}bin/spotdl",
 | 
				
			||||||
        "--format", "opus",
 | 
					                "--format",
 | 
				
			||||||
        "--bitrate", "80k",
 | 
					                "opus",
 | 
				
			||||||
        url
 | 
					                "--bitrate",
 | 
				
			||||||
    ])
 | 
					                "80k",
 | 
				
			||||||
 | 
					                resolved_url,
 | 
				
			||||||
 | 
					            ],
 | 
				
			||||||
 | 
					            stdout=log_file,
 | 
				
			||||||
 | 
					            stderr=log_file,
 | 
				
			||||||
 | 
					        )
 | 
				
			||||||
 | 
					        log_file.write(
 | 
				
			||||||
 | 
					            f"{datetime.datetime.now().isoformat()} Finished spotdl for '{artist}'\n"
 | 
				
			||||||
 | 
					        )
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    bot.send_message(message.chat.id, f"Finished downloading {artist}.")
 | 
					    bot.send_message(message.chat.id, f"Finished downloading {artist}.")
 | 
				
			||||||
    print(f"Download completed for {artist}.")
 | 
					    print(f"Download completed for {artist}.")
 | 
				
			||||||
    bot.send_message(message.chat.id, "Send another artist URL or use /start to begin again.")
 | 
					    bot.send_message(
 | 
				
			||||||
 | 
					        message.chat.id, "Send another artist URL or use /start to begin again."
 | 
				
			||||||
 | 
					    )
 | 
				
			||||||
 | 
					
 | 
				
			||||||
@bot.message_handler(func=lambda message: message.text.startswith("https://open.spotify.com/artist/"))
 | 
					
 | 
				
			||||||
def process_direct_url(message):
 | 
					@bot.message_handler(
 | 
				
			||||||
 | 
					    func=lambda m: isinstance(m.text, str)
 | 
				
			||||||
 | 
					    and re.match(
 | 
				
			||||||
 | 
					        r"^https?://open\.spotify\.com/(?:intl-[a-z-]+/)?artist/", m.text.strip()
 | 
				
			||||||
 | 
					    )
 | 
				
			||||||
 | 
					)
 | 
				
			||||||
 | 
					def process_direct_artist_url(message):
 | 
				
			||||||
    process_url(message)
 | 
					    process_url(message)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					@bot.message_handler(
 | 
				
			||||||
 | 
					    func=lambda m: isinstance(m.text, str)
 | 
				
			||||||
 | 
					    and re.match(
 | 
				
			||||||
 | 
					        r"^https?://(www\.)?(spotify\.link|spotify\.app\.link)/", m.text.strip()
 | 
				
			||||||
 | 
					    )
 | 
				
			||||||
 | 
					)
 | 
				
			||||||
 | 
					def process_spotify_link(message):
 | 
				
			||||||
 | 
					    process_url(message)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
@bot.message_handler(func=lambda message: True)
 | 
					@bot.message_handler(func=lambda message: True)
 | 
				
			||||||
def echo_all(message):
 | 
					def echo_all(message):
 | 
				
			||||||
    bot.reply_to(message, "I don't understand. Please send a Spotify artist URL.")
 | 
					    bot.reply_to(message, "I don't understand. Please send a Spotify artist URL.")
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
if __name__ == "__main__":
 | 
					if __name__ == "__main__":
 | 
				
			||||||
    bot.polling()
 | 
					    bot.polling()
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
		Loading…
	
	Add table
		
		Reference in a new issue