import os import requests from bs4 import BeautifulSoup from urllib.parse import urljoin, urlparse import base64 import re def download_image(url, folder_path): if not os.path.isdir(folder_path): os.makedirs(folder_path) try: response = requests.get(url, stream=True) response.raise_for_status() # Kontrollera om förfrågan lyckades except requests.RequestException as e: print(f"Failed to retrieve image {url}: {e}") return filename = os.path.join(folder_path, os.path.basename(urlparse(url).path)) with open(filename, 'wb') as file: for chunk in response.iter_content(1024): file.write(chunk) print(f"Downloaded: {filename}") def save_base64_image(data_url, folder_path, count): if not os.path.isdir(folder_path): os.makedirs(folder_path) match = re.match(r'data:image/(?P[^;]+);base64,(?P.+)', data_url) if match: ext = match.group('ext') data = match.group('data') img_data = base64.b64decode(data) filename = os.path.join(folder_path, f'image_{count}.{ext}') with open(filename, 'wb') as file: file.write(img_data) print(f"Downloaded: {filename}") else: print(f"Invalid base64 image data: {data_url}") def download_all_images(html_content, base_url, folder_path): soup = BeautifulSoup(html_content, 'html.parser') img_tags = soup.find_all('img') count = 0 for img in img_tags: img_url = img.get('src') if not img_url: continue if img_url.startswith(('http://', 'https://')): img_url = urljoin(base_url, img_url) print(f"Attempting to download image: {img_url}") download_image(img_url, folder_path) elif img_url.startswith('data:image/'): print(f"Attempting to save base64 image: {img_url[:30]}...") # Print only the start of the data URL count += 1 save_base64_image(img_url, folder_path, count) else: print(f"Ignoring non-http URL: {img_url}") def main(): url = input("Enter the URL of the webpage: ") folder_path = os.path.expanduser("~/Downloads/downloaded_images") try: response = requests.get(url) response.raise_for_status() # Kontrollera om förfrågan lyckades except requests.RequestException as e: print(f"Failed to retrieve webpage {url}: {e}") return download_all_images(response.content, url, folder_path) if __name__ == "__main__": main()