Nedladdning av bilder från websida. Kodbasen från GPT 4 Omni
This commit is contained in:
@@ -0,0 +1,76 @@
|
|||||||
|
import os
|
||||||
|
import requests
|
||||||
|
from bs4 import BeautifulSoup
|
||||||
|
from urllib.parse import urljoin, urlparse
|
||||||
|
import base64
|
||||||
|
import re
|
||||||
|
|
||||||
|
def download_image(url, folder_path):
|
||||||
|
if not os.path.isdir(folder_path):
|
||||||
|
os.makedirs(folder_path)
|
||||||
|
|
||||||
|
try:
|
||||||
|
response = requests.get(url, stream=True)
|
||||||
|
response.raise_for_status() # Kontrollera om förfrågan lyckades
|
||||||
|
except requests.RequestException as e:
|
||||||
|
print(f"Failed to retrieve image {url}: {e}")
|
||||||
|
return
|
||||||
|
|
||||||
|
filename = os.path.join(folder_path, os.path.basename(urlparse(url).path))
|
||||||
|
with open(filename, 'wb') as file:
|
||||||
|
for chunk in response.iter_content(1024):
|
||||||
|
file.write(chunk)
|
||||||
|
print(f"Downloaded: {filename}")
|
||||||
|
|
||||||
|
def save_base64_image(data_url, folder_path, count):
|
||||||
|
if not os.path.isdir(folder_path):
|
||||||
|
os.makedirs(folder_path)
|
||||||
|
|
||||||
|
match = re.match(r'data:image/(?P<ext>[^;]+);base64,(?P<data>.+)', data_url)
|
||||||
|
if match:
|
||||||
|
ext = match.group('ext')
|
||||||
|
data = match.group('data')
|
||||||
|
img_data = base64.b64decode(data)
|
||||||
|
filename = os.path.join(folder_path, f'image_{count}.{ext}')
|
||||||
|
with open(filename, 'wb') as file:
|
||||||
|
file.write(img_data)
|
||||||
|
print(f"Downloaded: {filename}")
|
||||||
|
else:
|
||||||
|
print(f"Invalid base64 image data: {data_url}")
|
||||||
|
|
||||||
|
def download_all_images(html_content, base_url, folder_path):
|
||||||
|
soup = BeautifulSoup(html_content, 'html.parser')
|
||||||
|
img_tags = soup.find_all('img')
|
||||||
|
|
||||||
|
count = 0
|
||||||
|
for img in img_tags:
|
||||||
|
img_url = img.get('src')
|
||||||
|
if not img_url:
|
||||||
|
continue
|
||||||
|
|
||||||
|
if img_url.startswith(('http://', 'https://')):
|
||||||
|
img_url = urljoin(base_url, img_url)
|
||||||
|
print(f"Attempting to download image: {img_url}")
|
||||||
|
download_image(img_url, folder_path)
|
||||||
|
elif img_url.startswith('data:image/'):
|
||||||
|
print(f"Attempting to save base64 image: {img_url[:30]}...") # Print only the start of the data URL
|
||||||
|
count += 1
|
||||||
|
save_base64_image(img_url, folder_path, count)
|
||||||
|
else:
|
||||||
|
print(f"Ignoring non-http URL: {img_url}")
|
||||||
|
|
||||||
|
def main():
|
||||||
|
url = input("Enter the URL of the webpage: ")
|
||||||
|
folder_path = os.path.expanduser("~/Downloads/downloaded_images")
|
||||||
|
|
||||||
|
try:
|
||||||
|
response = requests.get(url)
|
||||||
|
response.raise_for_status() # Kontrollera om förfrågan lyckades
|
||||||
|
except requests.RequestException as e:
|
||||||
|
print(f"Failed to retrieve webpage {url}: {e}")
|
||||||
|
return
|
||||||
|
|
||||||
|
download_all_images(response.content, url, folder_path)
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
main()
|
||||||
Reference in New Issue
Block a user