From ae67b9c7b3a12bd9c7a095be2b8e9080ccc01a3d Mon Sep 17 00:00:00 2001 From: Joakim Persson Date: Tue, 13 Aug 2024 14:30:34 +0200 Subject: [PATCH] =?UTF-8?q?Test=20med=20att=20h=C3=A4mta=20alla=20bilder?= =?UTF-8?q?=20p=C3=A5=20hemsida?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- web/ladda_hem_bilder.py | 54 +++++++++++++++++++++++++++++++++++++++++ 1 file changed, 54 insertions(+) create mode 100644 web/ladda_hem_bilder.py diff --git a/web/ladda_hem_bilder.py b/web/ladda_hem_bilder.py new file mode 100644 index 0000000..f0acc9d --- /dev/null +++ b/web/ladda_hem_bilder.py @@ -0,0 +1,54 @@ +import os +import re +from bs4 import BeautifulSoup +import requests +from urllib.parse import urljoin + +def ladda_ner_bilder(url): + # Hämta HTML-sidan + svar = requests.get(url) + soup = BeautifulSoup(svar.text, 'html.parser') + + # Hitta alla bilder + bilder = [] + for img in soup.find_all('img'): + src = img.get('src') + if src: + bilder.append(src) + + # Hantera inline-bilder i base64 + INLINE_BILD_MÖNSTER = r'data:image/(.*?);base64,(.*)' + matcher = re.compile(INLINE_BILD_MÖNSTER) + for match in matcher.finditer(svar.text): + bild_typ = match.group(1) + bild_data = match.group(2) + bilder.append(f"data:{bild_typ};base64,{bild_data}") + + # Ladda ner bilderna + bild_katalog = "bilder" + if not os.path.exists(bild_katalog): + os.makedirs(bild_katalog) + + for bild_url in bilder: + if not bild_url.startswith('http'): + bild_url = urljoin(url, bild_url) + + svar = requests.get(bild_url) + if svar.status_code == 200: + filnamn = os.path.basename(bild_url) + fil_sökväg = os.path.join(bild_katalog, filnamn) + with open(fil_sökväg, 'wb') as f: + f.write(svar.content) + print(f"Bilden {filnamn} har laddats ner till {bild_katalog}.") + +def main(): + url = input("Ange URL till sidan från vilken du vill hämta bilder: ") + if not url.startswith('http'): + url = 'http://' + url + try: + ladda_ner_bilder(url) + except Exception as e: + print(f"Fel inträffade: {e}") + +if __name__ == "__main__": + main()