Lade till requests

Beroenden som behövs till appar i /web
Nedladdning av bilder från websida. Kodbasen från GPT 4 Omni
2024-08-13 15:27:36 +02:00 · 2024-08-13 15:20:10 +02:00 · 2024-08-13 15:15:04 +02:00 · 2024-08-13 15:13:59 +02:00 · 2024-08-13 14:31:21 +02:00 · 2024-08-13 14:30:34 +02:00
6 changed files with 168 additions and 9 deletions
@@ -161,4 +161,5 @@ cython_debug/
 #.idea/

 # Exclude venv from smartassist
-smartassist/smartassist_dev_venv
+smartassist/smartassist_dev_venv
+.DS_Store
@@ -177,7 +177,7 @@ def chat() -> dict[str, any]:
    url = url_server
    headers = get_auth_headers(url)

-    logger.debug(f"Sending request to:\n\turl:\t{url}\nmodel:\n\t{model}")
+    logger.debug(f"Sending request to:\n\turl:\t{url}\n\tmodel:\t{model}")
    try:
        url = url + "/api/generate"
        logger.debug(f"url: {url} headers: {headers}")
@@ -239,11 +239,16 @@ def select_endpoint_llm() -> Response:
    if len(endpoints) != 1:
        raise ValueError(f"Expected exactly one endpoint with title '{title}', found {len(endpoints)}")

-    global_state.set_host_url(endpoints[0]['url'])
-    global_state.set_llm(llm)
-    logger.debug(f"Updated to host url {endpoints[0]['url']} and LLM {llm}")
-
-    return jsonify({'message': 'Endpoint and LLM selected successfully'})
+    # Reset the session
+    if (title != global_state.get_host_title()) or (llm != global_state.get_llm()): # A change in setting 
+        session.clear()
+        logger.debug('Session cleared due to changed endpoint or changed LLM')
+        global_state.set_host_url(endpoints[0]['url'])
+        global_state.set_llm(llm)
+        logger.debug(f"Updated to host url {endpoints[0]['url']} and LLM {llm}")
+        return jsonify({'message': 'New endpoint and/or LLM detected, settings were changed successfully'})
+    else:
+        return jsonify({'message': 'Endpoint and LLM are untouched'})


@app.route('/smartassist', methods=["POST"])
@@ -123,13 +123,26 @@ class GlobalState:

    def get_host_url(self) -> str:
        """
-        Get the current URL of the host used for LLMs.
+        Get the URL of the host currently used for LLMs.

        Returns:
-            str: The current URL of the host.
+            str: The URL of the current host.
        """
        return self.host_url
    
+    def get_host_title(self) -> str:
+        """
+        Get the title of the host currently used for LLMs.
+        There must be a 1-to-1 mapping from host_url to host_title.
+
+        Returns:
+            str: The title of the current host.
+        """
+        endpoints = self.get_endpoints_with_key_value('url', self.get_host_url())
+        if len(endpoints) != 1:
+            raise ValueError(f"Expected exactly one endpoint with url '{self.get_host_url()}', found {len(endpoints)}")
+        return endpoints[0]["title"]
+    
    def set_llm(self, model_name: str = "phi3:mini") -> None:
        """
        Set the LLM to use for queries.
@@ -0,0 +1,76 @@
+import os
+import requests
+from bs4 import BeautifulSoup
+from urllib.parse import urljoin, urlparse
+import base64
+import re
+
+def download_image(url, folder_path):
+    if not os.path.isdir(folder_path):
+        os.makedirs(folder_path)
+    
+    try:
+        response = requests.get(url, stream=True)
+        response.raise_for_status()  # Kontrollera om förfrågan lyckades
+    except requests.RequestException as e:
+        print(f"Failed to retrieve image {url}: {e}")
+        return
+
+    filename = os.path.join(folder_path, os.path.basename(urlparse(url).path))
+    with open(filename, 'wb') as file:
+        for chunk in response.iter_content(1024):
+            file.write(chunk)
+    print(f"Downloaded: {filename}")
+
+def save_base64_image(data_url, folder_path, count):
+    if not os.path.isdir(folder_path):
+        os.makedirs(folder_path)
+    
+    match = re.match(r'data:image/(?P<ext>[^;]+);base64,(?P<data>.+)', data_url)
+    if match:
+        ext = match.group('ext')
+        data = match.group('data')
+        img_data = base64.b64decode(data)
+        filename = os.path.join(folder_path, f'image_{count}.{ext}')
+        with open(filename, 'wb') as file:
+            file.write(img_data)
+        print(f"Downloaded: {filename}")
+    else:
+        print(f"Invalid base64 image data: {data_url}")
+
+def download_all_images(html_content, base_url, folder_path):
+    soup = BeautifulSoup(html_content, 'html.parser')
+    img_tags = soup.find_all('img')
+    
+    count = 0
+    for img in img_tags:
+        img_url = img.get('src')
+        if not img_url:
+            continue
+
+        if img_url.startswith(('http://', 'https://')):
+            img_url = urljoin(base_url, img_url)
+            print(f"Attempting to download image: {img_url}")
+            download_image(img_url, folder_path)
+        elif img_url.startswith('data:image/'):
+            print(f"Attempting to save base64 image: {img_url[:30]}...")  # Print only the start of the data URL
+            count += 1
+            save_base64_image(img_url, folder_path, count)
+        else:
+            print(f"Ignoring non-http URL: {img_url}")
+
+def main():
+    url = input("Enter the URL of the webpage: ")
+    folder_path = os.path.expanduser("~/Downloads/downloaded_images")
+    
+    try:
+        response = requests.get(url)
+        response.raise_for_status()  # Kontrollera om förfrågan lyckades
+    except requests.RequestException as e:
+        print(f"Failed to retrieve webpage {url}: {e}")
+        return
+
+    download_all_images(response.content, url, folder_path)
+
+if __name__ == "__main__":
+    main()
@@ -0,0 +1,62 @@
+import os
+import re
+from bs4 import BeautifulSoup
+import requests
+from urllib.parse import urljoin
+import base64
+
+def ladda_ner_bilder(url):
+    # Hämta HTML-sidan
+    svar = requests.get(url)
+    soup = BeautifulSoup(svar.text, 'html.parser')
+
+    # Hitta alla bilder
+    bilder = []
+    for img in soup.find_all('img'):
+        src = img.get('src')
+        if src:
+            bilder.append(src)
+
+    # Hantera inline-bilder i base64
+    INLINE_BILD_MÖNSTER = r'data:image/(.*?);base64,(.*)'
+    matcher = re.compile(INLINE_BILD_MÖNSTER)
+    for match in matcher.finditer(svar.text):
+        bild_typ = match.group(1)
+        bild_data = match.group(2)
+        bilder.append(f"data:{bild_typ};base64,{bild_data}")
+
+    # Ladda ner bilderna
+    bild_katalog = os.path.expanduser("~/Downloads/bilder")
+    if not os.path.exists(bild_katalog):
+        os.makedirs(bild_katalog)
+
+    for bild_url in bilder:
+        if not bild_url.startswith('http'):
+            bild_url = urljoin(url, bild_url)
+
+        if bild_url.startswith('data:'):
+            # Dekodera base64-strängen och spara den som en bild
+            format, data = bild_url.split(';base64,')
+            data = base64.b64decode(data)
+            filnamn = 'inline_' + str(len(bilder)) + '.gif'
+            with open(os.path.join(bild_katalog, filnamn), 'wb') as f:
+                f.write(data)
+        else:
+            svar = requests.get(bild_url)
+            if svar.status_code == 200:
+                filnamn = os.path.basename(bild_url).split('?')[0]
+                with open(os.path.join(bild_katalog, filnamn), 'wb') as f:
+                    f.write(svar.content)
+            print(f"Bilden {filnamn} har laddats ner till {bild_katalog}.")
+
+def main():
+    url = input("Ange URL till sidan från vilken du vill hämta bilder: ")
+    if not url.startswith('http'):
+        url = 'http://' + url
+    try:
+        ladda_ner_bilder(url)
+    except Exception as e:
+        print(f"Fel inträffade: {e}")
+
+if __name__ == "__main__":
+    main()
@@ -0,0 +1,2 @@
+BeautifulSoup4
+requests
Author	SHA1	Message	Date
Joakim Persson	82c7887cf7	Lade till requests	2024-08-13 15:27:36 +02:00
Joakim Persson	5f546a848e	Beroenden som behövs till appar i /web	2024-08-13 15:20:10 +02:00
Joakim Persson	7eb889a889	Nedladdning av bilder från websida. Kodbasen från GPT 4 Omni	2024-08-13 15:15:04 +02:00
Joakim Persson	6dab09c861	Fixat så bilderna landar i hämtade filer. Tar bort skalningsinformation m.m. efter filnamnet så att suffix blir korrekt.	2024-08-13 15:13:59 +02:00
Joakim Persson	8288450662	Lade till .DS_Store	2024-08-13 14:31:21 +02:00
Joakim Persson	ae67b9c7b3	Test med att hämta alla bilder på hemsida	2024-08-13 14:30:34 +02:00
joakimp	97ee179b29	Snyggade till en loggutskrivt	2024-08-06 23:37:27 +02:00
joakimp	942f9f78c9	Lade till metod för att erhålla host_title för nuvarande endpoint	2024-08-06 23:36:49 +02:00