initial commit

author: Marco Andronaco <andronacomarco@gmail.com> 2023-07-12 09:14:36 +0200
committer: Marco Andronaco <andronacomarco@gmail.com> 2023-07-12 09:14:36 +0200
commit: 733a0a23988fb074c93e6c398d8b9142ee180b29 (patch)
tree: 6a06a08378b0744474d45d8692ac1ce17bec7859
download: sunstroke-733a0a23988fb074c93e6c398d8b9142ee180b29.tar.gz
4 files changed, 135 insertions, 0 deletions
diff --git a/.gitignore b/.gitignore
new file mode 100644
index 0000000..9110474
--- /dev/null
+++ b/.gitignore
@@ -0,0 +1,4 @@
+__pycache__
+venv
+rss.xml
+session.txt
diff --git a/Sole.py b/Sole.py
new file mode 100644
index 0000000..717091c
--- /dev/null
+++ b/Sole.py
@@ -0,0 +1,76 @@
+import feedparser
+from html.parser import HTMLParser
+from datetime import datetime
+from re import compile
+
+N_LINKS_TO_REMOVE = 2
+REGEX_DATE = compile("\(([\d\.]*)\)")
+OVERPOST_URL = "https://overpost.biz/e-books/quotidiani/rss.xml"
+
+def add_or_update(dictionary, key, value):
+    try:
+        dictionary[key].append(value)
+    except KeyError:
+        dictionary[key] = [ value ]
+
+class PostParser(HTMLParser):
+    def __init__(self):
+        HTMLParser.__init__(self)
+        self.links = {}
+        self.prev_tag = None
+        self.current_tag = None
+        self.current_link = None
+    
+    def handle_starttag(self, tag, attrs):
+        if tag == "br":
+            return
+        self.prev_tag = self.current_tag
+        self.current_tag = tag
+        if tag == "a":
+            for at in attrs:
+                if at[0] == "href":
+                    self.current_link = at[1]
+
+    def handle_endtag(self, tag):
+        self.current_tag = self.prev_tag
+
+    def handle_data(self, data):
+        if self.current_tag == "a":
+            key = data.replace("_", " ").split(" - ")[0]
+            value = self.current_link
+            add_or_update(self.links, key, value)
+            
+    def get_links(self):
+        return self.links.copy()
+    
+def parse_html(html):
+    parser = PostParser()
+    parser.feed(html)
+    return parser.get_links()
+
+def remove_first(d):
+    return (k := next(iter(d)), d.pop(k))
+
+def remove_first_n(d, n):
+    for i in range(n):
+        remove_first(d)
+
+def parse_entry(entry): # entry = day
+    date = REGEX_DATE.findall(entry.title)[0]
+    links = parse_html(entry.turbo_content)
+    
+    remove_first_n(links, N_LINKS_TO_REMOVE)
+    return (datetime.strptime(date, "%d.%m.%Y"), links)
+
+def get_links(rss_url):
+    feed = feedparser.parse(rss_url)
+    return [ parse_entry(entry) for entry in feed.entries ]
+
+def get_sole():
+    links = get_links(OVERPOST_URL)
+    today = links[1]
+    return { k: v for k, v in today[1].items() if k.startswith("Il Sole 24 Ore")}
+
+OVERPOST_URL = r"/home/marco/Documenti/overpost/rss.xml"
+if __name__ == "__main__":
+    print(get_sole())
diff --git a/main.py b/main.py
new file mode 100644
index 0000000..53502c8
--- /dev/null
+++ b/main.py
@@ -0,0 +1,48 @@
+import json
+import requests # https://github.com/pyload/pyload/wiki/module.Api.Api
+from Sole import get_sole, remove_first
+
+SESSION_FILENAME = "session.txt"
+PYLOAD_PROTOCOL = "http"
+PYLOAD_HOST = "localhost"
+PYLOAD_PORT = 8000
+PYLOAD_USER = "pyload"
+PYLOAD_PW = "pyload"
+PYLOAD_API_ENDPOINT = "/api"
+PYLOAD_LOGIN_ENDPOINT = "/login"
+PYLOAD_ADDPACKAGE_ENDPOINT = "/generateAndAddPackages"
+PYLOAD_API_URL = f"{ PYLOAD_PROTOCOL }://{ PYLOAD_HOST }:{ PYLOAD_PORT }{ PYLOAD_API_ENDPOINT }"
+
+LOGIN_DATA = { "username": PYLOAD_USER, "password": PYLOAD_PW }
+LOGIN_URL = PYLOAD_API_URL + PYLOAD_LOGIN_ENDPOINT
+ADDPACKAGE_URL = PYLOAD_API_URL + PYLOAD_ADDPACKAGE_ENDPOINT
+
+def get_session_id():
+    try:
+        with open(SESSION_FILENAME, "r", encoding="utf-8") as in_file:
+            return in_file.readline()
+    except FileNotFoundError:
+        res = requests.post(LOGIN_URL, data=LOGIN_DATA)
+        cookies = res.cookies.get_dict()
+        session_id = cookies['pyload_session']
+        with open(SESSION_FILENAME, "w", encoding="utf-8") as out_file:
+            out_file.write(session_id)
+        return session_id
+    
+def add_package(links):
+    ADDPACKAGE_DATA = { "links": json.dumps(links), "session": session_id }
+    print(ADDPACKAGE_URL)
+    print(ADDPACKAGE_DATA)
+    kek = requests.post(ADDPACKAGE_URL, data=LOGIN_DATA).text
+    return kek
+
+if __name__ == "__main__":
+    session_id = get_session_id()
+    
+    #sole = get_sole()
+    #sole_link = remove_first(sole)[1][0]
+    
+    
+    links = [ "http://localhost:8080/file2", "http://localhost:8080/file1" ]
+    
+    print(add_package(links))
diff --git a/requirements.txt b/requirements.txt
new file mode 100644
index 0000000..ad62927
--- /dev/null
+++ b/requirements.txt
@@ -0,0 +1,7 @@
+certifi==2023.5.7
+charset-normalizer==3.2.0
+feedparser==6.0.10
+idna==3.4
+requests==2.31.0
+sgmllib3k==1.0.0
+urllib3==2.0.3
author	Marco Andronaco <andronacomarco@gmail.com>	2023-07-12 09:14:36 +0200
committer	Marco Andronaco <andronacomarco@gmail.com>	2023-07-12 09:14:36 +0200
commit	733a0a23988fb074c93e6c398d8b9142ee180b29 (patch)
tree	6a06a08378b0744474d45d8692ac1ce17bec7859
download	sunstroke-733a0a23988fb074c93e6c398d8b9142ee180b29.tar.gz