working version

author: Andronaco Marco <marco.andronaco@olivetti.com> 2023-07-12 13:02:46 +0200
committer: Andronaco Marco <marco.andronaco@olivetti.com> 2023-07-12 13:02:46 +0200
commit: 585855a8728f87cc5383329bd227f6d6ba840aff (patch)
tree: 634aa7868a614e45eeab6772a2a5ddf367e74a91
parent: 733a0a23988fb074c93e6c398d8b9142ee180b29 (diff)
download: sunstroke-585855a8728f87cc5383329bd227f6d6ba840aff.tar.gz
5 files changed, 81 insertions, 57 deletions
diff --git a/.env.example b/.env.example
new file mode 100644
index 0000000..2664957
--- /dev/null
+++ b/.env.example
@@ -0,0 +1,6 @@
+PYLOAD_HOST=http://localhost:8000/
+PYLOAD_USER=user
+PYLOAD_PW=password
+N_LINKS_TO_REMOVE=2
+NEWSPAPER_PREFIX=Il Sole 24 Ore
+RSS_URL=https://overpost.biz/e-books/quotidiani/rss.xml
diff --git a/.gitignore b/.gitignore
index 9110474..8120251 100644
--- a/.gitignore
+++ b/.gitignore
@@ -1,4 +1,5 @@
 __pycache__
 venv
+.env
 rss.xml
 session.txt
diff --git a/MyPyload.py b/MyPyload.py
new file mode 100644
index 0000000..2653758
--- /dev/null
+++ b/MyPyload.py
@@ -0,0 +1,36 @@
+# My edited version of https://github.com/thammi/pyload-utils/blob/master/pyloadutils/pyload.py
+import json
+from urllib.request import urlopen
+from urllib.parse import urljoin, urlencode
+from dotenv import load_dotenv
+from os import getenv
+load_dotenv()
+
+PYLOAD_HOST = getenv("PYLOAD_HOST") or "http://localhost:8000/"
+PYLOAD_USER = getenv("PYLOAD_USER") or "pyload"
+PYLOAD_PW = getenv("PYLOAD_PW") or "pyload"
+
+class Pyload:
+
+    def __init__(self):
+        self.url_base = urljoin(PYLOAD_HOST, 'api/')
+        self.session = self._call('login', {'username': PYLOAD_USER, 'password': PYLOAD_PW}, False)
+
+    def _call(self, name, args={}, encode=True):
+        url = urljoin(self.url_base, name)
+
+        if encode:
+            data = { k: json.dumps(v) for k, v in args.items() }
+        else:
+            data = args
+
+        if hasattr(self, 'session'):
+            data['session'] = self.session
+
+        post = urlencode(data).encode('utf-8')
+        return json.loads(urlopen(url, post).read().decode('utf-8'))
+
+    def __getattr__(self, name):
+        def wrapper(**kargs):
+            return self._call(name, kargs)
+        return wrapper
diff --git a/Sole.py b/Overpost.py
index 717091c..6404b37 100644
--- a/Sole.py
+++ b/Overpost.py
@@ -1,11 +1,14 @@
-import feedparser
 from html.parser import HTMLParser
 from datetime import datetime
 from re import compile
+import os
+import feedparser
+from dotenv import load_dotenv
+load_dotenv()
 
-N_LINKS_TO_REMOVE = 2
+RSS_URL = os.getenv("RSS_URL") or os.path.join(".", "rss.xml")
+N_LINKS_TO_REMOVE = os.getenv("N_LINKS_TO_REMOVE") or 2
 REGEX_DATE = compile("\(([\d\.]*)\)")
-OVERPOST_URL = "https://overpost.biz/e-books/quotidiani/rss.xml"
 
 def add_or_update(dictionary, key, value):
     try:
@@ -48,29 +51,30 @@ def parse_html(html):
     parser.feed(html)
     return parser.get_links()
 
-def remove_first(d):
+def dict_pop(d):
     return (k := next(iter(d)), d.pop(k))
 
-def remove_first_n(d, n):
-    for i in range(n):
-        remove_first(d)
+def dict_pop_first_n(d, n):
+    return [dict_pop(d) for _ in range(n)]
 
 def parse_entry(entry): # entry = day
     date = REGEX_DATE.findall(entry.title)[0]
     links = parse_html(entry.turbo_content)
     
-    remove_first_n(links, N_LINKS_TO_REMOVE)
+    dict_pop_first_n(links, int(N_LINKS_TO_REMOVE))
     return (datetime.strptime(date, "%d.%m.%Y"), links)
 
 def get_links(rss_url):
     feed = feedparser.parse(rss_url)
     return [ parse_entry(entry) for entry in feed.entries ]
 
-def get_sole():
-    links = get_links(OVERPOST_URL)
-    today = links[1]
-    return { k: v for k, v in today[1].items() if k.startswith("Il Sole 24 Ore")}
+def get_newspaper(prefix="", index=0):
+    links = get_links(RSS_URL)
+    try:
+        daily = links[index][1]
+    except IndexError:
+        return {}
+    return { k: v for k, v in daily.items() if k.startswith(prefix)}
 
-OVERPOST_URL = r"/home/marco/Documenti/overpost/rss.xml"
 if __name__ == "__main__":
-    print(get_sole())
+    print(get_newspaper("Il Sole"))
diff --git a/main.py b/main.py
index 53502c8..d7fb9c9 100644
--- a/main.py
+++ b/main.py
@@ -1,48 +1,25 @@
-import json
-import requests # https://github.com/pyload/pyload/wiki/module.Api.Api
-from Sole import get_sole, remove_first
+from Overpost import get_newspaper
+from MyPyload import Pyload
+from os import getenv
 
-SESSION_FILENAME = "session.txt"
-PYLOAD_PROTOCOL = "http"
-PYLOAD_HOST = "localhost"
-PYLOAD_PORT = 8000
-PYLOAD_USER = "pyload"
-PYLOAD_PW = "pyload"
-PYLOAD_API_ENDPOINT = "/api"
-PYLOAD_LOGIN_ENDPOINT = "/login"
-PYLOAD_ADDPACKAGE_ENDPOINT = "/generateAndAddPackages"
-PYLOAD_API_URL = f"{ PYLOAD_PROTOCOL }://{ PYLOAD_HOST }:{ PYLOAD_PORT }{ PYLOAD_API_ENDPOINT }"
+NEWSPAPER_PREFIX = getenv("NEWSPAPER_PREFIX") or ""
 
-LOGIN_DATA = { "username": PYLOAD_USER, "password": PYLOAD_PW }
-LOGIN_URL = PYLOAD_API_URL + PYLOAD_LOGIN_ENDPOINT
-ADDPACKAGE_URL = PYLOAD_API_URL + PYLOAD_ADDPACKAGE_ENDPOINT
+def scroll_dict(dictionary):
+    i = 0
+    for key, values in dictionary.items():
+        if i >= len(values):
+            i = 0
+        yield key, values[i]
+        i += 1
 
-def get_session_id():
-    try:
-        with open(SESSION_FILENAME, "r", encoding="utf-8") as in_file:
-            return in_file.readline()
-    except FileNotFoundError:
-        res = requests.post(LOGIN_URL, data=LOGIN_DATA)
-        cookies = res.cookies.get_dict()
-        session_id = cookies['pyload_session']
-        with open(SESSION_FILENAME, "w", encoding="utf-8") as out_file:
-            out_file.write(session_id)
-        return session_id
-    
-def add_package(links):
-    ADDPACKAGE_DATA = { "links": json.dumps(links), "session": session_id }
-    print(ADDPACKAGE_URL)
-    print(ADDPACKAGE_DATA)
-    kek = requests.post(ADDPACKAGE_URL, data=LOGIN_DATA).text
-    return kek
+def download_link(connection, name, link):
+    return connection.addPackage(name=name, links=[link])
+
+def main():
+    newspapers = get_newspaper(NEWSPAPER_PREFIX, 0) # 0 -> today
+    con = Pyload()
+    pids = [ download_link(con, NEWSPAPER_PREFIX, link) for _, link in scroll_dict(newspapers) ]
+    print(pids)
 
 if __name__ == "__main__":
-    session_id = get_session_id()
-    
-    #sole = get_sole()
-    #sole_link = remove_first(sole)[1][0]
-    
-    
-    links = [ "http://localhost:8080/file2", "http://localhost:8080/file1" ]
-    
-    print(add_package(links))
+    exit(main())
author	Andronaco Marco <marco.andronaco@olivetti.com>	2023-07-12 13:02:46 +0200
committer	Andronaco Marco <marco.andronaco@olivetti.com>	2023-07-12 13:02:46 +0200
commit	585855a8728f87cc5383329bd227f6d6ba840aff (patch)
tree	634aa7868a614e45eeab6772a2a5ddf367e74a91
parent	733a0a23988fb074c93e6c398d8b9142ee180b29 (diff)
download	sunstroke-585855a8728f87cc5383329bd227f6d6ba840aff.tar.gz