From b8ef451357505d063690d7bef51903696f750382 Mon Sep 17 00:00:00 2001
From: b4d <b4d@sablun.org>
Date: Tue, 21 Sep 2021 15:48:44 +0200
Subject: [PATCH] Made stuff into functions and cleaned up the code

---
 parse.py | 60 +++++++++++++++++++++++++++-----------------------------
 1 file changed, 29 insertions(+), 31 deletions(-)

diff --git a/parse.py b/parse.py
index ee00b22..525905c 100755
--- a/parse.py
+++ b/parse.py
@@ -4,6 +4,8 @@ import requests
 from bs4 import BeautifulSoup
 import pandas as pd
 
+from multiprocessing import Process
+
 # URLs to parse
 modra_URL = "https://www.modra.si/skladi-in-podskladi/"
 infond_URL = "https://www.infond.si/tecajnica-vzajemnih-skladov"
@@ -15,20 +17,19 @@ headers = {'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_10_1) AppleW
 ###############################################################################
 # Parse Modra Zavarovalnica
 #
+def parse_modra():
+    a = requests.get(modra_URL, headers = headers)
+    df = pd.read_html(a.text, thousands=None)[0]
 
-a = requests.get(modra_URL, headers = headers)
+    # Rename Columns
+    df.rename(columns = {'VEP ? Vrednost enote premoženja':'VEP', 'Sklad':'SKLAD'}, inplace = True)
 
-df_list = pd.read_html(a.text, thousands=None)
-df = df_list[0]
+    # Drop all columns except the ones we want
+    df = df.filter(['SKLAD', 'VEP'])
 
-# Rename Columns
-df.rename(columns = {'VEP ? Vrednost enote premoženja':'VEP', 'Sklad':'SKLAD'}, inplace = True)
+    # Drop all rows except the ones we want
+    return df[(df["SKLAD"]=="Dinamični podsklad") | (df["SKLAD"] =="Zajamčeni podsklad") ]
 
-# Drop all columns except the ones we want
-df = df.filter(['SKLAD', 'VEP'])
-
-# Drop all rows except the ones we want
-subset = df[(df["SKLAD"]=="Dinamični podsklad") | (df["SKLAD"] =="Zajamčeni podsklad") ]
 
 
 
@@ -36,33 +37,30 @@ subset = df[(df["SKLAD"]=="Dinamični podsklad") | (df["SKLAD"] =="Zajamčeni po
 # Parse Sava Infond Skladi
 #
 
-a = requests.get(infond_URL, headers = headers)
-df_list = pd.read_html(a.text, thousands=None)
+def parse_infond():
+    a = requests.get(infond_URL, headers = headers)
+    df = pd.read_html(a.text, thousands=None)[0]
 
+    # Drop all columns except the ones we want
+    df = df.filter(['SKLAD', 'VEP'])
 
-# Drop all columns except the ones we want
-df_list[0] = df_list[0].filter(['SKLAD', 'VEP'])
+    # Cleanup the "SKLAD" name
+    a = df.at[23,'SKLAD']
+    df.at[23,'SKLAD'] = a.split()[0]+' '+a.split()[1]
+    df.at[23,'VEP'] = a.split()[2]
 
-# Cleanup the "SKLAD" name
-a = df_list[0].at[23,'SKLAD']
-df_list[0].at[23,'SKLAD'] = a.split()[0]+' '+a.split()[1]
-df_list[0].at[23,'VEP'] = a.split()[2]
-
-a = df_list[0].at[15,'SKLAD']
-df_list[0].at[15,'SKLAD'] = a.split()[0]+' '+a.split()[1]
-df_list[0].at[15,'VEP'] = a.split()[2]
-
-
-# Drop all rows except the ones we want
-df = df_list[0]
-subset1 = df[(df["SKLAD"]=="Infond Defensive") | (df["SKLAD"] =="Infond Technology") ]
+    a = df.at[15,'SKLAD']
+    df.at[15,'SKLAD'] = a.split()[0]+' '+a.split()[1]
+    df.at[15,'VEP'] = a.split()[2]
 
+    # Drop all rows except the ones we want
+    return df[(df["SKLAD"]=="Infond Defensive") | (df["SKLAD"] =="Infond Technology") ]
 
 
 ###############################################################################
 # Create new datatable and output it
 #
-
-output_table = pd.concat([subset, subset1], axis=0)
-output_table = output_table.reset_index(drop=True)
-print(output_table)
+if __name__ == '__main__':
+    output_table = pd.concat([parse_modra(), parse_infond()], axis=0)
+    output_table = output_table.reset_index(drop=True)
+    print(output_table)