From b8ef451357505d063690d7bef51903696f750382 Mon Sep 17 00:00:00 2001 From: b4d Date: Tue, 21 Sep 2021 15:48:44 +0200 Subject: [PATCH] Made stuff into functions and cleaned up the code --- parse.py | 60 +++++++++++++++++++++++++++----------------------------- 1 file changed, 29 insertions(+), 31 deletions(-) diff --git a/parse.py b/parse.py index ee00b22..525905c 100755 --- a/parse.py +++ b/parse.py @@ -4,6 +4,8 @@ import requests from bs4 import BeautifulSoup import pandas as pd +from multiprocessing import Process + # URLs to parse modra_URL = "https://www.modra.si/skladi-in-podskladi/" infond_URL = "https://www.infond.si/tecajnica-vzajemnih-skladov" @@ -15,20 +17,19 @@ headers = {'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_10_1) AppleW ############################################################################### # Parse Modra Zavarovalnica # +def parse_modra(): + a = requests.get(modra_URL, headers = headers) + df = pd.read_html(a.text, thousands=None)[0] -a = requests.get(modra_URL, headers = headers) + # Rename Columns + df.rename(columns = {'VEP ? Vrednost enote premoženja':'VEP', 'Sklad':'SKLAD'}, inplace = True) -df_list = pd.read_html(a.text, thousands=None) -df = df_list[0] + # Drop all columns except the ones we want + df = df.filter(['SKLAD', 'VEP']) -# Rename Columns -df.rename(columns = {'VEP ? Vrednost enote premoženja':'VEP', 'Sklad':'SKLAD'}, inplace = True) + # Drop all rows except the ones we want + return df[(df["SKLAD"]=="Dinamični podsklad") | (df["SKLAD"] =="Zajamčeni podsklad") ] -# Drop all columns except the ones we want -df = df.filter(['SKLAD', 'VEP']) - -# Drop all rows except the ones we want -subset = df[(df["SKLAD"]=="Dinamični podsklad") | (df["SKLAD"] =="Zajamčeni podsklad") ] @@ -36,33 +37,30 @@ subset = df[(df["SKLAD"]=="Dinamični podsklad") | (df["SKLAD"] =="Zajamčeni po # Parse Sava Infond Skladi # -a = requests.get(infond_URL, headers = headers) -df_list = pd.read_html(a.text, thousands=None) +def parse_infond(): + a = requests.get(infond_URL, headers = headers) + df = pd.read_html(a.text, thousands=None)[0] + # Drop all columns except the ones we want + df = df.filter(['SKLAD', 'VEP']) -# Drop all columns except the ones we want -df_list[0] = df_list[0].filter(['SKLAD', 'VEP']) + # Cleanup the "SKLAD" name + a = df.at[23,'SKLAD'] + df.at[23,'SKLAD'] = a.split()[0]+' '+a.split()[1] + df.at[23,'VEP'] = a.split()[2] -# Cleanup the "SKLAD" name -a = df_list[0].at[23,'SKLAD'] -df_list[0].at[23,'SKLAD'] = a.split()[0]+' '+a.split()[1] -df_list[0].at[23,'VEP'] = a.split()[2] - -a = df_list[0].at[15,'SKLAD'] -df_list[0].at[15,'SKLAD'] = a.split()[0]+' '+a.split()[1] -df_list[0].at[15,'VEP'] = a.split()[2] - - -# Drop all rows except the ones we want -df = df_list[0] -subset1 = df[(df["SKLAD"]=="Infond Defensive") | (df["SKLAD"] =="Infond Technology") ] + a = df.at[15,'SKLAD'] + df.at[15,'SKLAD'] = a.split()[0]+' '+a.split()[1] + df.at[15,'VEP'] = a.split()[2] + # Drop all rows except the ones we want + return df[(df["SKLAD"]=="Infond Defensive") | (df["SKLAD"] =="Infond Technology") ] ############################################################################### # Create new datatable and output it # - -output_table = pd.concat([subset, subset1], axis=0) -output_table = output_table.reset_index(drop=True) -print(output_table) +if __name__ == '__main__': + output_table = pd.concat([parse_modra(), parse_infond()], axis=0) + output_table = output_table.reset_index(drop=True) + print(output_table)