From f76b804f0ca83edeeac1931ea170c2525a08abd6 Mon Sep 17 00:00:00 2001 From: b4d Date: Tue, 21 Sep 2021 14:43:16 +0200 Subject: [PATCH] First commit --- parse.py | 68 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 68 insertions(+) create mode 100755 parse.py diff --git a/parse.py b/parse.py new file mode 100755 index 0000000..ee00b22 --- /dev/null +++ b/parse.py @@ -0,0 +1,68 @@ +#!/usr/bin/env python3 + +import requests +from bs4 import BeautifulSoup +import pandas as pd + +# URLs to parse +modra_URL = "https://www.modra.si/skladi-in-podskladi/" +infond_URL = "https://www.infond.si/tecajnica-vzajemnih-skladov" + +# Fake headers, otherwise Modra won't work +headers = {'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_10_1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/39.0.2171.95 Safari/537.36'} + + +############################################################################### +# Parse Modra Zavarovalnica +# + +a = requests.get(modra_URL, headers = headers) + +df_list = pd.read_html(a.text, thousands=None) +df = df_list[0] + +# Rename Columns +df.rename(columns = {'VEP ? Vrednost enote premoženja':'VEP', 'Sklad':'SKLAD'}, inplace = True) + +# Drop all columns except the ones we want +df = df.filter(['SKLAD', 'VEP']) + +# Drop all rows except the ones we want +subset = df[(df["SKLAD"]=="Dinamični podsklad") | (df["SKLAD"] =="Zajamčeni podsklad") ] + + + +############################################################################### +# Parse Sava Infond Skladi +# + +a = requests.get(infond_URL, headers = headers) +df_list = pd.read_html(a.text, thousands=None) + + +# Drop all columns except the ones we want +df_list[0] = df_list[0].filter(['SKLAD', 'VEP']) + +# Cleanup the "SKLAD" name +a = df_list[0].at[23,'SKLAD'] +df_list[0].at[23,'SKLAD'] = a.split()[0]+' '+a.split()[1] +df_list[0].at[23,'VEP'] = a.split()[2] + +a = df_list[0].at[15,'SKLAD'] +df_list[0].at[15,'SKLAD'] = a.split()[0]+' '+a.split()[1] +df_list[0].at[15,'VEP'] = a.split()[2] + + +# Drop all rows except the ones we want +df = df_list[0] +subset1 = df[(df["SKLAD"]=="Infond Defensive") | (df["SKLAD"] =="Infond Technology") ] + + + +############################################################################### +# Create new datatable and output it +# + +output_table = pd.concat([subset, subset1], axis=0) +output_table = output_table.reset_index(drop=True) +print(output_table)