First commit

2021-09-21 14:43:16 +02:00
commit f76b804f0c
1 changed files with 68 additions and 0 deletions
--- a/parse.py
+++ b/parse.py
@ -0,0 +1,68 @@
+#!/usr/bin/env python3
+
+import requests
+from bs4 import BeautifulSoup
+import pandas as pd
+
+# URLs to parse
+modra_URL = "https://www.modra.si/skladi-in-podskladi/"
+infond_URL = "https://www.infond.si/tecajnica-vzajemnih-skladov"
+
+# Fake headers, otherwise Modra won't work
+headers = {'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_10_1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/39.0.2171.95 Safari/537.36'}
+
+
+###############################################################################
+# Parse Modra Zavarovalnica
+#
+
+a = requests.get(modra_URL, headers = headers)
+
+df_list = pd.read_html(a.text, thousands=None)
+df = df_list[0]
+
+# Rename Columns
+df.rename(columns = {'VEP ? Vrednost enote premoženja':'VEP', 'Sklad':'SKLAD'}, inplace = True)
+
+# Drop all columns except the ones we want
+df = df.filter(['SKLAD', 'VEP'])
+
+# Drop all rows except the ones we want
+subset = df[(df["SKLAD"]=="Dinamični podsklad") | (df["SKLAD"] =="Zajamčeni podsklad") ]
+
+
+
+###############################################################################
+# Parse Sava Infond Skladi
+#
+
+a = requests.get(infond_URL, headers = headers)
+df_list = pd.read_html(a.text, thousands=None)
+
+
+# Drop all columns except the ones we want
+df_list[0] = df_list[0].filter(['SKLAD', 'VEP'])
+
+# Cleanup the "SKLAD" name
+a = df_list[0].at[23,'SKLAD']
+df_list[0].at[23,'SKLAD'] = a.split()[0]+' '+a.split()[1]
+df_list[0].at[23,'VEP'] = a.split()[2]
+
+a = df_list[0].at[15,'SKLAD']
+df_list[0].at[15,'SKLAD'] = a.split()[0]+' '+a.split()[1]
+df_list[0].at[15,'VEP'] = a.split()[2]
+
+
+# Drop all rows except the ones we want
+df = df_list[0]
+subset1 = df[(df["SKLAD"]=="Infond Defensive") | (df["SKLAD"] =="Infond Technology") ]
+
+
+
+###############################################################################
+# Create new datatable and output it
+#
+
+output_table = pd.concat([subset, subset1], axis=0)
+output_table = output_table.reset_index(drop=True)
+print(output_table)