First commit
This commit is contained in:
		
							
								
								
									
										68
									
								
								parse.py
									
									
									
									
									
										Executable file
									
								
							
							
						
						
									
										68
									
								
								parse.py
									
									
									
									
									
										Executable file
									
								
							@ -0,0 +1,68 @@
 | 
			
		||||
#!/usr/bin/env python3
 | 
			
		||||
 | 
			
		||||
import requests
 | 
			
		||||
from bs4 import BeautifulSoup
 | 
			
		||||
import pandas as pd
 | 
			
		||||
 | 
			
		||||
# URLs to parse
 | 
			
		||||
modra_URL = "https://www.modra.si/skladi-in-podskladi/"
 | 
			
		||||
infond_URL = "https://www.infond.si/tecajnica-vzajemnih-skladov"
 | 
			
		||||
 | 
			
		||||
# Fake headers, otherwise Modra won't work
 | 
			
		||||
headers = {'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_10_1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/39.0.2171.95 Safari/537.36'}
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
###############################################################################
 | 
			
		||||
# Parse Modra Zavarovalnica
 | 
			
		||||
#
 | 
			
		||||
 | 
			
		||||
a = requests.get(modra_URL, headers = headers)
 | 
			
		||||
 | 
			
		||||
df_list = pd.read_html(a.text, thousands=None)
 | 
			
		||||
df = df_list[0]
 | 
			
		||||
 | 
			
		||||
# Rename Columns
 | 
			
		||||
df.rename(columns = {'VEP ? Vrednost enote premoženja':'VEP', 'Sklad':'SKLAD'}, inplace = True)
 | 
			
		||||
 | 
			
		||||
# Drop all columns except the ones we want
 | 
			
		||||
df = df.filter(['SKLAD', 'VEP'])
 | 
			
		||||
 | 
			
		||||
# Drop all rows except the ones we want
 | 
			
		||||
subset = df[(df["SKLAD"]=="Dinamični podsklad") | (df["SKLAD"] =="Zajamčeni podsklad") ]
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
###############################################################################
 | 
			
		||||
# Parse Sava Infond Skladi
 | 
			
		||||
#
 | 
			
		||||
 | 
			
		||||
a = requests.get(infond_URL, headers = headers)
 | 
			
		||||
df_list = pd.read_html(a.text, thousands=None)
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
# Drop all columns except the ones we want
 | 
			
		||||
df_list[0] = df_list[0].filter(['SKLAD', 'VEP'])
 | 
			
		||||
 | 
			
		||||
# Cleanup the "SKLAD" name
 | 
			
		||||
a = df_list[0].at[23,'SKLAD']
 | 
			
		||||
df_list[0].at[23,'SKLAD'] = a.split()[0]+' '+a.split()[1]
 | 
			
		||||
df_list[0].at[23,'VEP'] = a.split()[2]
 | 
			
		||||
 | 
			
		||||
a = df_list[0].at[15,'SKLAD']
 | 
			
		||||
df_list[0].at[15,'SKLAD'] = a.split()[0]+' '+a.split()[1]
 | 
			
		||||
df_list[0].at[15,'VEP'] = a.split()[2]
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
# Drop all rows except the ones we want
 | 
			
		||||
df = df_list[0]
 | 
			
		||||
subset1 = df[(df["SKLAD"]=="Infond Defensive") | (df["SKLAD"] =="Infond Technology") ]
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
###############################################################################
 | 
			
		||||
# Create new datatable and output it
 | 
			
		||||
#
 | 
			
		||||
 | 
			
		||||
output_table = pd.concat([subset, subset1], axis=0)
 | 
			
		||||
output_table = output_table.reset_index(drop=True)
 | 
			
		||||
print(output_table)
 | 
			
		||||
		Reference in New Issue
	
	Block a user