Data Science Assignment 1

FIN42110: Data Science for Trading and Risk Management
Homework 1
Harsh Desai(23205088)
February 2024
1 Yahoo Finance
import yfinance as yf
import mplfinance as mpf
from datetime import datetime, timedelta
# Define the stock symbol (Tesla)

stock_symbol = "TSLA"
# Download historical data for the last year

end_date = datetime.today().strftime('%Y-%m-%d')
start_date = (datetime.today() - timedelta(days=365)).strftime('%Y-%m-%d')
stock_data = yf.download(stock_symbol, start=start_date, end=end_date)
# Plot the candlestick chart

mpf.plot(stock_data, type='candle', title=f'Candlestick Chart for {stock_symbol} (Last Year)',
ylabel='Price (USD)', xlabel='Date', style='yahoo', mav=(20, 50))
# display the plot

mpf.show()
1
2 PDF Scraping
pip install tabula-py
import tabula as tb
import pandas as pd
from IPython.display import display
#import pdf file

file='GDP12.pdf'
#Convert file into dataframe using tabula

data = tb.read_pdf(file, pages = '1')
display(data)
#convert data to latex

for idx, df in enumerate(data):
latex_output_file = f"table_output_page_{idx+1}.tex"
df.to_latex(latex_output_file, index=False)
Table 1: Gross Domestic Product 2020.
Country Ranking Economy Millions of Dollars

USA 1 United States 20,936,600 Unnamed: 0
CHN 2 China 14,722,731 NaN
JPN 3 Japan 5,064,873 NaN
DEU 4 Germany 3,806,060 NaN
GBR 5 United Kingdom 2,707,744 NaN
IND 6 India 2,622,984 NaN
FRA 7 France 2,603,004 NaN
ITA 8 Italy 1,886,445 NaN
CAN 9 Canada 1,643,408 NaN
KOR 10 Korea, Rep. 1,630,525 NaN
RUS 11 Russian Federation 1,483,498 a
BRA 12 Brazil 1,444,733 NaN
AUS 13 Australia 1,330,901 NaN
ESP 14 Spain 1,281,199 NaN
MEX 15 Mexico 1,076,163 NaN
IDN 16 Indonesia 1,058,424 NaN
NLD 17 Netherlands 912,242 NaN
CHE 18 Switzerland 747,969 NaN
TUR 19 Turkey 720,101 NaN
SAU 20 Saudi Arabia 700,118 NaN
POL 21 Poland 594,165 NaN
SWE 22 Sweden 537,610 NaN
BEL 23 Belgium 515,332 NaN
THA 24 Thailand 501,795 NaN
NGA 25 Nigeria 432,294 NaN
AUT 26 Austria 428,965 NaN
ARE 27 United Arab Emirates 421,142 NaN
IRL 28 Ireland 418,622 NaN
ISR 29 Israel 401,954 NaN
ARG 30 Argentina 383,067 b
EGY 31 Egypt, Arab Rep. 363,069 NaN
NOR 32 Norway 362,009 NaN
PHL 33 Philippines 361,489 NaN
DNK 34 Denmark 355,184 NaN
HKG 35 Hong Kong SAR, China 346,586 NaN
2
SGP 36 Singapore 339,998 NaN
MYS 37 Malaysia 336,664 NaN
BGD 38 Bangladesh 324,239 NaN
ZAF 39 South Africa 301,924 NaN
COL 40 Colombia 271,347 NaN
FIN 41 Finland 271,234 NaN
VNM 42 Vietnam 271,158 NaN
PAK 43 Pakistan 263,687 NaN
CHL 44 Chile 252,940 NaN
ROU 45 Romania 248,716 NaN
CZE 46 Czech Republic 243,530 NaN
PRT 47 Portugal 231,256 NaN
NZL 48 New Zealand 212,482 NaN
PER 49 Peru 202,014 NaN
IRN 50 Iran, Islamic Rep. 191,718 NaN
GRC 51 Greece 189,410 NaN
KAZ 52 Kazakhstan 169,835 NaN
IRQ 53 Iraq 167,224 NaN
UKR 54 Ukraine 155,582 a
HUN 55 Hungary 155,013 NaN
QAT 56 Qatar 146,374 NaN
DZA 57 Algeria 145,164 NaN
KWT 58 Kuwait 136,197 NaN
MAR 59 Morocco 112,871 c
ETH 60 Ethiopia 107,645 NaN
SVK 61 Slovak Republic 104,574 NaN
PRI 62 Puerto Rico 103,138 NaN
CUB 63 Cuba 103,131 NaN
KEN 64 Kenya 98,843 NaN
ECU 65 Ecuador 98,808 NaN
3
3 Crypto Punk Web Scrapping
import requests
from bs4 import BeautifulSoup
from urllib.parse import urljoin
# URL of the page to scrape
url = 'https://cryptopunks.app/cryptopunks/details/9'
# Send a GET request to the page

response = requests.get(url)
# Parse the HTML content of the page

soup = BeautifulSoup(response.text, 'html.parser')
# Extract information
# Example: Find the section that contains the attributes
attributes_section = soup.find('div', {'col-md-10 col-md-offset-1': 'row'})
attributes = soup.find_all('li') # Assuming each attribute is in a list item
print('Attributes:')
# Extract current market status

market_status_section = soup.find('div', class_='col-md-10 col-md-offset-1')
if market_status_section:
market_status = market_status_section.text.strip()
print('\nCurrent Market Status:')
print(market_status)
else:
print("Market status not found.")
# Find the image element

image_element = soup.find('img', class_='img-responsive pixelated center-block')
# Extract the image URL

image_url = image_element['src']
# Send a GET request to download the image

base_url = 'https://cryptopunks.app'
absolute_image_url = urljoin(base_url, image_url)
# Send a GET request to download the image

image_response = requests.get(absolute_image_url)
# Check if the image request was successful

if image_response.status_code == 200:
# Save the image to a file
with open('crypto_punk_image.png', 'wb') as f:
f.write(image_response.content)
print("CryptoPunk image saved as 'crypto_punk_image.png'")
else:
print("Failed to fetch the CryptoPunk image. Status code:", image_response.status_code)
# Format the output into LaTeX-friendly format

latex_output = r"""
\documentclass{article}
\usepackage{enumitem}
\begin{document}
4
\section*{Attributes}
\begin{itemize}[leftmargin=*]"""
for attribute in attributes:

latex_output += f"\n \item {attribute}"
latex_output += r"""
\end{itemize}
\section*{Current Market Status}

"""
if market_status != "Market status not found.":

latex_output += market_status
else:
latex_output += "Market status not found."
latex_output += r"""
\end{document}
"""
# Write the LaTeX-formatted output to a file

with open('output.tex', 'w') as f:
f.write(latex_output)
Cryptopunk 9
Attributes: This Punk has 3 attributes, one of 4501 with that many.
1. Clown Nose- 212 Punks have this.
2. Police Cap- 203 Punks have this.
3. Big Beard- 146 Punks have this.

Data Science Assignment 1

Uploaded by

Document Information

Original Title

Copyright

Available Formats

Share this document

Share or Embed Document

Sharing Options

Did you find this document useful?

Is this content inappropriate?

Copyright:

Available Formats

Data Science Assignment 1

Uploaded by

Copyright:

Available Formats

FIN42110: Data Science for Trading and Risk Management

# Define the stock symbol (Tesla)

# Download historical data for the last year

# Plot the candlestick chart

# display the plot

pip install tabula-py

#import pdf file

#Convert file into dataframe using tabula

#convert data to latex

Table 1: Gross Domestic Product 2020.

Country Ranking Economy Millions of Dollars

# Send a GET request to the page

# Parse the HTML content of the page

# Extract current market status

# Find the image element

# Extract the image URL

# Send a GET request to download the image

# Send a GET request to download the image

# Check if the image request was successful

# Format the output into LaTeX-friendly format

for attribute in attributes:

\section*{Current Market Status}

if market_status != "Market status not found.":

# Write the LaTeX-formatted output to a file

1. Clown Nose- 212 Punks have this.

2. Police Cap- 203 Punks have this.

3. Big Beard- 146 Punks have this.

You might also like