You are on page 1of 4

Ex1: Cuisine's URLs (part2)

import time
from selenium import webdriver
from selenium.common.exceptions import NoSuchElementException, \
WebDriverException

# Create a new Chrome/Firefox browser object


driver = webdriver.Chrome()

driver.implicitly_wait(30)
driver.get("https://food.ndtv.com/recipes/by-cuisine")

def get_link_by_text(text):
"""Find link in the page with given text"""
element = driver.find_element_by_link_text(text.strip())
return element.get_attribute("href")

def get_list_by_class_name(class_name="main_image "):


"""Get list of text in all element by class_name"""
element_list = []
try:
all_elements = driver.find_elements_by_class_name(class_name)
element_list = [x.text for x in all_elements if len(x.text) > 0]
except (NoSuchElementException, WebDriverException) as e:
print(e)
return element_list

AMERICAN

#<h2 itemprop="name" class="recipe-image-header">AMERICAN</h2>


category_links = {x: get_link_by_text(x)
for x in get_list_by_class_name('recipe-image-header')}

# danh mục chính trong trang gốc được chọn


category_links

{'AMERICAN': 'https://food.ndtv.com/recipes/american-recipes',
'CHINESE': 'https://food.ndtv.com/recipes/chinese-recipes',
'CONTINENTAL': 'https://food.ndtv.com/recipes/continental-recipes',
'CUBAN': 'https://food.ndtv.com/recipes/cuban-recipes',
'FRENCH': 'https://food.ndtv.com/recipes/french-recipes',
'GREEK': 'https://food.ndtv.com/recipes/greek-recipes',
'INDIAN': 'https://food.ndtv.com/recipes/indian-recipes',
'INDONESIAN': 'https://food.ndtv.com/recipes/indonesian-recipes',
'ITALIAN': 'https://food.ndtv.com/recipes/italian-recipes',
'JAPANESE': 'https://food.ndtv.com/recipes/japanese-recipes',
'KOREAN': 'https://food.ndtv.com/recipes/korean-recipes',
'LEBANESE': 'https://food.ndtv.com/recipes/lebanese-recipes',
'MALAYSIAN': 'https://food.ndtv.com/recipes/malaysian-recipes',
'MEXICAN': 'https://food.ndtv.com/recipes/mexican-recipes',
'PAKISTANI': 'https://food.ndtv.com/recipes/pakistani-recipes',
'RUSSIAN': 'https://food.ndtv.com/recipes/russian-recipes',
'SINGAPORE': 'https://food.ndtv.com/recipes/singapore-recipes',
'SPANISH': 'https://food.ndtv.com/recipes/spanish-recipes',
'THAI': 'https://food.ndtv.com/recipes/thai-recipes',
'TIBETAN': 'https://food.ndtv.com/recipes/tibetan-recipes',
'VIETNAMESE': 'https://food.ndtv.com/recipes/vietnamese-recipes'}

# Chú ý khi đọc hết tất cả các mục, sẽ có mục rất dài và có nút “Show more”
# =>> cần viết function cho nút này:
# Ví dụ: https://food.ndtv.com/recipes/chinese-recipes

def keep_clicking_show_more():
"""Loop till show_more doesn't have anything to load."""
while True:
try:
x = driver.find_element_by_link_text('Show More')
if(not x):
break
x.click()
except (NoSuchElementException, WebDriverException) as e:
break

# danh mục trong từng trang tương ứng với danh sách danh mục chính trên
recipe_links = {}
for category, url in category_links.items():
driver.get(url) # open url in chrome
keep_clicking_show_more()
recipe_links[category] = {x: get_link_by_text(x)
for x in get_list_by_class_name('recipe-image-header')}

recipe_links
'Chilli Baby Corn': 'https://food.ndtv.com/recipe-chilli-baby-corn-955764',
'Mushroom Manchurian': 'https://food.ndtv.com/recipe-mushroom-manchurian-955687',
'Momo Manchurian': 'https://food.ndtv.com/recipe-momo-manchurian-955395',
'Chinese Idli': 'https://food.ndtv.com/recipe-chinese-idli-955342',
'Egg Manchurian': 'https://food.ndtv.com/recipe-egg-manchurian-955221',
'Burnt Garlic Mushroom Fried Rice': 'https://food.ndtv.com/recipe-burnt-garlic-mush
'Sheng Jian Bao': 'https://food.ndtv.com/recipe-sheng-jian-bao-955054',
'Stuffed Eggplant With Schezwan Sauce': 'https://food.ndtv.com/recipe-stuffed-eggpl
'Almond And Chicken Momos (without Shell)': 'https://food.ndtv.com/recipe-almond-an
'Peri Peri Chicken Satay': 'https://food.ndtv.com/recipe-peri-peri-chicken-satay-95
'Veg Hakka Noodles': 'https://food ndtv com/recipe-veg-hakka-noodles-952069'
Veg Hakka Noodles : https://food.ndtv.com/recipe veg hakka noodles 952069 ,
'Veg Fried Rice': 'https://food.ndtv.com/recipe-veg-fried-rice-951841',
'Honey Chilli Potato': 'https://food.ndtv.com/recipe-honey-chilli-potato-951813',
'Garlic Soya Chicken': 'https://food.ndtv.com/recipe-garlic-soya-chicken-878771',
'Mapo Tofu With Spring Onion And Black Beans': 'https://food.ndtv.com/recipe-mapo-to
'Hed Phad Medmamuang Or Stir Fried Mushroom With Cashewnuts': 'https://food.ndtv.com
'Five Spice Powder': 'https://food.ndtv.com/recipe-five-spice-powder-562201',
'Vegetable Manchow Soup': 'https://food.ndtv.com/recipe-vegetable-manchow-soup-chin
'Quick Noodles': 'https://food.ndtv.com/recipe-quick-noodles-with-whatever-500951',
'Cantonese Chicken Soup': 'https://food.ndtv.com/recipe-cantonese-chicken-soup-4889
'Stir Fried Tofu With Rice': 'https://food.ndtv.com/recipe-stir-fried-tofu-with-ric
'Chicken Manchurian': 'https://food.ndtv.com/recipe-chicken-manchurian-chinese-1-46
'Chilli Fish': 'https://food.ndtv.com/recipe-chilli-fish-410882',
'Honey Chilli Potatoes': 'https://food.ndtv.com/recipe-honey-chilli-potatoes-331733
'Garlic And Egg Fried Rice': 'https://food.ndtv.com/recipe-garlic-and-egg-fried-ric
'Sweet And Sour Chicken': 'https://food.ndtv.com/recipe-sweet-and-sour-chicken-2412
'Hot And Sour Soup': 'https://food.ndtv.com/recipe-hot-and-sour-soup-237241',
'Chilli Soya Nuggets': 'https://food.ndtv.com/recipe-chilli-soya-nuggets-chinese-219
'Asian BBQ Chicken': 'https://food.ndtv.com/recipe-asian-bbq-chicken-106687',
'Date Pancakes': 'https://food.ndtv.com/recipe-date-pancakes-100535',
'Mushroom Fried Rice': 'https://food.ndtv.com/recipe-mushroom-fried-rice-100474',
'Chilli Gobi': 'https://food.ndtv.com/recipe-chilli-gobhi-100367',
'Vegetable Chowmein': 'https://food.ndtv.com/recipe-vegetable-chowmein-99157',
'Crunchy Vegetable Stir-Fry': 'https://food.ndtv.com/recipe-crunchy-vegetable-stir-f
'Okra With Baby Corn': 'https://food.ndtv.com/recipe-okra-with-baby-corn-99155',
'Capsicum Stir Fry': 'https://food.ndtv.com/recipe-capsicum-stir-fry-99149',
'Tofu With Vegetables In Black Bean Sauce': 'https://food.ndtv.com/recipe-tofu-with
'Kapa Maki': 'https://food.ndtv.com/recipe-kapa-maki-99135',
'Vegetable Salt And Pepper': 'https://food.ndtv.com/recipe-vegetables-salt-and-pepp
'Vegetable Fried Rice': 'https://food.ndtv.com/recipe-vegetable-fried-rice-99110',
'Soya Dumplings In Tomato Sauce': 'https://food.ndtv.com/recipe-soya-dumplings-in-to
'Vegetable Chopsuey': 'https://food.ndtv.com/recipe-vegetable-chopsuey-99072',
'Baby Corn Soup': 'https://food.ndtv.com/recipe-baby-corn-soup-99063',
'Chicken Cantonese Soup': 'https://food.ndtv.com/recipe-chicken-cantonese-soup-9906
'Chilli Crab In Black Bean Sauce': 'https://food.ndtv.com/recipe-chilli-crab-in-bla
'Garlic Prawns': 'https://food.ndtv.com/recipe-garlic-prawns-98996',
'Chinese Salad': 'https://food.ndtv.com/recipe-chinese-salad-98986',
'Noodles With Mixed Meat': 'https://food.ndtv.com/recipe-noodles-with-mixed-meat-989
'Oriental Sauce': 'https://food.ndtv.com/recipe-oriental-sauce-98836',
'Chicken Fried Rice': 'https://food.ndtv.com/recipe-chicken-fried-rice-98820',
'Chicken Lollipops': 'https://food.ndtv.com/recipe-chicken-lollipops-98817',
'Chicken Schezwan Rice': 'https://food.ndtv.com/recipe-chicken-schezwan-rice-98813'
'Boneless Chilli Chicken': 'https://food.ndtv.com/recipe-boneless-chilli-chicken-98
'Szechwan Style Chicken': 'https://food.ndtv.com/recipe-szechwan-style-chicken-9880
'Stewed Chicken And Eggplant': 'https://food.ndtv.com/recipe-stewed-chicken-and-egg
'Vegetable Manchurian': 'https://food.ndtv.com/recipe-vegetable-manchurian-98735'},
'CONTINENTAL': {'Crispy Calamari Rings': 'https://food.ndtv.com/recipe-crispy-calama
'Quick Salted Caramel Pie': 'https://food ndtv com/recipe quick salted caramel pie 9

import json

# Lưu trữ vào file


with open('recipe_links.json', 'w') as json_file:
json.dump(recipe_links, json_file, indent=4, sort_keys=True)

You might also like