Print Google Search results using Selenium in Python,below is the sample class which we can use to execute the code, you just need to change the path to webdriver as per your computer drive path. It was made for PhantomJS ,You can download it HERE., if you want to use Chrome just use
"self.driver = webdriver.Chrome(path)".
"self.driver = webdriver.Chrome(path)".
Below is code example:
from urllib.parse import quote_plus from selenium import webdriver class Browser: def __init__(self, path, initiate=True, implicit_wait_time = 10, explicit_wait_time = 2): self.path = path if initiate: self.start() return def start(self): self.driver = webdriver.Chrome(path) self.driver.implicitly_wait(self.implicit_wait_time) return def end(self): self.driver.quit() return def go_to_url(self, url, wait_time = None): if wait_time is None: wait_time = self.explicit_wait_time self.driver.get(url) print('[*] Fetching results from: {}'.format(url)) time.sleep(wait_time) return def get_search_url(self, query, page_num=0, per_page=10, lang='en'): query = quote_plus(query) url = 'https://www.google.hr/search?q={}&num={}&start={}&nl={}'.format(query, per_page, page_num*per_page, lang) return url def scrape(self): #xpath migth change in future links = self.driver.find_elements_by_xpath("//h3[@class='r']/a[@href]") # searches for all links insede h3 tags with class "r" results = [] for link in links: d = {'url': link.get_attribute('href'), 'title': link.text} results.append(d) return results def search(self, query, page_num=0, per_page=10, lang='en', wait_time = None): if wait_time is None: wait_time = self.explicit_wait_time url = self.get_search_url(query, page_num, per_page, lang) self.go_to_url(url, wait_time) results = self.scrape() return results path = '<YOUR PATH TO PHANTOMJS>/phantomjs-2.1.1-windows/bin/phantomjs.exe'## SET YOU PATH TO phantomjs br = Browser(path) results = br.search('site:facebook.com inurl:login') for r in results: print(r) br.end()
No comments:
Post a Comment