当前位置: 亚洲城ca88 > ca88 > 正文

python爬取TmallOPPO手提式无线电话机

时间:2019-05-18 00:54来源:ca88
import re from selenium import webdriver from selenium.common.exceptions import TimeoutException from selenium.webdriver.common.by import By from selenium.webdriver.support.ui import WebDriverWait from selenium.webdriver.support import expe
import re  

from selenium import webdriver  
from selenium.common.exceptions import TimeoutException  
from  selenium.webdriver.common.by import By  
from selenium.webdriver.support.ui import WebDriverWait  
from selenium.webdriver.support import expected_conditions as EC  
from  pyquery import PyQuery as pq  
from config import *  
import  pymongo  


client = pymongo.MongoClient(MONGO_URL)  
db = client[MONGO_DB]  

# browser = webdriver.PhantomJS(service_args=SERVER_ARGS)  
browser = webdriver.Chrome()  
wait = WebDriverWait(browser,10)  

# browser.set_window_size(1400,900)  

def search():  
    print('SEARCHING...')  
    try:  
        browser.get('https://www.taobao.com')  
        input = wait.until(  
            EC.presence_of_element_located((By.CSS_SELECTOR,'#q'))  
        )  

        submit = wait.until(EC.element_to_be_clickable((By.CSS_SELECTOR,'#J_TSearchForm > div.search-button > button')))  
        input.send_keys(KEYWORD)  
        submit.click()  
        total = wait.until(EC.presence_of_element_located((By.CSS_SELECTOR,'#mainsrp-pager > div > div > div > div.total')))  
        get_products()  
        return total.text  
    except TimeoutException:  
        return search()  


def next_page(page_number):  
    print('PAGE TURNING...', page_number)  
    try:  
        input = wait.until(  
            EC.presence_of_element_located((By.CSS_SELECTOR, '#mainsrp-pager > div > div > div > div.form > input'))  
        )  
        submit = wait.until(  
            EC.presence_of_element_located((By.CSS_SELECTOR, '#mainsrp-pager > div > div > div > div.form > span.btn.J_Submit'))  
        )  
        input.clear()  
        input.send_keys(page_number)  
        submit.click()  
        wait.until(EC.text_to_be_present_in_element((By.CSS_SELECTOR, '#mainsrp-pager > div > div > div > ul > li.item.active > span'),str(page_number)))  
        get_products()  
    except TimeoutException:  
        next_page(page_number)  

def get_products():  
    wait.until(EC.presence_of_element_located((By.CSS_SELECTOR, '#mainsrp-itemlist .items .item')))  
    html = browser.page_source  
    doc = pq(html)  
    items = doc('#mainsrp-itemlist .items .item').items()  
    for item in items:  
        products = {  
            'image': item.find('.pic .img').attr('src'),  
            'price': item.find('.price').text(),  
            'deal': item.find('.deal-cnt').text()[:-3],  
            'title': item.find('.title').text(),  
            'shop': item.find('.shop').text(),  
            'location': item.find('.location').text()  
        }  
        print(products)  
        save_to_mongo(products)  

def save_to_mongo(result):  
    try:  
        if db[MONGO_TABLE].insert(result):  
            print('SUCCESSD!', result)  
    except Exception:  
        print('FAILD!', result)  

def main():  
    try:  
        total = search()  
        total = int(re.compile('(d )').search(total).group(1))  
        # print(total)  
        for i in range (2, total   1):  
            next_page(i)  
    except Exception:  
        print('error...')  
    finally:  
        browser.close()  

if __name__ == '__main__':  
    main()  

ca88, 

编辑:ca88 本文来源:python爬取TmallOPPO手提式无线电话机

关键词: 亚洲城ca88