Скрипт Instagram photo parser

Статус
В этой теме нельзя размещать новые ответы.
  • 68
  • 14
Python:
class InstaParser:
    def __init__(self, username, password, debug_mode=True, safe_mode=True):
        self.username = username
        self.password = password
        self.driver = None
        self.account_list = []
        self.posts = 0
        self.debug = debug_mode
        self.photo_list = []
        self.safe_mode = safe_mode
        self.__get_accounts_list()

    def run(self):
        self.__download_driver()
        self.__login()
        self.__parse_photos()
        self.__download_photos()
        self.__close_browser()

    def __close_browser(self):
        self.driver.quit()

    def __download_driver(self):
        try:
            self.driver = Chrome("chromedriver.exe")
        except:
            url = "https://chromedriver.storage.googleapis.com/87.0.4280.88/chromedriver_win32.zip"
            r = requests.get(url)
            if r.status_code != 200:
                if self.debug:
                    print("Can't downdload driver. Please download it manually: "
                          "https://chromedriver.storage.googleapis.com/87.0.4280.88/chromedriver_win32.zip")
                    time.sleep(10)
                return

            with open('driver.zip', 'wb+') as f:
                f.write(r.content)

            time.sleep(5)

            fzip = zipfile.ZipFile("driver.zip")
            fzip.extractall("")
            fzip.close()

            os.remove("driver.zip")

            time.sleep(5)
            self.driver = Chrome("chromedriver.exe")

    def __login(self):
        self.driver.get("https://www.instagram.com/")
        time.sleep(3)
        username = self.driver.find_element_by_name('username')
        username.clear()
        username.send_keys(self.username)

        password = self.driver.find_element_by_name('password')
        password.clear()
        password.send_keys(self.password)

        button = self.driver.find_element_by_xpath("//button[@type='submit']")
        button.click()

        time.sleep(1)

    def __get_accounts_list(self):
        try:
            with open("list.txt") as f:
                self.account_list = f.readlines()
        except:
            if self.debug:
                print("Can't find accounts list.")
                time.sleep(10)

    def __backup(self):
        if not self.safe_mode:
            return

        with open("safemode_backup.txt", "w") as f:
            for item in self.photo_list:
                f.write(item + "\n")
        print("Backup created.")

    def __calculate_loop(self, min=1, max=100):
        if self.posts > max:
            self.posts = max

        if self.posts < min:
            self.posts = min
        """instagram can show only 12post with out scrolling"""
        return int(self.posts / 4)

    def __parse_photos(self):
        for account in self.account_list:
            try:
                self.driver.get("https://www.instagram.com/" + account)

                if self.debug:
                    print("Started parse photos from:", account)

                time.sleep(1)

                self.posts = int(self.driver.find_element_by_xpath(
                    "/html/body/div[1]/section/main/div/header/section/ul/li[1]/span/span").text)

                if self.posts < 10:
                    if self.debug:
                        print("Skipped due post.")
                    continue

                for scroll in range(self.__calculate_loop()):
                    self.driver.execute_script("window.scrollTo(0, document.body.scrollHeight);")

                time.sleep(1)

                elements = self.driver.find_elements_by_xpath("//a[@href]")
                for element in elements:
                    link = element.get_attribute("href")
                    if "/p/" in link:
                        self.photo_list.append(link)
                        if self.debug:
                            print(link)

            except Exception as e:
                self.__backup()
                print(e)

            self.__backup()

            time.sleep(1)

    def __download_photos(self):
        index = 0
        photo_path = ""
        profile = ""
        if self.debug:
            print("Starting download photos.")

        if self.safe_mode:
            with open("safemode_backup.txt") as f:
                self.account_list = f.readlines()
            print("Photos links uploaded from backup.")

        for link in self.photo_list:
            try:
                index += 1
                self.driver.get(link)

                try:
                    img_src = self.driver.find_element_by_xpath(
                        '/html/body/div[1]/section/main/div/div/article/div[2]/div'
                        '/div/div/img').get_attribute("src")
                except:
                    try:
                        img_src = self.driver.find_element_by_class_name('FFVAD').get_attribute("src")
                    except:
                        img_src = self.driver.find_element_by_tag_name('//div/img').get_attribute("src")

                try:
                    img_likes = self.driver.find_element_by_xpath(
                        '/html/body/div[1]/section/main/div/div/article/div[3]/section['
                        '2]/div/div/button/span').text
                except:
                    img_likes = "1"

                img_time = self.driver.find_element_by_tag_name('time').get_attribute("datetime")[:-14]
                profile = self.driver.find_element_by_tag_name('a').get_attribute("href").replace("/", "")[26:]
                if self.debug:
                    print('img:', img_src)
                    print('likes:', img_likes)
                    print('time:', img_time)
                    print('profile:', profile)

                if not os.path.isdir("photos"):
                    os.mkdir("photos")

                if not os.path.isdir("photos\\" + profile):
                    os.mkdir("photos\\" + profile)

                photo_path = "photos" + "\\" + profile + "\\" + str(index) + "_" + img_time + "_" + img_likes + ".jpg"

                r = requests.get(img_src)

                fp = open(photo_path, "wb+")
                fp.write(r.content)
                fp.close()
                time.sleep(1)
            except Exception as e:
                if self.debug:
                    print("Cant download image", e)
 
Статус
В этой теме нельзя размещать новые ответы.
Сверху Снизу