본문으로 바로가기

flask에 sns 모듈 올리기

category OSINT 2023. 6. 26. 01:04

flask 구조(일부 생략)

├config.py
├main.py
├───app
│   │   chromedriver.exe
│   └───__pycache__
├───assets
├───capture_log
├───crawling_log
├───module
│   │   facebook_module.py
│   │   insta_module.py
│   │   sns_module.py
│   │   twitter_module.py
├───Nmap
│   ├───licenses
│   ├───nselib
│   │   └───data
│   │       ├───jdwp-class
│   │       └───psexec
│   └───scripts
├───static
│   ├───css
│   ├───images
│   └───js
│       │   scripts.js
├───templates
│   │   facebook_result.html
│   │   index.html
│   │   insta_result.html
│   │   sns_result.html
│   │   twitter_result.html
│   └───assets
│       └───img
└───__pycache__

 

여기서 SNS 기능을 위한 파일들은 다음과 같다.

  • config.py
  • main.py
  • /app/chromedriver.exe
  • /module/facebook_module.py
  • /module/insta_module.py
  • /module/sns_module.py
  • /module/twitter_module.py
  • /templates/facebook_result.html
  • /templates/index.html
  • /templates/insta_result.html
  • /templates/sns_result.html
  • /templates/twitter_result.html
  • /static/js/scripts.js

 

config.py

sns id,pw를 입력하는 파일

Instagram_ID = ''
Instagram_PW = ''

Facebook_ID = ''
Facebook_PW = ''

 

 

main.py

모듈을 import하는 모습

python main.py로 서버를 연다.

from flask import Flask, render_template 
from module.my_calc_module import my_calc_module
from module.sns_module import sns_module
from module.insta_module import insta_module
from module.facebook_module import facebook_module
from module.twitter_module import twitter_module
from module.search_module import search_module
from module.domain_module import domain_module
from module.network_module import network_module
# import config as config


app = Flask(__name__)
app.register_blueprint(my_calc_module)
app.register_blueprint(sns_module)
app.register_blueprint(insta_module)
app.register_blueprint(facebook_module)
app.register_blueprint(twitter_module)
app.register_blueprint(search_module)
app.register_blueprint(domain_module)
app.register_blueprint(network_module)

# app.config.from_object('config')

@app.route("/")
def index():
    
    return render_template("index.html")

@app.route("/hello")
def hello_flask():
    return render_template('loading.html')



if __name__ == "__main__":              
    app.run(host="0.0.0.0", port="8085" ,debug=True)

 

/app/chromedriver.exe

selenium.webdriver를 사용하기 위한 파일

 

/module/sns_module.py

sns 정보를 가져오는 모듈 파일

더보기
from flask import Blueprint, render_template, request
import requests
from bs4 import BeautifulSoup
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.common.keys import Keys
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.common.exceptions import NoSuchElementException
import time, re
from config import Instagram_ID ,Instagram_PW , Facebook_ID, Facebook_PW

sns_module = Blueprint("sns_module", __name__)

@sns_module.route("/sns_result", methods=["POST"])
def sns_result():
    class SNSProfileScraper:
        def __init__(self, username , driver_path):
            self.driver_path = driver_path
            self.username = username

        def login_facebook(self, driver, target_url, login_name, login_pw):
            fb_url = 'https://mobile.facebook.com/'

            driver.implicitly_wait(10)
            driver.get(fb_url)
            time.sleep(3)

            username_input = driver.find_element(By.CSS_SELECTOR, "input[name='email']")
            password_input = driver.find_element(By.CSS_SELECTOR, "input[name='pass']")

            username_input.send_keys(login_name)
            password_input.send_keys(login_pw)

            login_button = driver.find_element(By.XPATH , "//button[@type='button']")
            login_button.click()

            print('페이스북 로그인')
            time.sleep(3)
            driver.get(target_url)
            time.sleep(3)
            print('페이스북 진입성공')
        
        def scrape_facebook_profile(self , login_name, login_pw):
            try:
                url = 'https://mobile.facebook.com/' + self.username
                options = webdriver.ChromeOptions()
                options.add_argument('headless')
                options.add_argument('--disable-extensions')
                options.add_argument('--disable-gpu')
                options.add_argument('--no-sandbox')
                options.add_argument('--lang=ko_KR.UTF-8')
                driver = webdriver.Chrome(executable_path=self.driver_path, options=options)
                driver.get(url)
                time.sleep(5)
                self.login_facebook(driver, url, login_name, login_pw)
                time.sleep(5)
                try:
                    name = driver.find_element(By.CSS_SELECTOR,'#cover-name-root > h3')
                    name = name.text
                except:
                    name = 'None'

                try:
                    element_present = EC.presence_of_element_located((By.CSS_SELECTOR, '#bio  > div'))
                    WebDriverWait(driver, 10).until(element_present)
                    about = driver.find_element(By.CSS_SELECTOR,'#bio  > div')
                    about = about.text
                    # about = driver.find_element(By.CSS_SELECTOR,'#bio  > div')
                    # about = about.text.encode('utf-8').decode('utf-8')
                except:
                    about = 'aboutNone'

                try:
                    img_text = re.findall(r'u_0_u_[a-zA-Z0-9_\-]{2}', str(driver.page_source))
                    img_text = img_text[0]
                except:
                    img_text = None

                try:
                    profile_img = driver.find_element(By.CSS_SELECTOR , f'#{img_text} > a > div > i')
                    profile_img = profile_img.get_attribute('style')
                    pattern = r"url\(['\"]?([^'\")]+)['\"]?\)"
                    match = re.search(pattern, profile_img)
                    profile_img = match.group(1)
                        
                    # profile_img = profile_img.split('background:')[1].split(';')[0].strip()
                    # profile_img = profile_img.replace('url(\'', '').replace('\')', '')
                    

                except:
                    profile_img = img_text

                try:
                    about_data = self.get_facebook_about(driver, self.username, login_name, login_pw)
                except:
                    about_data = {'contact' : 'a',  'birth': 'b', 'career': 'b'}

                try:
                    profile_data = {
                        'sns' : 'facebook',
                        'name': name,
                        'about': about,
                        'profile_img': profile_img,
                        'contact': about_data['contact'],
                        'birth': about_data['birth'],
                        'career':about_data['career'],
                    }
                    return profile_data
                except:
                    return None
            except:
                return None
            
        def get_facebook_about(self, driver, username, login_name, login_pw):
            try:
                url = 'https://mobile.facebook.com/' + username + '/about'
                options = webdriver.ChromeOptions()
                options.add_argument('headless')
                options.add_argument('--disable-extensions')
                options.add_argument('--disable-gpu')
                options.add_argument('--no-sandbox')
                options.add_argument('--lang=ko_KR.UTF-8')
                driver = webdriver.Chrome(executable_path=self.driver_path, options=options)
                driver.get(url)
                time.sleep(5)
                self.login_facebook(driver, url, login_name, login_pw)
                time.sleep(5)
                
                try:
                    element_present = EC.presence_of_element_located((By.CSS_SELECTOR, '#contact-info'))
                    WebDriverWait(driver, 10).until(element_present)
                    contact = driver.find_element(By.CSS_SELECTOR,'#contact-info')
                    contact = contact.text
                except:
                    contact = '1'
                    
                try:
                    element_present = EC.presence_of_element_located((By.CSS_SELECTOR, '#basic-info > div > div:nth-of-type(1) > div > div._5cdv.r'))
                    WebDriverWait(driver, 10).until(element_present)
                    birth = driver.find_element(By.CSS_SELECTOR,'#basic-info > div > div:nth-of-type(1) > div > div._5cdv.r')
                    birth = birth.text
                except:
                    birth = '2'
                    
                try:
                    element_present = EC.presence_of_element_located((By.CSS_SELECTOR, '#work > div > div > div > div'))
                    WebDriverWait(driver, 10).until(element_present)
                    career = driver.find_element(By.CSS_SELECTOR,'#work > div > div > div > div')
                    career = career.text
                except:
                    career = '3'

                    
                try:
                    about_data = {
                        'contact': contact,
                        'birth': birth,
                        'career': career,
                    }

                    #크롤링 파일 저장 코드

                    # filename = url[url.find('//')+3:]
                    # filename = filename.replace('/','_')
                    # f = open(filename+'.html','w', encoding='utf-8')
                    # f.write(str(driver.page_source))
                    # f.close()
                    #print(driver.page_source)
                    return about_data
                except:
                    return None
            except:
                return None

        def scrape_twitter_profile(self):
            try:
                url = 'https://twitter.com/' + self.username
                options = webdriver.ChromeOptions()
                options.add_argument('headless')
                options.add_argument('--disable-extensions')
                options.add_argument('--disable-gpu')
                options.add_argument('--no-sandbox')
                driver = webdriver.Chrome(executable_path=self.driver_path, options=options)
                driver.get(url)
                time.sleep(5)
                try:                
                    name = driver.find_element(By.CSS_SELECTOR , '#react-root > div > div > div.css-1dbjc4n.r-18u37iz.r-13qz1uu.r-417010 > main > div > div > div > div > div > div.css-1dbjc4n.r-aqfbo4.r-gtdqiz.r-1gn8etr.r-1g40b8q > div:nth-child(1) > div > div > div > div > div > div.css-1dbjc4n.r-16y2uox.r-1wbh5a2.r-1pi2tsx.r-1777fci > div > h2 > div > div > div > div > span.css-901oao.css-16my406.r-1awozwy.r-18jsvk2.r-6koalj.r-poiln3.r-b88u0q.r-bcqeeo.r-1udh08x.r-3s2u2q.r-qvutc0 > span > span:nth-child(1)')
                    name = name.text
                except:
                    name = None

                try:
                    screen_name = driver.find_element(By.CSS_SELECTOR , '#react-root > div > div > div.css-1dbjc4n.r-18u37iz.r-13qz1uu.r-417010 > main > div > div > div > div > div > div:nth-child(3) > div > div > div > div.css-1dbjc4n.r-1ifxtd0.r-ymttw5.r-ttdzmv > div.css-1dbjc4n.r-6gpygo.r-14gqq1x > div.css-1dbjc4n.r-1wbh5a2.r-dnmrzs.r-1ny4l3l > div > div.css-1dbjc4n.r-1awozwy.r-18u37iz.r-1wbh5a2 > div > div > div > span')
                    screen_name = screen_name.text
                except:
                    screen_name = 'None'

                try:
                    bio = driver.find_element(By.CSS_SELECTOR , '#react-root > div > div > div.css-1dbjc4n.r-18u37iz.r-13qz1uu.r-417010 > main > div > div > div > div > div > div:nth-child(3) > div > div > div > div.css-1dbjc4n.r-1ifxtd0.r-ymttw5.r-ttdzmv > div:nth-child(3) > div > div > span')
                    bio = bio.text
                except:
                    bio = None

                try:
                    location = driver.find_element(By.CSS_SELECTOR , '#react-root > div > div > div.css-1dbjc4n.r-18u37iz.r-13qz1uu.r-417010 > main > div > div > div > div > div > div:nth-child(3) > div > div > div > div.css-1dbjc4n.r-1ifxtd0.r-ymttw5.r-ttdzmv > div:nth-child(4) > div > span:nth-child(1) > span > span')
                    location = location.text
                except:
                    location = None

                try:
                    profile_img = driver.find_element(By.CSS_SELECTOR , '#react-root > div > div > div.css-1dbjc4n.r-18u37iz.r-13qz1uu.r-417010 > main > div > div > div > div > div > div:nth-child(3) > div > div > div > div.css-1dbjc4n.r-1ifxtd0.r-ymttw5.r-ttdzmv > div.css-1dbjc4n.r-1habvwh.r-18u37iz.r-1w6e6rj.r-1wtj0ep > div.css-1dbjc4n.r-1adg3ll.r-16l9doz.r-6gpygo.r-2o1y69.r-1v6e3re.r-bztko3.r-1xce0ei > div.r-1p0dtai.r-1pi2tsx.r-1d2f490.r-u8s1d.r-ipm5af.r-13qz1uu > div > div.r-1p0dtai.r-1pi2tsx.r-1d2f490.r-u8s1d.r-ipm5af.r-13qz1uu > div > a > div.css-1dbjc4n.r-14lw9ot.r-sdzlij.r-1wyvozj.r-1udh08x.r-633pao.r-u8s1d.r-1v2oles.r-desppf > div > div.r-1p0dtai.r-1pi2tsx.r-1d2f490.r-u8s1d.r-ipm5af.r-13qz1uu > div > img')
                    profile_img = profile_img.get_attribute('src')
                except:
                    url = None

                try:
                    joined_date = driver.find_element(By.CSS_SELECTOR , '#react-root > div > div > div.css-1dbjc4n.r-18u37iz.r-13qz1uu.r-417010 > main > div > div > div > div > div > div:nth-child(3) > div > div > div > div.css-1dbjc4n.r-1ifxtd0.r-ymttw5.r-ttdzmv > div:nth-child(4) > div > span.css-901oao.css-16my406.r-14j79pv.r-4qtqp9.r-poiln3.r-1b7u577.r-bcqeeo.r-qvutc0 > span')
                    joined_date = joined_date.text
                except:
                    joined_date = None

                try:
                    profile_data = {
                        'sns' : 'twitter',
                        'name': name,
                        'screen_name': screen_name,
                        'bio': bio,
                        'location': location,
                        'profile_img': profile_img,
                        'joined_date': joined_date
                    }
                    return profile_data
                except:
                    return None
            except:
                return None

        def login_instargram(self, driver, target_url, login_name, login_pw):
            insta_url = 'https://www.instagram.com'
            driver.implicitly_wait(10)
            driver.get(insta_url)
            time.sleep(3)

            username_input = driver.find_element(By.CSS_SELECTOR, "input[name='username']")
            password_input = driver.find_element(By.CSS_SELECTOR, "input[name='password']")

            username_input.send_keys(login_name)
            password_input.send_keys(login_pw)

            login_button = driver.find_element(By.XPATH , "//button[@type='submit']")
            login_button.click()

            print('인스타그램 로그인')
            time.sleep(3)
            driver.get(target_url)
            time.sleep(3)
            print('인스타그램 진입성공')



        def scrape_instagram_profile(self, login_name, login_pw):
            try:
                url = 'https://www.instagram.com/' + self.username
                options = webdriver.ChromeOptions()
                options.add_argument('headless')
                options.add_argument('--disable-extensions')
                options.add_argument('--disable-gpu')
                options.add_argument('--no-sandbox')
                options.add_argument('--lang=ko_KR.UTF-8')
                driver = webdriver.Chrome(executable_path=self.driver_path, options=options)
                driver.get(url)
                time.sleep(5)
                self.login_instargram(driver, url, login_name, login_pw)
                time.sleep(5)


                # filename = url[url.find('//')+2:]
                # filename = filename.replace('/','_')
                # f = open(filename+'.html','w', encoding='utf-8')
                # f.write(str(driver.page_source))
                # f.close()

                try:
                    bio_text = re.findall(r'mount_0_0_[a-zA-Z0-9_\-]{2}', str(driver.page_source))
                    bio_text = bio_text[0]
                except:
                    bio_text = None




                try:
                    name = driver.find_element(By.TAG_NAME, 'title').get_attribute('textContent')
                    name = name.split('•')[0]
                except:
                    name = 'name'


                try:
                    bio = WebDriverWait(driver, 10).until(EC.presence_of_element_located((By.CSS_SELECTOR, f"#{bio_text} > div > div > div.x9f619.x1n2onr6.x1ja2u2z > div > div > div > div.x78zum5.xdt5ytf.x10cihs4.x1t2pt76.x1n2onr6.x1ja2u2z > div.x9f619.xnz67gz.x78zum5.x168nmei.x13lgxp2.x5pf9jr.xo71vjh.x1uhb9sk.x1plvlek.xryxfnj.x1c4vz4f.x2lah0s.x1q0g3np.xqjyukv.x1qjc9v5.x1oa3qoh.x1qughib > div.xh8yej3.x1gryazu.x10o80wk.x14k21rp.x1porb0y.x17snn68.x6osk4m > div:nth-child(2) > section > main > div > header > section > div._aa_c")))
                    
                    bio = bio.text
                except:
                    bio = bio_text

                try:
                    post = driver.find_element(By.CSS_SELECTOR, 'meta[name="description"]')
                    post = post.get_attribute('content').split('-')[0]
                    # post = post.text
                except:
                    post = None

                try:
                    profile_img = driver.find_element(By.CSS_SELECTOR, 'meta[property="og:image"]')
                    profile_img = profile_img.get_attribute('content')
                except:
                    profile_img = 'profile_img'


                try:
                    profile_data = {
                        'sns' : 'instgram',
                        'name': name,
                        'bio': bio,
                        'post': post,
                        'profile_img': profile_img,
                    }

                    

                    return profile_data
                except Exception as e:
                    return e
            except:
                return '123'

        # def scrape_instagram_profile(self):
        #     url = f"https://www.instagram.com/{self.username}/"
        #     response = requests.get(url)

        #     if response.ok:
        #         html = response.text
        #         soup = BeautifulSoup(html, 'html.parser')
        #         profile_data = {}
        #         profile_data['sns'] = 'instagram'
        #         profile_data['name'] = soup.select_one('head > title').get_text()
        #         profile_data['description'] = soup.find('meta', {'name': 'description'})['content']

        #         return profile_data
        #     else:
        #         return None



    

    
    
    driver_path = 'app/chromedriver.exe'

    find_name = request.cookies.get("NAME")
    # insta_id = request.cookies.get("insta_id")
    # insta_pw = request.cookies.get("insta_pw")
    # face_id = request.cookies.get("face_id")
    # face_pw = request.cookies.get("face_pw")



    

    # find_name = request.form["NAME"]
    scraper = SNSProfileScraper(find_name , driver_path)
    twitter_profile = scraper.scrape_twitter_profile()
    facebook_profile = scraper.scrape_facebook_profile(Facebook_ID,Facebook_PW)
    instagram_profile = scraper.scrape_instagram_profile(Instagram_ID,Instagram_PW)

    result ={}

    result['twitter'] = twitter_profile
    result['facebook'] = facebook_profile
    result['instagram'] = instagram_profile

    return render_template("sns_result.html", result=result)

 

/templates/index.html

option 토클을 통해 검색하고자 하는 프로필을 입력해 쿠키에 저장해준다.

 

이후 다음과 같이 모듈을 실행할수 있다.

 

코드

더보기
<!DOCTYPE html>
<html lang="en">
  <head>
    <meta charset="utf-8" />
    <meta
      name="viewport"
      content="width=device-width, initial-scale=1, shrink-to-fit=no"
    />
    <meta name="description" content="" />
    <meta name="author" content="" />
    <title>Joongsint - Open-Source Intelligence Solution</title>
    <!-- Favicon-->
    <link rel="icon" type="image/x-icon" href="static/images/favicon.ico" />
    <!-- Bootstrap icons-->
    <link
      href="https://cdn.jsdelivr.net/npm/bootstrap-icons@1.4.1/font/bootstrap-icons.css"
      rel="stylesheet"
    />
    <!-- Core theme CSS (includes Bootstrap)-->
    <link href="{{ url_for('static', filename='css/styles.css') }}" rel="stylesheet" />
  </head>
  <body>
    <!-- Responsive navbar-->
    <nav class="navbar navbar-expand-lg navbar-dark bg-dark">
      <div class="container px-lg-5">
        <a class="navbar-brand" href="#!">JoongSint</a>
        <button
          class="navbar-toggler"
          type="button"
          data-bs-toggle="collapse"
          data-bs-target="#navbarSupportedContent"
          aria-controls="navbarSupportedContent"
          aria-expanded="false"
          aria-label="Toggle navigation"
        >
          <span class="navbar-toggler-icon"></span>
        </button>
        <div class="collapse navbar-collapse" id="navbarSupportedContent">
          <ul class="navbar-nav ms-auto mb-2 mb-lg-0">
            <li class="nav-item">
              <a class="nav-link active" aria-current="page" href="#!">Home</a>
            </li>
            <li class="nav-item"><a class="nav-link" href="#!">About</a></li>
            <li class="nav-item"><a class="nav-link" href="#!">Contact</a></li>
          </ul>
        </div>
      </div>
    </nav>
    <!-- Header-->
    <header class="py-5">
      <div class="container px-lg-5">
        <div class="p-4 p-lg-5 bg-light rounded-3 text-center">
          <div class="m-4 m-lg-5">
            <h1 class="display-5 fw-bold">welcome Joongsint!</h1>
            <p class="fs-4">
              Gather all the information you need in one place with our OSINT
              webpage that offers comprehensive insights using data collected
              from network, SNS, Domain, search engines, CVE, and GPT.
              <p>당신이 원하는 모든 정보를 한 곳에서 모아보세요. 네트워크, SNS,
                Domain, 검색 엔진, CVE 및 GPT에서 수집한 데이터로 더 나은 인사이트를
                제공하는 OSINT 웹 페이지입니다.</p>
            </p>
            <div class="intro-btn gap-5 justify-content-sm-center">
                <a class="btn btn-primary btn-lg" data-bs-toggle="modal" href="#Manual">Manual</a>
                <a class="btn btn-primary btn-lg" data-bs-toggle="modal"  href="#Option">Option</a>
            </div>
          </div>
        </div>
      </div>
    </header>
    <!-- Page Content-->
    <section class="pt-4">
      <div class="container px-lg-5">
        <!-- Page Features-->
        <div class="row gx-lg-5">
          <div class="col-lg-6 col-xxl-4 mb-5">
            <div class="card bg-light border-0 h-100">
              <div class="card-body text-center p-4 p-lg-5 pt-0 pt-lg-0">
                <div
                  class="feature bg-primary bg-gradient text-white rounded-3 mb-4 mt-n4"
                >
                  <i class="bi bi-collection"></i>
                </div>
                <h2 class="fs-4 fw-bold">Search SNS</h2>
                <p class="mb-0">
                    Ability to collect and display data from Instagram, Facebook, and Twitter!
                </p>
                <form class="d-grid mt-4" form action="/sns_result" method="POST">
                  <button class="btn btn-outline-primary" type="submit">search</button>
                </form>
                <dvi class="d-flex align-items-center justify-content-between text-xs">
                  <form class="d-grid mt-4" form action="/insta_result" method="POST">
                    <button class="btn btn-outline-primary" type="submit">instagram</button>
                  </form>
                  <form class="d-grid mt-4" form action="/facebook_result" method="POST">
                    <button class="btn btn-outline-primary" type="submit">facebook</button>
                  </form>
                  <form class="d-grid mt-4" form action="/twitter_result" method="POST">
                    <button class="btn btn-outline-primary" type="submit">twitter</button>
                  </form>
                </dvi>
              </div>
            </div>
          </div>
          <div class="col-lg-6 col-xxl-4 mb-5">
            <div class="card bg-light border-0 h-100">
              <div class="card-body text-center p-4 p-lg-5 pt-0 pt-lg-0">
                <div
                  class="feature bg-primary bg-gradient text-white rounded-3 mb-4 mt-n4"
                >
                  <i class="bi bi-cloud-download"></i>
                </div>
                <h2 class="fs-4 fw-bold">Search Domain</h2>
                <p class="mb-0">
                    Enter Domain to search for keywords and open phone numbers and emails for that Domain and sub-Domain!
                </p>
                <form class="d-grid mt-4" form action="/domain_result" method="POST">
                  <button class="btn btn-outline-primary" type="submit">search</button>
                </form>
              </div>
            </div>
          </div>
          <div class="col-lg-6 col-xxl-4 mb-5">
            <div class="card bg-light border-0 h-100">
              <div class="card-body text-center p-4 p-lg-5 pt-0 pt-lg-0">
                <div
                  class="feature bg-primary bg-gradient text-white rounded-3 mb-4 mt-n4"
                >
                  <i class="bi bi-card-heading"></i>
                </div>
                <h2 class="fs-4 fw-bold">Search Engines</h2>
                <p class="mb-0">
                    Collect open information through Google and Naver's search engines and data through Chat GPT!
                </p>
                <form class="d-grid mt-4" form action="/search_result" method="POST">
                  <button class="btn btn-outline-primary" type="submit">search</button>
                </form>
              </div>
            </div>
          </div>
          <div class="col-lg-6 col-xxl-4 mb-5">
            <div class="card bg-light border-0 h-100">
              <div class="card-body text-center p-4 p-lg-5 pt-0 pt-lg-0">
                <div
                  class="feature bg-primary bg-gradient text-white rounded-3 mb-4 mt-n4"
                >
                  <i class="bi bi-bootstrap"></i>
                </div>
                <h2 class="fs-4 fw-bold">Search Network</h2>
                <p class="mb-0">
                    You can collect network information using IP, WhoIs, nmap, server vs. version, netblock, traceroute, and more!
                </p>
                <form class="d-grid mt-4" form action="/network_result" method="POST">
                  <button class="btn btn-outline-primary" type="submit">search</button>
                </form>
              </div>
            </div>
          </div>
          

        </div>

      </div>
    </section>
    <div class="container px-lg-5">
    <div class="mb-5">
        <div class=" bg-light border-0 h-100 shadow-l">
          <div class="text-center p-4 p-lg-5 pt-0 pt-lg-0">
            <div
              class="feature bg-primary bg-gradient text-white rounded-3 mb-4 mt-n4"
            >
              <i class="bi bi-code"></i>
            </div>
            <h2 class="fs-4 fw-bold">Total Check</h2>
            <p class="mb-0">
                Get data from the network, Domain, SNS, and search engines of the companies and users you want to search for using all of the features at once!
            </p>
            <div class="d-grid mt-4">
                <a class="btn btn-outline-primary" href="#!">search</a>
            </div>
          </div>
        </div>
      </div>
    </div>
    <!-- Footer-->
    <footer class="py-5 bg-dark">
      <div class="container">
        <p class="m-0 text-center text-white">
          Copyright &copy; Your Website 2023
        </p>
      </div>
    </footer>



    <!-- Option modal popup-->
    <div
      class="portfolio-modal modal fade"
      id="Option"
      tabindex="-1"
      role="dialog"
      aria-hidden="true"
    >
      <div class="modal-dialog">
        <div class="modal-content">
          <div class="close-modal" data-bs-dismiss="modal">
            <img src="/static/images/close-icon.svg" alt="Close modal" />
          </div>
          <div class="container">
            <div class="row justify-content-center">
              <div class="col-lg-8">
                <div class="modal-body">
                  <!-- Project details-->
                  <h2 class="text-uppercase">Setting Value</h2>
                  <p class="item-intro text-muted">
                    데이터 수집에 필요한 값들을 입력해주세요
                  </p>
                  <div class="col-md-auto">
                    검색할 SNS 프로필 ID
                    <div class="d-flex form-label justify-content-between">
                      
                      <!-- Name input-->
                      <input
                        class="form-control"
                        id="NAME"
                        type="text"
                        placeholder="Your Name *"
                        
                      />
                      <button
                        class="btn btn-primary btn-xl text-uppercase mx-sm-2"
                        type="button"
                        onclick="setCookies('NAME');"
                      >
                        <i class="fas fa-xmark me-1"></i>
                        SAVE
                      </button>

                    </div>
                    <!-- add -->
                    <!-- Instagram id , pw
                    <div class="d-flex form-label">
                      <div class="col-sm-5">
                          <input type="text" class="form-control form-control-user" id="insta_id" placeholder="Instagram_id">
                      </div>
                      <div class="col-sm-5 mx-sm-3">
                          <input type="text" class="form-control form-control-user " id="insta_pw" placeholder="Instagram_pw">
                      </div>

                      <button
                      class="btn btn-primary btn-xl text-uppercase mx-sm-2"
                      type="button"
                      onclick="setCookies('insta_id'); setCookies('insta_pw');"
                      >
                      <i class="fas fa-xmark me-1"></i>
                      SAVE
                      </button>
                    </div> -->

                    

                      <!-- add -->
                    <!-- facebook id , pw
                    <div class="d-flex form-label">
                      <div class="col-sm-5">
                          <input type="text" class="form-control form-control-user" id="face_id" placeholder="facebook_id">
                      </div>
                      <div class="col-sm-5 mx-sm-3">
                          <input type="text" class="form-control form-control-user" id="face_pw" placeholder="facebook_pw">
                      </div>

                      <button
                      class="btn btn-primary btn-xl text-uppercase mx-sm-2"
                      type="button"
                      onclick="setCookies('face_id'); setCookies('face_pw');"
                      >
                      <i class="fas fa-xmark me-1"></i>
                      SAVE
                      </button>
                    </div> -->

                    검색할 Domain을 적어주세요

                    <div class="d-flex form-label justify-content-between"> 
                      <!-- Email address input-->
                      <input
                        class="form-control"
                        id="Domain"

                        placeholder="ex) http://joongsint.64bit.kr"

                      />

                      <button
                        class="btn btn-primary btn-xl text-uppercase mx-sm-2"
                        type="button"
                        onclick="setCookies('Domain');"
                      >
                        <i class="fas fa-xmark me-1"></i>
                        SAVE
                      </button>
                    </div>
                    <!-- search engine input-->

                    검색엔진에 적용할 단어를 적어주세요
                    <div class="d-flex form-label justify-content-between">
                      
                      <input
                        class="form-control"
                        id="search"
                        placeholder="ex) joongsint"
                        
                      />
                      <button
                        class="btn btn-primary btn-xl text-uppercase mx-sm-2"
                        type="button"
                        onclick="setCookies('search');"
                      >
                        <i class="fas fa-xmark me-1"></i>
                        SAVE
                      </button>

                    </div>
                  </div>
                  
                  
                  <br>
                  <br>

                  <h2 class="text-uppercase mt-5">Save Value</h2>
                  <p class="item-intro text-muted">
                    저장한 값을 확인 하세요.
                  </p>
                  


                  <div class="card mt-5">
                    <div class="card-body">
                      <div class="d-flex align-items-center justify-content-between text-xs">
                        <div class="fw-500">SNS Profile ID</div>
                        <div id="cookie-value"> </div>
                      </div>
                      
                      <!---->
                      <!-- <div class="d-flex align-items-center justify-content-between text-xs">
                        <div class="fw-500">Instagram Login info</div>
                        <div>
                          <div class="d-flex">
                            instagram id : 
                            <div id="insta_info_id"> </div>
                          </div>
                          <div class="d-flex">
                            instagram pw : 
                            <div id="insta_info_pw"> </div>
                          </div>
                        </div>
                        
                      </div> -->
                      <hr  >
                      <!-- <div class="d-flex align-items-center justify-content-between text-xs">
                        <div class="fw-500">facebook Login info</div>
                        <div>
                          <div class="d-flex">
                            facebook id : 
                            <div id="face_info_id"> </div>
                          </div>
                          <div class="d-flex">
                            facebook pw : 
                            <div id="face_info_pw"> </div>
                          </div>
                        </div>
                      </div> -->
                      
                      <div class="d-flex align-items-center justify-content-between text-xs">
                        <div class="fw-500">Domain</div>
                        <div id="Domain-value"> </div>
                      </div>
                      <hr  >
                      <div class="d-flex align-items-center justify-content-between text-xs">
                        <div class="fw-500">Search Engine Query</div>
                        <div id="search-value"> </div>
                      </div>
                      
                      <!---->
                      <!---->
                    </div>
                  </div>
                </div>
              </div>
            </div>
          </div>
        </div>
      </div>
    </div>


    <!-- Manual modal popup-->
    <div
      class="portfolio-modal modal fade"
      id="Manual"
      tabindex="-1"
      role="dialog"
      aria-hidden="true"
    >
      <div class="modal-dialog">
        <div class="modal-content">
          <div class="close-modal" data-bs-dismiss="modal">
            <img src="/static/images/close-icon.svg" alt="Close modal" />
          </div>
          <div class="container">
            <div class="row justify-content-center">
              <div class="col-lg-8">
                <div class="modal-body">
                  <!-- Project details-->
                  <h1 class="text-uppercase">Joongsint Module Manual</h1>

                  <div>
                    <h4>저희 Joongsint를 사용하시려면 설치해야 할 모듈들이 존재합니다.</h4>
                    <h4>우선, Joonhsint에서 제공하는 모듈 파일들을 설치해 주세요.</h4>
                    <h4>그런 다음 원활한 사용을 위해 추가적으로 설치 명령어와 설치 파일을 사용하여 모듈을 설치해 주세요.</h4>
                    <p></p>※ python3를 권장하며, python2를 사용한다면 pip3가 아닌 pip를 통해 설치합니다.</p><br>
                    
                    <h5>공통 설치 명령어</h5>
                    <pre><code>pip3 install flask requests beautifulsoup4</code></pre>
                    
                    <h5>Search SNS & Search Domain 사용을 위해…</h5>
                    <pre><code>pip3 install selenium</code></pre>
                    
                    <h5>Search Engines 사용을 위해…</h5>
                    <pre><code>pip3 install google-api-python-client</code></pre>
                    <pre><code>pip3 install openai</code></pre>
                    
                    <h5>Search Network & CVE 사용을 위해…</h5>
                    <pre><code>Nmap 설치</code></pre>
                    <pre><code>pip3 install dnspython</code></pre>
                    <pre><code>pip3 install python-whois</code></pre>

                    <p>매뉴얼에 따른 모든 모듈에 대한 설치가 완료되었다면</p>
                    
                    <p>터미널을 통해 <code>py main.py</code> 또는 <code>python main.py</code> 또는 <code>flask run</code>을 통해 실행하고 서비스를 이용하세요.</p>
                    
                    <p>원활한 서비스 이용이 불가능하다면 관리자 이메일을 통해 문의 바랍니다.</p>
                    
                    <p>관리자 이메일: wpgur0510@gmail.com</p>
                  </div>

                </div>
              </div>
            </div>
          </div>
        </div>
      </div>
    </div>



    <!-- Bootstrap core JS-->
    <script src="https://cdn.jsdelivr.net/npm/bootstrap@5.2.3/dist/js/bootstrap.bundle.min.js"></script>
    <!-- Core theme JS-->
    <script src="static/js/scripts.js"></script>

    <script>
      const NameValue = getCookie('NAME');
      // const instaidValue = getCookie('insta_id');
      // const instapwValue = getCookie('insta_pw');
      // const faceidValue = getCookie('face_id');
      // const facepwValue = getCookie('face_pw');
      const DomainValue = getCookie('Domain');
      const seaerchValue = getCookie('search');
      document.getElementById('cookie-value').textContent=NameValue || 'No cookie found.';
      // document.getElementById('insta_info_id').textContent=instaidValue || 'No cookie found.';
      // document.getElementById('insta_info_pw').textContent=instapwValue || 'No cookie found.';
      // document.getElementById('face_info_id').textContent=faceidValue || 'No cookie found.';
      // document.getElementById('face_info_pw').textContent=facepwValue || 'No cookie found.';
      document.getElementById('Domain-value').textContent=DomainValue || 'No cookie found.';
      document.getElementById('search-value').textContent=seaerchValue || 'No cookie found.';
    </script>


    
  </body>
</html>

 

 

 

/static/js/scripts.js

쿠키값 저장과 토글기능을 정의하고 있다.

더보기
/*!
 * Start Bootstrap - Heroic Features v5.0.6 (https://startbootstrap.com/template/heroic-features)
 * Copyright 2013-2023 Start Bootstrap
 * Licensed under MIT (https://github.com/StartBootstrap/startbootstrap-heroic-features/blob/master/LICENSE)
 */
// This file is intentionally blank
// Use this file to add JavaScript to your project

window.addEventListener('DOMContentLoaded', (event) => {
  // Navbar shrink function
  var navbarShrink = function () {
    const navbarCollapsible = document.body.querySelector('#mainNav');
    if (!navbarCollapsible) {
      return;
    }
    if (window.scrollY === 0) {
      navbarCollapsible.classList.remove('navbar-shrink');
    } else {
      navbarCollapsible.classList.add('navbar-shrink');
    }
  };

  // Shrink the navbar
  navbarShrink();

  // Shrink the navbar when page is scrolled
  document.addEventListener('scroll', navbarShrink);

  //  Activate Bootstrap scrollspy on the main nav element
  const mainNav = document.body.querySelector('#mainNav');
  if (mainNav) {
    new bootstrap.ScrollSpy(document.body, {
      target: '#mainNav',
      rootMargin: '0px 0px -40%',
    });
  }

  // Collapse responsive navbar when toggler is visible
  const navbarToggler = document.body.querySelector('.navbar-toggler');
  const responsiveNavItems = [].slice.call(
    document.querySelectorAll('#navbarResponsive .nav-link')
  );
  responsiveNavItems.map(function (responsiveNavItem) {
    responsiveNavItem.addEventListener('click', () => {
      if (window.getComputedStyle(navbarToggler).display !== 'none') {
        navbarToggler.click();
      }
    });
  });
});

function setCookies(name) {
  // Get the entered data
  const NAME = document.getElementById(name).value;

  // Save the data to a cookie
  document.cookie =
    name + '=' + NAME + '; expires=Thu, 31 Dec 2026 12:00:00 UTC; path=/';

  // Show a message to indicate that the data has been saved
  alert(' ' + name + ' : ' + getCookie(name));
  location.href = './';
}

function getCookie(name) {
  var value = document.cookie.match('(^|;) ?' + name + '=([^;]*)(;|$)');
  return value ? value[2] : null;
}

 

 

 

/templates/sns_result.html

결과 페이지를 보여준다.

 

코드

더보기
<!DOCTYPE html>
<html>
  <head>
    <meta charset="UTF-8" />
    <title>Social Media Info</title>
    <!-- Core theme CSS (includes Bootstrap)-->
    <link
      href="{{ url_for('static', filename='css/styles.css') }}"
      rel="stylesheet"
    />
    <style>
      body {
        margin: 0;
        padding: 0;
        background-color: #f2f2f2;
        font-family: Arial, sans-serif;
      }

      h1 {
        text-align: center;
        padding: 20px;
      }

      .social-media {
        background-color: #fff;
        border-radius: 10px;
        box-shadow: 0 0 10px rgba(0, 0, 0, 0.1);
        margin: 20px;
        padding: 20px;
      }

      h2 {
        margin: 0 0 10px 0;
      }

      .twitter-info,
      .instagram-info,
      .facebook-info {
        display: flex;
        flex-wrap: wrap;
        justify-content: space-between;
        align-items: center;
      }

      .twitter-info img,
      .instagram-info img,
      .facebook-info img {
        width: 100px;
        height: 100px;
        border-radius: 50%;
        object-fit: cover;
        margin-right: 20px;
      }

      p {
        margin: 0;
        padding: 10px;
        text-align: left;
        font-weight: bold;
      }

      .twitter-info p,
      .instagram-info p,
      .facebook-info p {
        flex-basis: 100%;
        text-align: left;
        font-weight: normal;
      }

      /* :root {
        --background: {{ result['facebook']['profile_img'] }};
      }

      .facebook-img {
        background: var(--background);
        width: 100px; height: 100px;
      } */
    </style>
  </head>
  <body>
    <h1>Social Media Info</h1>
    <div class="social-media">
      <h2>Twitter</h2>
      <div class="twitter-info">
        <img
          src="{{ result['twitter']['profile_img'] }}"
          alt="profile picture"
        />
        <p>Name: {{ result['twitter']['name'] }}</p>
        <p>Screen Name: {{ result['twitter']['screen_name'] }}</p>
        <p>Bio: {{ result['twitter']['bio'] }}</p>
        <p>Location: {{ result['twitter']['location'] }}</p>
        <p>Joined Date: {{ result['twitter']['joined_date'] }}</p>
      </div>
    </div>
    <div class="social-media">
      <h2>Instagram</h2>
      <div class="instagram-info">
        <img
          src="{{ result['instagram']['profile_img'] }}"
          alt="profile picture"
        />
        <p>Name: {{ result['instagram']['name'] }}</p>
        <p>Bio: {{ result['instagram']['bio'] }}</p>
        <p>Post: {{ result['instagram']['post'] }}</p>
      </div>
    </div>
    <div class="social-media">
      <h2>Facebook</h2>
      <div class="facebook-info">
        <img
          src="{{ result['facebook']['profile_img'] }}"
          alt="profile picture"
        />

        <p>Name: {{ result['facebook']['name'] }}</p>
        <p>About: {{ result['facebook']['about'] }}</p>
        <p>Contact: {{ result['facebook']['contact'] }}</p>
        <p>Birth: {{ result['facebook']['birth'] }}</p>
        <p>Career: {{ result['facebook']['career'] }}</p>
      </div>
    </div>
    <div class="container my-5">
      <a class="container btn btn-outline-primary" href="/">돌아가기</a>
    </div>
  </body>
</html>

 


모듈 세분화

 

/module/facebook_module.py

더보기
from flask import Blueprint, render_template, request
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.common.keys import Keys
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.common.exceptions import NoSuchElementException
import time, re
from config import Facebook_ID, Facebook_PW

facebook_module = Blueprint("facebook_module", __name__)

@facebook_module.route("/facebook_result", methods=["POST"])
def facebook_result():
    class SNSProfileScraper:
        def __init__(self, username , driver_path):
            self.driver_path = driver_path
            self.username = username

        def login_facebook(self, driver, target_url, login_name, login_pw):
            fb_url = 'https://mobile.facebook.com/'

            driver.implicitly_wait(10)
            driver.get(fb_url)
            time.sleep(3)

            username_input = driver.find_element(By.CSS_SELECTOR, "input[name='email']")
            password_input = driver.find_element(By.CSS_SELECTOR, "input[name='pass']")

            username_input.send_keys(login_name)
            password_input.send_keys(login_pw)

            login_button = driver.find_element(By.XPATH , "//button[@type='button']")
            login_button.click()

            print('페이스북 로그인')
            time.sleep(3)
            driver.get(target_url)
            time.sleep(3)
            print('페이스북 진입성공')
        
        def scrape_facebook_profile(self , login_name, login_pw):
            try:
                url = 'https://mobile.facebook.com/' + self.username
                options = webdriver.ChromeOptions()
                options.add_argument('headless')
                options.add_argument('--disable-extensions')
                options.add_argument('--disable-gpu')
                options.add_argument('--no-sandbox')
                options.add_argument('--lang=ko_KR.UTF-8')
                driver = webdriver.Chrome(executable_path=self.driver_path, options=options)
                driver.get(url)
                time.sleep(5)
                self.login_facebook(driver, url, login_name, login_pw)
                time.sleep(5)
                try:
                    name = driver.find_element(By.CSS_SELECTOR,'#cover-name-root > h3')
                    name = name.text
                except:
                    name = 'None'

                try:
                    element_present = EC.presence_of_element_located((By.CSS_SELECTOR, '#bio  > div'))
                    WebDriverWait(driver, 10).until(element_present)
                    about = driver.find_element(By.CSS_SELECTOR,'#bio  > div')
                    about = about.text
                    # about = driver.find_element(By.CSS_SELECTOR,'#bio  > div')
                    # about = about.text.encode('utf-8').decode('utf-8')
                except:
                    about = 'aboutNone'

                try:
                    img_text = re.findall(r'u_0_u_[a-zA-Z0-9_\-]{2}', str(driver.page_source))
                    img_text = img_text[0]
                except:
                    img_text = None

                try:
                    profile_img = WebDriverWait(driver, 10).until(EC.presence_of_element_located((By.CSS_SELECTOR , f'#{img_text} > a > div > i')))
                    profile_img = profile_img.get_attribute('style')
                    pattern = r"url\(['\"]?([^'\")]+)['\"]?\)"
                    match = re.search(pattern, profile_img)
                    profile_img = match.group(1)
                        
                    # profile_img = profile_img.split('background:')[1].split(';')[0].strip()
                    # profile_img = profile_img.replace('url(\'', '').replace('\')', '')
                    

                except:
                    profile_img = img_text

                try:
                    about_data = self.get_facebook_about(driver, self.username, login_name, login_pw)
                except:
                    about_data = {'contact' : 'a',  'birth': 'b', 'career': 'b'}

                try:
                    profile_data = {
                        'sns' : 'facebook',
                        'name': name,
                        'about': about,
                        'profile_img': profile_img,
                        'contact': about_data['contact'],
                        'birth': about_data['birth'],
                        'career':about_data['career'],
                    }
                    return profile_data
                except:
                    return None
            except:
                return None
            
        def get_facebook_about(self, driver, username, login_name, login_pw):
            try:
                url = 'https://mobile.facebook.com/' + username + '/about'
                options = webdriver.ChromeOptions()
                options.add_argument('headless')
                options.add_argument('--disable-extensions')
                options.add_argument('--disable-gpu')
                options.add_argument('--no-sandbox')
                options.add_argument('--lang=ko_KR.UTF-8')
                driver = webdriver.Chrome(executable_path=self.driver_path, options=options)
                driver.get(url)
                time.sleep(5)
                self.login_facebook(driver, url, login_name, login_pw)
                time.sleep(5)
                
                try:
                    element_present = EC.presence_of_element_located((By.CSS_SELECTOR, '#contact-info'))
                    WebDriverWait(driver, 10).until(element_present)
                    contact = driver.find_element(By.CSS_SELECTOR,'#contact-info')
                    contact = contact.text
                except:
                    contact = '1'
                    
                try:
                    element_present = EC.presence_of_element_located((By.CSS_SELECTOR, '#basic-info > div > div:nth-of-type(1) > div > div._5cdv.r'))
                    WebDriverWait(driver, 10).until(element_present)
                    birth = driver.find_element(By.CSS_SELECTOR,'#basic-info > div > div:nth-of-type(1) > div > div._5cdv.r')
                    birth = birth.text
                except:
                    birth = '2'
                    
                try:
                    element_present = EC.presence_of_element_located((By.CSS_SELECTOR, '#work > div > div > div > div'))
                    WebDriverWait(driver, 10).until(element_present)
                    career = driver.find_element(By.CSS_SELECTOR,'#work > div > div > div > div')
                    career = career.text
                except:
                    career = '3'

                    
                try:
                    about_data = {
                        'contact': contact,
                        'birth': birth,
                        'career': career,
                    }

                    #크롤링 파일 저장 코드

                    # filename = url[url.find('//')+3:]
                    # filename = filename.replace('/','_')
                    # f = open(filename+'.html','w', encoding='utf-8')
                    # f.write(str(driver.page_source))
                    # f.close()
                    #print(driver.page_source)
                    return about_data
                except:
                    return None
            except:
                return None

    
    
    driver_path = 'app/chromedriver.exe'

    find_name = request.cookies.get("NAME")

    scraper = SNSProfileScraper(find_name , driver_path)
    facebook_profile = scraper.scrape_facebook_profile(Facebook_ID,Facebook_PW)

    result ={}

    result['facebook'] = facebook_profile
    
    return render_template("facebook_result.html", result=result)

/templates/facebook_result.html

더보기
<!DOCTYPE html>
<html>
  <head>
    <meta charset="UTF-8" />
    <title>Social Media Info</title>
    <!-- Core theme CSS (includes Bootstrap)-->
    <link
      href="{{ url_for('static', filename='css/styles.css') }}"
      rel="stylesheet"
    />
    <style>
      body {
        margin: 0;
        padding: 0;
        background-color: #f2f2f2;
        font-family: Arial, sans-serif;
      }

      h1 {
        text-align: center;
        padding: 20px;
      }

      .social-media {
        background-color: #fff;
        border-radius: 10px;
        box-shadow: 0 0 10px rgba(0, 0, 0, 0.1);
        margin: 20px;
        padding: 20px;
      }

      h2 {
        margin: 0 0 10px 0;
      }

      .twitter-info,
      .instagram-info,
      .facebook-info {
        display: flex;
        flex-wrap: wrap;
        justify-content: space-between;
        align-items: center;
      }

      .twitter-info img,
      .instagram-info img,
      .facebook-info img {
        width: 100px;
        height: 100px;
        border-radius: 50%;
        object-fit: cover;
        margin-right: 20px;
      }

      p {
        margin: 0;
        padding: 10px;
        text-align: left;
        font-weight: bold;
      }

      .twitter-info p,
      .instagram-info p,
      .facebook-info p {
        flex-basis: 100%;
        text-align: left;
        font-weight: normal;
      }


    </style>
  </head>
  <body>
    <h1>Social Media Info</h1>
    <h2>Facebook</h2>
        <div class="facebook-info">
        <img
            src="{{ result['facebook']['profile_img'] }}"
            alt="profile picture"
        />

        <p>Name: {{ result['facebook']['name'] }}</p>
        <p>About: {{ result['facebook']['about'] }}</p>
        <p>Contact: {{ result['facebook']['contact'] }}</p>
        <p>Birth: {{ result['facebook']['birth'] }}</p>
        <p>Career: {{ result['facebook']['career'] }}</p>
        </div>
    </div>
    <div class="container my-5">
      <a class="container btn btn-outline-primary" href="/">돌아가기</a>
    </div>
  </body>
</html>

 

/module/insta_module.py

더보기
from flask import Blueprint, render_template, request
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.common.keys import Keys
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.common.exceptions import NoSuchElementException
import time, re
from config import Instagram_ID ,Instagram_PW

insta_module = Blueprint("insta_module", __name__)

@insta_module.route("/insta_result", methods=["POST"])
def insta_result():
    class SNSProfileScraper:
        def __init__(self, username , driver_path):
            self.driver_path = driver_path
            self.username = username

        def login_instargram(self, driver, target_url, login_name, login_pw):
            insta_url = 'https://www.instagram.com'
            driver.implicitly_wait(10)
            driver.get(insta_url)
            time.sleep(3)

            username_input = driver.find_element(By.CSS_SELECTOR, "input[name='username']")
            password_input = driver.find_element(By.CSS_SELECTOR, "input[name='password']")

            username_input.send_keys(login_name)
            password_input.send_keys(login_pw)

            login_button = driver.find_element(By.XPATH , "//button[@type='submit']")
            login_button.click()

            print('인스타그램 로그인')
            time.sleep(3)
            driver.get(target_url)
            time.sleep(3)
            print('인스타그램 진입성공')



        def scrape_instagram_profile(self, login_name, login_pw):
            try:
                url = 'https://www.instagram.com/' + self.username
                options = webdriver.ChromeOptions()
                options.add_argument('headless')
                options.add_argument('--disable-extensions')
                options.add_argument('--disable-gpu')
                options.add_argument('--no-sandbox')
                options.add_argument('--lang=ko_KR.UTF-8')
                driver = webdriver.Chrome(executable_path=self.driver_path, options=options)
                driver.get(url)
                time.sleep(5)
                self.login_instargram(driver, url, login_name, login_pw)
                time.sleep(5)


                # filename = url[url.find('//')+2:]
                # filename = filename.replace('/','_')
                # f = open(filename+'.html','w', encoding='utf-8')
                # f.write(str(driver.page_source))
                # f.close()

                try:
                    bio_text = re.findall(r'mount_0_0_[a-zA-Z0-9_\-]{2}', str(driver.page_source))
                    bio_text = bio_text[0]
                except:
                    bio_text = None




                try:
                    name = driver.find_element(By.TAG_NAME, 'title').get_attribute('textContent')
                    name = name.split('•')[0]
                except:
                    name = 'name'


                try:
                    bio = WebDriverWait(driver, 10).until(EC.presence_of_element_located((By.CSS_SELECTOR, f"#{bio_text} > div > div > div.x9f619.x1n2onr6.x1ja2u2z > div > div > div > div.x78zum5.xdt5ytf.x10cihs4.x1t2pt76.x1n2onr6.x1ja2u2z > div.x9f619.xnz67gz.x78zum5.x168nmei.x13lgxp2.x5pf9jr.xo71vjh.x1uhb9sk.x1plvlek.xryxfnj.x1c4vz4f.x2lah0s.x1q0g3np.xqjyukv.x1qjc9v5.x1oa3qoh.x1qughib > div.xh8yej3.x1gryazu.x10o80wk.x14k21rp.x1porb0y.x17snn68.x6osk4m > div:nth-child(2) > section > main > div > header > section > div._aa_c")))
                    
                    bio = bio.text
                except:
                    bio = bio_text

                try:
                    post = driver.find_element(By.CSS_SELECTOR, 'meta[name="description"]')
                    post = post.get_attribute('content').split('-')[0]
                    # post = post.text
                except:
                    post = None

                try:
                    profile_img = driver.find_element(By.CSS_SELECTOR, 'meta[property="og:image"]')
                    profile_img = profile_img.get_attribute('content')
                except:
                    profile_img = 'profile_img'


                try:
                    profile_data = {
                        'sns' : 'instgram',
                        'name': name,
                        'bio': bio,
                        'post': post,
                        'profile_img': profile_img,
                    }

                    

                    return profile_data
                except Exception as e:
                    return e
            except:
                return '123'

    
    
    driver_path = 'app/chromedriver.exe'

    find_name = request.cookies.get("NAME")

    scraper = SNSProfileScraper(find_name , driver_path)
    instagram_profile = scraper.scrape_instagram_profile(Instagram_ID,Instagram_PW)

    result ={}

    result['instagram'] = instagram_profile
    
    return render_template("insta_result.html", result=result)

/templates/insta_result.html

더보기
<!DOCTYPE html>
<html>
  <head>
    <meta charset="UTF-8" />
    <title>Social Media Info</title>
    <!-- Core theme CSS (includes Bootstrap)-->
    <link
      href="{{ url_for('static', filename='css/styles.css') }}"
      rel="stylesheet"
    />
    <style>
      body {
        margin: 0;
        padding: 0;
        background-color: #f2f2f2;
        font-family: Arial, sans-serif;
      }

      h1 {
        text-align: center;
        padding: 20px;
      }

      .social-media {
        background-color: #fff;
        border-radius: 10px;
        box-shadow: 0 0 10px rgba(0, 0, 0, 0.1);
        margin: 20px;
        padding: 20px;
      }

      h2 {
        margin: 0 0 10px 0;
      }

      .twitter-info,
      .instagram-info,
      .facebook-info {
        display: flex;
        flex-wrap: wrap;
        justify-content: space-between;
        align-items: center;
      }

      .twitter-info img,
      .instagram-info img,
      .facebook-info img {
        width: 100px;
        height: 100px;
        border-radius: 50%;
        object-fit: cover;
        margin-right: 20px;
      }

      p {
        margin: 0;
        padding: 10px;
        text-align: left;
        font-weight: bold;
      }

      .twitter-info p,
      .instagram-info p,
      .facebook-info p {
        flex-basis: 100%;
        text-align: left;
        font-weight: normal;
      }


    </style>
  </head>
  <body>
    <h1>Social Media Info</h1>
      <h2>Instagram</h2>
      <div class="instagram-info">
        <img
          src="{{ result['instagram']['profile_img'] }}"
          alt="profile picture"
        />
        <p>Name: {{ result['instagram']['name'] }}</p>
        <p>Bio: {{ result['instagram']['bio'] }}</p>
        <p>Post: {{ result['instagram']['post'] }}</p>
      </div>
    </div>
    <div class="container my-5">
      <a class="container btn btn-outline-primary" href="/">돌아가기</a>
    </div>
  </body>
</html>

 


/module/twitter_module.py

더보기
from flask import Blueprint, render_template, request
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.common.keys import Keys
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.common.exceptions import NoSuchElementException
import time, re

twitter_module = Blueprint("twitter_module", __name__)

@twitter_module.route("/twitter_result", methods=["POST"])
def twitter_result():
    class SNSProfileScraper:
        def __init__(self, username , driver_path):
            self.driver_path = driver_path
            self.username = username

        def scrape_twitter_profile(self):
            try:
                url = 'https://twitter.com/' + self.username
                options = webdriver.ChromeOptions()
                options.add_argument('headless')
                options.add_argument('--disable-extensions')
                options.add_argument('--disable-gpu')
                options.add_argument('--no-sandbox')
                driver = webdriver.Chrome(executable_path=self.driver_path, options=options)
                driver.get(url)
                time.sleep(5)
                try:                
                    name = driver.find_element(By.CSS_SELECTOR , '#react-root > div > div > div.css-1dbjc4n.r-18u37iz.r-13qz1uu.r-417010 > main > div > div > div > div > div > div.css-1dbjc4n.r-aqfbo4.r-gtdqiz.r-1gn8etr.r-1g40b8q > div:nth-child(1) > div > div > div > div > div > div.css-1dbjc4n.r-16y2uox.r-1wbh5a2.r-1pi2tsx.r-1777fci > div > h2 > div > div > div > div > span.css-901oao.css-16my406.r-1awozwy.r-18jsvk2.r-6koalj.r-poiln3.r-b88u0q.r-bcqeeo.r-1udh08x.r-3s2u2q.r-qvutc0 > span > span:nth-child(1)')
                    name = name.text
                except:
                    name = None

                try:
                    screen_name = driver.find_element(By.CSS_SELECTOR , '#react-root > div > div > div.css-1dbjc4n.r-18u37iz.r-13qz1uu.r-417010 > main > div > div > div > div > div > div:nth-child(3) > div > div > div > div.css-1dbjc4n.r-1ifxtd0.r-ymttw5.r-ttdzmv > div.css-1dbjc4n.r-6gpygo.r-14gqq1x > div.css-1dbjc4n.r-1wbh5a2.r-dnmrzs.r-1ny4l3l > div > div.css-1dbjc4n.r-1awozwy.r-18u37iz.r-1wbh5a2 > div > div > div > span')
                    screen_name = screen_name.text
                except:
                    screen_name = 'None'

                try:
                    bio = driver.find_element(By.CSS_SELECTOR , '#react-root > div > div > div.css-1dbjc4n.r-18u37iz.r-13qz1uu.r-417010 > main > div > div > div > div > div > div:nth-child(3) > div > div > div > div.css-1dbjc4n.r-1ifxtd0.r-ymttw5.r-ttdzmv > div:nth-child(3) > div > div > span')
                    bio = bio.text
                except:
                    bio = None

                try:
                    location = driver.find_element(By.CSS_SELECTOR , '#react-root > div > div > div.css-1dbjc4n.r-18u37iz.r-13qz1uu.r-417010 > main > div > div > div > div > div > div:nth-child(3) > div > div > div > div.css-1dbjc4n.r-1ifxtd0.r-ymttw5.r-ttdzmv > div:nth-child(4) > div > span:nth-child(1) > span > span')
                    location = location.text
                except:
                    location = None

                try:
                    profile_img = driver.find_element(By.CSS_SELECTOR , '#react-root > div > div > div.css-1dbjc4n.r-18u37iz.r-13qz1uu.r-417010 > main > div > div > div > div > div > div:nth-child(3) > div > div > div > div.css-1dbjc4n.r-1ifxtd0.r-ymttw5.r-ttdzmv > div.css-1dbjc4n.r-1habvwh.r-18u37iz.r-1w6e6rj.r-1wtj0ep > div.css-1dbjc4n.r-1adg3ll.r-16l9doz.r-6gpygo.r-2o1y69.r-1v6e3re.r-bztko3.r-1xce0ei > div.r-1p0dtai.r-1pi2tsx.r-1d2f490.r-u8s1d.r-ipm5af.r-13qz1uu > div > div.r-1p0dtai.r-1pi2tsx.r-1d2f490.r-u8s1d.r-ipm5af.r-13qz1uu > div > a > div.css-1dbjc4n.r-14lw9ot.r-sdzlij.r-1wyvozj.r-1udh08x.r-633pao.r-u8s1d.r-1v2oles.r-desppf > div > div.r-1p0dtai.r-1pi2tsx.r-1d2f490.r-u8s1d.r-ipm5af.r-13qz1uu > div > img')
                    profile_img = profile_img.get_attribute('src')
                except:
                    url = None

                try:
                    joined_date = driver.find_element(By.CSS_SELECTOR , '#react-root > div > div > div.css-1dbjc4n.r-18u37iz.r-13qz1uu.r-417010 > main > div > div > div > div > div > div:nth-child(3) > div > div > div > div.css-1dbjc4n.r-1ifxtd0.r-ymttw5.r-ttdzmv > div:nth-child(4) > div > span.css-901oao.css-16my406.r-14j79pv.r-4qtqp9.r-poiln3.r-1b7u577.r-bcqeeo.r-qvutc0 > span')
                    joined_date = joined_date.text
                except:
                    joined_date = None

                try:
                    profile_data = {
                        'sns' : 'twitter',
                        'name': name,
                        'screen_name': screen_name,
                        'bio': bio,
                        'location': location,
                        'profile_img': profile_img,
                        'joined_date': joined_date
                    }
                    return profile_data
                except:
                    return None
            except:
                return None

    
    
    driver_path = 'app/chromedriver.exe'

    find_name = request.cookies.get("NAME")

    scraper = SNSProfileScraper(find_name , driver_path)
    twitter_profile = scraper.scrape_twitter_profile()

    result ={}

    result['twitter'] = twitter_profile
    
    return render_template("twitter_result.html", result=result)

/templates/twitter_result.html

더보기
<!DOCTYPE html>
<html>
  <head>
    <meta charset="UTF-8" />
    <title>Social Media Info</title>
    <!-- Core theme CSS (includes Bootstrap)-->
    <link
      href="{{ url_for('static', filename='css/styles.css') }}"
      rel="stylesheet"
    />
    <style>
      body {
        margin: 0;
        padding: 0;
        background-color: #f2f2f2;
        font-family: Arial, sans-serif;
      }

      h1 {
        text-align: center;
        padding: 20px;
      }

      .social-media {
        background-color: #fff;
        border-radius: 10px;
        box-shadow: 0 0 10px rgba(0, 0, 0, 0.1);
        margin: 20px;
        padding: 20px;
      }

      h2 {
        margin: 0 0 10px 0;
      }

      .twitter-info,
      .instagram-info,
      .facebook-info {
        display: flex;
        flex-wrap: wrap;
        justify-content: space-between;
        align-items: center;
      }

      .twitter-info img,
      .instagram-info img,
      .facebook-info img {
        width: 100px;
        height: 100px;
        border-radius: 50%;
        object-fit: cover;
        margin-right: 20px;
      }

      p {
        margin: 0;
        padding: 10px;
        text-align: left;
        font-weight: bold;
      }

      .twitter-info p,
      .instagram-info p,
      .facebook-info p {
        flex-basis: 100%;
        text-align: left;
        font-weight: normal;
      }


    </style>
  </head>
  <body>
    <h1>Social Media Info</h1>
    <h2>Twitter</h2>
        <div class="twitter-info">
        <img
            src="{{ result['twitter']['profile_img'] }}"
            alt="profile picture"
        />
        <p>Name: {{ result['twitter']['name'] }}</p>
        <p>Screen Name: {{ result['twitter']['screen_name'] }}</p>
        <p>Bio: {{ result['twitter']['bio'] }}</p>
        <p>Location: {{ result['twitter']['location'] }}</p>
        <p>Joined Date: {{ result['twitter']['joined_date'] }}</p>
        </div>
    </div>
    <div class="container my-5">
      <a class="container btn btn-outline-primary" href="/">돌아가기</a>
    </div>
  </body>
</html>

'OSINT' 카테고리의 다른 글

플라스크 토글 및 쿠키값 저장  (0) 2023.06.25
SNS 크롤링 모듈 구현(instagram,facebook,twitter)  (0) 2023.06.25