fetch 源代码
# -*- coding:utf-8 -*-
from bs4 import BeautifulSoup
import requests
import time
from sqlalchemy import create_engine, Column, Integer, String, Float
from sqlalchemy.ext.declarative import declarative_base
from sqlalchemy.orm import sessionmaker
from datetime import datetime
import json
eng = create_engine('sqlite:///data-dev.sqlite')
Base = declarative_base()
[文档]class Movie(Base):
"""
==================== =================
列名 说明
==================== =================
id 序号
title 电影名
original_title 借阅时间
directors 导演
casts 主演
genres 类型
year 上映年份
rating 评分
images 封面图片
alt 豆瓣链接
amount 库存
counts 借阅次数
==================== =================
"""
__tablename__ = 'movies'
id = Column(Integer, primary_key=True, autoincrement=True)
title = Column(String(64), unique=True, index=True)
original_title = Column(String(64), unique=True, index=True)
directors = Column(String(64))
casts = Column(String(64))
genres = Column(String(64))
year = Column(Integer)
rating = Column(Float)
images = Column(String(64))
alt = Column(String(64))
amount = Column(Integer, default=200)
counts = Column(Integer, default=0)
Session = sessionmaker(bind=eng)
session = Session()
[文档]def start():
"""
.. note:: 获取数据
使用 豆瓣电影 TOP250 API 初始化数据。
"""
for i in range(13):
api = 'https://api.douban.com/v2/movie/top250?start={}'.format(20*i)
res = requests.get(api)
json_str = json.loads(res.text)
for movie in json_str['subjects']:
title = movie['title']
original_title = movie['original_title']
directors = ' / '.join([director['name'] for director in movie['directors']])
casts = ' / '.join([cast['name'] for cast in movie['casts']])
genres = ' / '.join(movie['genres'])
year = movie['year']
rating = movie['rating']['average']
images = movie['images']['large']
alt = movie['alt']
session.add(Movie(title=title,original_title=original_title,
directors=directors,casts=casts,genres=genres,
year=year,rating=rating,images=images,alt=alt))
session.commit()
if __name__ == '__main__':
start()