var sum = 0;
for (var i = 1; i <= 100; i += 1) {
sum += i;
var sum = 0;
var i = 1;
while (i <= 100) {
sum += i;
i += 1;
var sum = 0;
var i = 1;
do {
sum += i;
i += 1;
} while (i <= 100);
<!DOCTYPE html>
<meta charset="UTF-8">
.right {
float: right;
width: 250px;
height: 30px;
font-size: 16px;
line-height: 30px;
background-color: blue;
color: yellow;
text-align: center;
<div id="time" class="right"></div>
function showDateTime() {
var array = ["", "", "", "", "", "", ""];
var date = new Date();
var str = "";
str += date.getFullYear() + ""; // 年
str += (date.getMonth() + 1) + ""; // 月(0-11)
str += date.getDate() + "日&nbsp;&nbsp;"; // 日
str += "星期" + array[date.getDay()] + "&nbsp;&nbsp;"; // 星期(0-6)
var hour = date.getHours();
str += hour < 10 ? "0" + hour : hour; // 时
str += ":";
var min = date.getMinutes();
str += min < 10 ? "0" + min : min; // 分
str += ":";
var sec = date.getSeconds();
str += sec < 10 ? "0" + sec : sec; // 秒
// JavaScript = ECMAScript + BOM(window) + DOM(document)
// document对象(DOM)代表整个HTML页面
// 通过该对象的getElementById方法可以用ID来获取指定的元素(标签)
// 通过获得的元素的textContent属性就可以修改标签体的文本内容
var div = document.getElementById("time");
// 如果放入元素中的内容又包含了标签或实体替换符(字符实体)
// 那么就要将textContent属性换成innerHTML才能渲染标签和字符实体
div.innerHTML = str;
// window对象(BOM)代表浏览器窗口
// 通过该对象的setInterval方法可以设置计时器控制函数周期性执行
setInterval(showDateTime, 1000);
<!DOCTYPE html>
<meta charset="UTF-8">
do {
var answer = parseInt(Math.random() * 100 + 1);
var total = 0;
do {
total += 1;
var thyAnswer = parseInt(prompt("请输入:"));
if (thyAnswer > answer) {
} else if (thyAnswer < answer) {
} else if (thyAnswer == answer) {
} else {
} while (thyAnswer != answer);
if (total > 7) {
} while (confirm('再来一局?'));
<!DOCTYPE html>
<meta charset="UTF-8">
<h3><span id="counter">5</span>秒钟以后跳转到百度</h3>
+function() {
var counter = 5;
var span = document.getElementById("counter");
setTimeout(function() {
counter -= 1;
if (counter > 0) {
span.textContent = counter;
setTimeout(arguments.callee, 1000);
} else {
location.href = "http://www.baidu.com";
}, 1000);
<!DOCTYPE html>
<meta charset="UTF-8">
* {
margin: 0;
padding: 0;
#adv {
width: 940px;
margin: 0 auto;
#adv ul {
width: 120px;
height: 30px;
margin: 0 auto;
position: relative;
top: -30px;
#adv li {
width: 30px;
height: 30px;
list-style: none;
float: left;
color: #ccc;
cursor: pointer;
<div id="adv">
<img id="image" src="img/slide-1.jpg" alt="">
<li class="dot"></li>
<li class="dot"></li>
<li class="dot"></li>
<li class="dot"></li>
<script src="js/common.js"></script>
(function() {
var index = 1;
var img = document.getElementById("image");
var timerId = 0;
function startTimer() {
if (timerId == 0) {
timerId = setInterval(function() {
index += 1;
if (index > 4) {
index = 1;
img.src = "img/slide-" + index + ".jpg";
}, 2000);
// 通过document对象获取页面上的元素(标签)有以下方法:
// 1. document.getElementById("...")
// 2. document.getElementsByTagName("...")
// 3. document.getElementsByClassName("...")
// 4. document.getElementsByName("...")
// 5. document.querySelector("...")
// 6. document.querySelectorAll("...")
var liList = document.querySelectorAll("#adv .dot");
for (var i = 0; i < liList.length; i += 1) {
liList[i].index = i + 1;
bind(liList[i], "click", function(evt) {
evt = evt || event;
var target = evt.target || evt.srcElement;
index = target.index;
img.src = "img/slide-" + index + ".jpg";
timerId = 0;
bind(liList[i], "mouseout", function(evt) {
<!DOCTYPE html>
<meta charset="UTF-8">
<button id="ok">确定</button>
<!--绑定事件回调函数大致有3种方式: -->
<!--1. 通过标签的onXXX属性来指定需要执行的事件回调函数-->
<!--2. 通过元素的onXXX属性来绑定需要执行的事件回调函数-->
<!--3. 通过元素的addEventListener方法来绑定事件回调函数-->
var btn = document.getElementById("ok");
function sayHello() {
function sayGoodbye() {
// Netscape Navigator --> Firefox
// Internet Explorer
// Chrome
// Safari
// Opera
if (btn.addEventListener) {
btn.addEventListener("click", sayHello);
btn.addEventListener("click", sayGoodbye);
btn.addEventListener("click", function() {
btn.removeEventListener("click", sayGoodbye);
} else {
btn.attachEvent("onclick", sayHello);
btn.attachEvent("onclick", sayGoodbye);
btn.attachEvent("onclick", function() {
btn.detachEvent("onclick", sayGoodbye);
<!DOCTYPE html>
<meta charset="UTF-8">
* {
margin: 0;
padding: 0;
#container {
margin: 10px 20px;
#container li {
float: left;
list-style: none;
width: 60px;
height: 60px;
<div id="container">
<img src="img/hello.jpg" alt="">
<li><img src="img/thumb-1.jpg" alt=""></li>
<li><img src="img/thumb-2.jpg" alt=""></li>
<li><img src="img/thumb-3.jpg" alt=""></li>
<script src="js/common.js"></script>
+function() {
// 通过querySelector用父子选择器获取img标签
var img = document.querySelector('#container>img');
function showPhoto(evt) {
evt = evt || window.event;
// 获取事件源(谁引发了事件)
var target = evt.target || evt.srcElement;
img.src = "img/" + target.parentNode.photoName;
var imgNames = ["hello.jpg", "goodbye.jpg", "oneshit.jpg"];
// 通过querySelectorAll用后代选择器获取指定的li标签
// var ul = document.querySelector("#container>ul");
// 通过元素获取相关节点的属性:
// parentNode - 获取父节点
// children - 获取所有子节点
// nextSibling - 获取相邻下一个兄弟节点
// previousSibling - 获取相邻上一个兄弟节点
var ul = img.nextSibling.nextSibling;
for (var i = 0; i < ul.children.length; i += 1) {
ul.children[i].photoName = imgNames[i];
bind(ul.children[i], "mouseover", showPhoto);
<!DOCTYPE html>
<meta charset="UTF-8">
* {
margin: 0;
padding: 0;
#container {
margin: 20px 50px;
#fruits li {
list-style: none;
width: 200px;
height: 50px;
font-size: 20px;
line-height: 50px;
background-color: cadetblue;
color: white;
text-align: center;
margin: 2px 0;
#fruits>li>a {
float: right;
text-decoration: none;
color: white;
position: relative;
right: 5px;
#fruits~input {
border: none;
outline: none;
font-size: 18px;
#fruits~input[type=text] {
border-bottom: 1px solid darkgray;
width: 200px;
height: 50px;
text-align: center;
#fruits~input[type=button] {
width: 80px;
height: 30px;
background-color: coral;
color: white;
vertical-align: bottom;
cursor: pointer;
<div id="container">
<ul id="fruits">
<li>苹果<a href="">×</a></li>
<li>香蕉<a href="">×</a></li>
<li>火龙果<a href="">×</a></li>
<li>西瓜<a href="">×</a></li>
<input type="text" name="fruit">
<input id="ok" type="button" value="确定">
<script src="js/common.js"></script>
function removeItem(evt) {
evt = evt || window.event;
prevent(evt); // 用自定义函数阻止事件的默认行为
var target = evt.target || evt.srcElement;
var li = target.parentNode;
(function() {
function addItem(evt) {
var fruitName = textInput.value.trim();
if (fruitName.length > 0) {
var li = document.createElement("li");
li.textContent = fruitName;
li.style.backgroundColor = "rgba(20, 150, 180, 0.5)";
var a = document.createElement("a");
a.href = "";
a.textContent = "×";
bind(a, "click", removeItem);
var ul = document.getElementById("fruits");
ul.insertBefore(li, ul.children[0]);
textInput.value = "";
var anchors = document.querySelectorAll("#fruits>li>a");
for (var i = 0; i < anchors.length; i += 1) {
bind(anchors[i], "click", removeItem);
var btn = document.getElementById("ok");
var textInput = document.getElementsByName("fruit")[0];
bind(textInput, "keyup", function(evt) {
evt = evt || window.event;
var code = evt.keyCode || evt.which;
// console.log(code);
if (code == 13) {
bind(btn, "click", addItem);
<!DOCTYPE html>
<meta charset="UTF-8">
#container {
width: 800px;
height: 400px;
margin: 10px auto;
border: 1px solid black;
overflow: hidden;
#buttons {
width: 800px;
margin: 10px auto;
text-align: center;
#add, #fla {
border: none;
outline: none;
width: 80px;
height: 30px;
background-color: red;
color: white;
.small {
width: 80px;
height: 80px;
float: left;
<div id="container">
<div id="buttons">
<button id="add">添加</button>
<button id="fla">闪烁</button>
<script src="js/common.js"></script>
(function() {
var container = document.getElementById("container");
var addButton = document.getElementById("add");
var flaButton = document.getElementById("fla");
bind(addButton, "click", function() {
var div = document.createElement("div");
div.className = "small";
div.style.backgroundColor = randomColor();
container.insertBefore(div, container.children[0]);
var timerId = 0;
bind(flaButton, "click", function(evt) {
evt = prepare(evt);
if (timerId == 0) {
evt.target.textContent = "停止";
timerId = setInterval(function() {
var divs = document.querySelectorAll("#container>div");
for (var i = 0; i < divs.length; i += 1) {
divs[i].style.backgroundColor = randomColor();
}, 200);
} else {
evt.target.textContent = "闪烁";
timerId = 0;
<!DOCTYPE html>
<meta charset="UTF-8">
#one {
width: 400px;
height: 400px;
background-color: red;
#two {
width: 300px;
height: 300px;
background-color: green;
#three {
width: 200px;
height: 200px;
background-color: blue;
#two, #three {
position: relative;
left: 50px;
top: 50px;
<div id="container">
<div id="one">
<div id="two">
<div id="three"></div>
<script src="js/common.js"></script>
(function() {
var one = document.getElementById("one");
var two = document.getElementById("two");
var three = document.getElementById("three");
bind(one, "click", function() {
bind(two, "click", function() {
bind(three, "click", function(evt) {
if (evt.stopPropagation) {
} else {
evt.cancelBubble = true;
<!DOCTYPE html>
<meta charset="UTF-8">
#adv {
position: fixed;
right: 10px;
top: 10px;
width: 200px;
height: 200px;
background-color: blue;
color: yellow;
#close {
float: right;
<div id="adv">
<button id="close">关闭</button>
<script src="js/common.js"></script>
(function() {
var div = document.getElementById("adv");
var closeButton = document.getElementById("close");
bind(closeButton, "click", function() {
var divStyle = div.currentStyle ||
var top = parseInt(divStyle.top);
if (top < 300) {
div.style.top = (top + 30) + "px";
} else {
div.style.display = "none";
// div.style.visibility = "hidden";
<!DOCTYPE html>
<meta charset="UTF-8">
* {
margin: 0;
padding: 0;
#container {
margin: 20px 50px;
#fruits li {
list-style: none;
width: 200px;
height: 50px;
font-size: 20px;
line-height: 50px;
background-color: cadetblue;
color: white;
text-align: center;
margin: 2px 0;
#fruits>li>a {
float: right;
text-decoration: none;
color: white;
position: relative;
right: 5px;
#fruits~input {
border: none;
outline: none;
font-size: 18px;
#fruits~input[type=text] {
border-bottom: 1px solid darkgray;
width: 200px;
height: 50px;
text-align: center;
#fruits~input[type=button] {
width: 80px;
height: 30px;
background-color: coral;
color: white;
vertical-align: bottom;
cursor: pointer;
<div id="container">
<ul id="fruits">
<li>苹果<a href="">×</a></li>
<li>香蕉<a href="">×</a></li>
<li>火龙果<a href="">×</a></li>
<li>西瓜<a href="">×</a></li>
<input type="text" name="fruit">
<input id="ok" type="button" value="确定">
<script src="js/jquery.min.js"></script>
// 写JavaScript代码时为什么推荐使用jQuery而不写原生JavaScript
// 因为jQuery对象有更多的属性和方法, 能够用更少的代码做更多的事情
// 而且jQuery对象的方法使用灵活且没有浏览器兼容性问题
// 当加载jQuery成功时会在window对象上绑定名为jQuery的属性
// 该属性还有一个名字叫$, $既是一个对象也是一个函数
// 当$作为函数时有以下四种最常用的用法:
// 1. 如果$函数的参数是一个函数, 传入的函数是页面加载完成时要执行的回调函数
// 2. 如果$函数的参数是选择器字符串, 那么$函数会返回代表元素的jQuery对象(其本质是一个数组)
// 3. 如果$函数的参数是标签字符串, 那么$函数会创建该标签并返回对应的jQuery对象
// 4. 如果$函数的参数是原生JavaScript对象(DOM), 那么$函数将该对象处理成jQuery对象
// 用法1
$(function() {
function removeItem(evt) {
// 用法4
function addItem(evt) {
// 用法2
var fruitName = $("#fruits+input").val().trim();
if (fruitName.length > 0) {
// 用法3
var $li = $("<li>").text(fruitName);
// 用法3
var $a = $("<a href=''>").text("×").on("click", removeItem);
// 用法2
$("#fruits a").on("click", removeItem);
$("#ok").on("click", addItem);
<!DOCTYPE html>
<meta charset="UTF-8">
#data {
border-collapse: collapse;
#data td, #data th {
width: 120px;
height: 40px;
text-align: center;
border: 1px solid black;
#buttons {
margin: 10px 0;
<table id="data">
<div id="buttons">
<button id="pretty">美化表格</button>
<button id="clear">清除数据</button>
<button id="remove">删单元格</button>
<button id="hide">隐藏表格</button>
<script src="js/jquery.min.js"></script>
$(function() {
$("#pretty").on("click", function() {
$("#data tr:gt(0)").css("color", "white");
$("#data tr:odd").css("background-color", "darkgreen");
$("#data tr:even").css("background-color", "darkmagenta");
$("#data tr:eq(0)").css("background-color", "white");
$("#clear").on("click", function() {
$("#data tr:gt(0) td").html("");
$("#remove").on("click", function() {
$("#data tr:gt(0):last").remove();
$("#hide").on("click", function() {
$("#data").fadeOut(2000, function() {
"display": "block",
"visibility": "hidden"
<!DOCTYPE html>
<meta charset="UTF-8">
* {
margin: 0;
padding: 0;
#container {
width: 960px;
margin: 0 auto;
<button id="load">加载</button>
<div id="container"></div>
<script src="https://cdn.bootcss.com/jquery/3.3.1/jquery.min.js"></script>
$(function() {
$("#load").on("click", function() {
console.log(encodeURIComponent("手机", "utf-8"));
console.log(decodeURIComponent("%E7%8B%97%E5%B1%8E", "utf-8"));
// 通过Ajax请求获得数据并对页面进行局部刷新
// jQuery封装了多个Ajax请求方法:
// - $.ajax(): 灵活强大(强烈推荐使用)
// - $.getJSON(): 简单好用
// 统一资源定位符
// 协议://IP地址或域名:端口号/路径/资源?查询字符串
// HTTP(s)协议的请求有多种请求命令
// 浏览器在正常情况下只能发出get或post请求
// 将来我们在项目中可能用到的HTTP请求命令包括以下5个:
// - GET: 从服务器获取资源
// - POST: 向服务器提交资源
// - DELETE: 从服务器删除资源
// - PUT / PATCH: 更新服务器上的资源
var url = "http://api.tianapi.com/meinv/";
"url": url,
"type": "get",
"data": {
"key": "772a81a51ae5c780251b1f98ea431b84",
"num": 15
"dataType": "json",
"success": function(json) {
for (var i = 0; i < json.newslist.length; i += 1) {
var mm = json.newslist[i];
$img = $("<img>").attr('src', mm.picUrl);
<!DOCTYPE html>
<meta charset="utf-8" />
.t99 {
border-collapse: collapse;
.t99 td {
padding: 0 10px;
border: 1px solid black;
<script src="js/common.js"></script>
* 绑定事件
* @param {HTMLElement} elem 待绑定事件的元素
* @param {String} en 事件的名称
* @param {Function} fn 回调函数
* @param {Boolean} capture 是否使用事件捕获
function bind(elem, en, fn, capture) {
if (elem.addEventListener) {
elem.addEventListener(en, fn, capture);
} else {
elem.attachEvent('on' + en, fn);
* 反绑定事件
* @param {HTMLElement} elem 待反绑定事件的元素
* @param {String} en 事件的名称
* @param {Function} fn 回调函数
function unbind(elem, en, fn) {
if (elem.removeEventListener) {
elem.removeEventListener(en, fn);
} else {
elem.detachEvent('on' + en, fn);
* 事件对象预处理
* @param {Event} evt 事件对象
function prepare(evt) {
evt = evt || window.event;
evt.target = evt.target || evt.srcElement;
evt.preventDefault = evt.preventDefault || function() {
this.returnValue = false;
return evt;
* 阻止事件的默认行为
* @param {Event} evt 事件对象
function prevent(evt) {
if (evt.preventDefault) {
} else {
evt.returnValue = false;
* 获得[min, max)范围的随机整数
* @param {Number} min
* @param {Number} max
function randomInt(min, max) {
return parseInt(Math.random() * (max - min) + min);
* 获得随机颜色
function randomColor() {
var red = randomInt(0, 256);
var green = randomInt(0, 256);
var blue = randomInt(0, 256);
return "rgb(" + red + "," + green + "," + blue + ")";
function createTable() {
document.write("<table class='t99'>");
for (var i = 1; i <= 9; i += 1) {
for (var j = 1; j <= i; j += 1) {
document.write(i + "*" + j + "=" + i * j);
...@@ -8,11 +8,8 @@ ...@@ -8,11 +8,8 @@
import scrapy import scrapy
class DoubanItem(scrapy.Item): class MovieItem(scrapy.Item):
name = scrapy.Field() title = scrapy.Field()
year = scrapy.Field()
score = scrapy.Field() score = scrapy.Field()
director = scrapy.Field() motto = scrapy.Field()
classification = scrapy.Field()
actor = scrapy.Field()
...@@ -78,7 +78,7 @@ class DoubanDownloaderMiddleware(object): ...@@ -78,7 +78,7 @@ class DoubanDownloaderMiddleware(object):
# - or return a Request object # - or return a Request object
# - or raise IgnoreRequest: process_exception() methods of # - or raise IgnoreRequest: process_exception() methods of
# installed downloader middleware will be called # installed downloader middleware will be called
return None request.meta['proxy'] = ''
def process_response(self, request, response, spider): def process_response(self, request, response, spider):
# Called with the response returned from the downloader. # Called with the response returned from the downloader.
...@@ -4,40 +4,17 @@ ...@@ -4,40 +4,17 @@
# #
# Don't forget to add your pipeline to the ITEM_PIPELINES setting # Don't forget to add your pipeline to the ITEM_PIPELINES setting
# See: https://doc.scrapy.org/en/latest/topics/item-pipeline.html # See: https://doc.scrapy.org/en/latest/topics/item-pipeline.html
import pymongo
from scrapy.exceptions import DropItem
from scrapy.conf import settings
from scrapy import log
class DoubanPipeline(object): class DoubanPipeline(object):
def __init__(self): # def __init__(self, server, port):
connection = pymongo.MongoClient(settings['MONGODB_SERVER'], settings['MONGODB_PORT']) # pass
db = connection[settings['MONGODB_DB']]
self.collection = db[settings['MONGODB_COLLECTION']] # @classmethod
# def from_crawler(cls, crawler):
# return cls(crawler.settings['MONGO_SERVER'],
# crawler.settings['MONGO_PORT'])
def process_item(self, item, spider): def process_item(self, item, spider):
#Remove invalid data
valid = True
for data in item:
if not data:
valid = False
raise DropItem("Missing %s of blogpost from %s" %(data, item['url']))
if valid:
#Insert data into database
log.msg("Item wrote to MongoDB database %s/%s" %
(settings['MONGODB_DB'], settings['MONGODB_COLLECTION']),
level=log.DEBUG, spider=spider)
return item return item
...@@ -11,35 +11,33 @@ ...@@ -11,35 +11,33 @@
BOT_NAME = 'douban' BOT_NAME = 'douban'
MONGO_PORT = 27017
SPIDER_MODULES = ['douban.spiders'] SPIDER_MODULES = ['douban.spiders']
NEWSPIDER_MODULE = 'douban.spiders' NEWSPIDER_MODULE = 'douban.spiders'
# Crawl responsibly by identifying yourself (and your website) on the user-agent # Crawl responsibly by identifying yourself (and your website) on the user-agent
USER_AGENT = 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_8_3) AppleWebKit/536.5 (KHTML, like Gecko) Chrome/19.0.1084.54 Safari/536.5' USER_AGENT = 'Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) ' \
'Chrome/65.0.3325.181 Safari/537.36'
# Obey robots.txt rules # Obey robots.txt rules
# Configure maximum concurrent requests performed by Scrapy (default: 16) # Configure maximum concurrent requests performed by Scrapy (default: 16)
# Configure a delay for requests for the same website (default: 0) # Configure a delay for requests for the same website (default: 0)
# See https://doc.scrapy.org/en/latest/topics/settings.html#download-delay # See https://doc.scrapy.org/en/latest/topics/settings.html#download-delay
# See also autothrottle settings and docs # See also autothrottle settings and docs
# The download delay setting will honor only one of: # The download delay setting will honor only one of:
# Disable cookies (enabled by default) # Disable cookies (enabled by default)
MONGODB_DB = 'douban'
# Disable Telnet Console (enabled by default) # Disable Telnet Console (enabled by default)
...@@ -58,9 +56,9 @@ MONGODB_COLLECTION = 'movie' ...@@ -58,9 +56,9 @@ MONGODB_COLLECTION = 'movie'
# Enable or disable downloader middlewares # Enable or disable downloader middlewares
# See https://doc.scrapy.org/en/latest/topics/downloader-middleware.html # See https://doc.scrapy.org/en/latest/topics/downloader-middleware.html
# 'douban.middlewares.DoubanDownloaderMiddleware': 543, 'douban.middlewares.DoubanDownloaderMiddleware': 543,
#} }
# Enable or disable extensions # Enable or disable extensions
# See https://doc.scrapy.org/en/latest/topics/extensions.html # See https://doc.scrapy.org/en/latest/topics/extensions.html
...@@ -71,11 +69,9 @@ MONGODB_COLLECTION = 'movie' ...@@ -71,11 +69,9 @@ MONGODB_COLLECTION = 'movie'
# Configure item pipelines # Configure item pipelines
# See https://doc.scrapy.org/en/latest/topics/item-pipeline.html # See https://doc.scrapy.org/en/latest/topics/item-pipeline.html
'douban.pipelines.DoubanPipeline': 400, 'douban.pipelines.DoubanPipeline': 300,
} }
# Enable and configure the AutoThrottle extension (disabled by default) # Enable and configure the AutoThrottle extension (disabled by default)
# See https://doc.scrapy.org/en/latest/topics/autothrottle.html # See https://doc.scrapy.org/en/latest/topics/autothrottle.html
# -*- coding: utf-8 -*- # -*- coding: utf-8 -*-
import scrapy import scrapy
from scrapy.selector import Selector
from scrapy.linkextractors import LinkExtractor
from scrapy.spiders import CrawlSpider, Rule
from douban.items import DoubanItem from douban.items import MovieItem
class MovieSpider(CrawlSpider): class MovieSpider(scrapy.Spider):
name = 'movie' name = 'movie'
allowed_domains = ['movie.douban.com'] allowed_domains = ['movie.douban.com']
start_urls = ['https://movie.douban.com/top250'] start_urls = ['https://movie.douban.com/top250']
rules = (
Rule(LinkExtractor(allow=(r'https://movie.douban.com/subject/\d+')), callback='parse_item'),
def parse_item(self, response):
sel = Selector(response)
item = DoubanItem()
item['classification']= sel.xpath('//span[@property="v:genre"]/text()').extract()
item['actor']= sel.xpath('//*[@id="info"]/span[3]/a[1]/text()').extract()
#i['domain_id'] = response.xpath('//input[@id="sid"]/@value').extract()
#i['name'] = response.xpath('//div[@id="name"]').extract()
#i['description'] = response.xpath('//div[@id="description"]').extract()
return item
def parse(self, response):
li_list = response.xpath('//*[@id="content"]/div/div[1]/ol/li')
for li in li_list:
item = MovieItem()
item['title'] = li.xpath('div/div[2]/div[1]/a/span[1]/text()').extract_first()
item['score'] = li.xpath('div/div[2]/div[2]/div/span[2]/text()').extract_first()
item['motto'] = li.xpath('div/div[2]/div[2]/p[2]/span/text()').extract_first()
yield item
href_list = response.css('a[href]::attr("href")').re('\?start=.*')
for href in href_list:
url = response.urljoin(href)
yield scrapy.Request(url=url, callback=self.parse)
