From 0203d062aacb7e92b7d34ebecf64d0f671f8bea9 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E6=A2=A6=E6=83=B3=E6=A9=A1=E7=9A=AE=E6=93=A6?= Date: Thu, 5 Jan 2023 20:08:03 +0800 Subject: [PATCH] =?UTF-8?q?=E6=A1=88=E4=BE=8B=20IP=20=E9=99=90=E5=88=B6?= =?UTF-8?q?=E5=8F=8D=E7=88=AC?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- README.md | 5 + app/__init__.py | 17 +++ app/__pycache__/__init__.cpython-36.pyc | Bin 634 -> 991 bytes app/__pycache__/routes.cpython-36.pyc | Bin 1284 -> 1284 bytes app/school/__pycache__/index.cpython-36.pyc | Bin 3618 -> 4122 bytes app/school/index.py | 27 ++++- app/templates/csdn/blogstar.html | 23 ++-- app/templates/csdn/newstar.html | 24 +++-- app/templates/index.html | 24 ++++- app/templates/school/ajax_list3.html | 112 ++++++++++++++++++++ app/templates/timeline.html | 16 +++ 11 files changed, 230 insertions(+), 18 deletions(-) create mode 100644 app/templates/school/ajax_list3.html diff --git a/README.md b/README.md index 79afb62..8648f8e 100644 --- a/README.md +++ b/README.md @@ -23,12 +23,17 @@ 15. [我是怎么用一个特殊 Cookie ,限制住别人的爬虫的](https://blog.csdn.net/hihell/article/details/128474849) 16. [你很勇哦,这么点数据就敢用异步加载?](https://blog.csdn.net/hihell/article/details/128474866?spm=1001.2014.3001.5501) 17. [老板让我手动控制网页渲染速度,说这能反爬虫?我信了。](https://blog.csdn.net/hihell/article/details/128474887?spm=1001.2014.3001.5501) +18. [离职原因:让 BOSS 学习“滚动加载”这一名词](https://dream.blog.csdn.net/article/details/128474916) +19. [网站响应数据加一个简单的密,就能挡住80%的爬虫,你信吗?](https://dream.blog.csdn.net/article/details/128474924) +20. [一秒一个Token甩到前台,吓死在座的各位爬虫工程师](https://dream.blog.csdn.net/article/details/128474930) +21. [反爬工程师都会用的手段,IP限制反爬 - 爬虫训练场](https://dream.blog.csdn.net/article/details/128550653) ## 小知识点补充博客 1. [【小知识点】爬虫训练场项目,Python Flask 模板更新,每次都要重新服务](https://blog.csdn.net/hihell/article/details/128399376) 2. [【小知识点】Python Flask 部署,生成环境的爬虫训练场项目](https://blog.csdn.net/hihell/article/details/128422613) 3. [【小知识点】给PythonWeb项目添加百度统计,爬虫训练场](https://blog.csdn.net/hihell/article/details/128448271) +4. [【小知识点】为爬虫训练场项目添加 Bootstrap5 时间轴](https://dream.blog.csdn.net/article/details/128543088) ## 站点数据储备博客 diff --git a/app/__init__.py b/app/__init__.py index 57593e8..c61c97c 100644 --- a/app/__init__.py +++ b/app/__init__.py @@ -3,10 +3,27 @@ from flask_sqlalchemy import SQLAlchemy from .config import BaseConfig # 导入配置文件 +# Flask 限流器 +from flask_limiter import Limiter +from flask_limiter.util import get_remote_address,get_ipaddr + + + + app = Flask(__name__) app.config.from_object(BaseConfig) # 启用配置 +def get_real_ip(): + if request.headers.getlist("X-Forwarded-For"): + return request.headers.getlist("X-Forwarded-For")[0] + return request.remote_addr + +limiter = Limiter(app, key_func=get_real_ip) + +# limiter = Limiter(app, key_func=get_ipaddr) + + db = SQLAlchemy() db.init_app(app) # 初始化数据库 diff --git a/app/__pycache__/__init__.cpython-36.pyc b/app/__pycache__/__init__.cpython-36.pyc index a2b9167188d30a002e4ec227876abc1cd5605f82..3b4701fb66bd4e1c0d04d41408fc1d9ed7d55e88 100644 GIT binary patch literal 991 zcmY+C%Wl&^6ozN)ICh;$nx@0y zco%P3b;A~k6=&=;Ew+5-^URrl&iUmb+wS+;bVSdkVH~N zQpzc{>?kL7xr^+|mTILQ_v(31wNsz_HMdoeuJN@r)20|I_ zQ_YZuln3W97o%Agqa$rpDZyB8#*l%I6e4|gG~D6d$l9uH^SocPFc&In{MGm;lBylo z&T7x*bekO2Sgh^2eDL4g?yO9d_HjW6c6%U17NsCWv}adGP`D@^2;R;G>p-IwDnBQ zpwLNHR@78<#KRj6?!<<-?5>*+(@>}G_WN0TTnmV+JGu{%>X9I33$ zjEoGa%*_l@oT;2yT&e6JehNbhYYuxZcN906%?4!iMDYOG%qi?Syde;p4@&bxX#prL z7{UNFTPRf+Y%+)?0%wUf%Yv)}35vl5#lco`1T$!IP8MS{l42{$FD*$e)?~cJo}QYQ zT9lXrWHM^`X-Z5kVKOkh#StH$mzbLxAAgHEv7jJ|JFO@`H$FcpD>bj66&{OdLSW R#>mACyF8IV`ujh8;FuJ19z^f2GD>J}w_hGqQ%Gg$7STJK{f z%O8+>Kbv9&)^|Yb1AgCDk@fFWHo%GpWS`b;$=JXQGFvi(k{JS~#LAKxlFS)k&az?2 zlq53(%qZKdNUAKUF`&lTm}JgK<{U8R**VFamCOZTUSL(p3`=GLm>1cEWJV-&ai99u z)+l=kG?UCh4LDn2^bT>R|G-qNIcl1_7IuBHNMFx8G|h%VD@c6)ZA33C6Ob~(2M~$~ z1%y6?euN=}lK4e+Mg~Ew8y#=cccYChUV>#xgRcp%2vb|7MR8lZs*mIMdGSd5SeILE zQO+zZR)9srN#wGikvK}K%w9@W9(}-3)q^pR{xX-tAWj-ltFv-Te3W^oE)T?`(u{OF zRlP8DGJFL0O(39OsR@1sZ7&I;Hm@Rk^4zp|q(|@lMzSKN^ik@F8~WOwJS-2}wu^D2 z8Aah-&}ROF*=EuT9m*#`<=&)u(alvSrJ++~Mf{W-Dket<=)8~ugXREw)bme-lUa#Q6%K)i{+tO^u zO``YxwygT&VTod)Og|Tsh3b1Z@N@I9E@}-}%Li)-Z}&R9F_svkyCfl7lqthxmnO;% zOeabbbqD6uu99d>-2ski4^@~~nKh1Gz$v7xv{&oZz9Jn-sRvCjo6qtZ4L^=u9(5rq zv~;X3)Lu|;A&WUb9sn+H0(=8+YyuEPqbBsALF<~=32NI$Z7%j3QJX7&&0Q@~z;Cz2kNFRXhCRlRnarrO;$+n^b$Z%DRD%haIe2C=iAv7DpXTZHCl z$A)x^*d2n&17p#=VDf_L1g1c{1Y-%N8<-xtM=+g&=>?`pOM)o~=2#z4{j^U|U4j|_ zW-lEOOt)YLf!Rm*3#NyXOHyTsFOt(QUXsJ|P#(6zatN?di5UncLKdMDp}>t)rD%a^ zYfX2_bHc@CR)Aqb{AUs?@e8R1V)FZ`Ikg|x>-=5nrYc6?rq$fj`7-N2yL!t%YH52IeCb zK$0z@5q1!)B;zi-*B$;qE%LSWAkq1gbpHfKf~fzKfsMi1i~Ljia${2=x~i-KFP1_W zK^R34;o89Zo8S(ir$x054qK{aO=jaTPUI%zH?rOQxjMd8n6KJCU+Dfz@_$v!Xs!f- z!@R4^i)z)M5)*a^#1y7IUB%!u$nA+}E4*mV|HV=wxp#n>dwuku6fm*Jqlm|fG8I)jT-H8-jy>82Bh*Sv<1`^(;vyqI!YTIV6Jo8^I%D2gy0Uwc>=hCGTeg)q+- za--{-<22m5=Qs(8n7GZ@tBJzGmDm&aM6|i+J<$hj0-dp%q8bcywCLIXRwUYNJ)|pv lH&0+igtuOUs|SC%Kn!BY**0xV&*iPOrCAxvgs0QeegN0`BQXE~ diff --git a/app/school/index.py b/app/school/index.py index dae3b19..37431aa 100644 --- a/app/school/index.py +++ b/app/school/index.py @@ -7,6 +7,10 @@ from flask import Blueprint, jsonify, request from flask import render_template from ..model import School # 导入上级模块 +# 从 app 中导入 limiter 对象 +from app import limiter + + s = Blueprint('school', __name__, url_prefix='/ss') @@ -132,8 +136,6 @@ def encry_api(): """ 间隔10秒生成一Cookie - - """ @@ -165,3 +167,24 @@ def token_list_school(): pagination = pagination_object(page) return jsonify(pagination) + + +""" +限制 IP 访问 +""" +@s.route('ajax_list3') +def ajax_list3(): + page = 1 # 初始化第一页数据 + + pagination = pagination_object(page) + return render_template('school/ajax_list3.html', pagination=pagination) + + +@s.route('api3') +@limiter.limit("3/second") +def school_api3(): + page = int(request.args.get("page", 1)) + + pagination = pagination_object(page) + + return jsonify(pagination) diff --git a/app/templates/csdn/blogstar.html b/app/templates/csdn/blogstar.html index 051a43b..aeb68b7 100644 --- a/app/templates/csdn/blogstar.html +++ b/app/templates/csdn/blogstar.html @@ -1,13 +1,22 @@ {% extends "base.html" %} {% block content %} +
- + +
diff --git a/app/templates/csdn/newstar.html b/app/templates/csdn/newstar.html index 16b80ad..fdbd01e 100644 --- a/app/templates/csdn/newstar.html +++ b/app/templates/csdn/newstar.html @@ -1,15 +1,25 @@ {% extends "base.html" %} {% block content %} +
-
+

CSDN 2022 博客之星总排名 👉 绿色背景是总分前 200(晋级区)👈

-

数据同步时间:2023-01-03 12:00

-

来都来了,不去给橡皮擦打个5分么? | 数据同步时间:2023-01-04 21:00

+

来都来了,不去给橡皮擦打个5分么?
https://bbs.csdn.net/topics/611387187

@@ -18,16 +27,16 @@ 仅看新星

-
- + @@ -52,9 +62,7 @@ {% endif %} - + {%endfor%} diff --git a/app/templates/index.html b/app/templates/index.html index 284f8ee..37d6848 100644 --- a/app/templates/index.html +++ b/app/templates/index.html @@ -234,7 +234,29 @@

+ + +
+
+
+

IP 限制爬虫

+
最新更新 +
+
+
+

本案例限制单IP每秒仅能访问3次API,学习时,需要用到代理IP池,或者间隔时间采集。

+

难度:⭐⭐

+

+ 案例: + 学校清单 +

+
+
diff --git a/app/templates/school/ajax_list3.html b/app/templates/school/ajax_list3.html new file mode 100644 index 0000000..b394925 --- /dev/null +++ b/app/templates/school/ajax_list3.html @@ -0,0 +1,112 @@ +{% extends "base.html" %} +{% block script %} + + +{% endblock script %} + + +{% block content %} +
+ {% for school in pagination.data_list %} +
+
+
+
+ + + +
+
+
{{school.name}}
+

+ {% for fea in school.feature.split(',') %} + {{fea}} + {% endfor %} +

+

所在省市:{{school.province}} -- {{school.city}}

+
+
+
+
+ {% endfor %} + +
+
+
+
+ 合计 {{pagination.total}} 条数据 + +
+
+
+ +{% endblock %} diff --git a/app/templates/timeline.html b/app/templates/timeline.html index d5161d3..2af033a 100644 --- a/app/templates/timeline.html +++ b/app/templates/timeline.html @@ -16,6 +16,22 @@ 正在更新中 +
+
+ +
+
+
+

爬虫训练场 V0.0.16 发布

+
+
+

更新 反爬案例 --- IP 限制次数!

+
+ +
+
-- GitLab

CSDN 2022 博客新星总排名 👉 绿色背景是总分前 100(晋级区)👈

-

数据同步时间:2023-12-30 9:00

-

来都来了,不去给橡皮擦打个5分么? | https://bbs.csdn.net/topics/611387187 +

数据同步时间:2023-01-04 21:00

+

来都来了,不去给橡皮擦打个5分么?
https://bbs.csdn.net/topics/611387187

@@ -25,7 +35,7 @@
昵称 赛道 注册时间目前得分总分
- {{u.regtime}} - {{u.regtime}} {{u.totalScore}}