diff --git a/README.md b/README.md index 79afb62798875ac94efd4cd097d5d7b940936933..8648f8eb6f817582dab0284b6897574ba0195274 100644 --- a/README.md +++ b/README.md @@ -23,12 +23,17 @@ 15. [我是怎么用一个特殊 Cookie ,限制住别人的爬虫的](https://blog.csdn.net/hihell/article/details/128474849) 16. [你很勇哦,这么点数据就敢用异步加载?](https://blog.csdn.net/hihell/article/details/128474866?spm=1001.2014.3001.5501) 17. [老板让我手动控制网页渲染速度,说这能反爬虫?我信了。](https://blog.csdn.net/hihell/article/details/128474887?spm=1001.2014.3001.5501) +18. [离职原因:让 BOSS 学习“滚动加载”这一名词](https://dream.blog.csdn.net/article/details/128474916) +19. [网站响应数据加一个简单的密,就能挡住80%的爬虫,你信吗?](https://dream.blog.csdn.net/article/details/128474924) +20. [一秒一个Token甩到前台,吓死在座的各位爬虫工程师](https://dream.blog.csdn.net/article/details/128474930) +21. [反爬工程师都会用的手段,IP限制反爬 - 爬虫训练场](https://dream.blog.csdn.net/article/details/128550653) ## 小知识点补充博客 1. [【小知识点】爬虫训练场项目,Python Flask 模板更新,每次都要重新服务](https://blog.csdn.net/hihell/article/details/128399376) 2. [【小知识点】Python Flask 部署,生成环境的爬虫训练场项目](https://blog.csdn.net/hihell/article/details/128422613) 3. [【小知识点】给PythonWeb项目添加百度统计,爬虫训练场](https://blog.csdn.net/hihell/article/details/128448271) +4. [【小知识点】为爬虫训练场项目添加 Bootstrap5 时间轴](https://dream.blog.csdn.net/article/details/128543088) ## 站点数据储备博客 diff --git a/app/__init__.py b/app/__init__.py index 57593e8f64bb5eabe1032e8658f5443b0b89f5fb..c61c97c79a6d181faef953c1949a8f98f95321c6 100644 --- a/app/__init__.py +++ b/app/__init__.py @@ -3,10 +3,27 @@ from flask_sqlalchemy import SQLAlchemy from .config import BaseConfig # 导入配置文件 +# Flask 限流器 +from flask_limiter import Limiter +from flask_limiter.util import get_remote_address,get_ipaddr + + + + app = Flask(__name__) app.config.from_object(BaseConfig) # 启用配置 +def get_real_ip(): + if request.headers.getlist("X-Forwarded-For"): + return request.headers.getlist("X-Forwarded-For")[0] + return request.remote_addr + +limiter = Limiter(app, key_func=get_real_ip) + +# limiter = Limiter(app, key_func=get_ipaddr) + + db = SQLAlchemy() db.init_app(app) # 初始化数据库 diff --git a/app/__pycache__/__init__.cpython-36.pyc b/app/__pycache__/__init__.cpython-36.pyc index a2b9167188d30a002e4ec227876abc1cd5605f82..3b4701fb66bd4e1c0d04d41408fc1d9ed7d55e88 100644 Binary files a/app/__pycache__/__init__.cpython-36.pyc and b/app/__pycache__/__init__.cpython-36.pyc differ diff --git a/app/__pycache__/routes.cpython-36.pyc b/app/__pycache__/routes.cpython-36.pyc index 8ebc11349d95c7b8e903e35cb8ee8d45217a0b47..d929aa3f9a136a1c4be27308948e21c163e7e519 100644 Binary files a/app/__pycache__/routes.cpython-36.pyc and b/app/__pycache__/routes.cpython-36.pyc differ diff --git a/app/school/__pycache__/index.cpython-36.pyc b/app/school/__pycache__/index.cpython-36.pyc index 43fd5843e2b7732745f34353759c29cb077b17ac..758e3c888601a875b87acc123be67e3f7efd9796 100644 Binary files a/app/school/__pycache__/index.cpython-36.pyc and b/app/school/__pycache__/index.cpython-36.pyc differ diff --git a/app/school/index.py b/app/school/index.py index dae3b19d683d69f6685c5aa910e4b0689c47a63e..37431aa92cd488419810eb4035d9b29e77b45d14 100644 --- a/app/school/index.py +++ b/app/school/index.py @@ -7,6 +7,10 @@ from flask import Blueprint, jsonify, request from flask import render_template from ..model import School # 导入上级模块 +# 从 app 中导入 limiter 对象 +from app import limiter + + s = Blueprint('school', __name__, url_prefix='/ss') @@ -132,8 +136,6 @@ def encry_api(): """ 间隔10秒生成一Cookie - - """ @@ -165,3 +167,24 @@ def token_list_school(): pagination = pagination_object(page) return jsonify(pagination) + + +""" +限制 IP 访问 +""" +@s.route('ajax_list3') +def ajax_list3(): + page = 1 # 初始化第一页数据 + + pagination = pagination_object(page) + return render_template('school/ajax_list3.html', pagination=pagination) + + +@s.route('api3') +@limiter.limit("3/second") +def school_api3(): + page = int(request.args.get("page", 1)) + + pagination = pagination_object(page) + + return jsonify(pagination) diff --git a/app/templates/csdn/blogstar.html b/app/templates/csdn/blogstar.html index 051a43bd4c28de879f0eda8cf5aef8161d5bb605..aeb68b7c60a6b6f2d9dca2521f65c930ea11124f 100644 --- a/app/templates/csdn/blogstar.html +++ b/app/templates/csdn/blogstar.html @@ -1,13 +1,22 @@ {% extends "base.html" %} {% block content %} +
- + +
diff --git a/app/templates/csdn/newstar.html b/app/templates/csdn/newstar.html index 16b80ad54246dde9067e2683101e4d6acf4628cf..fdbd01e9fdb901850e378685d130708a51a00c46 100644 --- a/app/templates/csdn/newstar.html +++ b/app/templates/csdn/newstar.html @@ -1,15 +1,25 @@ {% extends "base.html" %} {% block content %} +
-
+

CSDN 2022 博客之星总排名 👉 绿色背景是总分前 200(晋级区)👈

-

数据同步时间:2023-01-03 12:00

-

来都来了,不去给橡皮擦打个5分么? | 数据同步时间:2023-01-04 21:00

+

来都来了,不去给橡皮擦打个5分么?
https://bbs.csdn.net/topics/611387187

@@ -18,16 +27,16 @@ 仅看新星

-
- + @@ -52,9 +62,7 @@ {% endif %} - + {%endfor%} diff --git a/app/templates/index.html b/app/templates/index.html index 284f8ee75cf2f79bcb936545496a7b255e09bc6c..37d68482c0f75ecc67bb80338b80218cef4a0058 100644 --- a/app/templates/index.html +++ b/app/templates/index.html @@ -234,7 +234,29 @@

+ + +
+
+
+

IP 限制爬虫

+
最新更新 +
+
+
+

本案例限制单IP每秒仅能访问3次API,学习时,需要用到代理IP池,或者间隔时间采集。

+

难度:⭐⭐

+

+ 案例: + 学校清单 +

+
+
diff --git a/app/templates/school/ajax_list3.html b/app/templates/school/ajax_list3.html new file mode 100644 index 0000000000000000000000000000000000000000..b394925a03c0229ff139fed9337bc55b6dfe3dc8 --- /dev/null +++ b/app/templates/school/ajax_list3.html @@ -0,0 +1,112 @@ +{% extends "base.html" %} +{% block script %} + + +{% endblock script %} + + +{% block content %} +
+ {% for school in pagination.data_list %} +
+
+
+
+ + + +
+
+
{{school.name}}
+

+ {% for fea in school.feature.split(',') %} + {{fea}} + {% endfor %} +

+

所在省市:{{school.province}} -- {{school.city}}

+
+
+
+
+ {% endfor %} + +
+
+
+
+ 合计 {{pagination.total}} 条数据 + +
+
+
+ +{% endblock %} diff --git a/app/templates/timeline.html b/app/templates/timeline.html index d5161d3cf5804a8323afc96315c6a9ed45d91277..2af033a835c5fcac8aaeac3a4d8b50be99150435 100644 --- a/app/templates/timeline.html +++ b/app/templates/timeline.html @@ -16,6 +16,22 @@ 正在更新中 +
+
+ +
+
+
+

爬虫训练场 V0.0.16 发布

+
+
+

更新 反爬案例 --- IP 限制次数!

+
+ +
+

CSDN 2022 博客新星总排名 👉 绿色背景是总分前 100(晋级区)👈

-

数据同步时间:2023-12-30 9:00

-

来都来了,不去给橡皮擦打个5分么? | https://bbs.csdn.net/topics/611387187 +

数据同步时间:2023-01-04 21:00

+

来都来了,不去给橡皮擦打个5分么?
https://bbs.csdn.net/topics/611387187

@@ -25,7 +35,7 @@
昵称 赛道 注册时间目前得分总分
- {{u.regtime}} - {{u.regtime}} {{u.totalScore}}