diff --git a/app/__init__.py b/app/__init__.py index 7be31a23e0aa8369ff65fb7d47febccd47e5afd4..108b38d610d364ee39abbacd71231ace665fe6de 100644 --- a/app/__init__.py +++ b/app/__init__.py @@ -15,9 +15,10 @@ db.init_app(app) # 初始化数据库 from app import routes from app import general from .school.index import * +from .file.index import * app.register_blueprint(s) - +app.register_blueprint(f) diff --git a/app/__pycache__/__init__.cpython-36.pyc b/app/__pycache__/__init__.cpython-36.pyc index 7bac9ea0347df7c00172caf663da45e00fc93452..9b1c8f3226e4b978fb6d46ca2806755b63750b29 100644 Binary files a/app/__pycache__/__init__.cpython-36.pyc and b/app/__pycache__/__init__.cpython-36.pyc differ diff --git a/app/file/__init__.py b/app/file/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/app/file/__pycache__/__init__.cpython-36.pyc b/app/file/__pycache__/__init__.cpython-36.pyc new file mode 100644 index 0000000000000000000000000000000000000000..1b87e16ed2b4dc7c0eb0029485c6bf7bd6a7d59a Binary files /dev/null and b/app/file/__pycache__/__init__.cpython-36.pyc differ diff --git a/app/file/__pycache__/index.cpython-36.pyc b/app/file/__pycache__/index.cpython-36.pyc new file mode 100644 index 0000000000000000000000000000000000000000..64a26ca62a8258ba611fe686f344483389e8bc2d Binary files /dev/null and b/app/file/__pycache__/index.cpython-36.pyc differ diff --git a/app/file/index.py b/app/file/index.py new file mode 100644 index 0000000000000000000000000000000000000000..c457db05414274c15f3a98045a5f7874e2f52f4b --- /dev/null +++ b/app/file/index.py @@ -0,0 +1,20 @@ +from flask import Blueprint, request +from flask import render_template + +f = Blueprint('file', __name__, url_prefix='/file') + + +@f.route('/c') +def common_file(): + return render_template('file/c.html') + + +@f.route('/mp4') +def mp4_file(): + mp4_url = "http://clips.vorwaerts-gmbh.de/big_buck_bunny.mp4" + return render_template('file/mp4.html', file=mp4_url) + + +@f.route('/m3u8') +def m3u8_file(): + return render_template('file/m3u8.html') diff --git a/app/templates/base.html b/app/templates/base.html index 01d7015233d9ccc285cf2b8f8f7b641778aed54b..29a7483107188ea5c03f40e491b6a8962efeef91 100644 --- a/app/templates/base.html +++ b/app/templates/base.html @@ -1,8 +1,17 @@ -{%include 'common/header.html' %} +{% block head %} + + {% include 'common/header.html'%} + {% block link %}{% endblock link %} + {% block script %}{% endblock script %} + {% include 'common/tj.html'%} + +{% endblock %} -{%block content%}{%endblock content%} +{% include 'common/nav.html'%} +{% block content %} {% endblock content %} +{% include 'common/footer.html' %} \ No newline at end of file diff --git a/app/templates/common/header.html b/app/templates/common/header.html index 57393a9af2c7a00b6f8c55a1382e242cf8c07dcd..04e580ff7df4e2be802f26f8541307d4977b951a 100644 --- a/app/templates/common/header.html +++ b/app/templates/common/header.html @@ -1,13 +1,9 @@ - - - - {% if title%} - {{title}}|爬虫训练场,让天下没有失效的爬虫 - {% else %} - 爬虫训练场,让天下没有失效的爬虫 - {%endif%} - - - {% include 'common/tj.html'%} - - \ No newline at end of file + + +{% if title%} +{{title}}|爬虫训练场,让天下没有失效的爬虫 +{% else %} +爬虫训练场,让天下没有失效的爬虫 +{%endif%} + + \ No newline at end of file diff --git a/app/templates/file/c.html b/app/templates/file/c.html new file mode 100644 index 0000000000000000000000000000000000000000..93827080f5e9b863c0f4ee456daed40aa0e09309 --- /dev/null +++ b/app/templates/file/c.html @@ -0,0 +1,27 @@ +{% extends "base.html" %} +{%block link%} + +{%endblock link%} +{% block content %} + +
+

爬虫采集文件练习

+
+ WORD 文档:点击下载文件 + Excel 文档:点击下载文件 + + PPT 文档:点击下载文件 + + PDF 文档:点击下载文件 +
+
+ +{% endblock %} \ No newline at end of file diff --git a/app/templates/file/m3u8.html b/app/templates/file/m3u8.html new file mode 100644 index 0000000000000000000000000000000000000000..615c650c924ca43f70da5baa341258d812b0e983 --- /dev/null +++ b/app/templates/file/m3u8.html @@ -0,0 +1,20 @@ +{% extends "base.html" %} +{%block script%} + +{%endblock script%} +{% block content %} + +
+ +
+ + +{% endblock %} \ No newline at end of file diff --git a/app/templates/file/mp4.html b/app/templates/file/mp4.html new file mode 100644 index 0000000000000000000000000000000000000000..398062f2b8d019d13594cf16c77ec0a462f8dfb3 --- /dev/null +++ b/app/templates/file/mp4.html @@ -0,0 +1,13 @@ +{% extends "base.html" %} + +{% block content %} +
+
+
+
+ +
+
+
+
+{% endblock %} \ No newline at end of file diff --git a/app/templates/index.html b/app/templates/index.html index 49d6d7f7be1daa1e91edaee8d2fe7b83c838d30e..a35a0805eb826692212ff0e7e12fbfa0726c86ad 100644 --- a/app/templates/index.html +++ b/app/templates/index.html @@ -1,6 +1,5 @@ {% extends "base.html" %} {% block content %} -{% include 'common/nav.html'%}

普通爬虫

@@ -29,10 +28,12 @@
-
+

分页爬虫

-
最新更新
+
最新更新 +

本案例是由 2760 条数据组成的分页爬虫,数据为国内学校清单,主要用于练习分页数据采集。

@@ -43,29 +44,30 @@

-
+
-

单页爬虫

+

二进制文件采集

+
最新更新 +
-

目标数据呈现在单一页面中,使用最简单的爬虫库可以直接采集,一般用正则表达式即可完成数据提取。

+

本案例用于大家学习文件和视频文件内容采集,重点掌握 M3U8 格式视频下载,掌握二进制内容保存。

难度:⭐

案例: - 新闻页 - 图片清单 - 表格 + 普通 + MP4文件 + M3U8文件

@@ -221,5 +223,4 @@
-{% include 'common/footer.html'%} {% endblock %}