This commit is contained in:
Emily 2018-05-24 10:31:47 +02:00
commit a4f91d5acf
7 changed files with 77 additions and 6 deletions

View File

@ -16,5 +16,8 @@
"delay": 1, "delay": 1,
"max_attempts": 3, "max_attempts": 3,
"increase_delay_on_fail": true "increase_delay_on_fail": true
},
"downloader": {
"download_folder": "/home/wayback/files"
} }
} }

47
downloader.py Normal file
View File

@ -0,0 +1,47 @@
#Used to download all the files into local folders after scrape data has been saved
import urllib.request, json
import MySQLdb
import MySQLdb.cursors
import os
import atexit
with open("config.json", "r") as f:
config = json.load(f)
with open("memory.json", "r") as f:
memory = json.load(f)
sql = MySQLdb.connect(**config["sql"], cursorclass = MySQLdb.cursors.DictCursor)
cur = sql.cursor()
def on_close():
with open("memory.json", "w") as f:
json.dump(memory, f)
print("Closing...")
atexit.register(on_close)
cur.execute("SELECT file_version,filename,file_hash,url_full FROM updates")
data = cur.fetchall()
# Remove already downloaded files (checked from memory.json)
data = data[memory["downloader"]["last"]:]
for row in data:
try:
print("Downloading {} with id {}".format(row["filename"], row["file_version"]))
urllib.request.urlretrieve(
row["url_full"],
os.path.join(
config["downloader"]["download_folder"],
row["filename"],
"f_" + row["file_hash"]
)
)
print("Done.")
except Exception as e:
memory["downloader"]["failed"].append(row["file_version"])
print("Error downloading file {}: {}".format(row["file_version"], e))
memory["downloader"]["last"] += 1

2
handlers/update.py Normal file
View File

@ -0,0 +1,2 @@
def handle(request):
return "Unimplemented"

View File

@ -1 +1,10 @@
{"scrape": {"last": 0, "failed": []}} {
"scrape": {
"last": 0,
"failed": []
},
"downloader": {
"last": 0,
"failed": []
}
}

View File

@ -0,0 +1 @@
Unimplemented

9
web.py
View File

@ -2,6 +2,7 @@ import json
import MySQLdb import MySQLdb
import MySQLdb.cursors import MySQLdb.cursors
from flask import Flask, make_response, request, render_template, jsonify from flask import Flask, make_response, request, render_template, jsonify
from handlers import update
app = Flask(__name__) app = Flask(__name__)
@ -21,5 +22,13 @@ def download_index():
def api_index(): def api_index():
return render_template("api.html") return render_template("api.html")
@app.route("/api/getUpdate", methods=["GET", "POST"])
def api_update():
return update.handle(request)
@app.route("/api")
def api_index():
return render_template("api.html")
if __name__ == "__main__": if __name__ == "__main__":
app.run(**config["web"]) app.run(**config["web"])