diff --git a/deploy.sh b/deploy.sh index 73c66dba8b25f6ce30c2c7644f8cbe887944f10c..2fce52c4c86d736a596d49a911aea4dcb1efd4d6 100755 --- a/deploy.sh +++ b/deploy.sh @@ -27,6 +27,8 @@ else echo "Il faut mettre une salle qui existe. Sinon ca marche pas bien." exit fi + # IMPORTANT : Lancer la commande python3.8 -m pip install -r ~/bachelor-scrapper-058/projet/requirements.txt UNE + # FOIS sur un seul ordinateur de l'HEPIA. # Itération sur la salle courante... for host in $current do diff --git a/install.sh b/install.sh index 6f6c804e1bf540d1448fa4c5729e4553e7393ca4..2d24ff9d718daf0a1fe1e1ba2fdd9a5f255fe8f2 100644 --- a/install.sh +++ b/install.sh @@ -8,4 +8,4 @@ rm -rf /tmp/bachelor-scrapper-058 cp -r ~/bachelor-scrapper-058 /tmp # REPLACE BY /usr/local/scratch/theo.pirkl/ ONCE OK cd /tmp/bachelor-scrapper-058/projet || exit # Failsafe si pas hepia ou erreur # Attention : pas de venv ! -screen -d -m bash -c "python3.8 src/start.py --remote" # ADD python3.8 -m pip install -r requirements.txt; FOR DEPLOY \ No newline at end of file +screen -d -m bash -c "python3.8 src/start.py --remote" \ No newline at end of file diff --git a/projet/playbook-example.yml b/projet/playbook-example.yml deleted file mode 100644 index cab4ec23c1a3ebce504fce1433e4f24771d5e625..0000000000000000000000000000000000000000 --- a/projet/playbook-example.yml +++ /dev/null @@ -1,43 +0,0 @@ -clients: # The default management port for a client is 18965 - - "129.194.187.141" - - "129.194.187.142" - - "129.194.187.143" - - "129.194.187.144" - - "129.194.187.145" - - "129.194.187.146" - - "129.194.187.147" - - "129.194.187.148" - - "129.194.187.149" - - "129.194.187.150" - - "129.194.187.151" - - "129.194.187.152" - - "129.194.187.153" - - "129.194.187.154" - - "129.194.187.155" - - "129.194.187.156" - - "129.194.187.157" - - "129.194.187.158" - - "129.194.187.127" - - "129.194.187.128" - - "129.194.187.129" - - "129.194.187.130" - -variables: - - merchandiseId: "2709.0090" - -chapters: - Swiss Impex: - year: {start: 1989, end: 2020} # 1989 - 2019 - month: {start: 0, end: 12} - merchandiseId: $merchandiseId$ - then: # And then... - XLSX Parser: - file: "!swissImpexOutputfileName" - from: "A6" - to: "H197" - instructions: - replace: - - "*" - - "" - export: csv - cleanup: yes \ No newline at end of file diff --git a/projet/resources/measures/faogeneve-1m1c.csv b/projet/resources/measures/faogeneve-1m1c.csv new file mode 100644 index 0000000000000000000000000000000000000000..73542333ea0aba7f308d68709f222f30bf536bf9 --- /dev/null +++ b/projet/resources/measures/faogeneve-1m1c.csv @@ -0,0 +1,556 @@ +0,0 +1,0 +2,0 +3,0 +4,5 +5,6 +6,10 +7,10 +8,10 +9,10 +10,13 +11,14 +12,16 +13,20 +14,20 +15,20 +16,20 +17,21 +18,25 +19,28 +20,30 +21,30 +22,30 +23,30 +24,30 +25,32 +26,35 +27,39 +28,40 +29,40 +30,40 +31,41 +32,42 +33,45 +34,49 +35,50 +36,50 +37,50 +38,50 +39,60 +40,60 +41,60 +42,60 +43,61 +44,67 +45,70 +46,70 +47,70 +48,71 +49,78 +50,80 +51,80 +52,80 +53,80 +54,87 +55,90 +56,90 +57,90 +58,91 +59,97 +60,100 +61,100 +62,100 +63,100 +64,103 +65,109 +66,110 +67,110 +68,110 +69,110 +70,110 +71,111 +72,113 +73,120 +74,120 +75,120 +76,120 +77,120 +78,122 +79,130 +80,130 +81,130 +82,130 +83,131 +84,132 +85,138 +86,140 +87,140 +88,140 +89,141 +90,141 +91,149 +92,150 +93,150 +94,150 +95,151 +96,151 +97,157 +98,160 +99,160 +100,160 +101,160 +102,160 +103,163 +104,163 +105,165 +106,170 +107,170 +108,170 +109,170 +110,172 +111,180 +112,180 +113,180 +114,180 +115,181 +116,181 +117,190 +118,190 +119,190 +120,190 +121,191 +122,199 +123,200 +124,200 +125,200 +126,201 +127,203 +128,210 +129,210 +130,210 +131,210 +132,211 +133,220 +134,220 +135,220 +136,220 +137,221 +138,225 +139,230 +140,230 +141,230 +142,230 +143,231 +144,238 +145,240 +146,240 +147,240 +148,240 +149,241 +150,250 +151,250 +152,250 +153,250 +154,251 +155,252 +156,257 +157,260 +158,260 +159,260 +160,261 +161,270 +162,270 +163,270 +164,270 +165,270 +166,271 +167,280 +168,280 +169,280 +170,280 +171,281 +172,287 +173,290 +174,290 +175,290 +176,292 +177,292 +178,299 +179,300 +180,300 +181,300 +182,301 +183,310 +184,310 +185,310 +186,311 +187,311 +188,319 +189,320 +190,320 +191,320 +192,320 +193,321 +194,330 +195,330 +196,330 +197,331 +198,339 +199,340 +200,340 +201,340 +202,341 +203,350 +204,350 +205,350 +206,350 +207,351 +208,360 +209,360 +210,360 +211,360 +212,362 +213,370 +214,370 +215,370 +216,371 +217,374 +218,380 +219,380 +220,380 +221,380 +222,385 +223,390 +224,390 +225,390 +226,390 +227,391 +228,400 +229,400 +230,400 +231,400 +232,404 +233,410 +234,410 +235,410 +236,410 +237,411 +238,420 +239,420 +240,420 +241,421 +242,425 +243,430 +244,430 +245,430 +246,431 +247,433 +248,440 +249,440 +250,440 +251,440 +252,441 +253,443 +254,450 +255,450 +256,450 +257,452 +258,454 +259,460 +260,460 +261,460 +262,460 +263,463 +264,470 +265,470 +266,470 +267,470 +268,472 +269,480 +270,480 +271,480 +272,480 +273,483 +274,490 +275,490 +276,490 +277,490 +278,491 +279,498 +280,500 +281,500 +282,500 +283,500 +284,502 +285,503 +286,507 +287,509 +288,510 +289,510 +290,510 +291,511 +292,519 +293,520 +294,520 +295,520 +296,520 +297,522 +298,530 +299,530 +300,530 +301,531 +302,531 +303,539 +304,539 +305,540 +306,540 +307,540 +308,541 +309,550 +310,550 +311,550 +312,550 +313,551 +314,560 +315,560 +316,560 +317,560 +318,568 +319,570 +320,570 +321,570 +322,571 +323,580 +324,580 +325,580 +326,580 +327,581 +328,582 +329,590 +330,590 +331,590 +332,591 +333,600 +334,600 +335,600 +336,600 +337,601 +338,610 +339,610 +340,610 +341,612 +342,613 +343,614 +344,616 +345,618 +346,620 +347,620 +348,620 +349,622 +350,630 +351,630 +352,630 +353,631 +354,631 +355,631 +356,640 +357,640 +358,640 +359,640 +360,649 +361,650 +362,650 +363,650 +364,660 +365,660 +366,660 +367,661 +368,661 +369,666 +370,670 +371,670 +372,670 +373,671 +374,678 +375,680 +376,680 +377,680 +378,681 +379,685 +380,690 +381,690 +382,690 +383,691 +384,700 +385,700 +386,700 +387,701 +388,710 +389,710 +390,710 +391,710 +392,712 +393,720 +394,720 +395,720 +396,720 +397,721 +398,729 +399,730 +400,730 +401,730 +402,731 +403,734 +404,740 +405,740 +406,740 +407,741 +408,749 +409,750 +410,750 +411,750 +412,751 +413,760 +414,760 +415,760 +416,760 +417,761 +418,770 +419,770 +420,770 +421,770 +422,779 +423,780 +424,780 +425,780 +426,781 +427,785 +428,790 +429,790 +430,790 +431,790 +432,790 +433,791 +434,794 +435,799 +436,800 +437,800 +438,800 +439,801 +440,808 +441,810 +442,810 +443,810 +444,812 +445,820 +446,820 +447,820 +448,820 +449,821 +450,827 +451,830 +452,830 +453,830 +454,830 +455,831 +456,833 +457,840 +458,840 +459,840 +460,840 +461,841 +462,850 +463,850 +464,850 +465,851 +466,857 +467,860 +468,860 +469,860 +470,861 +471,862 +472,870 +473,870 +474,870 +475,870 +476,871 +477,880 +478,880 +479,880 +480,880 +481,886 +482,890 +483,890 +484,890 +485,891 +486,892 +487,900 +488,900 +489,900 +490,900 +491,901 +492,910 +493,910 +494,910 +495,910 +496,911 +497,911 +498,913 +499,920 +500,920 +501,920 +502,921 +503,930 +504,930 +505,930 +506,930 +507,937 +508,940 +509,940 +510,940 +511,940 +512,941 +513,0 +514,0 +515,0 +516,0 +517,950 +518,950 +519,950 +520,950 +521,950 +522,951 +523,960 +524,960 +525,960 +526,960 +527,970 +528,970 +529,970 +530,970 +531,971 +532,980 +533,980 +534,980 +535,980 +536,984 +537,990 +538,990 +539,990 +540,990 +541,999 +542,1000 +543,1000 +544,1000 +545,1000 +546,1000 +547,1000 +548,1000 +549,1000 +550,1000 +551,1000 +552,1000 +553,1000 +554,1000 +555,1000 diff --git a/projet/resources/measures/faogeneve-1m6c.csv b/projet/resources/measures/faogeneve-1m6c.csv new file mode 100644 index 0000000000000000000000000000000000000000..0b1ad0a5f173936c8cc6976cb23f4c4c22182d50 --- /dev/null +++ b/projet/resources/measures/faogeneve-1m6c.csv @@ -0,0 +1,280 @@ +0,0 +1,0 +2,0 +3,0 +4,0 +5,2 +6,6 +7,9 +8,16 +9,21 +10,27 +11,27 +12,30 +13,33 +14,48 +15,58 +16,60 +17,60 +18,60 +19,60 +20,61 +21,62 +22,64 +23,65 +24,67 +25,71 +26,76 +27,81 +28,97 +29,105 +30,108 +31,120 +32,120 +33,120 +34,120 +35,121 +36,121 +37,125 +38,128 +39,128 +40,129 +41,129 +42,130 +43,130 +44,141 +45,156 +46,163 +47,165 +48,167 +49,175 +50,175 +51,180 +52,180 +53,180 +54,181 +55,190 +56,191 +57,191 +58,195 +59,196 +60,197 +61,197 +62,198 +63,198 +64,198 +65,213 +66,230 +67,233 +68,239 +69,246 +70,249 +71,252 +72,252 +73,254 +74,259 +75,263 +76,264 +77,264 +78,264 +79,264 +80,286 +81,289 +82,291 +83,300 +84,310 +85,312 +86,317 +87,321 +88,323 +89,323 +90,327 +91,328 +92,332 +93,332 +94,334 +95,334 +96,335 +97,337 +98,349 +99,361 +100,377 +101,380 +102,380 +103,380 +104,380 +105,380 +106,381 +107,381 +108,381 +109,381 +110,382 +111,383 +112,392 +113,410 +114,423 +115,423 +116,424 +117,426 +118,434 +119,440 +120,440 +121,441 +122,449 +123,452 +124,452 +125,453 +126,454 +127,462 +128,464 +129,464 +130,464 +131,464 +132,464 +133,464 +134,465 +135,481 +136,481 +137,484 +138,488 +139,498 +140,508 +141,510 +142,510 +143,510 +144,511 +145,517 +146,517 +147,518 +148,522 +149,547 +150,559 +151,569 +152,570 +153,570 +154,570 +155,570 +156,578 +157,585 +158,588 +159,593 +160,612 +161,618 +162,630 +163,631 +164,640 +165,640 +166,640 +167,643 +168,645 +169,647 +170,648 +171,648 +172,653 +173,662 +174,682 +175,692 +176,699 +177,700 +178,702 +179,705 +180,710 +181,714 +182,724 +183,725 +184,726 +185,726 +186,727 +187,727 +188,745 +189,751 +190,760 +191,770 +192,779 +193,780 +194,780 +195,780 +196,782 +197,782 +198,785 +199,792 +200,796 +201,797 +202,800 +203,805 +204,809 +205,820 +206,826 +207,830 +208,840 +209,840 +210,840 +211,840 +212,840 +213,842 +214,842 +215,842 +216,842 +217,842 +218,842 +219,842 +220,842 +221,860 +222,863 +223,867 +224,871 +225,888 +226,899 +227,900 +228,901 +229,904 +230,910 +231,910 +232,911 +233,911 +234,911 +235,911 +236,911 +237,917 +238,919 +239,926 +240,940 +241,940 +242,940 +243,940 +244,942 +245,942 +246,944 +247,944 +248,947 +249,948 +250,949 +251,950 +252,958 +253,960 +254,960 +255,960 +256,960 +257,961 +258,961 +259,968 +260,970 +261,980 +262,980 +263,980 +264,980 +265,980 +266,982 +267,984 +268,985 +269,994 +270,998 +271,1000 +272,1000 +273,1000 +274,1000 +275,1000 +276,1000 +277,1000 +278,1000 +279,1000 diff --git a/projet/resources/measures/tosvg.py b/projet/resources/measures/tosvg.py new file mode 100644 index 0000000000000000000000000000000000000000..85715a7203d582dfb3387f639223d7cdffa9f2d5 --- /dev/null +++ b/projet/resources/measures/tosvg.py @@ -0,0 +1,29 @@ +import csv +import os +import sys + +import matplotlib.pyplot as plt +import numpy as np + +if __name__ == '__main__': + results = [] + + plt.xlabel("Temps [s]") + plt.ylabel("Documents téléchargés [0-1000]") + plt.gcf().subplots_adjust(bottom=0.15) + + with open(sys.argv[1]) as csvfile: + reader = csv.reader(csvfile) # change contents to floats + for row in reader: # each row is a list + results.append(row) + results = np.array(results).astype("int") + + x = results[:, 0] + y = results[:, 1] + + plt.xticks(np.arange(0, max(x), 10), rotation=70) + plt.yticks(np.arange(0, max(y) + 50, 50)) + + plt.plot(x, y) + plt.title("Performances du module FAOGeneve - NScrap") + plt.savefig(os.path.splitext(sys.argv[1])[0] + ".svg") diff --git a/projet/resources/playbooks/all-swiss-impex.yml b/projet/resources/playbooks/all-swiss-impex.yml new file mode 100644 index 0000000000000000000000000000000000000000..65deb5e79d8b826fb980a91ddb466c92cd611389 --- /dev/null +++ b/projet/resources/playbooks/all-swiss-impex.yml @@ -0,0 +1,54 @@ +clients: # The default management port for a client is 18965 + - "129.194.187.141" + - "129.194.187.142" + - "129.194.187.143" + - "129.194.187.144" + - "129.194.187.145" + - "129.194.187.146" + - "129.194.187.147" + - "129.194.187.148" + - "129.194.187.149" + - "129.194.187.150" + - "129.194.187.151" + - "129.194.187.152" + - "129.194.187.153" + - "129.194.187.154" + - "129.194.187.155" + - "129.194.187.156" + - "129.194.187.157" + - "129.194.187.158" + - "129.194.187.127" + - "129.194.187.128" + - "129.194.187.129" + - "129.194.187.130" + +chapters: + File Parser: + file: ./resources/EUMerchIDs.txt + server: yes # Will be run on server + instructions: + read: yes + then: + + Regex Parser: + input: "!fileParserOutput" + regex: "[0-9]{4}.[0-9]{4}" + server: yes # Will not be sent to client + then: # Sent to client from here only ! + + Swiss Impex: + year: {start: 2019, end: 2020} # 1989 - 2019 + month: {start: 0, end: 12} + merchandiseId: "!regexParserOutput" + then: + + XLSX Parser: + file: "!swissImpexOutputfileName" + from: "A6" + to: "H197" + instructions: + - replace: + - "*" + - "" + - export: csv + - cleanup: yes \ No newline at end of file diff --git a/projet/resources/playbooks/fao-geneve.yml b/projet/resources/playbooks/fao-geneve.yml new file mode 100644 index 0000000000000000000000000000000000000000..da74a4b1ca6e0e24ca3c819e0a1a5d31825d9de1 --- /dev/null +++ b/projet/resources/playbooks/fao-geneve.yml @@ -0,0 +1,6 @@ +clients: # The default management port for a client is 18965 + - "127.0.0.1" + +chapters: + FAO Geneve: + pages: {start: 0, end: 100} # From 0 to 100 included \ No newline at end of file diff --git a/projet/resources/playbooks/swiss-impex-petroleum.yml b/projet/resources/playbooks/swiss-impex-petroleum.yml new file mode 100644 index 0000000000000000000000000000000000000000..fcbdd66b8e9be80baa39df767d71fc56a2fed444 --- /dev/null +++ b/projet/resources/playbooks/swiss-impex-petroleum.yml @@ -0,0 +1,22 @@ +clients: # The default management port for a client is 18965 + - "127.0.0.1" + +variables: + - merchandiseId: "2709.0090" + +chapters: + Swiss Impex: + year: {start: 1989, end: 2020} # 1989 - 2019 + month: {start: 0, end: 12} + merchandiseId: $merchandiseId$ + then: # And then... + XLSX Parser: + file: "!swissImpexOutputfileName" + from: "A6" + to: "H197" + instructions: + replace: + - "*" + - "" + export: csv + cleanup: yes diff --git a/projet/src/client/schnell/workers.py b/projet/src/client/schnell/workers.py index 38b41c5a0957327bc71125cefc8694721f7d6105..07a2aa0130ef3855130301f44f48e987c63646bf 100644 --- a/projet/src/client/schnell/workers.py +++ b/projet/src/client/schnell/workers.py @@ -3,9 +3,8 @@ import time from os import getenv as env from loguru import logger -from sqlalchemy.orm import Session -from common.mvc.Models import FailedTask, process +from common.mvc.Models import process from common.networking.intercom import Intercom @@ -35,7 +34,6 @@ def work(item, context: dict = None): chrono = time.time() logger.debug("Ready. Running item...") arguments = {k: evaluateWithContext(v, context) for k, v in item['arguments'].items()} - logger.debug("Evaluated arguments to {}".format(arguments)) # TODO Remove resultFromProcess = process(item, arguments) logger.info("Item processed.") context = {**context, **resultFromProcess} @@ -56,31 +54,18 @@ def work(item, context: dict = None): intercom = Intercom("127.0.0.1", env("CLIENT_PORT"), True) intercom.talk({"type": "failed", "task_id": item["id"], "error": str(e)}) intercom.listen() + work(item, context) except Exception as e: logger.critical("Unhandled error in worker code !") logger.error(e) + intercom = Intercom("127.0.0.1", env("CLIENT_PORT"), True) + intercom.talk({"type": "failed", "task_id": item["id"], "error": str(e)}) + intercom.listen() finally: intercom.socket.close() logger.success("Closing down task.") -# Handles exceptions -def __handleException__(item, session: Session, e: AssertionError): - logger.warning("Task failed, retrying if possible...") - if item.tries <= 2: - item.tries += 1 - session.add(item) - session.commit() - return True - else: - logger.critical("Failed to do job after 3 times...") - ft = FailedTask(item.module_id, 3, str(e)) - session.add(ft) - session.delete(item) - session.commit() - return False - - class Schnell: nWorkers: int = 0 pool: multiprocessing.Pool = None @@ -90,7 +75,7 @@ class Schnell: def start(self): logger.info("Starting pool with {0} workers...".format(self.nWorkers)) - self.pool = multiprocessing.get_context("spawn").Pool(processes=self.nWorkers, maxtasksperchild=1) + self.pool = multiprocessing.get_context("spawn").Pool(processes=self.nWorkers) # Stops the schnell pool. def stop(self): diff --git a/projet/src/client/spiders/scrapy/Spiders/pipelines.py b/projet/src/client/spiders/scrapy/Spiders/pipelines.py index 97a19b7cd4339ecda5932758dcd12f9255fb3a8e..39531fccf5b531e8f76fa34038951b8c5c24a3cc 100644 --- a/projet/src/client/spiders/scrapy/Spiders/pipelines.py +++ b/projet/src/client/spiders/scrapy/Spiders/pipelines.py @@ -12,4 +12,4 @@ from scrapy.pipelines.files import FilesPipeline class FAOGeneveFilePipeline(FilesPipeline): def file_path(self, request, response=None, info=None): - return 'files/' + os.path.basename(urlparse(request.url).path + ".pdf") + return os.path.basename(urlparse(request.url).path + ".pdf") diff --git a/projet/src/client/spiders/scrapy/Spiders/settings.py b/projet/src/client/spiders/scrapy/Spiders/settings.py index 50ef61ebf80cdbfd1b84c21224f0cea1148fafe5..2142f6baf0bde97cbbe185b9e06cbbb958645921 100644 --- a/projet/src/client/spiders/scrapy/Spiders/settings.py +++ b/projet/src/client/spiders/scrapy/Spiders/settings.py @@ -1,4 +1,7 @@ +from dotenv import load_dotenv, find_dotenv from os import getenv as env + +load_dotenv(find_dotenv()) # -*- coding: utf-8 -*- # Scrapy settings for Spiders project diff --git a/projet/src/client/spiders/scrapy/Spiders/spiders/faogeneve_spider.py b/projet/src/client/spiders/scrapy/Spiders/spiders/faogeneve_spider.py index 184655e2a20f577447a09d63bc4de927580c094e..53e1621849aa5574089970fb0f73a0e82a6807c0 100644 --- a/projet/src/client/spiders/scrapy/Spiders/spiders/faogeneve_spider.py +++ b/projet/src/client/spiders/scrapy/Spiders/spiders/faogeneve_spider.py @@ -2,36 +2,40 @@ import scrapy class FAOGeneveSpider(scrapy.Spider): - name = "FAOGeneve" - start_urls = ["https://fao.ge.ch/"] - max_fetch_files = 200 - counter = 0 + name: str = "FAOGeneve" + start_urls: list = ["https://fao.ge.ch/"] + counter: int = 0 + pages: list = [] + currentPage: int = 0 + + def __init__(self, pages: str, **kwargs): + super().__init__(**kwargs) # python3 + self.pages = pages.split(",") def parse(self, response): if "/captcha" in response.url: noCaptcha = response.url.replace("/captcha", "/login") yield scrapy.Request(noCaptcha, callback=self.parse) else: - dateSelector = './/div[contains(@class, "entete")]/div[@class="col-date"]/div[@class="fao_date"]/' - daySelector = dateSelector + 'div[@class="jour"]/text()' - monthSelector = dateSelector + 'div[@class="mois"]/text()' - yearSelector = dateSelector + 'div[@class="annee"]/text()' + if "?page=" + str(self.currentPage) in response.url or "?page=" not in response.url and 0 in self.pages: + dateSelector = './/div[contains(@class, "entete")]/div[@class="col-date"]/div[@class="fao_date"]/' + daySelector = dateSelector + 'div[@class="jour"]/text()' + monthSelector = dateSelector + 'div[@class="mois"]/text()' + yearSelector = dateSelector + 'div[@class="annee"]/text()' - for article in response.xpath('//*[contains(@class, "fao-avis")]'): - yield { - 'concern': article.xpath('.//div[contains(@class, "entete")]/div[' - '@class="col-content"]/h1/span[@class="fao-sousrubrique"]/a/text()').extract_first(), - 'date': article.xpath(daySelector).extract_first() + "/" - + article.xpath(monthSelector).extract_first() + "/" - + article.xpath(yearSelector).extract_first(), - 'file_urls': [self.start_urls[0] + article.xpath(".//div[3]/div[2]/p/a/@href").extract_first()] + for article in response.xpath('//*[contains(@class, "fao-avis")]'): + yield { + 'concern': article.xpath('.//div[contains(@class, "entete")]/div[' + '@class="col-content"]/h1/span[@class="fao-sousrubrique"]/a/text()') + .extract_first(), + 'date': article.xpath(daySelector).extract_first() + "/" + + article.xpath(monthSelector).extract_first() + "/" + + article.xpath(yearSelector).extract_first(), + 'file_urls': [self.start_urls[0] + article.xpath(".//div[3]/div[2]/p/a/@href").extract_first()] - } - self.counter += 1 + } + self.counter += 1 - nextButtonGroup = response.xpath( - '//ul[contains(@class, "pagination_nombre")]/li[contains(@class, "active")]/following-sibling::li/a/@href') \ - .extract_first() - if nextButtonGroup is not None and self.counter < self.max_fetch_files: - next_page = response.urljoin(nextButtonGroup) - yield scrapy.Request(next_page, callback=self.parse) + if len(self.pages) > 0: + self.currentPage = self.pages.pop(0) + yield scrapy.Request(self.start_urls[0] + "?page=" + self.currentPage) diff --git a/projet/src/client/spiders/seleniumspiders/Sleepers.py b/projet/src/client/spiders/seleniumspiders/Sleepers.py index 7ca0fb7963614198a97bb9885d0e5e3d9da13dae..22a6c26aa6e598afac88a46e63a7ee79f102f189 100644 --- a/projet/src/client/spiders/seleniumspiders/Sleepers.py +++ b/projet/src/client/spiders/seleniumspiders/Sleepers.py @@ -8,7 +8,7 @@ from loguru import logger class Sleepers: def waitForXPathToAppear(self, driver, xPath, timeout=5): try: - logger.info("Waiting for XPATH variable", xPath + "...") + logger.info("Waiting for XPATH variable {}...".format(xPath)) isReady = driver.find_element_by_xpath(xPath) WebDriverWait(driver, timeout).until(EC.visibility_of(isReady)) WebDriverWait(driver, timeout).until(EC.invisibility_of_element(isReady)) diff --git a/projet/src/client/spiders/seleniumspiders/SwissImpex/Parser.py b/projet/src/client/spiders/seleniumspiders/SwissImpex/Parser.py index ca8a79ecf46e1670cca15898f117e67054e69f0d..4a0e89b3486d2224db8ac54d97f1e415d09e4e89 100644 --- a/projet/src/client/spiders/seleniumspiders/SwissImpex/Parser.py +++ b/projet/src/client/spiders/seleniumspiders/SwissImpex/Parser.py @@ -10,11 +10,14 @@ from time import sleep from loguru import logger from selenium import webdriver from selenium.webdriver.common.action_chains import ActionChains +from selenium.webdriver.common.by import By from selenium.webdriver.support.select import Select +from selenium.webdriver.support.wait import WebDriverWait from client.spiders.seleniumspiders.Sleepers import Sleepers from client.spiders.seleniumspiders.Utils import Utils from common.networking.radio import Radio +from selenium.webdriver.support import expected_conditions as EC RESET_PERIOD = '//input[@id="frmSelLog:qryPerSel:cmdBtnSlPerAllEntf"]' @@ -45,6 +48,7 @@ READY = '//div[@class="iceOutConStatInactv"]' IS_LOADING = '//div[@id="j_idt42:j_idt43"]' DONE_OPTIONS_BUTTON = '//a[@id="formErgMainErgebnis:j_idt142"]' +DONE_TOGGLE_OPTIONS_BUTTON = '//input[@id="frmEditQuery:j_idt42:cmdToggleOptions"]' DONE_ACTIONS_BUTTON = '//span[@id="frmEditQuery:j_idt42:ptsEditQuery:0:j_idt49"]' DONE_FILENAME = '//input[@id="frmEditQuery:j_idt42:ptsEditQuery:0:j_idt50:j_idt54"]' DONE_LOOK_BUTTON = '//span[@id="frmEditQuery:j_idt42:ptsEditQuery:0:j_idt109"]' @@ -114,6 +118,7 @@ class Parser: # Clicks on the next step button def nextStep(self): logger.info("Next step...") + WebDriverWait(self.driver, 3).until(EC.visibility_of_element_located((By.XPATH, NEXT_STEP))) nextStep = self.driver.find_element_by_xpath(NEXT_STEP) nextStep.click() @@ -156,6 +161,7 @@ class Parser: def __processMerchandise__(self): # We first need to iterate until we find the corresponding merchandise logger.info("Setting up form...") + WebDriverWait(self.driver, 5).until(EC.visibility_of_element_located((By.XPATH, MERCHANDISE_NUMBER_SELECTOR))) self.driver.find_element_by_xpath(MERCHANDISE_NUMBER_SELECTOR).click() self.w() logger.info("Locating search bar...") @@ -266,14 +272,21 @@ class Parser: Sleepers().waitForURL(self.driver, DONE_URL, 30) logger.info("Preparing results...") - if len(self.driver.find_elements_by_xpath(DONE_OPTIONS_BUTTON)) == 0: - logger.info("Apparently the export menu is already open, ignoring next statement") - else: - ActionChains(self.driver).click(self.driver.find_element_by_xpath(DONE_OPTIONS_BUTTON)).pause(0.5)\ - .perform() + while len(self.driver.find_elements_by_xpath(DONE_OPTIONS_BUTTON)) == 0: + if len(self.driver.find_elements_by_xpath(DONE_TOGGLE_OPTIONS_BUTTON)) == 0: + logger.warning("The page is not loaded properly yet ! Waiting for button.") + WebDriverWait(self.driver, 5).until(EC.visibility_of_element_located((By.XPATH, DONE_OPTIONS_BUTTON))) + else: + logger.info("Apparently the export menu is already open, ignoring next statement") + + ActionChains(self.driver).click(self.driver.find_element_by_xpath(DONE_OPTIONS_BUTTON)).pause(0.5)\ + .perform() logger.info("Setting export elements correctly...") + WebDriverWait(self.driver, 5).until(EC.visibility_of_element_located((By.XPATH, DONE_LOOK_BUTTON))) + ActionChains(self.driver).click(self.driver.find_element_by_xpath(DONE_LOOK_BUTTON)).pause(0.5).perform() + WebDriverWait(self.driver, 5).until(EC.visibility_of_element_located((By.XPATH, DONE_SHOW_EMPTY_CELLS_BUTTON))) ActionChains(self.driver).click(self.driver.find_element_by_xpath(DONE_SHOW_EMPTY_CELLS_BUTTON)) \ .pause(1.5).perform() @@ -285,9 +298,15 @@ class Parser: .pause(1.5).perform() logger.info("Getting ready to export...") + WebDriverWait(self.driver, 5).until(EC.visibility_of_element_located((By.XPATH, DONE_ACTIONS_BUTTON))) ActionChains(self.driver).click(self.driver.find_element_by_xpath(DONE_ACTIONS_BUTTON)).pause(0.5).perform() logger.info("Setting filename...") + while len(self.driver.find_elements_by_xpath(DONE_FILENAME)) == 0: + logger.warning("Stuborn web interface not responding, retrying") + ActionChains(self.driver).click(self.driver.find_element_by_xpath(DONE_ACTIONS_BUTTON)).pause( + 0.5).perform() + textbox = self.driver.find_element_by_xpath(DONE_FILENAME) textbox.clear() filename = "SwissImpex_{0}-{1}_{2}".format( @@ -316,5 +335,5 @@ class Parser: logger.error("Saved file state in " + name + ".") raise AssertionError(str(e)) finally: - self.driver.close() + self.driver.quit() logger.info("Done. Good night !") diff --git a/projet/src/common/mvc/Models.py b/projet/src/common/mvc/Models.py index 19b680e6950e70e85bc29cb079c661f8858d3f76..70e430c9503fb4c1ef1432eef0504afbd4ef5b5e 100644 --- a/projet/src/common/mvc/Models.py +++ b/projet/src/common/mvc/Models.py @@ -83,19 +83,31 @@ class Task(Base): } -def process(item : dict, arguments: dict) -> dict: +def process(item: dict, arguments: dict) -> dict: logger.debug("Seeking correct module...") - if item["module"]["name"] == "Swiss Impex": + mName = item["module"]["name"] + if mName == "Swiss Impex": return starters.startSwissImpex( arguments["merchandiseId"], arguments["year"], arguments["month"], int(env("HTTPHANDLER_WEBSERVER_PORT")) ) - elif item["module"]["name"] == "XLSX Parser": + + elif mName == "FAO Genève": + starters.startFAOGeneve(arguments["pages"]) + return {} + + elif mName == "XLSX Parser": return starters.startXSLXParser(arguments["file"], arguments["instructions"], arguments.get('index') or None, [arguments.get('from') or None, arguments.get('to') or None]) + elif mName == "File Parser": + return starters.startFileParser(arguments["file"], arguments["instructions"]) + + elif mName == "Regex Parser": + return starters.startRegexParser(arguments["input"], arguments["regex"]) + else: - starters.startFAOGeneve() - return {} + # TODO In fact, this will be done in the bachelor part. For now, we are only using static modules. + logger.error("Unrecognized module.") # JIT Relationships diff --git a/projet/src/common/utils/FileParser.py b/projet/src/common/utils/FileParser.py new file mode 100644 index 0000000000000000000000000000000000000000..c9ea0c208f4b67aaada1eca0cc4879094f6511fb --- /dev/null +++ b/projet/src/common/utils/FileParser.py @@ -0,0 +1,15 @@ +class FileParser: + path: str = None + + # Initializes the file parser + def __init__(self, path: str): + self.path = path + self.data = {} + + def run(self, instructions: dict) -> dict: + for name, instruction in instructions.items(): + if type(name) == str and name == "read" and instruction: + file = open(self.path, 'r') + self.data = file.readlines() + + return {"fileParserOutput": self.data} diff --git a/projet/src/common/utils/PerfObserver.py b/projet/src/common/utils/PerfObserver.py new file mode 100644 index 0000000000000000000000000000000000000000..5b163758eb941a751e63e9ed7b9caa867be308fd --- /dev/null +++ b/projet/src/common/utils/PerfObserver.py @@ -0,0 +1,70 @@ +import time +from threading import Thread + +from loguru import logger +from sqlalchemy.orm import Session + +from common.mvc import getSession +from os import getenv as env + +# Allows for client performance observation +from common.mvc.Models import Task +from common.networking.intercom import Intercom + + +class PerformanceObserver: + db: Session = None # Central database + shouldStop: bool = False # Should stop monitoring + clients: list = [] # Clients list + timing: int = 1 # Timing between every measure + + def __init__(self, clients, timing): + self.db = getSession() + self.folder = env("FILE_LOCKER") + self.clients = clients + self.timing = timing + + def start(self, shouldMonitorTasksInDB, shouldMonitorFilesInFileLocker): + Thread(target=self.monitor, args=(shouldMonitorTasksInDB, shouldMonitorFilesInFileLocker)) \ + .start() + + # Monitors every N second the requested data + # Format : number of seconds passed, + def monitor(self, shouldMonitorTasksInDB, shouldMonitorFilesInFileLocker): + if not shouldMonitorTasksInDB and not shouldMonitorFilesInFileLocker: + logger.error("You have chosen not to monitor anything...") + exit(250) + + logger.info("Starting performance observer : DB monitoring = {}, File monitoring = {}" + .format(shouldMonitorTasksInDB, shouldMonitorFilesInFileLocker)) + + secondsPassed = 0 + with open('measures.csv', 'a') as fd: + while not self.shouldStop: + currentRun = [str(secondsPassed)] + + if shouldMonitorTasksInDB: + currentRun.append(str(self.db.query(Task).count())) + + if shouldMonitorFilesInFileLocker: + total = 0 + # Reaching all clients to get statrep files number + for client in self.clients: + intercom = Intercom(client, env("CLIENT_PORT"), True) + intercom.talk({"type": "statrep"}) + try: + statrep = intercom.listen(500) + except: + statrep = {"files": []} + total += len(statrep.get("files") or []) + currentRun.append(str(total)) + + fd.write(",".join(currentRun) + "\n") + fd.flush() + time.sleep(self.timing) + secondsPassed += 1 + + def stop(self): + self.shouldStop = True + self.db.close() + logger.info("Stopped observer with success.") diff --git a/projet/src/common/utils/RegexParser.py b/projet/src/common/utils/RegexParser.py new file mode 100644 index 0000000000000000000000000000000000000000..363b578fa60db9758d01df7dc13fbe106fbb3aa5 --- /dev/null +++ b/projet/src/common/utils/RegexParser.py @@ -0,0 +1,22 @@ +import re + +from loguru import logger + + +class RegexParser: + text: str = None + + def __init__(self, text): + self.text = text + + def run(self, regex: str): + data = None + if type(self.text) == str: + data = re.findall(regex, self.text) + elif type(self.text) == list: + data = [re.findall(regex, line) for line in self.text] + data = [k[0] for k in data if len(k) > 0] # Selective flatmap + else: + logger.error("Unsupported regex recognition format.") + + return {"regexOutput": data} diff --git a/projet/src/common/utils/XLSXParser.py b/projet/src/common/utils/XLSXParser.py index 44eaad16dbf7576ff21614acd830c2af4d38b40b..f12c8589302ee93ab42189e8f690ebc0ba40446a 100644 --- a/projet/src/common/utils/XLSXParser.py +++ b/projet/src/common/utils/XLSXParser.py @@ -1,5 +1,6 @@ import os.path import re +from collections import OrderedDict from os import path import numpy as np @@ -19,7 +20,7 @@ class XLSXParser: raise RuntimeError("No such file {}".format(path)) self.data = {} - def run(self, instructions: dict, index=None, assistedSpliceData=None) -> dict: + def run(self, instructions: OrderedDict, index=None, assistedSpliceData=None) -> dict: regex = "([A-Z]+)([0-9]+)" alpha0 = numeric0 = alpha1 = numeric1 = None if assistedSpliceData is not None: @@ -29,13 +30,15 @@ class XLSXParser: usecols="{}:{}".format(alpha0, alpha1) if assistedSpliceData is not None else None) logger.debug("Removing NaN values...") - self.workbench.fillna("") + self.workbench = self.workbench.fillna("") if numeric0 is not None and numeric1 is not None: logger.info("Splicing data vertically from {} to {}".format(numeric0, numeric1)) self.data = self.workbench.values[int(numeric0):int(numeric1)] - for action, arguments in instructions.items(): + for instruction in instructions: + action = list(instruction.keys())[0] + arguments = list(instruction.values())[0] if isinstance(arguments, list): if action == "take" and len(arguments) == 2: if not arguments[0].isnumeric() or arguments[1].isnumeric(): @@ -46,8 +49,8 @@ class XLSXParser: elif action == "replace" and len(arguments) == 2: logger.info("Replacing from '{}' to '{}'...".format(arguments[0], arguments[1])) - self.data = {"data": np.where(self.data == arguments[0], arguments[1], self.data)} - + self.data = np.where(self.data == arguments[0], arguments[1], self.data) + elif isinstance(arguments, str): if action == "export" and isinstance(arguments, str): dest = os.path.splitext(self.file)[0] + ".csv" @@ -56,7 +59,7 @@ class XLSXParser: self.data = {"dest": dest} elif isinstance(arguments, bool): - if action == "cleanup" and arguments: + if action == "cleanup" and type(arguments) == bool and arguments: os.remove(self.file) else: diff --git a/projet/src/common/utils/YAMLParser.py b/projet/src/common/utils/YAMLParser.py index 2cd87b6f34b542ba4d70eda42c3b9f0ff5642be6..5d0b59c2d24bb38ca580a0cbcc9fab8673d13b90 100644 --- a/projet/src/common/utils/YAMLParser.py +++ b/projet/src/common/utils/YAMLParser.py @@ -1,13 +1,15 @@ import itertools +from collections import OrderedDict from sqlalchemy.orm import Session from zmq import ZMQError +from client.schnell.workers import evaluateWithContext from common.networking.intercom import Intercom from common import mvc import yaml from loguru import logger -from common.mvc.Models import Module, Task +from common.mvc.Models import Module, Task, process from os import getenv as env @@ -17,6 +19,7 @@ class YAMLParser: def __init__(self, path): self.file = yaml.load(open(path), Loader=yaml.FullLoader) + # Converts clients inside the YAML space into real full blown IPs def evaluateClients(self, isRemote: bool, shouldPing: bool = True) -> list: if "clients" not in self.file and isRemote: logger.error("No clients defined in playbook despite remote mode enabled. Stopping.") @@ -44,6 +47,7 @@ class YAMLParser: return output + # Variables beginning and ending with a '$' can be def parseVariable(self, key: str): key = str(key) if key.startswith("$") and key.endswith("$"): @@ -57,6 +61,7 @@ class YAMLParser: else: return key + # Creates the jobs, sends them to their owner and puts them into the DB. def evaluateJobs(self, isDry): logger.info("Starting playbook...") @@ -69,17 +74,60 @@ class YAMLParser: # Generating commands for further processing actions = [] + context = {} + clientLock = False for chapter, args in chapters.items(): # Immediately checking for pass keyword if "pass" in args and args.get("pass"): - logger.info("Chapter " + chapter + "has pass keyword, skipping") + logger.info("Chapter {} has pass keyword, skipping".format(chapter)) continue - actions += self.__iterateOnChapter__(chapter, args, isDry) + elif "server" in args and args.get("server"): + if clientLock: + logger.error("Server tasks cannot be run anymore because some client tasks have been generated.") + exit(20) + else: + logger.warning("Chapter {} has server keyword, running server tasks now.".format(chapter)) + context, clientModule, clientModuleName = self.__iterateOnServerChapter__(chapter, args, {}) + logger.warning("Server tasks are now done.") + # Manually calling client tasks. Required since all of then tasks are one single chapter + actions += self.__iterateOnChapter__(clientModuleName, clientModule, context, isDry) + else: + clientLock = True + actions += self.__iterateOnChapter__(chapter, args, {}, isDry) return chapters, actions + # Runs immediate actions and returns the generated infos + def __iterateOnServerChapter__(self, chapter, args: dict, context: dict) -> (dict, dict): + # Trying to find module in DB + session = mvc.getSession() + + module: Module = session.query(Module).filter_by(name=chapter).first() + if module is None: + logger.error("This module does not exist within the database.") + exit(5) + + output = process({"module": module.to_dict()}, {k: evaluateWithContext(arg, context) for k, arg in args.items()}) + context = {**output, **context} + nextModuleName = list(args["then"].keys())[0] if "then" in args else None + + if "then" in args: + if "server" in args["then"][nextModuleName] and args["then"][nextModuleName]["server"]: + logger.info("Server item has then module, running.") + + lasts, nextModule, name = self.__iterateOnServerChapter__(nextModuleName, args["then"][nextModuleName], context) + return {**output, **lasts}, nextModule, name + + if "then" not in args: + logger.error("Invalid YAML Format : Missing client tasks.") + exit(60) + + if "server" not in args["then"][nextModuleName]: + logger.info("Last server item detected, stopping") + return output, args["then"][nextModuleName], nextModuleName + # Iterates on one chapter - def __iterateOnChapter__(self, chapter, args, isDry): + def __iterateOnChapter__(self, chapter, args, context, isDry): logger.info("==========================================================") logger.info("Searching for module named as chapter " + chapter + "...") @@ -92,8 +140,8 @@ class YAMLParser: logger.success("Found module in database.") logger.info("Parsing chapter " + chapter + "...") - expansions = {} # Contains all generated expansions items. The expansion is NOT done here ! - meta = {} # Contains all non expanded key with their values + expansions = OrderedDict() # Contains all generated expansions items. The expansion is NOT done here ! + meta = OrderedDict() # Contains all non expanded key with their values # Parsing arguments for argName, argOptions in args.items(): @@ -109,10 +157,12 @@ class YAMLParser: logger.info("Finished reading file, now generating objects for processing") - tasks = [dict(zip(list(expansions.keys()), task)) for task in list(itertools.product(*expansions.values()))] + tasks = [OrderedDict(zip(list(expansions.keys()), task)) for task in + list(itertools.product(*expansions.values()))] logger.info("Generated " + str(len(tasks)) + " tasks.") + # Following immediately the then items. Allows for proper task workflow guidance thenItems = None if "then" in meta: logger.info("Doing 'then' items now. First checking if then module is present in DB...") @@ -129,6 +179,7 @@ class YAMLParser: for item in range(len(tasks)): tasks[item][argName] = argValues + # Sending into DB logger.info("Now converting into tasks...") output = list() for k in range(len(tasks)): @@ -146,7 +197,7 @@ class YAMLParser: else: # Small hack to say no to SQLAlchemy... for k in range(len(output)): - output[k].next_task = thenItems[k].to_dict() + output[k].next_task = thenItems[k].to_dict() if thenItems is not None else None realOutput = [task.to_dict() for task in output] @@ -168,7 +219,7 @@ class YAMLParser: return [argOptions] # Handles "then" instructions - def __handleThenType__(self, argOptions: dict, isDry: bool, module: Module, context: int, session : Session) -> list: + def __handleThenType__(self, argOptions: OrderedDict, isDry: bool, module: Module, context: int, session : Session) -> list: if len(argOptions) > 1: logger.error("Multiple then items are not yet supported.") # Non supported (MySQL current format), INOP diff --git a/projet/src/common/utils/starters.py b/projet/src/common/utils/starters.py index b7265db5d4b7ec7cf437405f75b73f14637e346b..8954402bb5ccb2004730fc2d817f5a7cc967d14b 100644 --- a/projet/src/common/utils/starters.py +++ b/projet/src/common/utils/starters.py @@ -1,15 +1,20 @@ # Starts the FAO Geneve import os +from collections import OrderedDict from loguru import logger from client.spiders.seleniumspiders.SwissImpex.Parser import Parser +from common.utils.FileParser import FileParser +from common.utils.RegexParser import RegexParser from common.utils.XLSXParser import XLSXParser -def startFAOGeneve(): - logger.debug("Starting scrapy. Since this is a third party, logs will not be kept here.") - os.system("cd src/Spiders && scrapy crawl FAOGeneve") +def startFAOGeneve(pages): + logger.warning("Starting scrapy. Since this is a third party, logs will not be kept here.") + if isinstance(pages, list): + pages = ",".join(pages) + os.system("cd src/client/spiders/scrapy && scrapy crawl FAOGeneve -a pages=" + str(pages)) def startSwissImpex(merchID: str, year: int, month: int, webServerPort: int): @@ -18,7 +23,20 @@ def startSwissImpex(merchID: str, year: int, month: int, webServerPort: int): return parser.parse(webServerPort) -def startXSLXParser(file: str, instructions: dict, sheetIndex: str, assistedSpliceData: list): +def startXSLXParser(file: str, instructions: OrderedDict, sheetIndex: str, assistedSpliceData: list): logger.debug("Starting XSLX Parser...") parser = XLSXParser(file) return parser.run(instructions, sheetIndex, assistedSpliceData) + + +def startFileParser(file: str, instructions: OrderedDict): + logger.debug("Starting File parser...") + parser = FileParser(file) + return parser.run(instructions) + + +def startRegexParser(file: str, regex: str): + logger.debug("Starting RegExp parser...") + parser = RegexParser(file) + return parser.run(regex) + diff --git a/projet/src/server/mvc/WebServer.py b/projet/src/server/mvc/WebServer.py index 2dd76697461402ce7480071e74e81a9fd03e952c..fccef20e73e0ca9da4d507cab787c931866fffad 100644 --- a/projet/src/server/mvc/WebServer.py +++ b/projet/src/server/mvc/WebServer.py @@ -23,9 +23,13 @@ def setup(): {"merchId": "text", "month": "number", "year": "number", "webServerPort": "number"}) xslxParser = Module("XLSX Parser", False, {}) + fileParser = Module("File Parser", False, {"file": "string"}) + regexParser = Module("Regex Parser", False, {"file": "string"}) session.add(fao) session.add(swissimpex) session.add(xslxParser) + session.add(fileParser) + session.add(regexParser) session.commit() diff --git a/projet/src/start.py b/projet/src/start.py index f7266f826d7f30fe7f1061481e786b4332bd4f20..323df907562db530f20d49e3d93784780e3d9fbb 100644 --- a/projet/src/start.py +++ b/projet/src/start.py @@ -8,6 +8,7 @@ import requests from dotenv import load_dotenv, find_dotenv from loguru import logger +from common.utils.PerfObserver import PerformanceObserver from server.mvc.WebServer import WebServer from client.schnell import workers from common.mvc.Models import Task @@ -33,6 +34,12 @@ parser.add_argument("--dry", help="Does not actually send any task anywhere. Jus parser.add_argument("--remote", help="Allows remote jobs to be sent to this instance. Only used when working in " "client mode.", default=False, action='store_true') parser.add_argument("--playbook", help="The playbook to run. Only used when working in server mode.") +parser.add_argument("--perf-files", help="Monitor processed files in file locker for performance analysis", + default=False, action='store_true') +parser.add_argument("--perf-db", help="Monitor tasks in central database for performance analysis", + default=False, action='store_true') +parser.add_argument("--perf-timing", help="Timing between each performance analysis measurement. Default : 1", + default=1) args = parser.parse_args() @@ -42,6 +49,10 @@ cleanSlate = args.wipe_tasks isDry = args.dry allowRemote = args.remote playbook = args.playbook +shouldMonitorFiles = args.perf_files +shouldMonitorDB = args.perf_db +shouldMonitor = shouldMonitorFiles or shouldMonitorDB +monitorTiming = args.perf_timing # Executes a playbook. SERVER ONLY @@ -65,6 +76,10 @@ def runPlaybook(p: YAMLParser, clts: list): # Starting program if __name__ == '__main__': + if not isServer and shouldMonitor: + logger.error("You cannot monitor performance with a NScrap client.") + exit(300) + logger.debug("Connecting to DB...") session = mvc.getSession() @@ -74,7 +89,7 @@ if __name__ == '__main__': session.commit() logger.warning("All tasks have been deleted.") - logger.info("Scrapper 058 - Théo Pirkl") + logger.info("NScrap - Théo Pirkl") if not allowRemote: logger.warning("You have forbidden outside instances to connect to this instance. \nThat's fine, but you will " "need to start another instance at the same time on this machine with the other mode in order " @@ -102,8 +117,14 @@ if __name__ == '__main__': clientIntercom.talk({"type": "jobs", "jobs": jobs}) clientIntercom.listen() + # Launching monitoring if needed + po = PerformanceObserver(clients, monitorTiming) + if shouldMonitor: + po.start(shouldMonitorDB, shouldMonitorFiles) + input("Press enter to shut down program. This will shut down the management web server.\n" "It's a better practice to wait for the jobs to fully finish before stopping this server.") + po.stop() k = input("Press enter to shut down all workers. Type anything else to shut down only the server.") if k == "": Intercom.broadcastToClients(clients, {"type": "shutdown"}) diff --git a/rapport/.gitignore b/rapport/.gitignore index 0af3ffd603d7080a3b06f1e90df841bf097f5852..27252750b271a7bca1007c00467a9ccd3eca2b4d 100644 --- a/rapport/.gitignore +++ b/rapport/.gitignore @@ -8,3 +8,10 @@ *.html errors.txt rapport.tex +*.lof +*.lot +*.out +*.toc +*.apc +.vscode +.idea \ No newline at end of file diff --git a/rapport/.vscode/spellright.dict b/rapport/.vscode/spellright.dict index 2069a6786f580890c3e8a9105b2d0ba041d21fcc..8821c40a96a41dda21ddb34d40d9c9d05c549fe0 100644 --- a/rapport/.vscode/spellright.dict +++ b/rapport/.vscode/spellright.dict @@ -9,3 +9,4 @@ Schnell prérequis captcha threads +séquentiellement diff --git a/rapport/Makefile b/rapport/Makefile index 5c363e79cdb27fbec652def15ebeaea9100a673e..4b733dcc6d5fd34a24fc82457039c213c6cc12da 100644 --- a/rapport/Makefile +++ b/rapport/Makefile @@ -8,9 +8,6 @@ PDFOPTIONS += --template=./templates/default.latex PDFOPTIONS += --top-level-division=chapter PDFOPTIONS += --filter pandoc-citeproc -PDFOPTIONS_START = --template=./templates/extremely-blank.latex -PDFOPTIONS_START += --pdf-engine xelatex - MD=$(sort $(wildcard text/*.md)) PDF=$(patsubst %.md,%.pdf,$(MD)) TEX=$(patsubst %.md,%.tex,$(MD)) @@ -18,10 +15,12 @@ TEX=$(patsubst %.md,%.tex,$(MD)) all: rapport.pdf rapport.pdf: rapport.tex - pandoc -s $(OPTIONS) $(PDFOPTIONS_START) $^ -o $@ + xelatex $^ + xelatex $^ + xelatex $^ rapport.tex: config.yaml $(MD) pandoc -s $(OPTIONS) $(PDFOPTIONS) $^ -o $@ clean: - rm -rf rapport.pdf rapport.tex \ No newline at end of file + rm -f rapport* \ No newline at end of file diff --git a/rapport/config.yaml b/rapport/config.yaml index c083574b31d1ec6a260a4a2fc61bc712e3c317f0..f30af7cfbad3311a278edfc3c417ce0f9372cba0 100644 --- a/rapport/config.yaml +++ b/rapport/config.yaml @@ -1,7 +1,7 @@ --- author: - Théo Pirkl -title: NScrap - Un scrapper modulaire et simple +title: NScrap - Un scrapper modulaire smallTitle: NScrap institute: Haute École du Paysage, d'Ingénierie et d'Architecture de Genève name: Pirkl @@ -9,7 +9,7 @@ surname: Théo keywords: [Rapport, semestre, Théo, Pirkl, HEPIA] orientation: logicielle year: 2020 -sensei: Docteur Orestis Malaspinas +sensei: Dr. Orestis Malaspinas mandator: Aucun frontLogoSourceURL: https://unsplash.com/photos/nE2HV5AUXFo dedicasse: diff --git a/rapport/figs/client-infra.png b/rapport/figs/client-infra.png index d780896914aeb0fecab5c10beb5e0ca545e81b5d..65d51b07a03a7d7af780cfa3308966e0ba2fcd2f 100644 Binary files a/rapport/figs/client-infra.png and b/rapport/figs/client-infra.png differ diff --git a/rapport/figs/faogeneve-1m1c.svg b/rapport/figs/faogeneve-1m1c.svg new file mode 100644 index 0000000000000000000000000000000000000000..2b1de8a10450509e4508188fe241e9fae6909f04 --- /dev/null +++ b/rapport/figs/faogeneve-1m1c.svg @@ -0,0 +1,1968 @@ +<?xml version="1.0" encoding="utf-8" standalone="no"?> +<!DOCTYPE svg PUBLIC "-//W3C//DTD SVG 1.1//EN" + "http://www.w3.org/Graphics/SVG/1.1/DTD/svg11.dtd"> +<!-- Created with matplotlib (https://matplotlib.org/) --> +<svg height="345.6pt" version="1.1" viewBox="0 0 460.8 345.6" width="460.8pt" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink"> + <defs> + <style type="text/css"> +*{stroke-linecap:butt;stroke-linejoin:round;} + </style> + </defs> + <g id="figure_1"> + <g id="patch_1"> + <path d="M 0 345.6 +L 460.8 345.6 +L 460.8 0 +L 0 0 +z +" style="fill:#ffffff;"/> + </g> + <g id="axes_1"> + <g id="patch_2"> + <path d="M 57.6 293.76 +L 414.72 293.76 +L 414.72 41.472 +L 57.6 41.472 +z +" style="fill:#ffffff;"/> + </g> + <g id="matplotlib.axis_1"> + <g id="xtick_1"> + <g id="line2d_1"> + <defs> + <path d="M 0 0 +L 0 3.5 +" id="mb164b9667e" style="stroke:#000000;stroke-width:0.8;"/> + </defs> + <g> + <use style="stroke:#000000;stroke-width:0.8;" x="73.832727" xlink:href="#mb164b9667e" y="293.76"/> + </g> + </g> + <g id="text_1"> + <!-- 0 --> + <defs> + <path d="M 31.78125 66.40625 +Q 24.171875 66.40625 20.328125 58.90625 +Q 16.5 51.421875 16.5 36.375 +Q 16.5 21.390625 20.328125 13.890625 +Q 24.171875 6.390625 31.78125 6.390625 +Q 39.453125 6.390625 43.28125 13.890625 +Q 47.125 21.390625 47.125 36.375 +Q 47.125 51.421875 43.28125 58.90625 +Q 39.453125 66.40625 31.78125 66.40625 +z +M 31.78125 74.21875 +Q 44.046875 74.21875 50.515625 64.515625 +Q 56.984375 54.828125 56.984375 36.375 +Q 56.984375 17.96875 50.515625 8.265625 +Q 44.046875 -1.421875 31.78125 -1.421875 +Q 19.53125 -1.421875 13.0625 8.265625 +Q 6.59375 17.96875 6.59375 36.375 +Q 6.59375 54.828125 13.0625 64.515625 +Q 19.53125 74.21875 31.78125 74.21875 +z +" id="DejaVuSans-48"/> + </defs> + <g transform="translate(75.33764 309.337613)rotate(-70)scale(0.1 -0.1)"> + <use xlink:href="#DejaVuSans-48"/> + </g> + </g> + </g> + <g id="xtick_2"> + <g id="line2d_2"> + <g> + <use style="stroke:#000000;stroke-width:0.8;" x="86.083842" xlink:href="#mb164b9667e" y="293.76"/> + </g> + </g> + <g id="text_2"> + <!-- 10 --> + <defs> + <path d="M 12.40625 8.296875 +L 28.515625 8.296875 +L 28.515625 63.921875 +L 10.984375 60.40625 +L 10.984375 69.390625 +L 28.421875 72.90625 +L 38.28125 72.90625 +L 38.28125 8.296875 +L 54.390625 8.296875 +L 54.390625 0 +L 12.40625 0 +z +" id="DejaVuSans-49"/> + </defs> + <g transform="translate(86.500703 315.316407)rotate(-70)scale(0.1 -0.1)"> + <use xlink:href="#DejaVuSans-49"/> + <use x="63.623047" xlink:href="#DejaVuSans-48"/> + </g> + </g> + </g> + <g id="xtick_3"> + <g id="line2d_3"> + <g> + <use style="stroke:#000000;stroke-width:0.8;" x="98.334957" xlink:href="#mb164b9667e" y="293.76"/> + </g> + </g> + <g id="text_3"> + <!-- 20 --> + <defs> + <path d="M 19.1875 8.296875 +L 53.609375 8.296875 +L 53.609375 0 +L 7.328125 0 +L 7.328125 8.296875 +Q 12.9375 14.109375 22.625 23.890625 +Q 32.328125 33.6875 34.8125 36.53125 +Q 39.546875 41.84375 41.421875 45.53125 +Q 43.3125 49.21875 43.3125 52.78125 +Q 43.3125 58.59375 39.234375 62.25 +Q 35.15625 65.921875 28.609375 65.921875 +Q 23.96875 65.921875 18.8125 64.3125 +Q 13.671875 62.703125 7.8125 59.421875 +L 7.8125 69.390625 +Q 13.765625 71.78125 18.9375 73 +Q 24.125 74.21875 28.421875 74.21875 +Q 39.75 74.21875 46.484375 68.546875 +Q 53.21875 62.890625 53.21875 53.421875 +Q 53.21875 48.921875 51.53125 44.890625 +Q 49.859375 40.875 45.40625 35.40625 +Q 44.1875 33.984375 37.640625 27.21875 +Q 31.109375 20.453125 19.1875 8.296875 +z +" id="DejaVuSans-50"/> + </defs> + <g transform="translate(98.751818 315.316407)rotate(-70)scale(0.1 -0.1)"> + <use xlink:href="#DejaVuSans-50"/> + <use x="63.623047" xlink:href="#DejaVuSans-48"/> + </g> + </g> + </g> + <g id="xtick_4"> + <g id="line2d_4"> + <g> + <use style="stroke:#000000;stroke-width:0.8;" x="110.586072" xlink:href="#mb164b9667e" y="293.76"/> + </g> + </g> + <g id="text_4"> + <!-- 30 --> + <defs> + <path d="M 40.578125 39.3125 +Q 47.65625 37.796875 51.625 33 +Q 55.609375 28.21875 55.609375 21.1875 +Q 55.609375 10.40625 48.1875 4.484375 +Q 40.765625 -1.421875 27.09375 -1.421875 +Q 22.515625 -1.421875 17.65625 -0.515625 +Q 12.796875 0.390625 7.625 2.203125 +L 7.625 11.71875 +Q 11.71875 9.328125 16.59375 8.109375 +Q 21.484375 6.890625 26.8125 6.890625 +Q 36.078125 6.890625 40.9375 10.546875 +Q 45.796875 14.203125 45.796875 21.1875 +Q 45.796875 27.640625 41.28125 31.265625 +Q 36.765625 34.90625 28.71875 34.90625 +L 20.21875 34.90625 +L 20.21875 43.015625 +L 29.109375 43.015625 +Q 36.375 43.015625 40.234375 45.921875 +Q 44.09375 48.828125 44.09375 54.296875 +Q 44.09375 59.90625 40.109375 62.90625 +Q 36.140625 65.921875 28.71875 65.921875 +Q 24.65625 65.921875 20.015625 65.03125 +Q 15.375 64.15625 9.8125 62.3125 +L 9.8125 71.09375 +Q 15.4375 72.65625 20.34375 73.4375 +Q 25.25 74.21875 29.59375 74.21875 +Q 40.828125 74.21875 47.359375 69.109375 +Q 53.90625 64.015625 53.90625 55.328125 +Q 53.90625 49.265625 50.4375 45.09375 +Q 46.96875 40.921875 40.578125 39.3125 +z +" id="DejaVuSans-51"/> + </defs> + <g transform="translate(111.002933 315.316407)rotate(-70)scale(0.1 -0.1)"> + <use xlink:href="#DejaVuSans-51"/> + <use x="63.623047" xlink:href="#DejaVuSans-48"/> + </g> + </g> + </g> + <g id="xtick_5"> + <g id="line2d_5"> + <g> + <use style="stroke:#000000;stroke-width:0.8;" x="122.837187" xlink:href="#mb164b9667e" y="293.76"/> + </g> + </g> + <g id="text_5"> + <!-- 40 --> + <defs> + <path d="M 37.796875 64.3125 +L 12.890625 25.390625 +L 37.796875 25.390625 +z +M 35.203125 72.90625 +L 47.609375 72.90625 +L 47.609375 25.390625 +L 58.015625 25.390625 +L 58.015625 17.1875 +L 47.609375 17.1875 +L 47.609375 0 +L 37.796875 0 +L 37.796875 17.1875 +L 4.890625 17.1875 +L 4.890625 26.703125 +z +" id="DejaVuSans-52"/> + </defs> + <g transform="translate(123.254048 315.316407)rotate(-70)scale(0.1 -0.1)"> + <use xlink:href="#DejaVuSans-52"/> + <use x="63.623047" xlink:href="#DejaVuSans-48"/> + </g> + </g> + </g> + <g id="xtick_6"> + <g id="line2d_6"> + <g> + <use style="stroke:#000000;stroke-width:0.8;" x="135.088302" xlink:href="#mb164b9667e" y="293.76"/> + </g> + </g> + <g id="text_6"> + <!-- 50 --> + <defs> + <path d="M 10.796875 72.90625 +L 49.515625 72.90625 +L 49.515625 64.59375 +L 19.828125 64.59375 +L 19.828125 46.734375 +Q 21.96875 47.46875 24.109375 47.828125 +Q 26.265625 48.1875 28.421875 48.1875 +Q 40.625 48.1875 47.75 41.5 +Q 54.890625 34.8125 54.890625 23.390625 +Q 54.890625 11.625 47.5625 5.09375 +Q 40.234375 -1.421875 26.90625 -1.421875 +Q 22.3125 -1.421875 17.546875 -0.640625 +Q 12.796875 0.140625 7.71875 1.703125 +L 7.71875 11.625 +Q 12.109375 9.234375 16.796875 8.0625 +Q 21.484375 6.890625 26.703125 6.890625 +Q 35.15625 6.890625 40.078125 11.328125 +Q 45.015625 15.765625 45.015625 23.390625 +Q 45.015625 31 40.078125 35.4375 +Q 35.15625 39.890625 26.703125 39.890625 +Q 22.75 39.890625 18.8125 39.015625 +Q 14.890625 38.140625 10.796875 36.28125 +z +" id="DejaVuSans-53"/> + </defs> + <g transform="translate(135.505163 315.316407)rotate(-70)scale(0.1 -0.1)"> + <use xlink:href="#DejaVuSans-53"/> + <use x="63.623047" xlink:href="#DejaVuSans-48"/> + </g> + </g> + </g> + <g id="xtick_7"> + <g id="line2d_7"> + <g> + <use style="stroke:#000000;stroke-width:0.8;" x="147.339417" xlink:href="#mb164b9667e" y="293.76"/> + </g> + </g> + <g id="text_7"> + <!-- 60 --> + <defs> + <path d="M 33.015625 40.375 +Q 26.375 40.375 22.484375 35.828125 +Q 18.609375 31.296875 18.609375 23.390625 +Q 18.609375 15.53125 22.484375 10.953125 +Q 26.375 6.390625 33.015625 6.390625 +Q 39.65625 6.390625 43.53125 10.953125 +Q 47.40625 15.53125 47.40625 23.390625 +Q 47.40625 31.296875 43.53125 35.828125 +Q 39.65625 40.375 33.015625 40.375 +z +M 52.59375 71.296875 +L 52.59375 62.3125 +Q 48.875 64.0625 45.09375 64.984375 +Q 41.3125 65.921875 37.59375 65.921875 +Q 27.828125 65.921875 22.671875 59.328125 +Q 17.53125 52.734375 16.796875 39.40625 +Q 19.671875 43.65625 24.015625 45.921875 +Q 28.375 48.1875 33.59375 48.1875 +Q 44.578125 48.1875 50.953125 41.515625 +Q 57.328125 34.859375 57.328125 23.390625 +Q 57.328125 12.15625 50.6875 5.359375 +Q 44.046875 -1.421875 33.015625 -1.421875 +Q 20.359375 -1.421875 13.671875 8.265625 +Q 6.984375 17.96875 6.984375 36.375 +Q 6.984375 53.65625 15.1875 63.9375 +Q 23.390625 74.21875 37.203125 74.21875 +Q 40.921875 74.21875 44.703125 73.484375 +Q 48.484375 72.75 52.59375 71.296875 +z +" id="DejaVuSans-54"/> + </defs> + <g transform="translate(147.756278 315.316407)rotate(-70)scale(0.1 -0.1)"> + <use xlink:href="#DejaVuSans-54"/> + <use x="63.623047" xlink:href="#DejaVuSans-48"/> + </g> + </g> + </g> + <g id="xtick_8"> + <g id="line2d_8"> + <g> + <use style="stroke:#000000;stroke-width:0.8;" x="159.590532" xlink:href="#mb164b9667e" y="293.76"/> + </g> + </g> + <g id="text_8"> + <!-- 70 --> + <defs> + <path d="M 8.203125 72.90625 +L 55.078125 72.90625 +L 55.078125 68.703125 +L 28.609375 0 +L 18.3125 0 +L 43.21875 64.59375 +L 8.203125 64.59375 +z +" id="DejaVuSans-55"/> + </defs> + <g transform="translate(160.007393 315.316407)rotate(-70)scale(0.1 -0.1)"> + <use xlink:href="#DejaVuSans-55"/> + <use x="63.623047" xlink:href="#DejaVuSans-48"/> + </g> + </g> + </g> + <g id="xtick_9"> + <g id="line2d_9"> + <g> + <use style="stroke:#000000;stroke-width:0.8;" x="171.841647" xlink:href="#mb164b9667e" y="293.76"/> + </g> + </g> + <g id="text_9"> + <!-- 80 --> + <defs> + <path d="M 31.78125 34.625 +Q 24.75 34.625 20.71875 30.859375 +Q 16.703125 27.09375 16.703125 20.515625 +Q 16.703125 13.921875 20.71875 10.15625 +Q 24.75 6.390625 31.78125 6.390625 +Q 38.8125 6.390625 42.859375 10.171875 +Q 46.921875 13.96875 46.921875 20.515625 +Q 46.921875 27.09375 42.890625 30.859375 +Q 38.875 34.625 31.78125 34.625 +z +M 21.921875 38.8125 +Q 15.578125 40.375 12.03125 44.71875 +Q 8.5 49.078125 8.5 55.328125 +Q 8.5 64.0625 14.71875 69.140625 +Q 20.953125 74.21875 31.78125 74.21875 +Q 42.671875 74.21875 48.875 69.140625 +Q 55.078125 64.0625 55.078125 55.328125 +Q 55.078125 49.078125 51.53125 44.71875 +Q 48 40.375 41.703125 38.8125 +Q 48.828125 37.15625 52.796875 32.3125 +Q 56.78125 27.484375 56.78125 20.515625 +Q 56.78125 9.90625 50.3125 4.234375 +Q 43.84375 -1.421875 31.78125 -1.421875 +Q 19.734375 -1.421875 13.25 4.234375 +Q 6.78125 9.90625 6.78125 20.515625 +Q 6.78125 27.484375 10.78125 32.3125 +Q 14.796875 37.15625 21.921875 38.8125 +z +M 18.3125 54.390625 +Q 18.3125 48.734375 21.84375 45.5625 +Q 25.390625 42.390625 31.78125 42.390625 +Q 38.140625 42.390625 41.71875 45.5625 +Q 45.3125 48.734375 45.3125 54.390625 +Q 45.3125 60.0625 41.71875 63.234375 +Q 38.140625 66.40625 31.78125 66.40625 +Q 25.390625 66.40625 21.84375 63.234375 +Q 18.3125 60.0625 18.3125 54.390625 +z +" id="DejaVuSans-56"/> + </defs> + <g transform="translate(172.258508 315.316407)rotate(-70)scale(0.1 -0.1)"> + <use xlink:href="#DejaVuSans-56"/> + <use x="63.623047" xlink:href="#DejaVuSans-48"/> + </g> + </g> + </g> + <g id="xtick_10"> + <g id="line2d_10"> + <g> + <use style="stroke:#000000;stroke-width:0.8;" x="184.092762" xlink:href="#mb164b9667e" y="293.76"/> + </g> + </g> + <g id="text_10"> + <!-- 90 --> + <defs> + <path d="M 10.984375 1.515625 +L 10.984375 10.5 +Q 14.703125 8.734375 18.5 7.8125 +Q 22.3125 6.890625 25.984375 6.890625 +Q 35.75 6.890625 40.890625 13.453125 +Q 46.046875 20.015625 46.78125 33.40625 +Q 43.953125 29.203125 39.59375 26.953125 +Q 35.25 24.703125 29.984375 24.703125 +Q 19.046875 24.703125 12.671875 31.3125 +Q 6.296875 37.9375 6.296875 49.421875 +Q 6.296875 60.640625 12.9375 67.421875 +Q 19.578125 74.21875 30.609375 74.21875 +Q 43.265625 74.21875 49.921875 64.515625 +Q 56.59375 54.828125 56.59375 36.375 +Q 56.59375 19.140625 48.40625 8.859375 +Q 40.234375 -1.421875 26.421875 -1.421875 +Q 22.703125 -1.421875 18.890625 -0.6875 +Q 15.09375 0.046875 10.984375 1.515625 +z +M 30.609375 32.421875 +Q 37.25 32.421875 41.125 36.953125 +Q 45.015625 41.5 45.015625 49.421875 +Q 45.015625 57.28125 41.125 61.84375 +Q 37.25 66.40625 30.609375 66.40625 +Q 23.96875 66.40625 20.09375 61.84375 +Q 16.21875 57.28125 16.21875 49.421875 +Q 16.21875 41.5 20.09375 36.953125 +Q 23.96875 32.421875 30.609375 32.421875 +z +" id="DejaVuSans-57"/> + </defs> + <g transform="translate(184.509623 315.316407)rotate(-70)scale(0.1 -0.1)"> + <use xlink:href="#DejaVuSans-57"/> + <use x="63.623047" xlink:href="#DejaVuSans-48"/> + </g> + </g> + </g> + <g id="xtick_11"> + <g id="line2d_11"> + <g> + <use style="stroke:#000000;stroke-width:0.8;" x="196.343877" xlink:href="#mb164b9667e" y="293.76"/> + </g> + </g> + <g id="text_11"> + <!-- 100 --> + <g transform="translate(195.672686 321.295202)rotate(-70)scale(0.1 -0.1)"> + <use xlink:href="#DejaVuSans-49"/> + <use x="63.623047" xlink:href="#DejaVuSans-48"/> + <use x="127.246094" xlink:href="#DejaVuSans-48"/> + </g> + </g> + </g> + <g id="xtick_12"> + <g id="line2d_12"> + <g> + <use style="stroke:#000000;stroke-width:0.8;" x="208.594991" xlink:href="#mb164b9667e" y="293.76"/> + </g> + </g> + <g id="text_12"> + <!-- 110 --> + <g transform="translate(207.923801 321.295202)rotate(-70)scale(0.1 -0.1)"> + <use xlink:href="#DejaVuSans-49"/> + <use x="63.623047" xlink:href="#DejaVuSans-49"/> + <use x="127.246094" xlink:href="#DejaVuSans-48"/> + </g> + </g> + </g> + <g id="xtick_13"> + <g id="line2d_13"> + <g> + <use style="stroke:#000000;stroke-width:0.8;" x="220.846106" xlink:href="#mb164b9667e" y="293.76"/> + </g> + </g> + <g id="text_13"> + <!-- 120 --> + <g transform="translate(220.174916 321.295202)rotate(-70)scale(0.1 -0.1)"> + <use xlink:href="#DejaVuSans-49"/> + <use x="63.623047" xlink:href="#DejaVuSans-50"/> + <use x="127.246094" xlink:href="#DejaVuSans-48"/> + </g> + </g> + </g> + <g id="xtick_14"> + <g id="line2d_14"> + <g> + <use style="stroke:#000000;stroke-width:0.8;" x="233.097221" xlink:href="#mb164b9667e" y="293.76"/> + </g> + </g> + <g id="text_14"> + <!-- 130 --> + <g transform="translate(232.426031 321.295202)rotate(-70)scale(0.1 -0.1)"> + <use xlink:href="#DejaVuSans-49"/> + <use x="63.623047" xlink:href="#DejaVuSans-51"/> + <use x="127.246094" xlink:href="#DejaVuSans-48"/> + </g> + </g> + </g> + <g id="xtick_15"> + <g id="line2d_15"> + <g> + <use style="stroke:#000000;stroke-width:0.8;" x="245.348336" xlink:href="#mb164b9667e" y="293.76"/> + </g> + </g> + <g id="text_15"> + <!-- 140 --> + <g transform="translate(244.677146 321.295202)rotate(-70)scale(0.1 -0.1)"> + <use xlink:href="#DejaVuSans-49"/> + <use x="63.623047" xlink:href="#DejaVuSans-52"/> + <use x="127.246094" xlink:href="#DejaVuSans-48"/> + </g> + </g> + </g> + <g id="xtick_16"> + <g id="line2d_16"> + <g> + <use style="stroke:#000000;stroke-width:0.8;" x="257.599451" xlink:href="#mb164b9667e" y="293.76"/> + </g> + </g> + <g id="text_16"> + <!-- 150 --> + <g transform="translate(256.928261 321.295202)rotate(-70)scale(0.1 -0.1)"> + <use xlink:href="#DejaVuSans-49"/> + <use x="63.623047" xlink:href="#DejaVuSans-53"/> + <use x="127.246094" xlink:href="#DejaVuSans-48"/> + </g> + </g> + </g> + <g id="xtick_17"> + <g id="line2d_17"> + <g> + <use style="stroke:#000000;stroke-width:0.8;" x="269.850566" xlink:href="#mb164b9667e" y="293.76"/> + </g> + </g> + <g id="text_17"> + <!-- 160 --> + <g transform="translate(269.179376 321.295202)rotate(-70)scale(0.1 -0.1)"> + <use xlink:href="#DejaVuSans-49"/> + <use x="63.623047" xlink:href="#DejaVuSans-54"/> + <use x="127.246094" xlink:href="#DejaVuSans-48"/> + </g> + </g> + </g> + <g id="xtick_18"> + <g id="line2d_18"> + <g> + <use style="stroke:#000000;stroke-width:0.8;" x="282.101681" xlink:href="#mb164b9667e" y="293.76"/> + </g> + </g> + <g id="text_18"> + <!-- 170 --> + <g transform="translate(281.430491 321.295202)rotate(-70)scale(0.1 -0.1)"> + <use xlink:href="#DejaVuSans-49"/> + <use x="63.623047" xlink:href="#DejaVuSans-55"/> + <use x="127.246094" xlink:href="#DejaVuSans-48"/> + </g> + </g> + </g> + <g id="xtick_19"> + <g id="line2d_19"> + <g> + <use style="stroke:#000000;stroke-width:0.8;" x="294.352796" xlink:href="#mb164b9667e" y="293.76"/> + </g> + </g> + <g id="text_19"> + <!-- 180 --> + <g transform="translate(293.681605 321.295202)rotate(-70)scale(0.1 -0.1)"> + <use xlink:href="#DejaVuSans-49"/> + <use x="63.623047" xlink:href="#DejaVuSans-56"/> + <use x="127.246094" xlink:href="#DejaVuSans-48"/> + </g> + </g> + </g> + <g id="xtick_20"> + <g id="line2d_20"> + <g> + <use style="stroke:#000000;stroke-width:0.8;" x="306.603911" xlink:href="#mb164b9667e" y="293.76"/> + </g> + </g> + <g id="text_20"> + <!-- 190 --> + <g transform="translate(305.93272 321.295202)rotate(-70)scale(0.1 -0.1)"> + <use xlink:href="#DejaVuSans-49"/> + <use x="63.623047" xlink:href="#DejaVuSans-57"/> + <use x="127.246094" xlink:href="#DejaVuSans-48"/> + </g> + </g> + </g> + <g id="xtick_21"> + <g id="line2d_21"> + <g> + <use style="stroke:#000000;stroke-width:0.8;" x="318.855026" xlink:href="#mb164b9667e" y="293.76"/> + </g> + </g> + <g id="text_21"> + <!-- 200 --> + <g transform="translate(318.183835 321.295202)rotate(-70)scale(0.1 -0.1)"> + <use xlink:href="#DejaVuSans-50"/> + <use x="63.623047" xlink:href="#DejaVuSans-48"/> + <use x="127.246094" xlink:href="#DejaVuSans-48"/> + </g> + </g> + </g> + <g id="xtick_22"> + <g id="line2d_22"> + <g> + <use style="stroke:#000000;stroke-width:0.8;" x="331.106141" xlink:href="#mb164b9667e" y="293.76"/> + </g> + </g> + <g id="text_22"> + <!-- 210 --> + <g transform="translate(330.43495 321.295202)rotate(-70)scale(0.1 -0.1)"> + <use xlink:href="#DejaVuSans-50"/> + <use x="63.623047" xlink:href="#DejaVuSans-49"/> + <use x="127.246094" xlink:href="#DejaVuSans-48"/> + </g> + </g> + </g> + <g id="xtick_23"> + <g id="line2d_23"> + <g> + <use style="stroke:#000000;stroke-width:0.8;" x="343.357256" xlink:href="#mb164b9667e" y="293.76"/> + </g> + </g> + <g id="text_23"> + <!-- 220 --> + <g transform="translate(342.686065 321.295202)rotate(-70)scale(0.1 -0.1)"> + <use xlink:href="#DejaVuSans-50"/> + <use x="63.623047" xlink:href="#DejaVuSans-50"/> + <use x="127.246094" xlink:href="#DejaVuSans-48"/> + </g> + </g> + </g> + <g id="xtick_24"> + <g id="line2d_24"> + <g> + <use style="stroke:#000000;stroke-width:0.8;" x="355.60837" xlink:href="#mb164b9667e" y="293.76"/> + </g> + </g> + <g id="text_24"> + <!-- 230 --> + <g transform="translate(354.93718 321.295202)rotate(-70)scale(0.1 -0.1)"> + <use xlink:href="#DejaVuSans-50"/> + <use x="63.623047" xlink:href="#DejaVuSans-51"/> + <use x="127.246094" xlink:href="#DejaVuSans-48"/> + </g> + </g> + </g> + <g id="xtick_25"> + <g id="line2d_25"> + <g> + <use style="stroke:#000000;stroke-width:0.8;" x="367.859485" xlink:href="#mb164b9667e" y="293.76"/> + </g> + </g> + <g id="text_25"> + <!-- 240 --> + <g transform="translate(367.188295 321.295202)rotate(-70)scale(0.1 -0.1)"> + <use xlink:href="#DejaVuSans-50"/> + <use x="63.623047" xlink:href="#DejaVuSans-52"/> + <use x="127.246094" xlink:href="#DejaVuSans-48"/> + </g> + </g> + </g> + <g id="xtick_26"> + <g id="line2d_26"> + <g> + <use style="stroke:#000000;stroke-width:0.8;" x="380.1106" xlink:href="#mb164b9667e" y="293.76"/> + </g> + </g> + <g id="text_26"> + <!-- 250 --> + <g transform="translate(379.43941 321.295202)rotate(-70)scale(0.1 -0.1)"> + <use xlink:href="#DejaVuSans-50"/> + <use x="63.623047" xlink:href="#DejaVuSans-53"/> + <use x="127.246094" xlink:href="#DejaVuSans-48"/> + </g> + </g> + </g> + <g id="xtick_27"> + <g id="line2d_27"> + <g> + <use style="stroke:#000000;stroke-width:0.8;" x="392.361715" xlink:href="#mb164b9667e" y="293.76"/> + </g> + </g> + <g id="text_27"> + <!-- 260 --> + <g transform="translate(391.690525 321.295202)rotate(-70)scale(0.1 -0.1)"> + <use xlink:href="#DejaVuSans-50"/> + <use x="63.623047" xlink:href="#DejaVuSans-54"/> + <use x="127.246094" xlink:href="#DejaVuSans-48"/> + </g> + </g> + </g> + <g id="text_28"> + <!-- Documents téléchargés [0-1000] --> + <defs> + <path d="M 19.671875 64.796875 +L 19.671875 8.109375 +L 31.59375 8.109375 +Q 46.6875 8.109375 53.6875 14.9375 +Q 60.6875 21.78125 60.6875 36.53125 +Q 60.6875 51.171875 53.6875 57.984375 +Q 46.6875 64.796875 31.59375 64.796875 +z +M 9.8125 72.90625 +L 30.078125 72.90625 +Q 51.265625 72.90625 61.171875 64.09375 +Q 71.09375 55.28125 71.09375 36.53125 +Q 71.09375 17.671875 61.125 8.828125 +Q 51.171875 0 30.078125 0 +L 9.8125 0 +z +" id="DejaVuSans-68"/> + <path d="M 30.609375 48.390625 +Q 23.390625 48.390625 19.1875 42.75 +Q 14.984375 37.109375 14.984375 27.296875 +Q 14.984375 17.484375 19.15625 11.84375 +Q 23.34375 6.203125 30.609375 6.203125 +Q 37.796875 6.203125 41.984375 11.859375 +Q 46.1875 17.53125 46.1875 27.296875 +Q 46.1875 37.015625 41.984375 42.703125 +Q 37.796875 48.390625 30.609375 48.390625 +z +M 30.609375 56 +Q 42.328125 56 49.015625 48.375 +Q 55.71875 40.765625 55.71875 27.296875 +Q 55.71875 13.875 49.015625 6.21875 +Q 42.328125 -1.421875 30.609375 -1.421875 +Q 18.84375 -1.421875 12.171875 6.21875 +Q 5.515625 13.875 5.515625 27.296875 +Q 5.515625 40.765625 12.171875 48.375 +Q 18.84375 56 30.609375 56 +z +" id="DejaVuSans-111"/> + <path d="M 48.78125 52.59375 +L 48.78125 44.1875 +Q 44.96875 46.296875 41.140625 47.34375 +Q 37.3125 48.390625 33.40625 48.390625 +Q 24.65625 48.390625 19.8125 42.84375 +Q 14.984375 37.3125 14.984375 27.296875 +Q 14.984375 17.28125 19.8125 11.734375 +Q 24.65625 6.203125 33.40625 6.203125 +Q 37.3125 6.203125 41.140625 7.25 +Q 44.96875 8.296875 48.78125 10.40625 +L 48.78125 2.09375 +Q 45.015625 0.34375 40.984375 -0.53125 +Q 36.96875 -1.421875 32.421875 -1.421875 +Q 20.0625 -1.421875 12.78125 6.34375 +Q 5.515625 14.109375 5.515625 27.296875 +Q 5.515625 40.671875 12.859375 48.328125 +Q 20.21875 56 33.015625 56 +Q 37.15625 56 41.109375 55.140625 +Q 45.0625 54.296875 48.78125 52.59375 +z +" id="DejaVuSans-99"/> + <path d="M 8.5 21.578125 +L 8.5 54.6875 +L 17.484375 54.6875 +L 17.484375 21.921875 +Q 17.484375 14.15625 20.5 10.265625 +Q 23.53125 6.390625 29.59375 6.390625 +Q 36.859375 6.390625 41.078125 11.03125 +Q 45.3125 15.671875 45.3125 23.6875 +L 45.3125 54.6875 +L 54.296875 54.6875 +L 54.296875 0 +L 45.3125 0 +L 45.3125 8.40625 +Q 42.046875 3.421875 37.71875 1 +Q 33.40625 -1.421875 27.6875 -1.421875 +Q 18.265625 -1.421875 13.375 4.4375 +Q 8.5 10.296875 8.5 21.578125 +z +M 31.109375 56 +z +" id="DejaVuSans-117"/> + <path d="M 52 44.1875 +Q 55.375 50.25 60.0625 53.125 +Q 64.75 56 71.09375 56 +Q 79.640625 56 84.28125 50.015625 +Q 88.921875 44.046875 88.921875 33.015625 +L 88.921875 0 +L 79.890625 0 +L 79.890625 32.71875 +Q 79.890625 40.578125 77.09375 44.375 +Q 74.3125 48.1875 68.609375 48.1875 +Q 61.625 48.1875 57.5625 43.546875 +Q 53.515625 38.921875 53.515625 30.90625 +L 53.515625 0 +L 44.484375 0 +L 44.484375 32.71875 +Q 44.484375 40.625 41.703125 44.40625 +Q 38.921875 48.1875 33.109375 48.1875 +Q 26.21875 48.1875 22.15625 43.53125 +Q 18.109375 38.875 18.109375 30.90625 +L 18.109375 0 +L 9.078125 0 +L 9.078125 54.6875 +L 18.109375 54.6875 +L 18.109375 46.1875 +Q 21.1875 51.21875 25.484375 53.609375 +Q 29.78125 56 35.6875 56 +Q 41.65625 56 45.828125 52.96875 +Q 50 49.953125 52 44.1875 +z +" id="DejaVuSans-109"/> + <path d="M 56.203125 29.59375 +L 56.203125 25.203125 +L 14.890625 25.203125 +Q 15.484375 15.921875 20.484375 11.0625 +Q 25.484375 6.203125 34.421875 6.203125 +Q 39.59375 6.203125 44.453125 7.46875 +Q 49.3125 8.734375 54.109375 11.28125 +L 54.109375 2.78125 +Q 49.265625 0.734375 44.1875 -0.34375 +Q 39.109375 -1.421875 33.890625 -1.421875 +Q 20.796875 -1.421875 13.15625 6.1875 +Q 5.515625 13.8125 5.515625 26.8125 +Q 5.515625 40.234375 12.765625 48.109375 +Q 20.015625 56 32.328125 56 +Q 43.359375 56 49.78125 48.890625 +Q 56.203125 41.796875 56.203125 29.59375 +z +M 47.21875 32.234375 +Q 47.125 39.59375 43.09375 43.984375 +Q 39.0625 48.390625 32.421875 48.390625 +Q 24.90625 48.390625 20.390625 44.140625 +Q 15.875 39.890625 15.1875 32.171875 +z +" id="DejaVuSans-101"/> + <path d="M 54.890625 33.015625 +L 54.890625 0 +L 45.90625 0 +L 45.90625 32.71875 +Q 45.90625 40.484375 42.875 44.328125 +Q 39.84375 48.1875 33.796875 48.1875 +Q 26.515625 48.1875 22.3125 43.546875 +Q 18.109375 38.921875 18.109375 30.90625 +L 18.109375 0 +L 9.078125 0 +L 9.078125 54.6875 +L 18.109375 54.6875 +L 18.109375 46.1875 +Q 21.34375 51.125 25.703125 53.5625 +Q 30.078125 56 35.796875 56 +Q 45.21875 56 50.046875 50.171875 +Q 54.890625 44.34375 54.890625 33.015625 +z +" id="DejaVuSans-110"/> + <path d="M 18.3125 70.21875 +L 18.3125 54.6875 +L 36.8125 54.6875 +L 36.8125 47.703125 +L 18.3125 47.703125 +L 18.3125 18.015625 +Q 18.3125 11.328125 20.140625 9.421875 +Q 21.96875 7.515625 27.59375 7.515625 +L 36.8125 7.515625 +L 36.8125 0 +L 27.59375 0 +Q 17.1875 0 13.234375 3.875 +Q 9.28125 7.765625 9.28125 18.015625 +L 9.28125 47.703125 +L 2.6875 47.703125 +L 2.6875 54.6875 +L 9.28125 54.6875 +L 9.28125 70.21875 +z +" id="DejaVuSans-116"/> + <path d="M 44.28125 53.078125 +L 44.28125 44.578125 +Q 40.484375 46.53125 36.375 47.5 +Q 32.28125 48.484375 27.875 48.484375 +Q 21.1875 48.484375 17.84375 46.4375 +Q 14.5 44.390625 14.5 40.28125 +Q 14.5 37.15625 16.890625 35.375 +Q 19.28125 33.59375 26.515625 31.984375 +L 29.59375 31.296875 +Q 39.15625 29.25 43.1875 25.515625 +Q 47.21875 21.78125 47.21875 15.09375 +Q 47.21875 7.46875 41.1875 3.015625 +Q 35.15625 -1.421875 24.609375 -1.421875 +Q 20.21875 -1.421875 15.453125 -0.5625 +Q 10.6875 0.296875 5.421875 2 +L 5.421875 11.28125 +Q 10.40625 8.6875 15.234375 7.390625 +Q 20.0625 6.109375 24.8125 6.109375 +Q 31.15625 6.109375 34.5625 8.28125 +Q 37.984375 10.453125 37.984375 14.40625 +Q 37.984375 18.0625 35.515625 20.015625 +Q 33.0625 21.96875 24.703125 23.78125 +L 21.578125 24.515625 +Q 13.234375 26.265625 9.515625 29.90625 +Q 5.8125 33.546875 5.8125 39.890625 +Q 5.8125 47.609375 11.28125 51.796875 +Q 16.75 56 26.8125 56 +Q 31.78125 56 36.171875 55.265625 +Q 40.578125 54.546875 44.28125 53.078125 +z +" id="DejaVuSans-115"/> + <path id="DejaVuSans-32"/> + <path d="M 56.203125 29.59375 +L 56.203125 25.203125 +L 14.890625 25.203125 +Q 15.484375 15.921875 20.484375 11.0625 +Q 25.484375 6.203125 34.421875 6.203125 +Q 39.59375 6.203125 44.453125 7.46875 +Q 49.3125 8.734375 54.109375 11.28125 +L 54.109375 2.78125 +Q 49.265625 0.734375 44.1875 -0.34375 +Q 39.109375 -1.421875 33.890625 -1.421875 +Q 20.796875 -1.421875 13.15625 6.1875 +Q 5.515625 13.8125 5.515625 26.8125 +Q 5.515625 40.234375 12.765625 48.109375 +Q 20.015625 56 32.328125 56 +Q 43.359375 56 49.78125 48.890625 +Q 56.203125 41.796875 56.203125 29.59375 +z +M 47.21875 32.234375 +Q 47.125 39.59375 43.09375 43.984375 +Q 39.0625 48.390625 32.421875 48.390625 +Q 24.90625 48.390625 20.390625 44.140625 +Q 15.875 39.890625 15.1875 32.171875 +z +M 38.53125 79.984375 +L 48.25 79.984375 +L 32.34375 61.625 +L 24.859375 61.625 +z +" id="DejaVuSans-233"/> + <path d="M 9.421875 75.984375 +L 18.40625 75.984375 +L 18.40625 0 +L 9.421875 0 +z +" id="DejaVuSans-108"/> + <path d="M 54.890625 33.015625 +L 54.890625 0 +L 45.90625 0 +L 45.90625 32.71875 +Q 45.90625 40.484375 42.875 44.328125 +Q 39.84375 48.1875 33.796875 48.1875 +Q 26.515625 48.1875 22.3125 43.546875 +Q 18.109375 38.921875 18.109375 30.90625 +L 18.109375 0 +L 9.078125 0 +L 9.078125 75.984375 +L 18.109375 75.984375 +L 18.109375 46.1875 +Q 21.34375 51.125 25.703125 53.5625 +Q 30.078125 56 35.796875 56 +Q 45.21875 56 50.046875 50.171875 +Q 54.890625 44.34375 54.890625 33.015625 +z +" id="DejaVuSans-104"/> + <path d="M 34.28125 27.484375 +Q 23.390625 27.484375 19.1875 25 +Q 14.984375 22.515625 14.984375 16.5 +Q 14.984375 11.71875 18.140625 8.90625 +Q 21.296875 6.109375 26.703125 6.109375 +Q 34.1875 6.109375 38.703125 11.40625 +Q 43.21875 16.703125 43.21875 25.484375 +L 43.21875 27.484375 +z +M 52.203125 31.203125 +L 52.203125 0 +L 43.21875 0 +L 43.21875 8.296875 +Q 40.140625 3.328125 35.546875 0.953125 +Q 30.953125 -1.421875 24.3125 -1.421875 +Q 15.921875 -1.421875 10.953125 3.296875 +Q 6 8.015625 6 15.921875 +Q 6 25.140625 12.171875 29.828125 +Q 18.359375 34.515625 30.609375 34.515625 +L 43.21875 34.515625 +L 43.21875 35.40625 +Q 43.21875 41.609375 39.140625 45 +Q 35.0625 48.390625 27.6875 48.390625 +Q 23 48.390625 18.546875 47.265625 +Q 14.109375 46.140625 10.015625 43.890625 +L 10.015625 52.203125 +Q 14.9375 54.109375 19.578125 55.046875 +Q 24.21875 56 28.609375 56 +Q 40.484375 56 46.34375 49.84375 +Q 52.203125 43.703125 52.203125 31.203125 +z +" id="DejaVuSans-97"/> + <path d="M 41.109375 46.296875 +Q 39.59375 47.171875 37.8125 47.578125 +Q 36.03125 48 33.890625 48 +Q 26.265625 48 22.1875 43.046875 +Q 18.109375 38.09375 18.109375 28.8125 +L 18.109375 0 +L 9.078125 0 +L 9.078125 54.6875 +L 18.109375 54.6875 +L 18.109375 46.1875 +Q 20.953125 51.171875 25.484375 53.578125 +Q 30.03125 56 36.53125 56 +Q 37.453125 56 38.578125 55.875 +Q 39.703125 55.765625 41.0625 55.515625 +z +" id="DejaVuSans-114"/> + <path d="M 45.40625 27.984375 +Q 45.40625 37.75 41.375 43.109375 +Q 37.359375 48.484375 30.078125 48.484375 +Q 22.859375 48.484375 18.828125 43.109375 +Q 14.796875 37.75 14.796875 27.984375 +Q 14.796875 18.265625 18.828125 12.890625 +Q 22.859375 7.515625 30.078125 7.515625 +Q 37.359375 7.515625 41.375 12.890625 +Q 45.40625 18.265625 45.40625 27.984375 +z +M 54.390625 6.78125 +Q 54.390625 -7.171875 48.1875 -13.984375 +Q 42 -20.796875 29.203125 -20.796875 +Q 24.46875 -20.796875 20.265625 -20.09375 +Q 16.0625 -19.390625 12.109375 -17.921875 +L 12.109375 -9.1875 +Q 16.0625 -11.328125 19.921875 -12.34375 +Q 23.78125 -13.375 27.78125 -13.375 +Q 36.625 -13.375 41.015625 -8.765625 +Q 45.40625 -4.15625 45.40625 5.171875 +L 45.40625 9.625 +Q 42.625 4.78125 38.28125 2.390625 +Q 33.9375 0 27.875 0 +Q 17.828125 0 11.671875 7.65625 +Q 5.515625 15.328125 5.515625 27.984375 +Q 5.515625 40.671875 11.671875 48.328125 +Q 17.828125 56 27.875 56 +Q 33.9375 56 38.28125 53.609375 +Q 42.625 51.21875 45.40625 46.390625 +L 45.40625 54.6875 +L 54.390625 54.6875 +z +" id="DejaVuSans-103"/> + <path d="M 8.59375 75.984375 +L 29.296875 75.984375 +L 29.296875 69 +L 17.578125 69 +L 17.578125 -6.203125 +L 29.296875 -6.203125 +L 29.296875 -13.1875 +L 8.59375 -13.1875 +z +" id="DejaVuSans-91"/> + <path d="M 4.890625 31.390625 +L 31.203125 31.390625 +L 31.203125 23.390625 +L 4.890625 23.390625 +z +" id="DejaVuSans-45"/> + <path d="M 30.421875 75.984375 +L 30.421875 -13.1875 +L 9.71875 -13.1875 +L 9.71875 -6.203125 +L 21.390625 -6.203125 +L 21.390625 69 +L 9.71875 69 +L 9.71875 75.984375 +z +" id="DejaVuSans-93"/> + </defs> + <g transform="translate(153.554531 334.004934)scale(0.1 -0.1)"> + <use xlink:href="#DejaVuSans-68"/> + <use x="77.001953" xlink:href="#DejaVuSans-111"/> + <use x="138.183594" xlink:href="#DejaVuSans-99"/> + <use x="193.164062" xlink:href="#DejaVuSans-117"/> + <use x="256.542969" xlink:href="#DejaVuSans-109"/> + <use x="353.955078" xlink:href="#DejaVuSans-101"/> + <use x="415.478516" xlink:href="#DejaVuSans-110"/> + <use x="478.857422" xlink:href="#DejaVuSans-116"/> + <use x="518.066406" xlink:href="#DejaVuSans-115"/> + <use x="570.166016" xlink:href="#DejaVuSans-32"/> + <use x="601.953125" xlink:href="#DejaVuSans-116"/> + <use x="641.162109" xlink:href="#DejaVuSans-233"/> + <use x="702.685547" xlink:href="#DejaVuSans-108"/> + <use x="730.46875" xlink:href="#DejaVuSans-233"/> + <use x="791.992188" xlink:href="#DejaVuSans-99"/> + <use x="846.972656" xlink:href="#DejaVuSans-104"/> + <use x="910.351562" xlink:href="#DejaVuSans-97"/> + <use x="971.630859" xlink:href="#DejaVuSans-114"/> + <use x="1010.994141" xlink:href="#DejaVuSans-103"/> + <use x="1074.470703" xlink:href="#DejaVuSans-233"/> + <use x="1135.994141" xlink:href="#DejaVuSans-115"/> + <use x="1188.09375" xlink:href="#DejaVuSans-32"/> + <use x="1219.880859" xlink:href="#DejaVuSans-91"/> + <use x="1258.894531" xlink:href="#DejaVuSans-48"/> + <use x="1322.517578" xlink:href="#DejaVuSans-45"/> + <use x="1358.601562" xlink:href="#DejaVuSans-49"/> + <use x="1422.224609" xlink:href="#DejaVuSans-48"/> + <use x="1485.847656" xlink:href="#DejaVuSans-48"/> + <use x="1549.470703" xlink:href="#DejaVuSans-48"/> + <use x="1613.09375" xlink:href="#DejaVuSans-93"/> + </g> + </g> + </g> + <g id="matplotlib.axis_2"> + <g id="ytick_1"> + <g id="line2d_28"> + <defs> + <path d="M 0 0 +L -3.5 0 +" id="mdd8a0640cb" style="stroke:#000000;stroke-width:0.8;"/> + </defs> + <g> + <use style="stroke:#000000;stroke-width:0.8;" x="57.6" xlink:href="#mdd8a0640cb" y="282.292364"/> + </g> + </g> + <g id="text_29"> + <!-- 0 --> + <g transform="translate(44.2375 286.091582)scale(0.1 -0.1)"> + <use xlink:href="#DejaVuSans-48"/> + </g> + </g> + </g> + <g id="ytick_2"> + <g id="line2d_29"> + <g> + <use style="stroke:#000000;stroke-width:0.8;" x="57.6" xlink:href="#mdd8a0640cb" y="270.824727"/> + </g> + </g> + <g id="text_30"> + <!-- 50 --> + <g transform="translate(37.875 274.623946)scale(0.1 -0.1)"> + <use xlink:href="#DejaVuSans-53"/> + <use x="63.623047" xlink:href="#DejaVuSans-48"/> + </g> + </g> + </g> + <g id="ytick_3"> + <g id="line2d_30"> + <g> + <use style="stroke:#000000;stroke-width:0.8;" x="57.6" xlink:href="#mdd8a0640cb" y="259.357091"/> + </g> + </g> + <g id="text_31"> + <!-- 100 --> + <g transform="translate(31.5125 263.15631)scale(0.1 -0.1)"> + <use xlink:href="#DejaVuSans-49"/> + <use x="63.623047" xlink:href="#DejaVuSans-48"/> + <use x="127.246094" xlink:href="#DejaVuSans-48"/> + </g> + </g> + </g> + <g id="ytick_4"> + <g id="line2d_31"> + <g> + <use style="stroke:#000000;stroke-width:0.8;" x="57.6" xlink:href="#mdd8a0640cb" y="247.889455"/> + </g> + </g> + <g id="text_32"> + <!-- 150 --> + <g transform="translate(31.5125 251.688673)scale(0.1 -0.1)"> + <use xlink:href="#DejaVuSans-49"/> + <use x="63.623047" xlink:href="#DejaVuSans-53"/> + <use x="127.246094" xlink:href="#DejaVuSans-48"/> + </g> + </g> + </g> + <g id="ytick_5"> + <g id="line2d_32"> + <g> + <use style="stroke:#000000;stroke-width:0.8;" x="57.6" xlink:href="#mdd8a0640cb" y="236.421818"/> + </g> + </g> + <g id="text_33"> + <!-- 200 --> + <g transform="translate(31.5125 240.221037)scale(0.1 -0.1)"> + <use xlink:href="#DejaVuSans-50"/> + <use x="63.623047" xlink:href="#DejaVuSans-48"/> + <use x="127.246094" xlink:href="#DejaVuSans-48"/> + </g> + </g> + </g> + <g id="ytick_6"> + <g id="line2d_33"> + <g> + <use style="stroke:#000000;stroke-width:0.8;" x="57.6" xlink:href="#mdd8a0640cb" y="224.954182"/> + </g> + </g> + <g id="text_34"> + <!-- 250 --> + <g transform="translate(31.5125 228.753401)scale(0.1 -0.1)"> + <use xlink:href="#DejaVuSans-50"/> + <use x="63.623047" xlink:href="#DejaVuSans-53"/> + <use x="127.246094" xlink:href="#DejaVuSans-48"/> + </g> + </g> + </g> + <g id="ytick_7"> + <g id="line2d_34"> + <g> + <use style="stroke:#000000;stroke-width:0.8;" x="57.6" xlink:href="#mdd8a0640cb" y="213.486545"/> + </g> + </g> + <g id="text_35"> + <!-- 300 --> + <g transform="translate(31.5125 217.285764)scale(0.1 -0.1)"> + <use xlink:href="#DejaVuSans-51"/> + <use x="63.623047" xlink:href="#DejaVuSans-48"/> + <use x="127.246094" xlink:href="#DejaVuSans-48"/> + </g> + </g> + </g> + <g id="ytick_8"> + <g id="line2d_35"> + <g> + <use style="stroke:#000000;stroke-width:0.8;" x="57.6" xlink:href="#mdd8a0640cb" y="202.018909"/> + </g> + </g> + <g id="text_36"> + <!-- 350 --> + <g transform="translate(31.5125 205.818128)scale(0.1 -0.1)"> + <use xlink:href="#DejaVuSans-51"/> + <use x="63.623047" xlink:href="#DejaVuSans-53"/> + <use x="127.246094" xlink:href="#DejaVuSans-48"/> + </g> + </g> + </g> + <g id="ytick_9"> + <g id="line2d_36"> + <g> + <use style="stroke:#000000;stroke-width:0.8;" x="57.6" xlink:href="#mdd8a0640cb" y="190.551273"/> + </g> + </g> + <g id="text_37"> + <!-- 400 --> + <g transform="translate(31.5125 194.350491)scale(0.1 -0.1)"> + <use xlink:href="#DejaVuSans-52"/> + <use x="63.623047" xlink:href="#DejaVuSans-48"/> + <use x="127.246094" xlink:href="#DejaVuSans-48"/> + </g> + </g> + </g> + <g id="ytick_10"> + <g id="line2d_37"> + <g> + <use style="stroke:#000000;stroke-width:0.8;" x="57.6" xlink:href="#mdd8a0640cb" y="179.083636"/> + </g> + </g> + <g id="text_38"> + <!-- 450 --> + <g transform="translate(31.5125 182.882855)scale(0.1 -0.1)"> + <use xlink:href="#DejaVuSans-52"/> + <use x="63.623047" xlink:href="#DejaVuSans-53"/> + <use x="127.246094" xlink:href="#DejaVuSans-48"/> + </g> + </g> + </g> + <g id="ytick_11"> + <g id="line2d_38"> + <g> + <use style="stroke:#000000;stroke-width:0.8;" x="57.6" xlink:href="#mdd8a0640cb" y="167.616"/> + </g> + </g> + <g id="text_39"> + <!-- 500 --> + <g transform="translate(31.5125 171.415219)scale(0.1 -0.1)"> + <use xlink:href="#DejaVuSans-53"/> + <use x="63.623047" xlink:href="#DejaVuSans-48"/> + <use x="127.246094" xlink:href="#DejaVuSans-48"/> + </g> + </g> + </g> + <g id="ytick_12"> + <g id="line2d_39"> + <g> + <use style="stroke:#000000;stroke-width:0.8;" x="57.6" xlink:href="#mdd8a0640cb" y="156.148364"/> + </g> + </g> + <g id="text_40"> + <!-- 550 --> + <g transform="translate(31.5125 159.947582)scale(0.1 -0.1)"> + <use xlink:href="#DejaVuSans-53"/> + <use x="63.623047" xlink:href="#DejaVuSans-53"/> + <use x="127.246094" xlink:href="#DejaVuSans-48"/> + </g> + </g> + </g> + <g id="ytick_13"> + <g id="line2d_40"> + <g> + <use style="stroke:#000000;stroke-width:0.8;" x="57.6" xlink:href="#mdd8a0640cb" y="144.680727"/> + </g> + </g> + <g id="text_41"> + <!-- 600 --> + <g transform="translate(31.5125 148.479946)scale(0.1 -0.1)"> + <use xlink:href="#DejaVuSans-54"/> + <use x="63.623047" xlink:href="#DejaVuSans-48"/> + <use x="127.246094" xlink:href="#DejaVuSans-48"/> + </g> + </g> + </g> + <g id="ytick_14"> + <g id="line2d_41"> + <g> + <use style="stroke:#000000;stroke-width:0.8;" x="57.6" xlink:href="#mdd8a0640cb" y="133.213091"/> + </g> + </g> + <g id="text_42"> + <!-- 650 --> + <g transform="translate(31.5125 137.01231)scale(0.1 -0.1)"> + <use xlink:href="#DejaVuSans-54"/> + <use x="63.623047" xlink:href="#DejaVuSans-53"/> + <use x="127.246094" xlink:href="#DejaVuSans-48"/> + </g> + </g> + </g> + <g id="ytick_15"> + <g id="line2d_42"> + <g> + <use style="stroke:#000000;stroke-width:0.8;" x="57.6" xlink:href="#mdd8a0640cb" y="121.745455"/> + </g> + </g> + <g id="text_43"> + <!-- 700 --> + <g transform="translate(31.5125 125.544673)scale(0.1 -0.1)"> + <use xlink:href="#DejaVuSans-55"/> + <use x="63.623047" xlink:href="#DejaVuSans-48"/> + <use x="127.246094" xlink:href="#DejaVuSans-48"/> + </g> + </g> + </g> + <g id="ytick_16"> + <g id="line2d_43"> + <g> + <use style="stroke:#000000;stroke-width:0.8;" x="57.6" xlink:href="#mdd8a0640cb" y="110.277818"/> + </g> + </g> + <g id="text_44"> + <!-- 750 --> + <g transform="translate(31.5125 114.077037)scale(0.1 -0.1)"> + <use xlink:href="#DejaVuSans-55"/> + <use x="63.623047" xlink:href="#DejaVuSans-53"/> + <use x="127.246094" xlink:href="#DejaVuSans-48"/> + </g> + </g> + </g> + <g id="ytick_17"> + <g id="line2d_44"> + <g> + <use style="stroke:#000000;stroke-width:0.8;" x="57.6" xlink:href="#mdd8a0640cb" y="98.810182"/> + </g> + </g> + <g id="text_45"> + <!-- 800 --> + <g transform="translate(31.5125 102.609401)scale(0.1 -0.1)"> + <use xlink:href="#DejaVuSans-56"/> + <use x="63.623047" xlink:href="#DejaVuSans-48"/> + <use x="127.246094" xlink:href="#DejaVuSans-48"/> + </g> + </g> + </g> + <g id="ytick_18"> + <g id="line2d_45"> + <g> + <use style="stroke:#000000;stroke-width:0.8;" x="57.6" xlink:href="#mdd8a0640cb" y="87.342545"/> + </g> + </g> + <g id="text_46"> + <!-- 850 --> + <g transform="translate(31.5125 91.141764)scale(0.1 -0.1)"> + <use xlink:href="#DejaVuSans-56"/> + <use x="63.623047" xlink:href="#DejaVuSans-53"/> + <use x="127.246094" xlink:href="#DejaVuSans-48"/> + </g> + </g> + </g> + <g id="ytick_19"> + <g id="line2d_46"> + <g> + <use style="stroke:#000000;stroke-width:0.8;" x="57.6" xlink:href="#mdd8a0640cb" y="75.874909"/> + </g> + </g> + <g id="text_47"> + <!-- 900 --> + <g transform="translate(31.5125 79.674128)scale(0.1 -0.1)"> + <use xlink:href="#DejaVuSans-57"/> + <use x="63.623047" xlink:href="#DejaVuSans-48"/> + <use x="127.246094" xlink:href="#DejaVuSans-48"/> + </g> + </g> + </g> + <g id="ytick_20"> + <g id="line2d_47"> + <g> + <use style="stroke:#000000;stroke-width:0.8;" x="57.6" xlink:href="#mdd8a0640cb" y="64.407273"/> + </g> + </g> + <g id="text_48"> + <!-- 950 --> + <g transform="translate(31.5125 68.206491)scale(0.1 -0.1)"> + <use xlink:href="#DejaVuSans-57"/> + <use x="63.623047" xlink:href="#DejaVuSans-53"/> + <use x="127.246094" xlink:href="#DejaVuSans-48"/> + </g> + </g> + </g> + <g id="ytick_21"> + <g id="line2d_48"> + <g> + <use style="stroke:#000000;stroke-width:0.8;" x="57.6" xlink:href="#mdd8a0640cb" y="52.939636"/> + </g> + </g> + <g id="text_49"> + <!-- 1000 --> + <g transform="translate(25.15 56.738855)scale(0.1 -0.1)"> + <use xlink:href="#DejaVuSans-49"/> + <use x="63.623047" xlink:href="#DejaVuSans-48"/> + <use x="127.246094" xlink:href="#DejaVuSans-48"/> + <use x="190.869141" xlink:href="#DejaVuSans-48"/> + </g> + </g> + </g> + <g id="text_50"> + <!-- Temps [s] --> + <defs> + <path d="M -0.296875 72.90625 +L 61.375 72.90625 +L 61.375 64.59375 +L 35.5 64.59375 +L 35.5 0 +L 25.59375 0 +L 25.59375 64.59375 +L -0.296875 64.59375 +z +" id="DejaVuSans-84"/> + <path d="M 18.109375 8.203125 +L 18.109375 -20.796875 +L 9.078125 -20.796875 +L 9.078125 54.6875 +L 18.109375 54.6875 +L 18.109375 46.390625 +Q 20.953125 51.265625 25.265625 53.625 +Q 29.59375 56 35.59375 56 +Q 45.5625 56 51.78125 48.09375 +Q 58.015625 40.1875 58.015625 27.296875 +Q 58.015625 14.40625 51.78125 6.484375 +Q 45.5625 -1.421875 35.59375 -1.421875 +Q 29.59375 -1.421875 25.265625 0.953125 +Q 20.953125 3.328125 18.109375 8.203125 +z +M 48.6875 27.296875 +Q 48.6875 37.203125 44.609375 42.84375 +Q 40.53125 48.484375 33.40625 48.484375 +Q 26.265625 48.484375 22.1875 42.84375 +Q 18.109375 37.203125 18.109375 27.296875 +Q 18.109375 17.390625 22.1875 11.75 +Q 26.265625 6.109375 33.40625 6.109375 +Q 40.53125 6.109375 44.609375 11.75 +Q 48.6875 17.390625 48.6875 27.296875 +z +" id="DejaVuSans-112"/> + </defs> + <g transform="translate(19.070312 191.641)rotate(-90)scale(0.1 -0.1)"> + <use xlink:href="#DejaVuSans-84"/> + <use x="44.083984" xlink:href="#DejaVuSans-101"/> + <use x="105.607422" xlink:href="#DejaVuSans-109"/> + <use x="203.019531" xlink:href="#DejaVuSans-112"/> + <use x="266.496094" xlink:href="#DejaVuSans-115"/> + <use x="318.595703" xlink:href="#DejaVuSans-32"/> + <use x="350.382812" xlink:href="#DejaVuSans-91"/> + <use x="389.396484" xlink:href="#DejaVuSans-115"/> + <use x="441.496094" xlink:href="#DejaVuSans-93"/> + </g> + </g> + </g> + <g id="line2d_49"> + <path clip-path="url(#pa3e036b96b)" d="M 73.832727 282.292364 +L 76.28295 282.292364 +L 77.508062 282.063011 +L 79.958285 278.852073 +L 81.183396 277.705309 +L 82.408508 276.329193 +L 83.633619 275.411782 +L 84.858731 275.411782 +L 87.308954 273.118255 +L 88.534065 273.118255 +L 89.759177 272.888902 +L 90.984288 270.824727 +L 92.2094 270.366022 +L 93.434511 268.5312 +L 94.659623 268.5312 +L 95.884734 267.155084 +L 97.109846 266.237673 +L 98.334957 266.00832 +L 100.78518 264.173498 +L 103.235403 262.797382 +L 104.460515 261.879971 +L 105.685626 260.503855 +L 106.910738 259.357091 +L 108.135849 258.43968 +L 109.360961 257.980975 +L 110.586072 256.146153 +L 111.811184 254.540684 +L 114.261407 252.247156 +L 115.486518 251.788451 +L 116.71163 250.412335 +L 117.936741 249.724276 +L 119.161852 249.265571 +L 120.386964 249.036218 +L 122.837187 246.283985 +L 124.062298 245.595927 +L 125.28741 243.761105 +L 126.512521 243.3024 +L 127.737633 243.3024 +L 128.962744 241.696931 +L 130.187856 241.008873 +L 131.412967 240.77952 +L 132.638079 240.320815 +L 133.86319 238.485993 +L 135.088302 237.797935 +L 136.313413 236.421818 +L 137.538525 235.504407 +L 138.763636 234.128291 +L 139.988748 234.128291 +L 141.213859 232.522822 +L 142.438971 231.834764 +L 143.664082 231.834764 +L 144.889194 231.146705 +L 146.114305 229.541236 +L 147.339417 229.082531 +L 148.564528 227.247709 +L 149.78964 226.789004 +L 151.014751 225.412887 +L 152.239863 224.954182 +L 154.690086 223.578065 +L 155.915197 222.431302 +L 157.140309 221.055185 +L 158.36542 220.367127 +L 159.590532 218.532305 +L 160.815643 217.844247 +L 162.040755 217.614895 +L 163.265866 216.926836 +L 164.490978 215.780073 +L 165.716089 215.321367 +L 166.941201 213.486545 +L 168.166312 213.257193 +L 169.391424 212.569135 +L 170.616535 210.963665 +L 171.841647 210.50496 +L 173.066758 208.899491 +L 174.29187 208.670138 +L 175.516981 206.835316 +L 176.742093 206.376611 +L 177.967204 204.771142 +L 179.192316 204.312436 +L 180.417427 202.477615 +L 181.642539 201.789556 +L 182.86765 200.41344 +L 184.092762 199.725382 +L 185.317873 199.496029 +L 186.542985 197.661207 +L 187.768096 196.743796 +L 188.993208 195.138327 +L 190.218319 194.908975 +L 191.443431 192.8448 +L 192.668542 192.8448 +L 193.893654 191.927389 +L 195.118765 190.551273 +L 196.343877 190.092567 +L 197.568988 188.945804 +L 198.794099 188.257745 +L 200.019211 186.881629 +L 201.244322 186.193571 +L 202.469434 185.27616 +L 203.694545 183.900044 +L 206.144768 182.982633 +L 207.36988 182.065222 +L 208.594991 181.377164 +L 211.045214 180.918458 +L 212.270326 180.2304 +L 213.495437 180.001047 +L 214.720549 178.624931 +L 215.94566 178.166225 +L 217.170772 177.478167 +L 218.395883 176.560756 +L 219.620995 175.872698 +L 220.846106 174.955287 +L 222.071218 174.496582 +L 223.296329 173.579171 +L 224.521441 172.203055 +L 225.746552 171.973702 +L 226.971664 169.909527 +L 228.196775 169.450822 +L 229.421887 167.616 +L 230.646998 167.386647 +L 231.87211 165.322473 +L 233.097221 164.863767 +L 234.322333 163.028945 +L 235.547444 163.028945 +L 236.772556 162.57024 +L 237.997667 161.194124 +L 239.222779 160.735418 +L 240.44789 160.04736 +L 241.673002 158.441891 +L 242.898113 158.441891 +L 244.123225 157.52448 +L 245.348336 156.148364 +L 246.573448 155.919011 +L 247.798559 155.230953 +L 249.023671 154.084189 +L 250.248782 153.854836 +L 253.924117 151.102604 +L 255.149228 149.95584 +L 256.37434 149.267782 +L 257.599451 148.350371 +L 258.824563 147.203607 +L 260.049674 146.974255 +L 261.274786 146.286196 +L 262.499897 145.368785 +L 264.95012 143.992669 +L 266.175232 142.845905 +L 267.400343 141.011084 +L 268.625455 140.781731 +L 271.075678 138.488204 +L 272.300789 138.029498 +L 273.525901 136.653382 +L 274.751012 135.735971 +L 275.976123 135.047913 +L 277.201235 133.442444 +L 278.426346 130.919564 +L 279.651458 130.460858 +L 280.876569 128.626036 +L 282.101681 128.167331 +L 283.326792 126.791215 +L 284.551904 125.873804 +L 285.777015 124.497687 +L 287.002127 124.038982 +L 288.227238 123.350924 +L 289.45235 122.20416 +L 290.677461 121.745455 +L 291.902573 120.828044 +L 293.127684 119.68128 +L 294.352796 119.451927 +L 295.577907 118.534516 +L 296.803019 118.075811 +L 298.02813 117.1584 +L 299.253242 116.470342 +L 300.478353 115.094225 +L 301.703465 113.488756 +L 302.928576 112.571345 +L 304.153688 110.277818 +L 305.378799 110.048465 +L 306.603911 107.984291 +L 307.829022 107.754938 +L 309.054134 106.149469 +L 310.279245 105.461411 +L 311.504357 104.314647 +L 312.729468 103.626589 +L 313.95458 102.250473 +L 317.629914 98.810182 +L 322.53036 98.810182 +L 323.755472 98.122124 +L 324.980583 97.204713 +L 327.430806 95.828596 +L 328.655918 95.828596 +L 331.106141 95.369891 +L 333.556364 95.369891 +L 334.781475 94.681833 +L 336.006587 94.45248 +L 337.231698 93.993775 +L 338.45681 93.305716 +L 340.907033 92.388305 +L 342.132144 91.470895 +L 343.357256 89.865425 +L 344.582367 89.40672 +L 345.807479 87.342545 +L 347.03259 87.113193 +L 348.257702 85.278371 +L 349.482813 84.590313 +L 350.707925 82.755491 +L 351.933036 82.526138 +L 353.158148 82.067433 +L 354.383259 80.461964 +L 355.60837 80.003258 +L 358.058593 78.168436 +L 359.283705 77.480378 +L 360.508816 77.251025 +L 361.733928 75.645556 +L 362.959039 73.810735 +L 364.184151 73.352029 +L 365.409262 71.975913 +L 366.634374 71.287855 +L 367.859485 70.829149 +L 369.084597 68.994327 +L 371.53482 68.076916 +L 372.759931 66.7008 +L 373.985043 65.095331 +L 375.210154 64.407273 +L 376.435266 63.031156 +L 377.660377 62.572451 +L 378.885489 61.65504 +L 380.1106 61.196335 +L 381.335712 59.820218 +L 382.560823 59.590865 +L 383.785935 59.13216 +L 385.011046 58.444102 +L 386.236158 57.526691 +L 387.461269 57.067985 +L 388.686381 55.233164 +L 389.911492 55.003811 +L 391.136604 52.939636 +L 398.487273 52.939636 +L 398.487273 52.939636 +" style="fill:none;stroke:#1f77b4;stroke-linecap:square;stroke-width:1.5;"/> + </g> + <g id="patch_3"> + <path d="M 57.6 293.76 +L 57.6 41.472 +" style="fill:none;stroke:#000000;stroke-linecap:square;stroke-linejoin:miter;stroke-width:0.8;"/> + </g> + <g id="patch_4"> + <path d="M 414.72 293.76 +L 414.72 41.472 +" style="fill:none;stroke:#000000;stroke-linecap:square;stroke-linejoin:miter;stroke-width:0.8;"/> + </g> + <g id="patch_5"> + <path d="M 57.6 293.76 +L 414.72 293.76 +" style="fill:none;stroke:#000000;stroke-linecap:square;stroke-linejoin:miter;stroke-width:0.8;"/> + </g> + <g id="patch_6"> + <path d="M 57.6 41.472 +L 414.72 41.472 +" style="fill:none;stroke:#000000;stroke-linecap:square;stroke-linejoin:miter;stroke-width:0.8;"/> + </g> + <g id="text_51"> + <!-- Performances du module FAOGeneve - NScrap --> + <defs> + <path d="M 19.671875 64.796875 +L 19.671875 37.40625 +L 32.078125 37.40625 +Q 38.96875 37.40625 42.71875 40.96875 +Q 46.484375 44.53125 46.484375 51.125 +Q 46.484375 57.671875 42.71875 61.234375 +Q 38.96875 64.796875 32.078125 64.796875 +z +M 9.8125 72.90625 +L 32.078125 72.90625 +Q 44.34375 72.90625 50.609375 67.359375 +Q 56.890625 61.8125 56.890625 51.125 +Q 56.890625 40.328125 50.609375 34.8125 +Q 44.34375 29.296875 32.078125 29.296875 +L 19.671875 29.296875 +L 19.671875 0 +L 9.8125 0 +z +" id="DejaVuSans-80"/> + <path d="M 37.109375 75.984375 +L 37.109375 68.5 +L 28.515625 68.5 +Q 23.6875 68.5 21.796875 66.546875 +Q 19.921875 64.59375 19.921875 59.515625 +L 19.921875 54.6875 +L 34.71875 54.6875 +L 34.71875 47.703125 +L 19.921875 47.703125 +L 19.921875 0 +L 10.890625 0 +L 10.890625 47.703125 +L 2.296875 47.703125 +L 2.296875 54.6875 +L 10.890625 54.6875 +L 10.890625 58.5 +Q 10.890625 67.625 15.140625 71.796875 +Q 19.390625 75.984375 28.609375 75.984375 +z +" id="DejaVuSans-102"/> + <path d="M 45.40625 46.390625 +L 45.40625 75.984375 +L 54.390625 75.984375 +L 54.390625 0 +L 45.40625 0 +L 45.40625 8.203125 +Q 42.578125 3.328125 38.25 0.953125 +Q 33.9375 -1.421875 27.875 -1.421875 +Q 17.96875 -1.421875 11.734375 6.484375 +Q 5.515625 14.40625 5.515625 27.296875 +Q 5.515625 40.1875 11.734375 48.09375 +Q 17.96875 56 27.875 56 +Q 33.9375 56 38.25 53.625 +Q 42.578125 51.265625 45.40625 46.390625 +z +M 14.796875 27.296875 +Q 14.796875 17.390625 18.875 11.75 +Q 22.953125 6.109375 30.078125 6.109375 +Q 37.203125 6.109375 41.296875 11.75 +Q 45.40625 17.390625 45.40625 27.296875 +Q 45.40625 37.203125 41.296875 42.84375 +Q 37.203125 48.484375 30.078125 48.484375 +Q 22.953125 48.484375 18.875 42.84375 +Q 14.796875 37.203125 14.796875 27.296875 +z +" id="DejaVuSans-100"/> + <path d="M 9.8125 72.90625 +L 51.703125 72.90625 +L 51.703125 64.59375 +L 19.671875 64.59375 +L 19.671875 43.109375 +L 48.578125 43.109375 +L 48.578125 34.8125 +L 19.671875 34.8125 +L 19.671875 0 +L 9.8125 0 +z +" id="DejaVuSans-70"/> + <path d="M 34.1875 63.1875 +L 20.796875 26.90625 +L 47.609375 26.90625 +z +M 28.609375 72.90625 +L 39.796875 72.90625 +L 67.578125 0 +L 57.328125 0 +L 50.6875 18.703125 +L 17.828125 18.703125 +L 11.1875 0 +L 0.78125 0 +z +" id="DejaVuSans-65"/> + <path d="M 39.40625 66.21875 +Q 28.65625 66.21875 22.328125 58.203125 +Q 16.015625 50.203125 16.015625 36.375 +Q 16.015625 22.609375 22.328125 14.59375 +Q 28.65625 6.59375 39.40625 6.59375 +Q 50.140625 6.59375 56.421875 14.59375 +Q 62.703125 22.609375 62.703125 36.375 +Q 62.703125 50.203125 56.421875 58.203125 +Q 50.140625 66.21875 39.40625 66.21875 +z +M 39.40625 74.21875 +Q 54.734375 74.21875 63.90625 63.9375 +Q 73.09375 53.65625 73.09375 36.375 +Q 73.09375 19.140625 63.90625 8.859375 +Q 54.734375 -1.421875 39.40625 -1.421875 +Q 24.03125 -1.421875 14.8125 8.828125 +Q 5.609375 19.09375 5.609375 36.375 +Q 5.609375 53.65625 14.8125 63.9375 +Q 24.03125 74.21875 39.40625 74.21875 +z +" id="DejaVuSans-79"/> + <path d="M 59.515625 10.40625 +L 59.515625 29.984375 +L 43.40625 29.984375 +L 43.40625 38.09375 +L 69.28125 38.09375 +L 69.28125 6.78125 +Q 63.578125 2.734375 56.6875 0.65625 +Q 49.8125 -1.421875 42 -1.421875 +Q 24.90625 -1.421875 15.25 8.5625 +Q 5.609375 18.5625 5.609375 36.375 +Q 5.609375 54.25 15.25 64.234375 +Q 24.90625 74.21875 42 74.21875 +Q 49.125 74.21875 55.546875 72.453125 +Q 61.96875 70.703125 67.390625 67.28125 +L 67.390625 56.78125 +Q 61.921875 61.421875 55.765625 63.765625 +Q 49.609375 66.109375 42.828125 66.109375 +Q 29.4375 66.109375 22.71875 58.640625 +Q 16.015625 51.171875 16.015625 36.375 +Q 16.015625 21.625 22.71875 14.15625 +Q 29.4375 6.6875 42.828125 6.6875 +Q 48.046875 6.6875 52.140625 7.59375 +Q 56.25 8.5 59.515625 10.40625 +z +" id="DejaVuSans-71"/> + <path d="M 2.984375 54.6875 +L 12.5 54.6875 +L 29.59375 8.796875 +L 46.6875 54.6875 +L 56.203125 54.6875 +L 35.6875 0 +L 23.484375 0 +z +" id="DejaVuSans-118"/> + <path d="M 9.8125 72.90625 +L 23.09375 72.90625 +L 55.421875 11.921875 +L 55.421875 72.90625 +L 64.984375 72.90625 +L 64.984375 0 +L 51.703125 0 +L 19.390625 60.984375 +L 19.390625 0 +L 9.8125 0 +z +" id="DejaVuSans-78"/> + <path d="M 53.515625 70.515625 +L 53.515625 60.890625 +Q 47.90625 63.578125 42.921875 64.890625 +Q 37.9375 66.21875 33.296875 66.21875 +Q 25.25 66.21875 20.875 63.09375 +Q 16.5 59.96875 16.5 54.203125 +Q 16.5 49.359375 19.40625 46.890625 +Q 22.3125 44.4375 30.421875 42.921875 +L 36.375 41.703125 +Q 47.40625 39.59375 52.65625 34.296875 +Q 57.90625 29 57.90625 20.125 +Q 57.90625 9.515625 50.796875 4.046875 +Q 43.703125 -1.421875 29.984375 -1.421875 +Q 24.8125 -1.421875 18.96875 -0.25 +Q 13.140625 0.921875 6.890625 3.21875 +L 6.890625 13.375 +Q 12.890625 10.015625 18.65625 8.296875 +Q 24.421875 6.59375 29.984375 6.59375 +Q 38.421875 6.59375 43.015625 9.90625 +Q 47.609375 13.234375 47.609375 19.390625 +Q 47.609375 24.75 44.3125 27.78125 +Q 41.015625 30.8125 33.5 32.328125 +L 27.484375 33.5 +Q 16.453125 35.6875 11.515625 40.375 +Q 6.59375 45.0625 6.59375 53.421875 +Q 6.59375 63.09375 13.40625 68.65625 +Q 20.21875 74.21875 32.171875 74.21875 +Q 37.3125 74.21875 42.625 73.28125 +Q 47.953125 72.359375 53.515625 70.515625 +z +" id="DejaVuSans-83"/> + </defs> + <g transform="translate(96.965625 35.472)scale(0.12 -0.12)"> + <use xlink:href="#DejaVuSans-80"/> + <use x="56.677734" xlink:href="#DejaVuSans-101"/> + <use x="118.201172" xlink:href="#DejaVuSans-114"/> + <use x="159.314453" xlink:href="#DejaVuSans-102"/> + <use x="194.519531" xlink:href="#DejaVuSans-111"/> + <use x="255.701172" xlink:href="#DejaVuSans-114"/> + <use x="295.064453" xlink:href="#DejaVuSans-109"/> + <use x="392.476562" xlink:href="#DejaVuSans-97"/> + <use x="453.755859" xlink:href="#DejaVuSans-110"/> + <use x="517.134766" xlink:href="#DejaVuSans-99"/> + <use x="572.115234" xlink:href="#DejaVuSans-101"/> + <use x="633.638672" xlink:href="#DejaVuSans-115"/> + <use x="685.738281" xlink:href="#DejaVuSans-32"/> + <use x="717.525391" xlink:href="#DejaVuSans-100"/> + <use x="781.001953" xlink:href="#DejaVuSans-117"/> + <use x="844.380859" xlink:href="#DejaVuSans-32"/> + <use x="876.167969" xlink:href="#DejaVuSans-109"/> + <use x="973.580078" xlink:href="#DejaVuSans-111"/> + <use x="1034.761719" xlink:href="#DejaVuSans-100"/> + <use x="1098.238281" xlink:href="#DejaVuSans-117"/> + <use x="1161.617188" xlink:href="#DejaVuSans-108"/> + <use x="1189.400391" xlink:href="#DejaVuSans-101"/> + <use x="1250.923828" xlink:href="#DejaVuSans-32"/> + <use x="1282.710938" xlink:href="#DejaVuSans-70"/> + <use x="1331.105469" xlink:href="#DejaVuSans-65"/> + <use x="1397.763672" xlink:href="#DejaVuSans-79"/> + <use x="1476.474609" xlink:href="#DejaVuSans-71"/> + <use x="1553.964844" xlink:href="#DejaVuSans-101"/> + <use x="1615.488281" xlink:href="#DejaVuSans-110"/> + <use x="1678.867188" xlink:href="#DejaVuSans-101"/> + <use x="1740.390625" xlink:href="#DejaVuSans-118"/> + <use x="1799.570312" xlink:href="#DejaVuSans-101"/> + <use x="1861.09375" xlink:href="#DejaVuSans-32"/> + <use x="1892.880859" xlink:href="#DejaVuSans-45"/> + <use x="1928.964844" xlink:href="#DejaVuSans-32"/> + <use x="1960.751953" xlink:href="#DejaVuSans-78"/> + <use x="2035.556641" xlink:href="#DejaVuSans-83"/> + <use x="2099.033203" xlink:href="#DejaVuSans-99"/> + <use x="2154.013672" xlink:href="#DejaVuSans-114"/> + <use x="2195.126953" xlink:href="#DejaVuSans-97"/> + <use x="2256.40625" xlink:href="#DejaVuSans-112"/> + </g> + </g> + </g> + </g> + <defs> + <clipPath id="pa3e036b96b"> + <rect height="252.288" width="357.12" x="57.6" y="41.472"/> + </clipPath> + </defs> +</svg> diff --git a/rapport/figs/js-infra.png b/rapport/figs/js-infra.png new file mode 100644 index 0000000000000000000000000000000000000000..8ef387d5e6c921b33da49950ac21fe5dd6999e2e Binary files /dev/null and b/rapport/figs/js-infra.png differ diff --git a/rapport/figs/playbook-complex.png b/rapport/figs/playbook-complex.png new file mode 100644 index 0000000000000000000000000000000000000000..a26a4e0a996db9bd539a7e87464ef8215a8c3cef Binary files /dev/null and b/rapport/figs/playbook-complex.png differ diff --git a/rapport/my.bib b/rapport/my.bib index 205b463cedc4e3c2889921d4cd9f4df023ba48fd..fdf9729dd49364f08fa80220688986ab3ac92e2c 100644 --- a/rapport/my.bib +++ b/rapport/my.bib @@ -65,6 +65,13 @@ In its fifth year, Data Never Sleeps shows exactly how much data is created ever file = {/Users/theo.pirkl/Zotero/storage/PZKBUDAV/EARN_MW_CUR.html} } +@online{noauthor_usage_nodate, + title = {Usage {{Statistics}} of {{JavaScript}} as {{Client}}-Side {{Programming Language}} on {{Websites}}, {{March}} 2020}, + url = {https://w3techs.com/technologies/details/cp-javascript}, + urldate = {2020-03-17}, + file = {/Users/theo.pirkl/Zotero/storage/UQPJXR2I/cp-javascript.html} +} + @online{noi_workers_nodate, title = {Do Workers Still Waste Time Searching for Information?}, author = {Noi, Daniela Di}, diff --git a/rapport/templates/default.latex b/rapport/templates/default.latex index 142d487a4f229060212eb385ec591944182d00b7..f0d6eb6a273ffb8c4c5ba8afb4470431d717f043 100644 --- a/rapport/templates/default.latex +++ b/rapport/templates/default.latex @@ -235,12 +235,13 @@ $endif$ \usepackage{titlesec} \usepackage[color=black,opacity=0.5,angle=0,scale=1,]{background} \usepackage[absolute]{textpos} +\usepackage{tocloft} % END OF CUSTOM PACKAGES % CUSTOM PACKAGES ROUTINES \titleformat{\chapter}{\centering\normalfont\LARGE\bfseries}{\thechapter. Chapitre \thechapter :}{10pt}{\LARGE} \titleformat{\section}{\large\normalfont\bfseries}{\thesection. }{10pt}{\large} -\titleformat{\subsection}{\normalfont\bfseries}{\thesubsection. }{10pt}{} +\titleformat{\subsection}{\normalfont\bfseries}{\hspace{.75cm}\thesubsection. }{10pt}{} \backgroundsetup{ contents={% \small{$name$, $surname$ - $smallTitle$ - $projectTitle$ - Mars $year$} @@ -262,6 +263,11 @@ $if(csl-refs)$ \everypar{\setlength{\hangindent}{\cslhangindent}}\ignorespaces$endif$}% {\par} $endif$ +% Appendices +\newcommand{\listappendicesname}{Liste des annexes} +\newlistof{appendices}{apc}{\listappendicesname} +\newcommand{\appendices}[1]{\addcontentsline{apc}{appendices}{#1}\addcontentsline{toc}{section}{#1}} +\newcommand{\newappendix}[1]{\section*{#1}\appendices{#1}} % Raccourcis \newcommand{\img}[3]{% \img{path}{settings}{caption} \begin{figure} @@ -276,18 +282,13 @@ $endif$ \caption{#3} \end{figure} } -\newcommand{\wimg}[5]{ - \begin{wrapfigure}{#4}{#5} - \includegraphics[#2]{#1} - \caption{#3} - \end{wrapfigure} -} % END OF CUSTOM ROUTINES \begin{document} % Nom conformes des tables \renewcommand*\listfigurename{Liste des illustrations} +\renewcommand*\listtablename{Liste des tableaux} % Sets the page numbering style to roman %\pagestyle{headings} \setcounter{page}{1} diff --git a/rapport/text/0-preface.md b/rapport/text/0-preface.md index a3e049500322635dd2bea622d0376c9679611be7..8d07ab1f1d08dfca82cfd74bb4d8139c3642f2cc 100644 --- a/rapport/text/0-preface.md +++ b/rapport/text/0-preface.md @@ -5,7 +5,7 @@ de ce travail. Un grand merci à Monsieur Hoerdt, qui m'a pointé sur la technologie sur laquelle se repose ce projet. -J'aimerais aussi remercier Eduardo Basilico, qui a permis le déploiement de ce projet sur les machines de l'HEPIA. +J'aimerais aussi remercier Monsieur Basilico, qui a permis le déploiement de ce projet sur les machines de l'HEPIA. Enfin, je souhaite adresser mes remerciements à Monsieur El Kharroubi, qui m'a assisté dans le déploiement des différents outils nécessaires à ce projet de semestre. diff --git a/rapport/text/1-references.md b/rapport/text/1-references.md index 391b84d2d4969592e12c53eaeb80ad70f6f7fb62..3f57f7165661f1bc84891d8b35764895bc7ce4ae 100644 --- a/rapport/text/1-references.md +++ b/rapport/text/1-references.md @@ -1,4 +1,6 @@ -\printnoidxglossary[sort=word,title=Glossaire,nonumberlist] +\printnoidxglossary[sort=word,title=Liste des acronymes,nonumberlist] + +\pagebreak \listoffigures @@ -9,7 +11,7 @@ \multicolumn{1}{l}{URL03} & \multicolumn{1}{l}{\url{https://redmonk.com/sogrady/2020/02/28/language-rankings-1-20/}} \\ \end{tabular} -La figure 2.1 a été créée par mes soins et ne comporte donc pas de référence. +\pagebreak \listoftables @@ -17,8 +19,8 @@ La figure 2.1 a été créée par mes soins et ne comporte donc pas de référen \begin{tabular}{ p{3cm} p{9cm} } \end{tabular} -# Liste des annexes {-} +\pagebreak -1, 2, 3, 4.. +\listofappendices \pagebreak \ No newline at end of file diff --git a/rapport/text/3-etude.md b/rapport/text/3-etude.md index 41a5be7fb0f48c2f4285c70f075e3750bea81009..1f3022ecf466fe2f267a3fb850c603be4a40e7f1 100644 --- a/rapport/text/3-etude.md +++ b/rapport/text/3-etude.md @@ -2,7 +2,7 @@ Il est aujourd'hui difficile de récupérer des documents en masse[^1]. Sans connaissances en informatique poussées, il est véritablement complexe pour un utilisateur lambda d'arriver à ses fins dans ce domaine. -\wimg{figs/data-never-sleeps.png}{scale=0.2}{Ce que produit Internet chaque minute (en 2017)}{r}{200pt} +\cimg{figs/data-never-sleeps.png}{scale=0.2}{Ce que produit Internet chaque minute (en 2017)}{r}{200pt} Faciliter la récupération de données est particulièrement important; Forbes estimait en 2018 la création de 2'500'000 *terabytes* de données par jour [@marr_how_nodate]. Cette quantité de données est affolante et il est très facile de se noyer dedans et de perdre de l'information. Pour se donner une idée, l'entreprise Nandex affirme qu'un employé perd 2.5 heures par jour à chercher des documents [@noi_workers_nodate]. diff --git a/rapport/text/4-conception.md b/rapport/text/4-conception.md index 8b2ba0b792a1ae562a16ce688edfe878fa028b74..4fac0312d5f3e890ca5cf693ebc6f2a7988dce7f 100644 --- a/rapport/text/4-conception.md +++ b/rapport/text/4-conception.md @@ -12,6 +12,8 @@ En plus d'être flexible, le but final de NScrap est d'éviter au maximum à l'u Les modules ne sont, du point de vue de l'utilisateur, que des boîtes noires [^7]. L'utilisateur rentre un certain nombre de paramètre, et un fichier sort du module. Il n'a de cette façon pas besoin de se préoccuper de la partie programmation. +Du côté utilisateur, on ne s'attend pas de l'utilisateur qu'il sache programmer. Un fichier, nommé _playbook_, décrit chaque action à effectuer et dans quel ordre. Il suffit à l'utilisateur d'utiliser le manuel d'utilisation de NScrap pour savoir s'en servir. _[L'annexe un](#exemple-de-fichier-playbook)_ donne un exemple de _playbook_ sur lequel [nous reviendrons](#Application-pratique-complete). + [^7]: En vérité, il ne s'agit pas véritablement de boîtes noires comme l'entendrait par exemple Stallman [@williams_hackers_2002]. L'utilisateur serait tout à fait en mesure d'aller voir comment fonctionne le module et le modifier s'il le souhaitait. ### Infrastructure @@ -23,29 +25,25 @@ Les machines travaillant sur une ou plusieurs tâches sont considérées comme l Une machine, quant à elle, est considérée comme le _serveur_ : c'est elle qui va envoyer les tâches aux clients, qui va décider si un client doit s'arrêter et qui peut récupérer les documents téléchargés de chaque client. Le serveur respecte la tolérance aux pannes de l'infrastructure; en cas de plantage, seul le serveur cessera de fonctionner car une fois les tâches envoyées, les clients ne dépendent plus du serveur. Enfin, un second serveur est nécessaire au fonctionnement optimal du service : une base de données permettant de stocker les tâches. Même si la base de données plante, les tâches, effectuées ou non, seront conservées. -\cimg{figs/infra.png}{scale=0.5}{Infrastructure du projet} +\cimg{figs/infra.png}{scale=0.45}{Infrastructure du projet} Le serveur est assez simpliste. Une fois démarré, il va lire un fichier qui permettra de créer les tâches à envoyer aux clients. Une fois ces dernières distribuées, il reste allumé dans le seul but de laisser un accès au panneau de contrôle. Il permet de récupérer les fichiers téléchargés par les clients. Les clients, en revanche, sont plus complexes. Ils sont à l'écoute en permanence de nouvelles commandes de la part du serveur, mais doivent en plus supporter toute l'infrastructure pour les différents processus (en l'occurence, ceux téléchargeant les documents). Chaque client possède _N_ processus, où _N_ est fixé à l'avance et un processus "principal", contenant deux threads : un en charge de la communication vers l'extérieur et un autre en charge de gérer les communications entre les processus travaillant sur une tâche. Cette configuration, bien que compliquée, permet de gérer n'importe quelle tâche de façon aisée. -\cimg{figs/client-infra.png}{scale=.30}{Infrastructure interne du client} +\cimg{figs/client-infra.png}{scale=.3}{Infrastructure interne du client} [^8]: _Schnell_ signifie _vite_ ou _rapidement_ en allemand. ### Langage de programmation Le langage choisi pour ce projet a été Python. Python est un langage particulièrement populaire, et ce pour deux raisons : sa facilité de lecture et sa capacité à effectuer des tâches complexes (ce malgré sa simplicité). - -\cimg{figs/redmonk-lang-q120.png}{scale=.30}{Classement des langages de programmation en 2020 - les plus populaires se situent en haut à droite} - Python est considéré comme le second langage recommandé à apprendre après le HTML [@alison_10_nodate]; c'est donc le premier langage de programmation polyvalent recommandé aux débutants. Il est tellement populaire qu'il est choisi quasiment systématiquement dans les projets de recherche dans l'(+IA_a). -En outre, des centaines de milliers de paquets Python existent déjà, rendant la tâche d'interconnecter NScrap à un système extrêmement facile dans bien des cas. Au vu de ces faits, Python est un candidat idéal à l'implémentation de ce projet. -### Design pattern +En outre, des centaines de milliers de paquets Python existent déjà, rendant la tâche d'interconnecter NScrap à un système extrêmement facile dans bien des cas. +Au vu de ces faits, Python est un candidat idéal à l'implémentation de ce projet. -Bli ### Modèles de données @@ -61,19 +59,84 @@ La communication est faite de deux façons : dans la très grande majorité des et inter-processus. ZeroMQ est particulièrement populaire, propose une implémentation en Python et est extrêmement bien documenté. Il possède différents modèles de communication dans lesquels nous ne rentrerons pas en détail; ces derniers permettent en somme d'émettre un message à un ou plusieurs destinataires. Quand la communication avec ZMQ n'est pas souhaitable ou possible, le protocole (+HTTP_a) est utilisé : le panneau de contrôle utilise par exemple exclusivement HTTP. -Les scrapers utilisent aussi (+HTTP_a)car il ne leur est pas possible d'utiliser ZMQ. Pour gérer certains évènements générés par les pages web qu'ils visitent, -il est nécessaire d'injecter du javascript dans la page web en question. Utiliser ZMQ dans cette situation aurait été bien plus compliqué qu'utiliser (+HTTP_a) (javascript permettant de faire des appels (+HTTP_a) très simplement), raison pour laquelle (+HTTP_a) a été utilisé à la place de ZMQ. +Les scrapers utilisent aussi (+HTTP_a) car il ne leur est pas possible d'utiliser ZMQ. Pour gérer certains évènements générés par les pages web qu'ils visitent, il est nécessaire d'injecter du javascript dans la page web en question. Ainsi, si la page web fonctionne en partie ou totalement avec Javascript, +il est possible d'intercepter les évènements signalant qu'une action s'est produite et agir en conséquence. +Utiliser ZMQ dans cette situation aurait été bien plus compliqué qu'utiliser (+HTTP_a). Qui plus est, Javascript permet de lancer des appels HTTP. Il suffit donc de créer un petit serveur web capable de réceptionner les appels HTTP lancés par Javascript. Ainsi, on est tout à fait capable de récupérer les évènements générés par la page web dans notre infrastructure. Un seul serveur web de "surveillance" est démarré par client afin d'éviter de surcharger la machine. + +\cimg{figs/js-infra.png}{scale=.4}{Gestion des évènements Javascript} + +L'avantage de gérer Javascript est énorme. On estime en 2020 que 95% des sites web se servent de ce langage [@noauthor_usage_nodate] . Dans certains cas, il n'est utilisé qu'à des fins esthétiques. +Dans d'autres, toute la navigation du site en dépend. Dans tous les cas, la capacité à pouvoir gérer ce langage est cruciale pour un scraper se voulant modulaire. + +### Génération des tâches + +Chaque tâche est générée selon le _playbook_ fourni par l'utilisateur. Chaque module spécifié dans le playbook est analysé et génère un nombre de tâches soit pour le client, soit pour le serveur. C'est le serveur qui génère les tâches et les distribue aux clients. Chaque module spécifié dans le playbook peut inclure un nombre variable de paramètres _simples_ ou _complexes_. Les paramètres complexes permettent de rapidement générer une série de valeurs. Prenons un exemple : + +\cimg{figs/playbook-complex.png}{}{Exemple de paramètre complexe} + +Le module Swiss Impex va ici considérer le paramètre _year_ et voir qu'une valeur _start_ et _stop_ ont été définies. Il va alors automatiquement remplacer _year_ par toutes les valeurs possibles entre 1989 et 2020 (2020 non inclus). En voyant ensuite _month_, il fera de même. Une fois cela fait pour les tous les paramètres ayant défini _start_ et _stop_, le système générera toutes les combinaisons de valeurs possibles : Swiss Impex avec _month_ à 0, _year_ à 1989, Swiss Impex avec _month_ à 1, _year_ à 1989, etc. + +Cela permet de générer un très grand nombre de tâches en très peu de lignes. ## Réalisation -Blou +Différents sites web ont été analysés et traités par NScrap. Ces sites ont été choisis sur la base de besoins concrets. + +| Nom du site | Description | | | +|------------------------------------------------------------------------|-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|---|---| +| FAO Genève | La Feuille d'Avis Officielle (ou FAO) publie tous les jours des informations sur ces citoyens et entreprises en tout genre. Il peut être intéressant de s'y abonner si l'on souhaite recevoir des nouvelles sur certains mots-clés. Le site ne propose toutefois aucun système d'abonnement et n'en semble pas disposé à en ajouter un. | | | +| Swiss Impex | Swiss-Impex propose toutes les statistiques d'import/export de la Suisse. Les informations sur ce site ont déjà permis de détecter des irrégularités et de dévoiler des scandales. Les informations de ce site sont difficiles à récupérer automatiquement, et elles sont en très grand nombre. L'automatisation de la récupération d'informations de ce site serait intéressant. | | | +| | | | | +Table: Etude de deux sites intéressants à traiter par NScrap + +Ces deux sites proposent des données étatiques permettant de donner des informations tout à fait intéressantes : des sites en font même la collecte pour les revendre[^9]. + +[^9]: [Lex Oculus](https://www.lexoculus.ch), [Easy Monitoring](https://www.easymonitoring.ch/fr/surveillance-fosc) + +\pagebreak + +## Catégorie un : Scrapers simples + +On définit par un _scraper simple_ un scraper n'ayant pas besoin d'intervenir dans le javascript pour traiter un site. Concrètement, cela signifie qu'il suffit de cliquer sur des éléments hypertexte (boutons, images, liens) pour arriver au-x document-s souhaité-s. +La catégorie un regroupe donc tout site ne dépendant pas de Javascript pour naviguer à travers les pages. +Plutôt que de réinventer la roue, NScrap utilise un framework, nommé Scrapy, capable de traiter les sites web rentrant dans la catégorie un. +Ce dernier permet le téléchargement très rapide[^10] de documents sur un site web. + +[^10]: Pour plus d'informations sur les performances de Scrapy, voir la section mesures + +### Application pratique : FAO Genève + +La FAO Genève est un excellent exemple de scraper simple. Javascript n'est utilisé qu'à des fins esthétiques. On peut donc utiliser Scrapy pour ce genre de site. +L'implémentation avec Scrapy est relativement simple : en une cinquantaine de lignes, on est capable de récupérer les documents du site. +Scrapy étant lui-même son framework, aucun module n'est nécessaire. On peut démarrer avec une configuration réduite un scraper rentrant dans la catégorie un , c'est-à-dire démarrer avec un seul client, au vu de la vitesse de traitement. +Les modules présentés ci-dessus sont tout à fait compatibles avec Scrapy, mais ils ne sont pas utilisés au vu de la seule efficacité de ce framework. + +## Catégorie deux : Scrapers complexes + +On définit par _scraper complexe_ un scraper ayant *obligatoirement* besoin de Javascript pour traiter un site. En vérité, les scrapers de cette catégorie en ont même besoin ne serait-ce que pour naviguer dessus. Dans cette configuration, tous les modules mentionnés plus tôt sont nécessaires au traitement de documents : la complexité étant plus importante, chaque document prend plus de temps à être récupéré. +Aucun framework de scraping n'est ici utilisé. Pour faire fonctionner un scraper de catégorie deux, on démarre un navigateur (dans notre cas, Firefox) qui va, comme son nom l'indique, naviguer sur le site qu'on veut traiter. On donne au navigateur les instructions à effectuer et elles sont lancées séquentiellement. + +### Application pratique : Swiss-Impex + +Swiss-Impex fonctionne sur une technologie Java-Javascript nommée IceFaces. IceFaces se base sur des appels (+AJAX_a) qui oblige le support de javascript. +Par conséquent, la catégorie deux de scrapers est idéale. Le module _SwissImpex Parser_ interagit avec le navigateur et est capable de se diriger jusqu'au document recherché. Ici, chaque action est envoyé au serveur Swiss-Impex. Il est donc nécessaire d'attendre que chaque action soit acquittée par le serveur avant d'effectuer le prochain clic. Le module de communication Javascript est donc ici utilisé pour garantir une réception des évènements javascript : ici, on s'intéresse aux acquittements du serveur Swiss-Impex. + +_[L'annexe un](#exemple-de-fichier-playbook)_ donne un exemple de _playbook_ dans lequel on peut voir Swiss Impex apparaître. Ce playbook propose d'effectuer les tâches suivantes : + +- Lire le fichier contenu dans `./resources/EUMerchIDs.txt` (ce fichier contient l'identificateur de chaque catégorie de marchandises catégorisé en Europe) +- Filtrer les identificateurs et garder ceux respectant une expression régulière[^11] +- Envoyer aux clients (dans le cas de l'annexe un, il y en a deux) un ensemble de tâches Swiss-Impex également réparti. +- Lors de la réception des tâches Swiss-Impex, le client va les lancer dans l'ordre de réception (ce qui va aboutir au téléchargement du document au format Excel) +- A la fin de chaque tâche Swiss Impex, le fichier XLSX est converti en CSV pour faciliter la lecture; Enfin, le fichier Excel est supprimé. -### Catégorie un : Scrapers simples +Chaque instruction "attend" la précédente : le travail est donc fait de façon séquentielle. Toutefois, chaque travailleur n'en attend aucun autre : autrement dit, chaque processus va séquentiellement effectuer les tâches spécifiées dans le _playbook_ de son côté. -LEs sc +[^11]: Une expression régulière, ou Regex (ou encore RegExp) sont une chaîne de caractères décrivant un ensemble de chaînes possibles. -### Catégorie deux : Scrapers complexes +## Bilan -Bloump +L'infrastructure de NScrap n'est pas évidente. Elle possède plusieurs modules, relativement lourds pour le client et n'est pas pas aisée à comprendre. +Toutefois, du point de vue de l'utilisateur, c'est une solution facile (du moment que le module existe dans NScrap), car il n'a pas à programmer. +Le support de javascript est aussi un énorme plus car il permet de supprimer les barrières qu'on pourrait observer en tentant de scraper un site : on peut donc théoriquement grâce à NScrap scraper n'importe quel site. \pagebreak \ No newline at end of file diff --git a/rapport/text/5-solution.md b/rapport/text/5-solution.md index e891fe49e2b40e5bb3b9dd05a30e82f11e154a23..234a3b7f09ff0767b8cbb32b6ebbde53a4e7e52f 100644 --- a/rapport/text/5-solution.md +++ b/rapport/text/5-solution.md @@ -1,6 +1,82 @@ -# Solution suggérée +# Performances de NScrap -Au vu de ce que j'ai dit jusqu'ici, un exemple où mon application est utile, suivi de -l'étude de l'implémentation de la solution +Dans cette partie, nous étudierons plusieurs exécutions de NScrap afin de déterminer son comportement, ses performances et les résultats produits. +Nous commencerons par analyser un lancement de la FAO Genève en configuration réduite, puis sur plusieurs coeurs, puis sur deux machines avec plusieurs coeurs. +Nous continuerons par analyser un lancement réduit sur Swiss-Impex, c'est-à-dire un lancement sur une seule catégorie de marchandises; nous terminerons ce chapitre par l'analyse d'un lancement total sur Swiss-Impex. Nous partirons du principe qu'il y aura toujours une base de données, un serveur et au moins un client fonctionnel en tout temps. + +Le but de cette section est de répondre à deux questions : + +* Est-ce que l'automatisation de la récupération des documents est plus rapide que le téléchargement des documents un par un ? +* Est-ce que la distribution des tâches de téléchargement accélère le traitement des tâches ? + +## Analyse FAO Genève + +Nous allons nous intéresser au lancement de la récupération de 1000 documents sur la FAO Genève. La configuration de NScrap sera faite de trois façons différentes : + +* Une machine, un travailleur +* Une machine, six travailleurs +* Deux machines, six travailleurs + +On utilise un _playbook_ définissant seulement de lancer le module FAO Genève. + +### Théorie + +Les documents sont au nombre de 1'000. Si on part du principe que 50 documents sont sur une page, et que 50 documents sont récupérés en une minute[^12], on peut poser : + +$\frac{N_{documents} * T_{50 documents}}{50} = \frac{1000 * 60}{50} = 1200$ + +Soit 20 minutes théoriques dans le cas où nous n'avons qu'une machine et un travailleur. Dans le cas où nous avons $n$ travailleurs, il faudra diviser la durée par $n$; dans le cas où nous avons $n$ travailleurs et $k$ machines (où chaque machine dans $k$ a exactement $n$ travailleurs), il faudra diviser la durée théorique par $k*n$. + +[^12]: Vitesse supposée de téléchargement d'un humain ne faisant aucune faute et ne se fatiguant pas + +### Mesures + +Après envoi de la commande au-x client-s, on observe après chaque seconde le nombre de documents téléchargés au sein du dossier de chaque client. + +### Résultats + +Commençons par les résultats d'une machine ayant un travailleur. + +\cimg{figs/faogeneve-1m1c.svg}{scale=0.5}{Performances du module FAO Genève sur 1000 documents} + +Comme nous pouvons le constater, nous avons largement dépassé la théorie. En effet, nous arrivons à une moyenne pratique de trois documents par seconde (soit 180 documents par minute), bien plus que les 50 documents par minute escomptés. A la place des vingt minutes théoriques, nous sommes arrivés à un traitement de moins de cinq minutes (pour être exact, 4 minutes et 25 secondes). +Il est appréciable de constater que la vitesse de traitement de documents n'est pas par "crans", mais qu'elle reste plus ou moins constante durant toute la durée du téléchargement. +A partir des résultats présentés ci-dessus, nous pouvons répondre aux deux questions posées au début de ce chapitre. + +* Oui, l'automatisation de la récupération des documents permet d'aller plus vite qu'un humain : aucun humain ne peut prétendre télécharger trois documents à la seconde pendant une période de temps prolongée; +* La distribution des tâches de téléchargement accélère probablement le traitement des tâches, mais ici, nous n'avons qu'une seule machine (et un seul travailleur) qui a traité les documents. En se basant seulement sur les données récoltées, nous ne pouvons pas répondre à cette question. + +## Analyse Swiss-Impex réduite + +Nous allons maintenant analyser le module Swiss-Impex. Ce module est plus intéressant à étudier que la FAO Genève : en effet, Swiss-Impex est un scrapper de catégorie deux, plus complet et complexes que ceux de première catégorie. La récupération des données prendra bien plus de temps que la FAO Genève. +Dans le cas d'une analyse Swiss-Impex réduite, on ne s'intéresse qu'à une seule catégorie de marchandises. Dans ce cas, on s'intéressera à la catégorie *2709.0090*, ce qui correspond aux données pétrolières Suisses. + +On utilise le _playbook_ présenté dans le fichier `projet/resources/playbooks/swiss-impex-petroleum.yml` + +### Théorie + +Il y a un rapport d'import/export par mois présent sur Swiss-Impex depuis 1988 jusqu'à 2019 (les informations de 2020 étant encore relativement inexactes). +On peut donc calculer le nombre de documents à $12 * (2019 - 1989) = 360 $. + +On part du principe que chaque document prend 3 minutes. Avec une seule machine ayant six workers, cela prendrait donc 3 heures. Pour réduire au plus ce temps, nous allons utiliser vingt machines. Chacune de ces machines aura six travailleurs. Nous aurons donc un total de 120 travailleurs capables d'effectuer des tâches simultanément. +Nous passons donc de trois heures à neuf minutes. + +### Mesures +Après envoi de la commande aux clients, on observe toutes les dix secondes[^12] : + +* Le nombre de documents téléchargés (le total calculé sur toutes les machines) +* Le nombre de tâches restantes au sein de la base de données + +[^12]: Observer au dessous de dix secondes serait saturer inutilement le protocole de communication : les mesures arriveraient tardivement et seraient décalées à la réalité. + +### Résultats + +TODO LES RESULTATS + +## Analyse Swiss-Impex totale + +Nous allons finalement lancer un téléchargement complet de Swiss-Impex, sur toutes les données possibles à télécharger. + +## Bilan \pagebreak \ No newline at end of file diff --git a/rapport/text/8-annexes.md b/rapport/text/8-annexes.md index b54bf6ca4944199391883584cf3deb838b3ace8f..d342786693db7444546fe7590fdece562b1e08d5 100644 --- a/rapport/text/8-annexes.md +++ b/rapport/text/8-annexes.md @@ -1,5 +1,41 @@ # Annexes {-} -Tout est dans le titre. +\newappendix{Exemple de fichier playbook} +```yaml +clients: # The default management port for a client is 18965 + - "129.194.187.141" + - "129.194.187.142" + +chapters: + File Parser: + file: ./resources/EUMerchIDs.txt + server: yes # Will be run on server + instructions: + read: yes + then: + + Regex Parser: + input: "!fileParserOutput" + regex: "[0-9]{4}.[0-9]{4}" + server: yes # Will not be sent to client + then: # Sent to client from here only ! + + Swiss Impex: + year: {start: 1989, end: 2020} # 1989 - 2019 + month: {start: 0, end: 12} + merchandiseId: "!regexParserOutput" + then: + + XLSX Parser: + file: "!swissImpexOutputfileName" + from: "A6" + to: "H197" + instructions: + - replace: + - "*" + - "" + - export: csv + - cleanup: yes +``` \pagebreak \ No newline at end of file diff --git a/rapport/text/ZZ-glossaire.tex b/rapport/text/ZZ-glossaire.tex index 02ab44a7a0fc600e14f7053185eb23f893f19299..7cfdc0f1d09f3beb4c29e1aa9451bea38bd3f317 100644 --- a/rapport/text/ZZ-glossaire.tex +++ b/rapport/text/ZZ-glossaire.tex @@ -15,4 +15,5 @@ \newacronym{RSS_a}{RSS}{Really Simple Syndication} \newacronym{IA_a}{IA}{Intelligence Artificielle} \newacronym{CSV_a}{CSV}{Comma Separated Value} -\newacronym{HTTP_a}{HTTP}{HyperText Transfer Protocol} \ No newline at end of file +\newacronym{HTTP_a}{HTTP}{HyperText Transfer Protocol} +\newacronym{AJAX_a}{AJAX}{Asynchronous JavaScript and XML} \ No newline at end of file