From 497926b0252a4779b6f849ced63bb09623c7b557 Mon Sep 17 00:00:00 2001
From: Ivan Pavlovich <ivan.pavlovic@hes-so.ch>
Date: Fri, 14 Mar 2025 05:02:24 +0100
Subject: [PATCH] Done: llm price calculation script finished

---
 .../Tokenizer/{doc => data}/token_count.json  |   0
 models/LLM/Tokenizer/token_count.py           |   4 +-
 models/LLM/prices/calc_llm_prices.py          | 106 +++++-
 models/LLM/prices/data/llm_prices.json        | 338 ++++++++++++++++++
 4 files changed, 439 insertions(+), 9 deletions(-)
 rename models/LLM/Tokenizer/{doc => data}/token_count.json (100%)
 create mode 100644 models/LLM/prices/data/llm_prices.json

diff --git a/models/LLM/Tokenizer/doc/token_count.json b/models/LLM/Tokenizer/data/token_count.json
similarity index 100%
rename from models/LLM/Tokenizer/doc/token_count.json
rename to models/LLM/Tokenizer/data/token_count.json
diff --git a/models/LLM/Tokenizer/token_count.py b/models/LLM/Tokenizer/token_count.py
index 93c7a4d12..75dd76197 100644
--- a/models/LLM/Tokenizer/token_count.py
+++ b/models/LLM/Tokenizer/token_count.py
@@ -11,7 +11,7 @@ from testModel.utils import get_article_data
 from variables.pubmed import NCDS_MESH_TERM, KEYWORDS_MESH_TERM, KEYWORDS_MESH_SUBHEADING, KEYWORDS_MESH_SITE_PROPOSITION, KEYWORDS_MESH_PROPOSITION
 
 DATA_DIR = os.path.abspath(os.path.join(os.path.dirname(__file__), "../../../dataSources/PubMed/data"))
-DOC_DIR = os.path.abspath(os.path.join(os.path.dirname(__file__), "./doc"))
+RESULT_DATA_DIR = os.path.abspath(os.path.join(os.path.dirname(__file__), "./data"))
 
 INTERVALS = [
     "day",
@@ -208,5 +208,5 @@ for tokenizer_name in TOKENIZERS:
                     "mean": statistics.mean(counts[tokenizer_name][category][interval][i])
                 }
 
-with open(f"{DOC_DIR}/token_count.json", "w") as json_file:
+with open(f"{RESULT_DATA_DIR}/token_count.json", "w") as json_file:
     json.dump(counts, json_file, indent=4)
diff --git a/models/LLM/prices/calc_llm_prices.py b/models/LLM/prices/calc_llm_prices.py
index 51c9ff52e..4fd46e549 100644
--- a/models/LLM/prices/calc_llm_prices.py
+++ b/models/LLM/prices/calc_llm_prices.py
@@ -1,13 +1,105 @@
+from transformers import AutoTokenizer
+import json
+import sys
+import os
+import statistics
+from datetime import datetime, timedelta
+
+sys.path.append(os.path.abspath(os.path.join(os.path.dirname(__file__), "../../../")))
+
+from testModel.utils import get_article_data
+from variables.pubmed import NCDS_MESH_TERM, KEYWORDS_MESH_TERM, KEYWORDS_MESH_SUBHEADING, KEYWORDS_MESH_SITE_PROPOSITION, KEYWORDS_MESH_PROPOSITION
+
+DATA_DIR = os.path.abspath(os.path.join(os.path.dirname(__file__), "../Tokenizer/data"))
+RESULT_DATA_DIR = os.path.abspath(os.path.join(os.path.dirname(__file__), "./data"))
 
 # Prices for 1M tokens
 PRICES = {
     'Mistral Large': { 'input': 2, 'output': 6},
     'Mistral Small': { 'input': 0.1, 'output': 0.3},
-    'GPT-4o': { 'input': 0.1, 'output': 0.3},
-    'Mistral Small': { 'input': 0.1, 'output': 0.3},
-    'Mistral Small': { 'input': 0.1, 'output': 0.3},
-    'Mistral Small': { 'input': 0.1, 'output': 0.3},
-    'Mistral Small': { 'input': 0.1, 'output': 0.3},
-    'Mistral Small': { 'input': 0.1, 'output': 0.3},
+    'GPT-4o': { 'input': 2.5, 'output': 10},
+    'GPT-4o mini': { 'input': 0.15, 'output': 0.6},
+    'DeepSeek-V3': { 'input': 0.27, 'output': 1.1},
+    'Gemini 2.0 Flash': { 'input': 0.1, 'output': 0.4},
+    'Gemini 2.0 Flash-Lite': { 'input': 0.075, 'output': 0.3},
+    'Claude 3.7 Sonnet': { 'input': 3, 'output': 15},
+    'Claude 3.5 Haiku': { 'input': 0.8, 'output': 4},
+    'Claude 3 Opus': { 'input': 15, 'output': 75},
+    'Command A': { 'input': 2.5, 'output': 10},
+    'Command R+': { 'input': 2.5, 'output': 10},
+}
+
+INTERVALS = [
+    "day",
+    "week",
+    "month"
+]
+
+CATEGORIES = [
+    "NO KEYWORDS",
+    "KEYWORDS",
+    "SUBHEADINGS",
+    "SITE PROPOSITION",
+    "PROPOSITION"
+]
+
+TOKENIZERS = [
+    #"openai-community/gpt-4",
+    #"meta-llama/Llama-2-7b-hf",
+    "bert-base-uncased",
+    "roberta-base",
+    "facebook/bart-large"
+]
+
+file_path = f"{DATA_DIR}/token_count.json"
+with open(file_path, "r", encoding="utf-8") as file:
+    data = json.load(file)
+
+mean_data = {
+    "ALL": 0
+}
+
+for category in CATEGORIES:
+    mean_data[category] = {}
+    for interval in INTERVALS:
+        mean_data[category][interval] = {
+            "input": 0,
+            "output": 0
+        }
+
+
+for tokenizer_name in TOKENIZERS:
+    mean_data["ALL"] += data[tokenizer_name]["ALL"]
+    for category in CATEGORIES:
+        for interval in INTERVALS:
+            for i in ["input", "output"]:
+                mean_data[category][interval][i] += data[tokenizer_name][category][interval][i]["mean"]
+
+mean_data["ALL"] /= len(TOKENIZERS)
+for category in CATEGORIES:
+    for interval in INTERVALS:
+        for i in ["input", "output"]:
+            mean_data[category][interval][i] /= len(tokenizer_name)
+
+
+res = {}
+
+for llm in PRICES.keys():
+
+    res[llm] = {
+        "ALL": 0
+    }
+
+    for category in CATEGORIES:
+        res[llm][category] = {}
+        for interval in INTERVALS:
+            res[llm][category][interval] = 0
+
+for llm, prices in PRICES.items():
+    res[llm]["ALL"] = prices["input"] / 1000000 * mean_data["ALL"] + prices["output"] / 1000000 * mean_data["ALL"]
+    for category in CATEGORIES:
+        for interval in INTERVALS:
+            res[llm][category][interval] = prices["input"] / 1000000 * mean_data[category][interval]["input"] + prices["output"] / 1000000 * mean_data[category][interval]["output"]
 
-}
\ No newline at end of file
+with open(f"{RESULT_DATA_DIR}/llm_prices.json", "w") as json_file:
+    json.dump(res, json_file, indent=4)
\ No newline at end of file
diff --git a/models/LLM/prices/data/llm_prices.json b/models/LLM/prices/data/llm_prices.json
new file mode 100644
index 000000000..375cc8a4d
--- /dev/null
+++ b/models/LLM/prices/data/llm_prices.json
@@ -0,0 +1,338 @@
+{
+    "Mistral Large": {
+        "ALL": 495.855152,
+        "NO KEYWORDS": {
+            "day": 0.032945036912280706,
+            "week": 0.23020600326904217,
+            "month": 1.0017072034139403
+        },
+        "KEYWORDS": {
+            "day": 0.001468795602339181,
+            "week": 0.010263323308270676,
+            "month": 0.04465932574679943
+        },
+        "SUBHEADINGS": {
+            "day": 0.001468795602339181,
+            "week": 0.010263323308270676,
+            "month": 0.04465932574679943
+        },
+        "SITE PROPOSITION": {
+            "day": 0.0019225152748538013,
+            "week": 0.013433724746649231,
+            "month": 0.058454856330014224
+        },
+        "PROPOSITION": {
+            "day": 0.002632865216374269,
+            "week": 0.018397350114416476,
+            "month": 0.08005333428165007
+        }
+    },
+    "Mistral Small": {
+        "ALL": 24.792757599999998,
+        "NO KEYWORDS": {
+            "day": 0.0016472518456140351,
+            "week": 0.011510300163452107,
+            "month": 0.050085360170697014
+        },
+        "KEYWORDS": {
+            "day": 7.343978011695905e-05,
+            "week": 0.0005131661654135338,
+            "month": 0.0022329662873399716
+        },
+        "SUBHEADINGS": {
+            "day": 7.343978011695905e-05,
+            "week": 0.0005131661654135338,
+            "month": 0.0022329662873399716
+        },
+        "SITE PROPOSITION": {
+            "day": 9.612576374269007e-05,
+            "week": 0.0006716862373324615,
+            "month": 0.002922742816500711
+        },
+        "PROPOSITION": {
+            "day": 0.00013164326081871344,
+            "week": 0.0009198675057208238,
+            "month": 0.004002666714082503
+        }
+    },
+    "GPT-4o": {
+        "ALL": 774.773675,
+        "NO KEYWORDS": {
+            "day": 0.04765905052631579,
+            "week": 0.3330213157894737,
+            "month": 1.4490927524893316
+        },
+        "KEYWORDS": {
+            "day": 0.0021235734502923977,
+            "week": 0.014838634357633215,
+            "month": 0.06456811166429588
+        },
+        "SUBHEADINGS": {
+            "day": 0.0021235734502923977,
+            "week": 0.014838634357633215,
+            "month": 0.06456811166429588
+        },
+        "SITE PROPOSITION": {
+            "day": 0.002775600233918129,
+            "week": 0.019394722131415496,
+            "month": 0.08439325035561879
+        },
+        "PROPOSITION": {
+            "day": 0.003803116608187135,
+            "week": 0.02657457257273619,
+            "month": 0.1156353022759602
+        }
+    },
+    "GPT-4o mini": {
+        "ALL": 46.486420499999994,
+        "NO KEYWORDS": {
+            "day": 0.0028595430315789473,
+            "week": 0.019981278947368418,
+            "month": 0.08694556514935989
+        },
+        "KEYWORDS": {
+            "day": 0.00012741440701754387,
+            "week": 0.0008903180614579927,
+            "month": 0.003874086699857752
+        },
+        "SUBHEADINGS": {
+            "day": 0.00012741440701754387,
+            "week": 0.0008903180614579927,
+            "month": 0.003874086699857752
+        },
+        "SITE PROPOSITION": {
+            "day": 0.00016653601403508771,
+            "week": 0.0011636833278849297,
+            "month": 0.005063595021337126
+        },
+        "PROPOSITION": {
+            "day": 0.00022818699649122803,
+            "week": 0.001594474354364171,
+            "month": 0.006938118136557611
+        }
+    },
+    "DeepSeek-V3": {
+        "ALL": 84.91519478000001,
+        "NO KEYWORDS": {
+            "day": 0.005198999491929825,
+            "week": 0.036328412598888526,
+            "month": 0.15807768725462307
+        },
+        "KEYWORDS": {
+            "day": 0.00023164656421052632,
+            "week": 0.001618648352402746,
+            "month": 0.007043307695590327
+        },
+        "SUBHEADINGS": {
+            "day": 0.00023164656421052632,
+            "week": 0.001618648352402746,
+            "month": 0.007043307695590327
+        },
+        "SITE PROPOSITION": {
+            "day": 0.00030274447438596495,
+            "week": 0.002115450519777705,
+            "month": 0.009205068477951637
+        },
+        "PROPOSITION": {
+            "day": 0.00041483287438596496,
+            "week": 0.002898676917293233,
+            "month": 0.01261316172119488
+        }
+    },
+    "Gemini 2.0 Flash": {
+        "ALL": 30.990947000000002,
+        "NO KEYWORDS": {
+            "day": 0.0019063620210526318,
+            "week": 0.013320852631578948,
+            "month": 0.05796371009957327
+        },
+        "KEYWORDS": {
+            "day": 8.49429380116959e-05,
+            "week": 0.0005935453743053285,
+            "month": 0.002582724466571835
+        },
+        "SUBHEADINGS": {
+            "day": 8.49429380116959e-05,
+            "week": 0.0005935453743053285,
+            "month": 0.002582724466571835
+        },
+        "SITE PROPOSITION": {
+            "day": 0.00011102400935672515,
+            "week": 0.0007757888852566199,
+            "month": 0.003375730014224751
+        },
+        "PROPOSITION": {
+            "day": 0.0001521246643274854,
+            "week": 0.0010629829029094476,
+            "month": 0.0046254120910384075
+        }
+    },
+    "Gemini 2.0 Flash-Lite": {
+        "ALL": 23.243210249999997,
+        "NO KEYWORDS": {
+            "day": 0.0014297715157894737,
+            "week": 0.009990639473684209,
+            "month": 0.043472782574679944
+        },
+        "KEYWORDS": {
+            "day": 6.370720350877193e-05,
+            "week": 0.00044515903072899637,
+            "month": 0.001937043349928876
+        },
+        "SUBHEADINGS": {
+            "day": 6.370720350877193e-05,
+            "week": 0.00044515903072899637,
+            "month": 0.001937043349928876
+        },
+        "SITE PROPOSITION": {
+            "day": 8.326800701754386e-05,
+            "week": 0.0005818416639424649,
+            "month": 0.002531797510668563
+        },
+        "PROPOSITION": {
+            "day": 0.00011409349824561402,
+            "week": 0.0007972371771820855,
+            "month": 0.0034690590682788054
+        }
+    },
+    "Claude 3.7 Sonnet": {
+        "ALL": 1115.674092,
+        "NO KEYWORDS": {
+            "day": 0.06496416589473684,
+            "week": 0.4539421529911736,
+            "month": 1.975261800853485
+        },
+        "KEYWORDS": {
+            "day": 0.002893382877192982,
+            "week": 0.0202177374959137,
+            "month": 0.08797447937411096
+        },
+        "SUBHEADINGS": {
+            "day": 0.002893382877192982,
+            "week": 0.0202177374959137,
+            "month": 0.08797447937411096
+        },
+        "SITE PROPOSITION": {
+            "day": 0.0037776676491228073,
+            "week": 0.02639674599542334,
+            "month": 0.11486151635846373
+        },
+        "PROPOSITION": {
+            "day": 0.005178182035087719,
+            "week": 0.03618294900294214,
+            "month": 0.15744472403982931
+        }
+    },
+    "Claude 3.5 Haiku": {
+        "ALL": 297.51309119999996,
+        "NO KEYWORDS": {
+            "day": 0.017323777571929825,
+            "week": 0.12105124079764629,
+            "month": 0.526736480227596
+        },
+        "KEYWORDS": {
+            "day": 0.0007715687672514619,
+            "week": 0.005391396665576986,
+            "month": 0.023459861166429587
+        },
+        "SUBHEADINGS": {
+            "day": 0.0007715687672514619,
+            "week": 0.005391396665576986,
+            "month": 0.023459861166429587
+        },
+        "SITE PROPOSITION": {
+            "day": 0.0010073780397660819,
+            "week": 0.007039132265446224,
+            "month": 0.030629737695590326
+        },
+        "PROPOSITION": {
+            "day": 0.0013808485426900583,
+            "week": 0.00964878640078457,
+            "month": 0.04198525974395448
+        }
+    },
+    "Claude 3 Opus": {
+        "ALL": 5578.370459999999,
+        "NO KEYWORDS": {
+            "day": 0.3248208294736842,
+            "week": 2.2697107649558674,
+            "month": 9.876309004267425
+        },
+        "KEYWORDS": {
+            "day": 0.01446691438596491,
+            "week": 0.10108868747956848,
+            "month": 0.4398723968705548
+        },
+        "SUBHEADINGS": {
+            "day": 0.01446691438596491,
+            "week": 0.10108868747956848,
+            "month": 0.4398723968705548
+        },
+        "SITE PROPOSITION": {
+            "day": 0.018888338245614034,
+            "week": 0.1319837299771167,
+            "month": 0.5743075817923186
+        },
+        "PROPOSITION": {
+            "day": 0.025890910175438597,
+            "week": 0.18091474501471067,
+            "month": 0.7872236201991465
+        }
+    },
+    "Command A": {
+        "ALL": 774.773675,
+        "NO KEYWORDS": {
+            "day": 0.04765905052631579,
+            "week": 0.3330213157894737,
+            "month": 1.4490927524893316
+        },
+        "KEYWORDS": {
+            "day": 0.0021235734502923977,
+            "week": 0.014838634357633215,
+            "month": 0.06456811166429588
+        },
+        "SUBHEADINGS": {
+            "day": 0.0021235734502923977,
+            "week": 0.014838634357633215,
+            "month": 0.06456811166429588
+        },
+        "SITE PROPOSITION": {
+            "day": 0.002775600233918129,
+            "week": 0.019394722131415496,
+            "month": 0.08439325035561879
+        },
+        "PROPOSITION": {
+            "day": 0.003803116608187135,
+            "week": 0.02657457257273619,
+            "month": 0.1156353022759602
+        }
+    },
+    "Command R+": {
+        "ALL": 774.773675,
+        "NO KEYWORDS": {
+            "day": 0.04765905052631579,
+            "week": 0.3330213157894737,
+            "month": 1.4490927524893316
+        },
+        "KEYWORDS": {
+            "day": 0.0021235734502923977,
+            "week": 0.014838634357633215,
+            "month": 0.06456811166429588
+        },
+        "SUBHEADINGS": {
+            "day": 0.0021235734502923977,
+            "week": 0.014838634357633215,
+            "month": 0.06456811166429588
+        },
+        "SITE PROPOSITION": {
+            "day": 0.002775600233918129,
+            "week": 0.019394722131415496,
+            "month": 0.08439325035561879
+        },
+        "PROPOSITION": {
+            "day": 0.003803116608187135,
+            "week": 0.02657457257273619,
+            "month": 0.1156353022759602
+        }
+    }
+}
\ No newline at end of file
-- 
GitLab