From 497926b0252a4779b6f849ced63bb09623c7b557 Mon Sep 17 00:00:00 2001 From: Ivan Pavlovich <ivan.pavlovic@hes-so.ch> Date: Fri, 14 Mar 2025 05:02:24 +0100 Subject: [PATCH] Done: llm price calculation script finished --- .../Tokenizer/{doc => data}/token_count.json | 0 models/LLM/Tokenizer/token_count.py | 4 +- models/LLM/prices/calc_llm_prices.py | 106 +++++- models/LLM/prices/data/llm_prices.json | 338 ++++++++++++++++++ 4 files changed, 439 insertions(+), 9 deletions(-) rename models/LLM/Tokenizer/{doc => data}/token_count.json (100%) create mode 100644 models/LLM/prices/data/llm_prices.json diff --git a/models/LLM/Tokenizer/doc/token_count.json b/models/LLM/Tokenizer/data/token_count.json similarity index 100% rename from models/LLM/Tokenizer/doc/token_count.json rename to models/LLM/Tokenizer/data/token_count.json diff --git a/models/LLM/Tokenizer/token_count.py b/models/LLM/Tokenizer/token_count.py index 93c7a4d12..75dd76197 100644 --- a/models/LLM/Tokenizer/token_count.py +++ b/models/LLM/Tokenizer/token_count.py @@ -11,7 +11,7 @@ from testModel.utils import get_article_data from variables.pubmed import NCDS_MESH_TERM, KEYWORDS_MESH_TERM, KEYWORDS_MESH_SUBHEADING, KEYWORDS_MESH_SITE_PROPOSITION, KEYWORDS_MESH_PROPOSITION DATA_DIR = os.path.abspath(os.path.join(os.path.dirname(__file__), "../../../dataSources/PubMed/data")) -DOC_DIR = os.path.abspath(os.path.join(os.path.dirname(__file__), "./doc")) +RESULT_DATA_DIR = os.path.abspath(os.path.join(os.path.dirname(__file__), "./data")) INTERVALS = [ "day", @@ -208,5 +208,5 @@ for tokenizer_name in TOKENIZERS: "mean": statistics.mean(counts[tokenizer_name][category][interval][i]) } -with open(f"{DOC_DIR}/token_count.json", "w") as json_file: +with open(f"{RESULT_DATA_DIR}/token_count.json", "w") as json_file: json.dump(counts, json_file, indent=4) diff --git a/models/LLM/prices/calc_llm_prices.py b/models/LLM/prices/calc_llm_prices.py index 51c9ff52e..4fd46e549 100644 --- a/models/LLM/prices/calc_llm_prices.py +++ b/models/LLM/prices/calc_llm_prices.py @@ -1,13 +1,105 @@ +from transformers import AutoTokenizer +import json +import sys +import os +import statistics +from datetime import datetime, timedelta + +sys.path.append(os.path.abspath(os.path.join(os.path.dirname(__file__), "../../../"))) + +from testModel.utils import get_article_data +from variables.pubmed import NCDS_MESH_TERM, KEYWORDS_MESH_TERM, KEYWORDS_MESH_SUBHEADING, KEYWORDS_MESH_SITE_PROPOSITION, KEYWORDS_MESH_PROPOSITION + +DATA_DIR = os.path.abspath(os.path.join(os.path.dirname(__file__), "../Tokenizer/data")) +RESULT_DATA_DIR = os.path.abspath(os.path.join(os.path.dirname(__file__), "./data")) # Prices for 1M tokens PRICES = { 'Mistral Large': { 'input': 2, 'output': 6}, 'Mistral Small': { 'input': 0.1, 'output': 0.3}, - 'GPT-4o': { 'input': 0.1, 'output': 0.3}, - 'Mistral Small': { 'input': 0.1, 'output': 0.3}, - 'Mistral Small': { 'input': 0.1, 'output': 0.3}, - 'Mistral Small': { 'input': 0.1, 'output': 0.3}, - 'Mistral Small': { 'input': 0.1, 'output': 0.3}, - 'Mistral Small': { 'input': 0.1, 'output': 0.3}, + 'GPT-4o': { 'input': 2.5, 'output': 10}, + 'GPT-4o mini': { 'input': 0.15, 'output': 0.6}, + 'DeepSeek-V3': { 'input': 0.27, 'output': 1.1}, + 'Gemini 2.0 Flash': { 'input': 0.1, 'output': 0.4}, + 'Gemini 2.0 Flash-Lite': { 'input': 0.075, 'output': 0.3}, + 'Claude 3.7 Sonnet': { 'input': 3, 'output': 15}, + 'Claude 3.5 Haiku': { 'input': 0.8, 'output': 4}, + 'Claude 3 Opus': { 'input': 15, 'output': 75}, + 'Command A': { 'input': 2.5, 'output': 10}, + 'Command R+': { 'input': 2.5, 'output': 10}, +} + +INTERVALS = [ + "day", + "week", + "month" +] + +CATEGORIES = [ + "NO KEYWORDS", + "KEYWORDS", + "SUBHEADINGS", + "SITE PROPOSITION", + "PROPOSITION" +] + +TOKENIZERS = [ + #"openai-community/gpt-4", + #"meta-llama/Llama-2-7b-hf", + "bert-base-uncased", + "roberta-base", + "facebook/bart-large" +] + +file_path = f"{DATA_DIR}/token_count.json" +with open(file_path, "r", encoding="utf-8") as file: + data = json.load(file) + +mean_data = { + "ALL": 0 +} + +for category in CATEGORIES: + mean_data[category] = {} + for interval in INTERVALS: + mean_data[category][interval] = { + "input": 0, + "output": 0 + } + + +for tokenizer_name in TOKENIZERS: + mean_data["ALL"] += data[tokenizer_name]["ALL"] + for category in CATEGORIES: + for interval in INTERVALS: + for i in ["input", "output"]: + mean_data[category][interval][i] += data[tokenizer_name][category][interval][i]["mean"] + +mean_data["ALL"] /= len(TOKENIZERS) +for category in CATEGORIES: + for interval in INTERVALS: + for i in ["input", "output"]: + mean_data[category][interval][i] /= len(tokenizer_name) + + +res = {} + +for llm in PRICES.keys(): + + res[llm] = { + "ALL": 0 + } + + for category in CATEGORIES: + res[llm][category] = {} + for interval in INTERVALS: + res[llm][category][interval] = 0 + +for llm, prices in PRICES.items(): + res[llm]["ALL"] = prices["input"] / 1000000 * mean_data["ALL"] + prices["output"] / 1000000 * mean_data["ALL"] + for category in CATEGORIES: + for interval in INTERVALS: + res[llm][category][interval] = prices["input"] / 1000000 * mean_data[category][interval]["input"] + prices["output"] / 1000000 * mean_data[category][interval]["output"] -} \ No newline at end of file +with open(f"{RESULT_DATA_DIR}/llm_prices.json", "w") as json_file: + json.dump(res, json_file, indent=4) \ No newline at end of file diff --git a/models/LLM/prices/data/llm_prices.json b/models/LLM/prices/data/llm_prices.json new file mode 100644 index 000000000..375cc8a4d --- /dev/null +++ b/models/LLM/prices/data/llm_prices.json @@ -0,0 +1,338 @@ +{ + "Mistral Large": { + "ALL": 495.855152, + "NO KEYWORDS": { + "day": 0.032945036912280706, + "week": 0.23020600326904217, + "month": 1.0017072034139403 + }, + "KEYWORDS": { + "day": 0.001468795602339181, + "week": 0.010263323308270676, + "month": 0.04465932574679943 + }, + "SUBHEADINGS": { + "day": 0.001468795602339181, + "week": 0.010263323308270676, + "month": 0.04465932574679943 + }, + "SITE PROPOSITION": { + "day": 0.0019225152748538013, + "week": 0.013433724746649231, + "month": 0.058454856330014224 + }, + "PROPOSITION": { + "day": 0.002632865216374269, + "week": 0.018397350114416476, + "month": 0.08005333428165007 + } + }, + "Mistral Small": { + "ALL": 24.792757599999998, + "NO KEYWORDS": { + "day": 0.0016472518456140351, + "week": 0.011510300163452107, + "month": 0.050085360170697014 + }, + "KEYWORDS": { + "day": 7.343978011695905e-05, + "week": 0.0005131661654135338, + "month": 0.0022329662873399716 + }, + "SUBHEADINGS": { + "day": 7.343978011695905e-05, + "week": 0.0005131661654135338, + "month": 0.0022329662873399716 + }, + "SITE PROPOSITION": { + "day": 9.612576374269007e-05, + "week": 0.0006716862373324615, + "month": 0.002922742816500711 + }, + "PROPOSITION": { + "day": 0.00013164326081871344, + "week": 0.0009198675057208238, + "month": 0.004002666714082503 + } + }, + "GPT-4o": { + "ALL": 774.773675, + "NO KEYWORDS": { + "day": 0.04765905052631579, + "week": 0.3330213157894737, + "month": 1.4490927524893316 + }, + "KEYWORDS": { + "day": 0.0021235734502923977, + "week": 0.014838634357633215, + "month": 0.06456811166429588 + }, + "SUBHEADINGS": { + "day": 0.0021235734502923977, + "week": 0.014838634357633215, + "month": 0.06456811166429588 + }, + "SITE PROPOSITION": { + "day": 0.002775600233918129, + "week": 0.019394722131415496, + "month": 0.08439325035561879 + }, + "PROPOSITION": { + "day": 0.003803116608187135, + "week": 0.02657457257273619, + "month": 0.1156353022759602 + } + }, + "GPT-4o mini": { + "ALL": 46.486420499999994, + "NO KEYWORDS": { + "day": 0.0028595430315789473, + "week": 0.019981278947368418, + "month": 0.08694556514935989 + }, + "KEYWORDS": { + "day": 0.00012741440701754387, + "week": 0.0008903180614579927, + "month": 0.003874086699857752 + }, + "SUBHEADINGS": { + "day": 0.00012741440701754387, + "week": 0.0008903180614579927, + "month": 0.003874086699857752 + }, + "SITE PROPOSITION": { + "day": 0.00016653601403508771, + "week": 0.0011636833278849297, + "month": 0.005063595021337126 + }, + "PROPOSITION": { + "day": 0.00022818699649122803, + "week": 0.001594474354364171, + "month": 0.006938118136557611 + } + }, + "DeepSeek-V3": { + "ALL": 84.91519478000001, + "NO KEYWORDS": { + "day": 0.005198999491929825, + "week": 0.036328412598888526, + "month": 0.15807768725462307 + }, + "KEYWORDS": { + "day": 0.00023164656421052632, + "week": 0.001618648352402746, + "month": 0.007043307695590327 + }, + "SUBHEADINGS": { + "day": 0.00023164656421052632, + "week": 0.001618648352402746, + "month": 0.007043307695590327 + }, + "SITE PROPOSITION": { + "day": 0.00030274447438596495, + "week": 0.002115450519777705, + "month": 0.009205068477951637 + }, + "PROPOSITION": { + "day": 0.00041483287438596496, + "week": 0.002898676917293233, + "month": 0.01261316172119488 + } + }, + "Gemini 2.0 Flash": { + "ALL": 30.990947000000002, + "NO KEYWORDS": { + "day": 0.0019063620210526318, + "week": 0.013320852631578948, + "month": 0.05796371009957327 + }, + "KEYWORDS": { + "day": 8.49429380116959e-05, + "week": 0.0005935453743053285, + "month": 0.002582724466571835 + }, + "SUBHEADINGS": { + "day": 8.49429380116959e-05, + "week": 0.0005935453743053285, + "month": 0.002582724466571835 + }, + "SITE PROPOSITION": { + "day": 0.00011102400935672515, + "week": 0.0007757888852566199, + "month": 0.003375730014224751 + }, + "PROPOSITION": { + "day": 0.0001521246643274854, + "week": 0.0010629829029094476, + "month": 0.0046254120910384075 + } + }, + "Gemini 2.0 Flash-Lite": { + "ALL": 23.243210249999997, + "NO KEYWORDS": { + "day": 0.0014297715157894737, + "week": 0.009990639473684209, + "month": 0.043472782574679944 + }, + "KEYWORDS": { + "day": 6.370720350877193e-05, + "week": 0.00044515903072899637, + "month": 0.001937043349928876 + }, + "SUBHEADINGS": { + "day": 6.370720350877193e-05, + "week": 0.00044515903072899637, + "month": 0.001937043349928876 + }, + "SITE PROPOSITION": { + "day": 8.326800701754386e-05, + "week": 0.0005818416639424649, + "month": 0.002531797510668563 + }, + "PROPOSITION": { + "day": 0.00011409349824561402, + "week": 0.0007972371771820855, + "month": 0.0034690590682788054 + } + }, + "Claude 3.7 Sonnet": { + "ALL": 1115.674092, + "NO KEYWORDS": { + "day": 0.06496416589473684, + "week": 0.4539421529911736, + "month": 1.975261800853485 + }, + "KEYWORDS": { + "day": 0.002893382877192982, + "week": 0.0202177374959137, + "month": 0.08797447937411096 + }, + "SUBHEADINGS": { + "day": 0.002893382877192982, + "week": 0.0202177374959137, + "month": 0.08797447937411096 + }, + "SITE PROPOSITION": { + "day": 0.0037776676491228073, + "week": 0.02639674599542334, + "month": 0.11486151635846373 + }, + "PROPOSITION": { + "day": 0.005178182035087719, + "week": 0.03618294900294214, + "month": 0.15744472403982931 + } + }, + "Claude 3.5 Haiku": { + "ALL": 297.51309119999996, + "NO KEYWORDS": { + "day": 0.017323777571929825, + "week": 0.12105124079764629, + "month": 0.526736480227596 + }, + "KEYWORDS": { + "day": 0.0007715687672514619, + "week": 0.005391396665576986, + "month": 0.023459861166429587 + }, + "SUBHEADINGS": { + "day": 0.0007715687672514619, + "week": 0.005391396665576986, + "month": 0.023459861166429587 + }, + "SITE PROPOSITION": { + "day": 0.0010073780397660819, + "week": 0.007039132265446224, + "month": 0.030629737695590326 + }, + "PROPOSITION": { + "day": 0.0013808485426900583, + "week": 0.00964878640078457, + "month": 0.04198525974395448 + } + }, + "Claude 3 Opus": { + "ALL": 5578.370459999999, + "NO KEYWORDS": { + "day": 0.3248208294736842, + "week": 2.2697107649558674, + "month": 9.876309004267425 + }, + "KEYWORDS": { + "day": 0.01446691438596491, + "week": 0.10108868747956848, + "month": 0.4398723968705548 + }, + "SUBHEADINGS": { + "day": 0.01446691438596491, + "week": 0.10108868747956848, + "month": 0.4398723968705548 + }, + "SITE PROPOSITION": { + "day": 0.018888338245614034, + "week": 0.1319837299771167, + "month": 0.5743075817923186 + }, + "PROPOSITION": { + "day": 0.025890910175438597, + "week": 0.18091474501471067, + "month": 0.7872236201991465 + } + }, + "Command A": { + "ALL": 774.773675, + "NO KEYWORDS": { + "day": 0.04765905052631579, + "week": 0.3330213157894737, + "month": 1.4490927524893316 + }, + "KEYWORDS": { + "day": 0.0021235734502923977, + "week": 0.014838634357633215, + "month": 0.06456811166429588 + }, + "SUBHEADINGS": { + "day": 0.0021235734502923977, + "week": 0.014838634357633215, + "month": 0.06456811166429588 + }, + "SITE PROPOSITION": { + "day": 0.002775600233918129, + "week": 0.019394722131415496, + "month": 0.08439325035561879 + }, + "PROPOSITION": { + "day": 0.003803116608187135, + "week": 0.02657457257273619, + "month": 0.1156353022759602 + } + }, + "Command R+": { + "ALL": 774.773675, + "NO KEYWORDS": { + "day": 0.04765905052631579, + "week": 0.3330213157894737, + "month": 1.4490927524893316 + }, + "KEYWORDS": { + "day": 0.0021235734502923977, + "week": 0.014838634357633215, + "month": 0.06456811166429588 + }, + "SUBHEADINGS": { + "day": 0.0021235734502923977, + "week": 0.014838634357633215, + "month": 0.06456811166429588 + }, + "SITE PROPOSITION": { + "day": 0.002775600233918129, + "week": 0.019394722131415496, + "month": 0.08439325035561879 + }, + "PROPOSITION": { + "day": 0.003803116608187135, + "week": 0.02657457257273619, + "month": 0.1156353022759602 + } + } +} \ No newline at end of file -- GitLab