Skip to content
Snippets Groups Projects
Commit 497926b0 authored by Ivan Pavlovich's avatar Ivan Pavlovich
Browse files

Done: llm price calculation script finished

parent e8f3c549
Branches
No related tags found
No related merge requests found
......@@ -11,7 +11,7 @@ from testModel.utils import get_article_data
from variables.pubmed import NCDS_MESH_TERM, KEYWORDS_MESH_TERM, KEYWORDS_MESH_SUBHEADING, KEYWORDS_MESH_SITE_PROPOSITION, KEYWORDS_MESH_PROPOSITION
DATA_DIR = os.path.abspath(os.path.join(os.path.dirname(__file__), "../../../dataSources/PubMed/data"))
DOC_DIR = os.path.abspath(os.path.join(os.path.dirname(__file__), "./doc"))
RESULT_DATA_DIR = os.path.abspath(os.path.join(os.path.dirname(__file__), "./data"))
INTERVALS = [
"day",
......@@ -208,5 +208,5 @@ for tokenizer_name in TOKENIZERS:
"mean": statistics.mean(counts[tokenizer_name][category][interval][i])
}
with open(f"{DOC_DIR}/token_count.json", "w") as json_file:
with open(f"{RESULT_DATA_DIR}/token_count.json", "w") as json_file:
json.dump(counts, json_file, indent=4)
from transformers import AutoTokenizer
import json
import sys
import os
import statistics
from datetime import datetime, timedelta
sys.path.append(os.path.abspath(os.path.join(os.path.dirname(__file__), "../../../")))
from testModel.utils import get_article_data
from variables.pubmed import NCDS_MESH_TERM, KEYWORDS_MESH_TERM, KEYWORDS_MESH_SUBHEADING, KEYWORDS_MESH_SITE_PROPOSITION, KEYWORDS_MESH_PROPOSITION
DATA_DIR = os.path.abspath(os.path.join(os.path.dirname(__file__), "../Tokenizer/data"))
RESULT_DATA_DIR = os.path.abspath(os.path.join(os.path.dirname(__file__), "./data"))
# Prices for 1M tokens
PRICES = {
'Mistral Large': { 'input': 2, 'output': 6},
'Mistral Small': { 'input': 0.1, 'output': 0.3},
'GPT-4o': { 'input': 0.1, 'output': 0.3},
'Mistral Small': { 'input': 0.1, 'output': 0.3},
'Mistral Small': { 'input': 0.1, 'output': 0.3},
'Mistral Small': { 'input': 0.1, 'output': 0.3},
'Mistral Small': { 'input': 0.1, 'output': 0.3},
'Mistral Small': { 'input': 0.1, 'output': 0.3},
'GPT-4o': { 'input': 2.5, 'output': 10},
'GPT-4o mini': { 'input': 0.15, 'output': 0.6},
'DeepSeek-V3': { 'input': 0.27, 'output': 1.1},
'Gemini 2.0 Flash': { 'input': 0.1, 'output': 0.4},
'Gemini 2.0 Flash-Lite': { 'input': 0.075, 'output': 0.3},
'Claude 3.7 Sonnet': { 'input': 3, 'output': 15},
'Claude 3.5 Haiku': { 'input': 0.8, 'output': 4},
'Claude 3 Opus': { 'input': 15, 'output': 75},
'Command A': { 'input': 2.5, 'output': 10},
'Command R+': { 'input': 2.5, 'output': 10},
}
INTERVALS = [
"day",
"week",
"month"
]
CATEGORIES = [
"NO KEYWORDS",
"KEYWORDS",
"SUBHEADINGS",
"SITE PROPOSITION",
"PROPOSITION"
]
TOKENIZERS = [
#"openai-community/gpt-4",
#"meta-llama/Llama-2-7b-hf",
"bert-base-uncased",
"roberta-base",
"facebook/bart-large"
]
file_path = f"{DATA_DIR}/token_count.json"
with open(file_path, "r", encoding="utf-8") as file:
data = json.load(file)
mean_data = {
"ALL": 0
}
for category in CATEGORIES:
mean_data[category] = {}
for interval in INTERVALS:
mean_data[category][interval] = {
"input": 0,
"output": 0
}
for tokenizer_name in TOKENIZERS:
mean_data["ALL"] += data[tokenizer_name]["ALL"]
for category in CATEGORIES:
for interval in INTERVALS:
for i in ["input", "output"]:
mean_data[category][interval][i] += data[tokenizer_name][category][interval][i]["mean"]
mean_data["ALL"] /= len(TOKENIZERS)
for category in CATEGORIES:
for interval in INTERVALS:
for i in ["input", "output"]:
mean_data[category][interval][i] /= len(tokenizer_name)
res = {}
for llm in PRICES.keys():
res[llm] = {
"ALL": 0
}
for category in CATEGORIES:
res[llm][category] = {}
for interval in INTERVALS:
res[llm][category][interval] = 0
for llm, prices in PRICES.items():
res[llm]["ALL"] = prices["input"] / 1000000 * mean_data["ALL"] + prices["output"] / 1000000 * mean_data["ALL"]
for category in CATEGORIES:
for interval in INTERVALS:
res[llm][category][interval] = prices["input"] / 1000000 * mean_data[category][interval]["input"] + prices["output"] / 1000000 * mean_data[category][interval]["output"]
}
\ No newline at end of file
with open(f"{RESULT_DATA_DIR}/llm_prices.json", "w") as json_file:
json.dump(res, json_file, indent=4)
\ No newline at end of file
{
"Mistral Large": {
"ALL": 495.855152,
"NO KEYWORDS": {
"day": 0.032945036912280706,
"week": 0.23020600326904217,
"month": 1.0017072034139403
},
"KEYWORDS": {
"day": 0.001468795602339181,
"week": 0.010263323308270676,
"month": 0.04465932574679943
},
"SUBHEADINGS": {
"day": 0.001468795602339181,
"week": 0.010263323308270676,
"month": 0.04465932574679943
},
"SITE PROPOSITION": {
"day": 0.0019225152748538013,
"week": 0.013433724746649231,
"month": 0.058454856330014224
},
"PROPOSITION": {
"day": 0.002632865216374269,
"week": 0.018397350114416476,
"month": 0.08005333428165007
}
},
"Mistral Small": {
"ALL": 24.792757599999998,
"NO KEYWORDS": {
"day": 0.0016472518456140351,
"week": 0.011510300163452107,
"month": 0.050085360170697014
},
"KEYWORDS": {
"day": 7.343978011695905e-05,
"week": 0.0005131661654135338,
"month": 0.0022329662873399716
},
"SUBHEADINGS": {
"day": 7.343978011695905e-05,
"week": 0.0005131661654135338,
"month": 0.0022329662873399716
},
"SITE PROPOSITION": {
"day": 9.612576374269007e-05,
"week": 0.0006716862373324615,
"month": 0.002922742816500711
},
"PROPOSITION": {
"day": 0.00013164326081871344,
"week": 0.0009198675057208238,
"month": 0.004002666714082503
}
},
"GPT-4o": {
"ALL": 774.773675,
"NO KEYWORDS": {
"day": 0.04765905052631579,
"week": 0.3330213157894737,
"month": 1.4490927524893316
},
"KEYWORDS": {
"day": 0.0021235734502923977,
"week": 0.014838634357633215,
"month": 0.06456811166429588
},
"SUBHEADINGS": {
"day": 0.0021235734502923977,
"week": 0.014838634357633215,
"month": 0.06456811166429588
},
"SITE PROPOSITION": {
"day": 0.002775600233918129,
"week": 0.019394722131415496,
"month": 0.08439325035561879
},
"PROPOSITION": {
"day": 0.003803116608187135,
"week": 0.02657457257273619,
"month": 0.1156353022759602
}
},
"GPT-4o mini": {
"ALL": 46.486420499999994,
"NO KEYWORDS": {
"day": 0.0028595430315789473,
"week": 0.019981278947368418,
"month": 0.08694556514935989
},
"KEYWORDS": {
"day": 0.00012741440701754387,
"week": 0.0008903180614579927,
"month": 0.003874086699857752
},
"SUBHEADINGS": {
"day": 0.00012741440701754387,
"week": 0.0008903180614579927,
"month": 0.003874086699857752
},
"SITE PROPOSITION": {
"day": 0.00016653601403508771,
"week": 0.0011636833278849297,
"month": 0.005063595021337126
},
"PROPOSITION": {
"day": 0.00022818699649122803,
"week": 0.001594474354364171,
"month": 0.006938118136557611
}
},
"DeepSeek-V3": {
"ALL": 84.91519478000001,
"NO KEYWORDS": {
"day": 0.005198999491929825,
"week": 0.036328412598888526,
"month": 0.15807768725462307
},
"KEYWORDS": {
"day": 0.00023164656421052632,
"week": 0.001618648352402746,
"month": 0.007043307695590327
},
"SUBHEADINGS": {
"day": 0.00023164656421052632,
"week": 0.001618648352402746,
"month": 0.007043307695590327
},
"SITE PROPOSITION": {
"day": 0.00030274447438596495,
"week": 0.002115450519777705,
"month": 0.009205068477951637
},
"PROPOSITION": {
"day": 0.00041483287438596496,
"week": 0.002898676917293233,
"month": 0.01261316172119488
}
},
"Gemini 2.0 Flash": {
"ALL": 30.990947000000002,
"NO KEYWORDS": {
"day": 0.0019063620210526318,
"week": 0.013320852631578948,
"month": 0.05796371009957327
},
"KEYWORDS": {
"day": 8.49429380116959e-05,
"week": 0.0005935453743053285,
"month": 0.002582724466571835
},
"SUBHEADINGS": {
"day": 8.49429380116959e-05,
"week": 0.0005935453743053285,
"month": 0.002582724466571835
},
"SITE PROPOSITION": {
"day": 0.00011102400935672515,
"week": 0.0007757888852566199,
"month": 0.003375730014224751
},
"PROPOSITION": {
"day": 0.0001521246643274854,
"week": 0.0010629829029094476,
"month": 0.0046254120910384075
}
},
"Gemini 2.0 Flash-Lite": {
"ALL": 23.243210249999997,
"NO KEYWORDS": {
"day": 0.0014297715157894737,
"week": 0.009990639473684209,
"month": 0.043472782574679944
},
"KEYWORDS": {
"day": 6.370720350877193e-05,
"week": 0.00044515903072899637,
"month": 0.001937043349928876
},
"SUBHEADINGS": {
"day": 6.370720350877193e-05,
"week": 0.00044515903072899637,
"month": 0.001937043349928876
},
"SITE PROPOSITION": {
"day": 8.326800701754386e-05,
"week": 0.0005818416639424649,
"month": 0.002531797510668563
},
"PROPOSITION": {
"day": 0.00011409349824561402,
"week": 0.0007972371771820855,
"month": 0.0034690590682788054
}
},
"Claude 3.7 Sonnet": {
"ALL": 1115.674092,
"NO KEYWORDS": {
"day": 0.06496416589473684,
"week": 0.4539421529911736,
"month": 1.975261800853485
},
"KEYWORDS": {
"day": 0.002893382877192982,
"week": 0.0202177374959137,
"month": 0.08797447937411096
},
"SUBHEADINGS": {
"day": 0.002893382877192982,
"week": 0.0202177374959137,
"month": 0.08797447937411096
},
"SITE PROPOSITION": {
"day": 0.0037776676491228073,
"week": 0.02639674599542334,
"month": 0.11486151635846373
},
"PROPOSITION": {
"day": 0.005178182035087719,
"week": 0.03618294900294214,
"month": 0.15744472403982931
}
},
"Claude 3.5 Haiku": {
"ALL": 297.51309119999996,
"NO KEYWORDS": {
"day": 0.017323777571929825,
"week": 0.12105124079764629,
"month": 0.526736480227596
},
"KEYWORDS": {
"day": 0.0007715687672514619,
"week": 0.005391396665576986,
"month": 0.023459861166429587
},
"SUBHEADINGS": {
"day": 0.0007715687672514619,
"week": 0.005391396665576986,
"month": 0.023459861166429587
},
"SITE PROPOSITION": {
"day": 0.0010073780397660819,
"week": 0.007039132265446224,
"month": 0.030629737695590326
},
"PROPOSITION": {
"day": 0.0013808485426900583,
"week": 0.00964878640078457,
"month": 0.04198525974395448
}
},
"Claude 3 Opus": {
"ALL": 5578.370459999999,
"NO KEYWORDS": {
"day": 0.3248208294736842,
"week": 2.2697107649558674,
"month": 9.876309004267425
},
"KEYWORDS": {
"day": 0.01446691438596491,
"week": 0.10108868747956848,
"month": 0.4398723968705548
},
"SUBHEADINGS": {
"day": 0.01446691438596491,
"week": 0.10108868747956848,
"month": 0.4398723968705548
},
"SITE PROPOSITION": {
"day": 0.018888338245614034,
"week": 0.1319837299771167,
"month": 0.5743075817923186
},
"PROPOSITION": {
"day": 0.025890910175438597,
"week": 0.18091474501471067,
"month": 0.7872236201991465
}
},
"Command A": {
"ALL": 774.773675,
"NO KEYWORDS": {
"day": 0.04765905052631579,
"week": 0.3330213157894737,
"month": 1.4490927524893316
},
"KEYWORDS": {
"day": 0.0021235734502923977,
"week": 0.014838634357633215,
"month": 0.06456811166429588
},
"SUBHEADINGS": {
"day": 0.0021235734502923977,
"week": 0.014838634357633215,
"month": 0.06456811166429588
},
"SITE PROPOSITION": {
"day": 0.002775600233918129,
"week": 0.019394722131415496,
"month": 0.08439325035561879
},
"PROPOSITION": {
"day": 0.003803116608187135,
"week": 0.02657457257273619,
"month": 0.1156353022759602
}
},
"Command R+": {
"ALL": 774.773675,
"NO KEYWORDS": {
"day": 0.04765905052631579,
"week": 0.3330213157894737,
"month": 1.4490927524893316
},
"KEYWORDS": {
"day": 0.0021235734502923977,
"week": 0.014838634357633215,
"month": 0.06456811166429588
},
"SUBHEADINGS": {
"day": 0.0021235734502923977,
"week": 0.014838634357633215,
"month": 0.06456811166429588
},
"SITE PROPOSITION": {
"day": 0.002775600233918129,
"week": 0.019394722131415496,
"month": 0.08439325035561879
},
"PROPOSITION": {
"day": 0.003803116608187135,
"week": 0.02657457257273619,
"month": 0.1156353022759602
}
}
}
\ No newline at end of file
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment