huuminh365
/

CustomBERT

Model card Files Files and versions

CustomBERT / test.py

huuminh365's picture

first commit

c8b6e7d about 3 years ago

history blame contribute delete

1.09 kB

	import re
	import py_vncorenlp
	from const import *
	from pprint import pprint
	from latex2operatortree import *
	from transformers import AutoTokenizer, AutoModel
	from const import LATEX_VOC
	# text = "Trong các hình vẽ sau $y=\dfrac{x+1}{-x+1}$, hình nào biểu diễn đồ thị của hàm số $y=x^3$, $y=x^5$?"
	# pattern = r'\$.*?\$'

	# equations = re.findall(pattern, text)

	# pprint(latex2tree(text))
	# dir = 'code/'
	# py_vncorenlp.download_model(save_dir=dir)
	# model = py_vncorenlp.VnCoreNLP(save_dir='code/')


	model_path = "huuminh365/CustomBERT"
	latex_token = LATEX_VOC
	# print(len(latex_token), latex_token[:5])

	tok = AutoTokenizer.from_pretrained(model_path)
	# model = AutoModel.from_pretrained(model_path)

	tok.add_tokens(latex_token)
	# tok.save_pretrained(f'tokenizer_{model_path}_with_latex')
	text = 'Tính diện tích hình phẳng giới hạn bởi đồ thị các hàm số $y =x^3$, $y=x^5$, $\dfrac{2}{x}=5$. Cho hàm số $y=f(x)$ liên tục trên $\mathbb{R}$, có đồ thị cắt trục $Ox$ tại các điểm có hoành độ'
	print(tok.tokenize(text))