Paste: llm code
Author: | 1 |
Mode: | python |
Date: | Sat, 2 Mar 2024 06:16:16 |
Plain Text |
import numpy as np
graph_text_list = []
upper_tri_indices = np.triu_indices(dense_matrix.shape[0], k=1)
for row, col in zip(upper_tri_indices[0], upper_tri_indices[1]):
value = dense_matrix[row, col]
if value > 0:
rounded_value = "{:.2f}".format(value)
text = f"node {row} has connection with node {col}, with connection weight of {rounded_value}"
graph_text_list.append(text)
text = "\n".join(graph_text_list)
print(len(text))
from transformers import RobertaTokenizer, RobertaModel
from tqdm import tqdm
tokenizer = RobertaTokenizer.from_pretrained('roberta-base')
model = RobertaModel.from_pretrained('roberta-base').to(device)
inputs = []
with tqdm(total=len(text)) as pbar:
for i in range(0, len(text), 512):
chunk = text[i:i+512]
tokenized = tokenizer(chunk, return_tensors='pt', padding=True, truncation=True, max_length=512)
inputs.append(tokenized)
pbar.update(len(chunk))
outputs = []
with tqdm(total=len(inputs)) as pbar:
for input_chunk in inputs:
with torch.no_grad():
output = model(**input_chunk.to(device))
outputs.append(output.last_hidden_state)
pbar.update(1)
print(outputs[0].shape, outputs[1].shape)
outputs_concatenated = torch.cat(outputs, dim=0)
node_features = outputs_concatenated
desired_size = (10000, 1024)
node_features_resized = torch.nn.functional.interpolate(node_features.unsqueeze(0), size=desired_size, mode='nearest').squeeze(0)
node_features_array = node_features_resized.numpy()
tensor = torch.tensor(node_features_array, dtype=torch.float32)
print(tensor.shape)
New Annotation