Paste: llm code

Author: 1
Mode: python
Date: Sat, 2 Mar 2024 06:16:16
Plain Text |
#  english text
import numpy as np
graph_text_list = []

upper_tri_indices = np.triu_indices(dense_matrix.shape[0], k=1)

for row, col in zip(upper_tri_indices[0], upper_tri_indices[1]):
    value = dense_matrix[row, col]
    if value > 0:
        rounded_value = "{:.2f}".format(value)  # Round the value to two decimal places
        text = f"node {row} has connection with node {col}, with connection weight of {rounded_value}"
        graph_text_list.append(text)
        # if row < 1 and col < 10:
        #     print(text)

text = "\n".join(graph_text_list)

# Print or use the generated text
print(len(text))





#  tokenize

from transformers import RobertaTokenizer, RobertaModel
from tqdm import tqdm  # Import tqdm for progress bar


# Load RoBERTa tokenizer and model
tokenizer = RobertaTokenizer.from_pretrained('roberta-base')
model = RobertaModel.from_pretrained('roberta-base').to(device)


inputs = []
with tqdm(total=len(text)) as pbar:
    for i in range(0, len(text), 512):
        chunk = text[i:i+512]
        tokenized = tokenizer(chunk, return_tensors='pt', padding=True, truncation=True, max_length=512)
        inputs.append(tokenized)
        pbar.update(len(chunk))







# 跑模型

outputs = []
with tqdm(total=len(inputs)) as pbar:
    for input_chunk in inputs:
        with torch.no_grad():
            output = model(**input_chunk.to(device))
        outputs.append(output.last_hidden_state)
        pbar.update(1)






# 搞成一个大的 10000*1024 size  tensor

print(outputs[0].shape, outputs[1].shape)
outputs_concatenated = torch.cat(outputs, dim=0)
node_features = outputs_concatenated  

# Resize the features to 10000x1024
desired_size = (10000, 1024)
node_features_resized = torch.nn.functional.interpolate(node_features.unsqueeze(0), size=desired_size, mode='nearest').squeeze(0)

# Convert to numpy array
node_features_array = node_features_resized.numpy()

tensor = torch.tensor(node_features_array, dtype=torch.float32)
print(tensor.shape)

New Annotation

Summary:
Author:
Mode:
Body: