# 搞 english text import numpy as np graph_text_list = [] upper_tri_indices = np.triu_indices(dense_matrix.shape[0], k=1) for row, col in zip(upper_tri_indices[0], upper_tri_indices[1]): value = dense_matrix[row, col] if value > 0: rounded_value = "{:.2f}".format(value) # Round the value to two decimal places text = f"node {row} has connection with node {col}, with connection weight of {rounded_value}" graph_text_list.append(text) # if row < 1 and col < 10: # print(text) text = "\n".join(graph_text_list) # Print or use the generated text print(len(text)) # 搞 tokenize from transformers import RobertaTokenizer, RobertaModel from tqdm import tqdm # Import tqdm for progress bar # Load RoBERTa tokenizer and model tokenizer = RobertaTokenizer.from_pretrained('roberta-base') model = RobertaModel.from_pretrained('roberta-base').to(device) inputs = [] with tqdm(total=len(text)) as pbar: for i in range(0, len(text), 512): chunk = text[i:i+512] tokenized = tokenizer(chunk, return_tensors='pt', padding=True, truncation=True, max_length=512) inputs.append(tokenized) pbar.update(len(chunk)) # 跑模型 outputs = [] with tqdm(total=len(inputs)) as pbar: for input_chunk in inputs: with torch.no_grad(): output = model(**input_chunk.to(device)) outputs.append(output.last_hidden_state) pbar.update(1) # 搞成一个大的 10000*1024 size 的 tensor print(outputs[0].shape, outputs[1].shape) outputs_concatenated = torch.cat(outputs, dim=0) node_features = outputs_concatenated # Resize the features to 10000x1024 desired_size = (10000, 1024) node_features_resized = torch.nn.functional.interpolate(node_features.unsqueeze(0), size=desired_size, mode='nearest').squeeze(0) # Convert to numpy array node_features_array = node_features_resized.numpy() tensor = torch.tensor(node_features_array, dtype=torch.float32) print(tensor.shape)