format code
use yapf format code, column limit change from 80 to 120
This commit is contained in:
parent
e57fe43915
commit
e6175ef036
|
@ -44,8 +44,8 @@ def collate_fn(batch):
|
|||
tail_pos.append(_padding(data['tail_pos'], max_len))
|
||||
mask_pos.append(_padding(data['mask_pos'], max_len))
|
||||
y.append(data['target'])
|
||||
return torch.tensor(sent), torch.tensor(head_pos), torch.tensor(
|
||||
tail_pos), torch.tensor(mask_pos), torch.tensor(y)
|
||||
return torch.tensor(sent), torch.tensor(head_pos), torch.tensor(tail_pos), torch.tensor(
|
||||
mask_pos), torch.tensor(y)
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
|
@ -55,19 +55,10 @@ if __name__ == '__main__':
|
|||
vocab = load_pkl(vocab_path)
|
||||
|
||||
train_dataset = CustomDataset(train_data_path)
|
||||
dataloader = DataLoader(train_dataset,
|
||||
batch_size=4,
|
||||
shuffle=True,
|
||||
collate_fn=collate_fn)
|
||||
dataloader = DataLoader(train_dataset, batch_size=4, shuffle=True, collate_fn=collate_fn)
|
||||
|
||||
for idx, (*x, y) in enumerate(dataloader):
|
||||
print(x)
|
||||
print(y)
|
||||
break
|
||||
# sent, head_pos, tail_pos, mask_pos = x
|
||||
# raw_sents = []
|
||||
# for i in range(4):
|
||||
# raw_sent = [vocab.idx2word[i] for i in sent[i].numpy()]
|
||||
# raw_sents.append(''.join(raw_sent))
|
||||
# print(raw_sents, head_pos, tail_pos, mask, y, sep='\n\n')
|
||||
# break
|
||||
|
||||
|
|
|
@ -24,12 +24,10 @@ class CNN(BasicModule):
|
|||
for k in self.kernel_size:
|
||||
assert k % 2 == 1, "kernel size has to be odd numbers."
|
||||
|
||||
self.embedding = Embedding(self.vocab_size, self.word_dim, self.pos_size,
|
||||
self.pos_dim)
|
||||
self.embedding = Embedding(self.vocab_size, self.word_dim, self.pos_size, self.pos_dim)
|
||||
# PCNN embedding
|
||||
self.mask_embed = nn.Embedding(4, 3)
|
||||
masks = torch.tensor([[0, 0, 0], [100, 0, 0], [0, 100, 0], [0, 0,
|
||||
100]])
|
||||
masks = torch.tensor([[0, 0, 0], [100, 0, 0], [0, 100, 0], [0, 0, 100]])
|
||||
self.mask_embed.weight.data.copy_(masks)
|
||||
self.mask_embed.weight.requires_grad = False
|
||||
|
||||
|
|
|
@ -21,17 +21,14 @@ class Capsule(BasicModule):
|
|||
self.output_unit_size = config.capsule.output_unit_size
|
||||
self.num_iterations = config.capsule.num_iterations
|
||||
|
||||
self.embedding = Embedding(self.vocab_size, self.word_dim, self.pos_size,
|
||||
self.pos_dim)
|
||||
self.embedding = Embedding(self.vocab_size, self.word_dim, self.pos_size, self.pos_dim)
|
||||
self.input_dim = self.word_dim + self.pos_dim * 2
|
||||
self.lstm = VarLenLSTM(
|
||||
self.input_dim,
|
||||
self.hidden_dim,
|
||||
)
|
||||
self.capsule = CapsuleNet(self.num_primary_units,
|
||||
self.num_output_units, self.primary_channels,
|
||||
self.primary_unit_size,
|
||||
self.output_unit_size, self.num_iterations)
|
||||
self.capsule = CapsuleNet(self.num_primary_units, self.num_output_units, self.primary_channels,
|
||||
self.primary_unit_size, self.output_unit_size, self.num_iterations)
|
||||
|
||||
def forward(self, input):
|
||||
*x, mask = input
|
||||
|
@ -66,8 +63,8 @@ class Capsule(BasicModule):
|
|||
|
||||
|
||||
class CapsuleNet(nn.Module):
|
||||
def __init__(self, num_primary_units, num_output_units, primary_channels,
|
||||
primary_unit_size, output_unit_size, num_iterations):
|
||||
def __init__(self, num_primary_units, num_output_units, primary_channels, primary_unit_size, output_unit_size,
|
||||
num_iterations):
|
||||
super(CapsuleNet, self).__init__()
|
||||
self.primary = CapsuleLayer(in_units=0,
|
||||
out_units=num_primary_units,
|
||||
|
@ -102,8 +99,7 @@ class ConvUnit(nn.Module):
|
|||
|
||||
|
||||
class CapsuleLayer(nn.Module):
|
||||
def __init__(self, in_units, out_units, in_channels, unit_size,
|
||||
use_routing, num_iterations):
|
||||
def __init__(self, in_units, out_units, in_channels, unit_size, use_routing, num_iterations):
|
||||
super(CapsuleLayer, self).__init__()
|
||||
self.in_units = in_units
|
||||
self.out_units = out_units
|
||||
|
@ -112,8 +108,7 @@ class CapsuleLayer(nn.Module):
|
|||
self.use_routing = use_routing
|
||||
|
||||
if self.use_routing:
|
||||
self.W = nn.Parameter(
|
||||
torch.randn(1, in_channels, out_units, unit_size, in_units))
|
||||
self.W = nn.Parameter(torch.randn(1, in_channels, out_units, unit_size, in_units))
|
||||
self.num_iterations = num_iterations
|
||||
else:
|
||||
|
||||
|
@ -190,8 +185,7 @@ class CapsuleLayer(nn.Module):
|
|||
v_j1 = torch.cat([v_j] * self.in_channels, dim=1)
|
||||
|
||||
# (1, features, out_units, 1)
|
||||
u_vj1 = torch.matmul(u_hat.transpose(3, 4),
|
||||
v_j1).squeeze(4).mean(dim=0, keepdim=True)
|
||||
u_vj1 = torch.matmul(u_hat.transpose(3, 4), v_j1).squeeze(4).mean(dim=0, keepdim=True)
|
||||
|
||||
# Update b_ij (routing)
|
||||
b_ij = u_vj1
|
||||
|
|
|
@ -3,8 +3,7 @@ import torch.nn as nn
|
|||
|
||||
|
||||
class Embedding(nn.Module):
|
||||
def __init__(self, vocab_size: int, word_dim: int, pos_size: int,
|
||||
pos_dim: int):
|
||||
def __init__(self, vocab_size: int, word_dim: int, pos_size: int, pos_dim: int):
|
||||
super(Embedding, self).__init__()
|
||||
self.word_embed = nn.Embedding(vocab_size, word_dim, padding_idx=0)
|
||||
self.head_pos_embed = nn.Embedding(pos_size, pos_dim, padding_idx=0)
|
||||
|
|
|
@ -15,6 +15,6 @@ class LM(BasicModule):
|
|||
|
||||
def forward(self, x):
|
||||
x = x[0]
|
||||
out = self.lm(x)[0][:,0]
|
||||
out = self.lm(x)[0][:, 0]
|
||||
out = self.fc(out)
|
||||
return out
|
||||
|
|
|
@ -6,12 +6,7 @@ from deepke.model import BasicModule, Embedding
|
|||
|
||||
|
||||
class VarLenLSTM(BasicModule):
|
||||
def __init__(self,
|
||||
input_size,
|
||||
hidden_size,
|
||||
lstm_layers=1,
|
||||
dropout=0,
|
||||
last_hn=False):
|
||||
def __init__(self, input_size, hidden_size, lstm_layers=1, dropout=0, last_hn=False):
|
||||
super(VarLenLSTM, self).__init__()
|
||||
self.model_name = 'VarLenLSTM'
|
||||
self.lstm_layers = lstm_layers
|
||||
|
@ -36,10 +31,7 @@ class VarLenLSTM(BasicModule):
|
|||
out: [B * seq_len * hidden] hidden = 2 * hidden_dim
|
||||
hn: [B * layers * hidden] hidden = 2 * hidden_dim
|
||||
'''
|
||||
x = pack_padded_sequence(x,
|
||||
x_len,
|
||||
batch_first=True,
|
||||
enforce_sorted=True)
|
||||
x = pack_padded_sequence(x, x_len, batch_first=True, enforce_sorted=True)
|
||||
out, (hn, _) = self.lstm(x)
|
||||
out, _ = pad_packed_sequence(out, batch_first=True, padding_value=0.0)
|
||||
hn = hn.transpose(0, 1).contiguous()
|
||||
|
@ -65,8 +57,7 @@ class BiLSTM(BasicModule):
|
|||
self.last_hn = config.rnn.last_hn
|
||||
self.out_dim = config.relation_type
|
||||
|
||||
self.embedding = Embedding(self.vocab_size, self.word_dim, self.pos_size,
|
||||
self.pos_dim)
|
||||
self.embedding = Embedding(self.vocab_size, self.word_dim, self.pos_size, self.pos_dim)
|
||||
self.input_dim = self.word_dim + self.pos_dim * 2
|
||||
self.lstm = VarLenLSTM(self.input_dim,
|
||||
self.hidden_dim,
|
||||
|
@ -101,10 +92,7 @@ if __name__ == '__main__':
|
|||
])
|
||||
x_len = torch.Tensor([6, 3, 3, 2])
|
||||
embedding = nn.Embedding(5, 10, padding_idx=0)
|
||||
model = VarLenLSTM(input_size=10,
|
||||
hidden_size=30,
|
||||
lstm_layers=5,
|
||||
last_hn=False)
|
||||
model = VarLenLSTM(input_size=10, hidden_size=30, lstm_layers=5, last_hn=False)
|
||||
|
||||
x = embedding(x) # [4, 6, 5]
|
||||
out, hn = model(x, x_len)
|
||||
|
|
|
@ -68,8 +68,7 @@ class MultiHeadAttention(nn.Module):
|
|||
v = v.permute(2, 0, 1, 3).contiguous().view(-1, sk, feature)
|
||||
if att_mask_out is not None:
|
||||
att_mask_out = att_mask_out.repeat(n_head, 1, 1)
|
||||
att = self.attention(q, k, v,
|
||||
att_mask_out).view(n_head, batch, sq, feature)
|
||||
att = self.attention(q, k, v, att_mask_out).view(n_head, batch, sq, feature)
|
||||
|
||||
# concat all heads, do output linear
|
||||
# [num_head, batch, seq_len, feature] => [batch, seq_len, num_head*feature]
|
||||
|
@ -91,15 +90,12 @@ class Transformer(BasicModule):
|
|||
self.layers = config.transformer.transformer_layers
|
||||
self.out_dim = config.relation_type
|
||||
|
||||
self.embedding = Embedding(self.vocab_size, self.word_dim, self.pos_size,
|
||||
self.pos_dim)
|
||||
self.embedding = Embedding(self.vocab_size, self.word_dim, self.pos_size, self.pos_dim)
|
||||
self.feature_dim = self.word_dim + self.pos_dim * 2
|
||||
self.att = MultiHeadAttention(self.feature_dim, num_head=4)
|
||||
self.norm1 = nn.LayerNorm(self.feature_dim)
|
||||
self.ffn = nn.Sequential(nn.Linear(self.feature_dim, self.hidden_dim),
|
||||
nn.ReLU(),
|
||||
nn.Linear(self.hidden_dim, self.feature_dim),
|
||||
nn.Dropout(self.dropout))
|
||||
self.ffn = nn.Sequential(nn.Linear(self.feature_dim, self.hidden_dim), nn.ReLU(),
|
||||
nn.Linear(self.hidden_dim, self.feature_dim), nn.Dropout(self.dropout))
|
||||
self.norm2 = nn.LayerNorm(self.feature_dim)
|
||||
self.fc = nn.Linear(self.feature_dim, self.out_dim)
|
||||
|
||||
|
|
|
@ -21,8 +21,7 @@ def _mask_feature(entities_idx: List, sen_len: int) -> List:
|
|||
return left + middle + right
|
||||
|
||||
|
||||
def _pos_feature(sent_len: int, entity_idx: int, entity_len: int,
|
||||
pos_limit: int) -> List:
|
||||
def _pos_feature(sent_len: int, entity_idx: int, entity_len: int, pos_limit: int) -> List:
|
||||
|
||||
left = list(range(-entity_idx, 0))
|
||||
middle = [0] * entity_len
|
||||
|
@ -54,8 +53,7 @@ def _build_data(data: List[Dict], vocab: Vocab, relations: Dict) -> List[Dict]:
|
|||
head_len, tail_len = 1, 1
|
||||
else:
|
||||
head_len, tail_len = len(d['head_type']), len(d['tail_type'])
|
||||
entities_idx = [head_idx, tail_idx
|
||||
] if tail_idx > head_idx else [tail_idx, head_idx]
|
||||
entities_idx = [head_idx, tail_idx] if tail_idx > head_idx else [tail_idx, head_idx]
|
||||
head_pos = _pos_feature(seq_len, head_idx, head_len, config.pos_limit)
|
||||
tail_pos = _pos_feature(seq_len, tail_idx, tail_len, config.pos_limit)
|
||||
mask_pos = _mask_feature(entities_idx, seq_len)
|
||||
|
|
|
@ -26,13 +26,12 @@ def train(epoch, device, dataloader, model, optimizer, criterion, config):
|
|||
total_loss.append(loss.item())
|
||||
|
||||
# logging
|
||||
data_cal = len(dataloader.dataset) if batch_idx == len(
|
||||
dataloader) else batch_idx * len(y)
|
||||
if (config.training.train_log and batch_idx %
|
||||
config.training.log_interval == 0) or batch_idx == len(dataloader):
|
||||
print('Train Epoch: {} [{}/{} ({:.0f}%)]\tLoss: {:.6f}'.format(
|
||||
epoch, data_cal, len(dataloader.dataset),
|
||||
100. * batch_idx / len(dataloader), loss.item()))
|
||||
data_cal = len(dataloader.dataset) if batch_idx == len(dataloader) else batch_idx * len(y)
|
||||
if (config.training.train_log
|
||||
and batch_idx % config.training.log_interval == 0) or batch_idx == len(dataloader):
|
||||
print('Train Epoch: {} [{}/{} ({:.0f}%)]\tLoss: {:.6f}'.format(epoch, data_cal, len(dataloader.dataset),
|
||||
100. * batch_idx / len(dataloader),
|
||||
loss.item()))
|
||||
|
||||
# plot
|
||||
if config.training.show_plot:
|
||||
|
@ -66,9 +65,7 @@ def validate(dataloader, model, device, config):
|
|||
|
||||
total_f1 = []
|
||||
for average in config.training.f1_norm:
|
||||
p, r, f1, _ = precision_recall_fscore_support(total_y_true,
|
||||
total_y_pred,
|
||||
average=average)
|
||||
p, r, f1, _ = precision_recall_fscore_support(total_y_true, total_y_pred, average=average)
|
||||
print(f' {average} metrics: [p: {p:.4f}, r:{r:.4f}, f1:{f1:.4f}]')
|
||||
total_f1.append(f1)
|
||||
|
||||
|
|
|
@ -25,6 +25,7 @@ __all__ = [
|
|||
|
||||
Path = str
|
||||
|
||||
|
||||
def to_one_hot(x, length):
|
||||
batch_size = x.size(0)
|
||||
x_one_hot = torch.zeros(batch_size, length).to(x.device)
|
||||
|
@ -97,20 +98,16 @@ def seq_len_to_mask(seq_len, max_len=None):
|
|||
:return: np.ndarray, torch.Tensor 。shape将是(B, max_length), 元素类似为bool或torch.uint8
|
||||
"""
|
||||
if isinstance(seq_len, np.ndarray):
|
||||
assert len(
|
||||
np.shape(seq_len)
|
||||
) == 1, f"seq_len can only have one dimension, got {len(np.shape(seq_len))}."
|
||||
assert len(np.shape(seq_len)) == 1, f"seq_len can only have one dimension, got {len(np.shape(seq_len))}."
|
||||
max_len = int(max_len) if max_len else int(seq_len.max())
|
||||
broad_cast_seq_len = np.tile(np.arange(max_len), (len(seq_len), 1))
|
||||
mask = broad_cast_seq_len < seq_len.reshape(-1, 1)
|
||||
|
||||
elif isinstance(seq_len, torch.Tensor):
|
||||
assert seq_len.dim(
|
||||
) == 1, f"seq_len can only have one dimension, got {seq_len.dim() == 1}."
|
||||
assert seq_len.dim() == 1, f"seq_len can only have one dimension, got {seq_len.dim() == 1}."
|
||||
batch_size = seq_len.size(0)
|
||||
max_len = int(max_len) if max_len else seq_len.max().long()
|
||||
broad_cast_seq_len = torch.arange(max_len).expand(batch_size,
|
||||
-1).to(seq_len)
|
||||
broad_cast_seq_len = torch.arange(max_len).expand(batch_size, -1).to(seq_len)
|
||||
mask = broad_cast_seq_len.lt(seq_len.unsqueeze(1))
|
||||
else:
|
||||
raise TypeError("Only support 1-d numpy.ndarray or 1-d torch.Tensor.")
|
||||
|
@ -138,8 +135,7 @@ def load_pkl(fp: str, obj_name: str = 'data', verbose: bool = True) -> Any:
|
|||
return data
|
||||
|
||||
|
||||
def save_pkl(fp: Path, obj, obj_name: str = 'data',
|
||||
verbose: bool = True) -> None:
|
||||
def save_pkl(fp: Path, obj, obj_name: str = 'data', verbose: bool = True) -> None:
|
||||
if verbose:
|
||||
print(f'save {obj_name} in {fp}')
|
||||
with open(fp, 'wb') as f:
|
||||
|
@ -167,7 +163,6 @@ def load_csv(fp: str) -> List:
|
|||
return list(reader)
|
||||
|
||||
|
||||
|
||||
def load_jsonld(fp: str) -> List:
|
||||
print(f'load {fp}')
|
||||
datas = []
|
||||
|
@ -227,8 +222,7 @@ def csv2jsonld(fp: str, verbose: bool = True) -> str:
|
|||
if verbose:
|
||||
print('saving...')
|
||||
with open(fp_new, 'w', encoding='utf-8') as f:
|
||||
f.write(
|
||||
os.linesep.join([json.dumps(l, ensure_ascii=False) for l in data]))
|
||||
f.write(os.linesep.join([json.dumps(l, ensure_ascii=False) for l in data]))
|
||||
if verbose:
|
||||
print(f'saved jsonld file in: {fp_new}')
|
||||
return fp_new
|
||||
|
|
|
@ -51,9 +51,8 @@ class Vocab(object):
|
|||
keep_words.append(k)
|
||||
new_words.extend([k] * v)
|
||||
if verbose:
|
||||
print('after trim, keep words [{} / {}] = {:.2f}%'.format(
|
||||
len(keep_words), len(self.word2idx),
|
||||
len(keep_words) / len(self.word2idx) * 100))
|
||||
print('after trim, keep words [{} / {}] = {:.2f}%'.format(len(keep_words), len(self.word2idx),
|
||||
len(keep_words) / len(self.word2idx) * 100))
|
||||
|
||||
# Reinitialize dictionaries
|
||||
self.word2idx = {}
|
||||
|
|
15
main.py
15
main.py
|
@ -66,11 +66,10 @@ model.to(device)
|
|||
# print(model)
|
||||
|
||||
optimizer = optim.Adam(model.parameters(), lr=config.training.learning_rate)
|
||||
scheduler = optim.lr_scheduler.ReduceLROnPlateau(
|
||||
optimizer,
|
||||
'max',
|
||||
factor=config.training.decay_rate,
|
||||
patience=config.training.decay_patience)
|
||||
scheduler = optim.lr_scheduler.ReduceLROnPlateau(optimizer,
|
||||
'max',
|
||||
factor=config.training.decay_rate,
|
||||
patience=config.training.decay_patience)
|
||||
criterion = nn.CrossEntropyLoss()
|
||||
|
||||
best_macro_f1, best_macro_epoch = 0, 1
|
||||
|
@ -94,7 +93,5 @@ for epoch in range(1, config.training.epoch + 1):
|
|||
best_micro_model = model_name
|
||||
|
||||
print('=' * 10, ' End training ', '=' * 10)
|
||||
print(f'best macro f1: {best_macro_f1:.4f},',
|
||||
f'in epoch: {best_macro_epoch}, saved in: {best_macro_model}')
|
||||
print(f'best micro f1: {best_micro_f1:.4f},',
|
||||
f'in epoch: {best_micro_epoch}, saved in: {best_micro_model}')
|
||||
print(f'best macro f1: {best_macro_f1:.4f},', f'in epoch: {best_macro_epoch}, saved in: {best_macro_model}')
|
||||
print(f'best micro f1: {best_micro_f1:.4f},', f'in epoch: {best_micro_epoch}, saved in: {best_micro_model}')
|
||||
|
|
Loading…
Reference in New Issue