Speaker Recognition Loss
Sept. 17, 2020, 4:25 p.m.
read: 417
AM-Softmax Loss
import tensorflow as tf
import math
import torch
import torch.nn as nn
from torch import Tensor
import numpy as np
# weight = np.random.randn(1991, 512).astype(np.float32)
# np.save('weight.npy', weight)
# weight = np.load('weight.npy')
class AMSoftmaxLossTF(tf.keras.losses.Loss):
def __init__(self, nOut: int, nClasses: int, m: float = 0.3, s: float = 15, name="AMSoftmaxLoss") -> None:
super().__init__(name=name)
self.embedding_size = nOut
self.num_classes = nClasses
self.m = m
self.s = s
initializer = tf.initializers.GlorotNormal()
self.weight = tf.Variable(initializer(
shape=[nOut, nClasses], dtype=tf.float32),
name="AMSoftmaxLoss_weight")
# self.weight = tf.Variable(weight.T, name="AAMSoftmaxLoss_weight")
self.criterion = tf.nn.softmax_cross_entropy_with_logits
def __call__(self, input: Tensor, label: Tensor, logit_length=None) -> Tensor:
# labels = tf.squeeze(samples['output'])
outputs_norm = tf.math.l2_normalize(input, axis=1)
weight_norm = tf.math.l2_normalize(self.weight, axis=0)
costh = tf.matmul(outputs_norm, weight_norm)
label_onehot = tf.one_hot(label, self.num_classes)
delt_costh = self.m * label_onehot
costh_m = costh - delt_costh
costh_m_s = self.s * costh_m
loss = tf.reduce_mean(self.criterion(label_onehot, costh_m_s))
return loss
class AMSoftmaxLossTorch(nn.Module):
def __init__(self, nOut: int, nClasses: int, m: float = 0.3, s: float = 15) -> None:
super(AMSoftmaxLossTorch, self).__init__()
self.test_normalize = True
self.m = m
self.s = s
self.in_feats = nOut
self.W = torch.nn.Parameter(torch.randn(nOut, nClasses), requires_grad=True)
# self.W = torch.nn.Parameter(torch.tensor(weight.T), requires_grad=True)
self.ce = nn.CrossEntropyLoss()
nn.init.xavier_normal_(self.W, gain=1)
print('Initialised AMSoftmax m=%.3f s=%.3f' % (self.m, self.s))
def forward(self, input: Tensor, label: Tensor) -> Tensor:
assert input.size()[0] == label.size()[0]
assert input.size()[1] == self.in_feats
x_norm = torch.norm(input, p=2, dim=1, keepdim=True).clamp(min=1e-12)
x_norm = torch.div(input, x_norm)
w_norm = torch.norm(self.W, p=2, dim=0, keepdim=True).clamp(min=1e-12)
w_norm = torch.div(self.W, w_norm)
costh = torch.mm(x_norm, w_norm)
label_view = label.view(-1, 1)
if label_view.is_cuda: label_view = label_view.cpu()
delt_costh = torch.zeros(costh.size()).scatter_(1, label_view, self.m)
if input.is_cuda: delt_costh = delt_costh.cuda()
costh_m = costh - delt_costh
costh_m_s = self.s * costh_m
loss = self.ce(costh_m_s, label)
return loss
if __name__ == '__main__':
input = np.random.randn(10, 512)
# np.save('input.npy', input)
# input = np.load('input.npy')
input_torch = torch.from_numpy(input).float()
input_tf = tf.convert_to_tensor(value=input, dtype=np.float32)
label = np.array([99, 11, 22, 33, 44, 88, 47, 478, 982, 1990]).astype(np.int32)
label_torch = torch.from_numpy(label).long()
label_tf = tf.convert_to_tensor(value=label, dtype=np.int32)
am_loss_1 = AMSoftmaxLossTF(nOut=512, nClasses=1991)
am_loss_2 = AMSoftmaxLossTorch(nOut=512, nClasses=1991)
loss_1 = am_loss_1(input_tf, label_tf)
loss_2 = am_loss_2(input_torch, label_torch)
print(loss_1)
print(loss_2)
AAM-Softmax Loss
import tensorflow as tf
import math
import torch
import torch.nn as nn
from torch import Tensor
import torch.nn.functional as F
import numpy as np
# weight = np.random.randn(1991, 512).astype(np.float32)
# np.save('weight.npy', weight)
# weight = np.load('weight.npy')
class AAMSoftmaxLossTF(tf.keras.losses.Loss):
def __init__(self, nOut: int, nClasses: int,
m: float = 0.3, s: float = 15,
easy_margin: bool = False, name: str = "AAMSoftmaxLoss") -> None:
super().__init__(name=name)
self.embedding_size = nOut
self.num_classes = nClasses
self.m = m
self.s = s
initializer = tf.initializers.GlorotNormal()
self.weight = tf.Variable(initializer(
shape=[nOut, nClasses], dtype=tf.float32),
name="AAMSoftmaxLoss_weight")
# self.weight = tf.Variable(weight.T, name="AAMSoftmaxLoss_weight")
self.criterion = tf.nn.softmax_cross_entropy_with_logits
self.easy_margin = easy_margin
self.cos_m = math.cos(self.m)
self.sin_m = math.sin(self.m)
# make the function cos(theta+m) monotonic decreasing while theta in [0°,180°]
self.th = math.cos(math.pi - self.m)
self.mm = math.sin(math.pi - self.m) * self.m
def __call__(self, input: Tensor, label: Tensor) -> Tensor:
# labels = tf.squeeze(samples['output'])
outputs_norm = tf.math.l2_normalize(input, axis=1)
weight_norm = tf.math.l2_normalize(self.weight, axis=0)
cosine = tf.matmul(outputs_norm, weight_norm)
sine = tf.clip_by_value(tf.math.sqrt(1.0 - tf.math.pow(cosine, 2)), 0, 1)
phi = cosine * self.cos_m - sine * self.sin_m
if self.easy_margin:
phi = tf.where(cosine > 0, phi, cosine)
else:
phi = tf.where((cosine - self.th) > 0, phi, cosine - self.mm)
label_onehot = tf.one_hot(label, self.num_classes)
output = (label_onehot * phi) + ((1.0 - label_onehot) * cosine)
output = output * self.s
loss = tf.reduce_mean(self.criterion(label_onehot, output))
return loss
class AAMSoftmaxLossTorch(nn.Module):
def __init__(self, nOut: int, nClasses: int, m: float = 0.3, s: float = 15,
easy_margin: bool = False) -> None:
super(AAMSoftmaxLossTorch, self).__init__()
self.test_normalize = True
self.m = m
self.s = s
self.in_feats = nOut
self.weight = torch.nn.Parameter(torch.FloatTensor(nClasses, nOut), requires_grad=True)
# self.weight = torch.nn.Parameter(torch.tensor(weight), requires_grad=True)
self.ce = nn.CrossEntropyLoss()
nn.init.xavier_normal_(self.weight, gain=1)
self.easy_margin = easy_margin
self.cos_m = math.cos(self.m)
self.sin_m = math.sin(self.m)
# make the function cos(theta+m) monotonic decreasing while theta in [0°,180°]
self.th = math.cos(math.pi - self.m)
self.mm = math.sin(math.pi - self.m) * self.m
print('Initialised AAMSoftmax margin %.3f scale %.3f' % (self.m, self.s))
def forward(self, input: Tensor, label: Tensor) -> Tensor:
assert input.size()[0] == label.size()[0]
assert input.size()[1] == self.in_feats
# cos(theta)
cosine = F.linear(F.normalize(input), F.normalize(self.weight))
# cos(theta + m)
sine = torch.sqrt((1.0 - torch.pow(cosine, 2)).clamp(0, 1))
phi = cosine * self.cos_m - sine * self.sin_m
if self.easy_margin:
phi = torch.where(cosine > 0, phi, cosine)
else:
phi = torch.where((cosine - self.th) > 0, phi, cosine - self.mm)
# one_hot = torch.zeros(cosine.size(), device='cuda' if torch.cuda.is_available() else 'cpu')
one_hot = torch.zeros_like(cosine)
one_hot.scatter_(1, label.view(-1, 1), 1)
output = (one_hot * phi) + ((1.0 - one_hot) * cosine)
output = output * self.s
loss = self.ce(output, label)
return loss
if __name__ == '__main__':
input = np.random.randn(10, 512)
# np.save('input.npy', input)
# input = np.load('input.npy')
input_torch = torch.from_numpy(input).float()
input_tf = tf.convert_to_tensor(value=input, dtype=np.float32)
label = np.array([99, 11, 22, 33, 44, 88, 47, 478, 982, 1990]).astype(np.int32)
label_torch = torch.from_numpy(label).long()
label_tf = tf.convert_to_tensor(value=label, dtype=np.int32)
am_loss_1 = AAMSoftmaxLossTF(nOut=512, nClasses=1991)
am_loss_2 = AAMSoftmaxLossTorch(nOut=512, nClasses=1991)
loss_1 = am_loss_1(input_tf, label_tf)
loss_2 = am_loss_2(input_torch, label_torch)
print(loss_1.numpy())
print(loss_2.item())
Circle Loss
import torch
import torch.nn as nn
from torch import Tensor
from torch.nn import Parameter
import torch.nn.functional as F
"""
2个不同版本的CircleLoss的实现
"""
class CosineLinearLayer(nn.Module):
def __init__(self, in_features: int, out_features: int) -> None:
super(CosineLinearLayer, self).__init__()
self.in_features = in_features
self.out_features = out_features
self.weight = Parameter(torch.Tensor(in_features, out_features))
self.weight.data.uniform_(-1, 1).renorm_(2, 1, 1e-5).mul_(1e5)
def forward(self, input: Tensor) -> Tensor:
x = input # F
w = self.weight # W
ww = w.renorm(2, 1, 1e-5).mul(1e5) # weights normed
xlen = x.pow(2).sum(1).pow(0.5) # size=B
wlen = ww.pow(2).sum(0).pow(0.5) # size=Classnum
cos_theta = x.mm(ww) # size=(B,Classnum) x.dot(ww) FW/ x_len * w_len
cos_theta = cos_theta / xlen.view(-1, 1) / wlen.view(1, -1) #
cos_theta = cos_theta.clamp(-1.0, 1.0)
cos_theta = cos_theta * xlen.view(-1, 1)
return cos_theta
class CircleCore(nn.Module):
def __init__(self, m: float = 0.35, s: float = 256) -> None:
super(CircleCore, self).__init__()
self.s, self.m = s, m
self.criteria = nn.CrossEntropyLoss()
def forward(self, input: Tensor, label: Tensor) -> Tensor:
cosine = input
alpha_p = F.relu(1 + self.m - cosine).detach()
margin_p = 1 - self.m
alpha_n = F.relu(cosine + self.m).detach()
margin_n = self.m
sp_y = alpha_p * (cosine - margin_p)
sp_j = alpha_n * (cosine - margin_n)
one_hot = torch.zeros(cosine.size()).to(label.device)
one_hot.scatter_(1, label.view(-1, 1).long(), 1)
output = one_hot * sp_y + ((1.0 - one_hot) * sp_j)
output *= self.s
loss = self.criteria(output, label)
return loss
class CircleLossV1(nn.Module):
def __init__(self, nOut: int, nClasses: int, m: float = 0.35, s: float = 256) -> None:
super(CircleLossV1, self).__init__()
self.classifier_linear = CosineLinearLayer(in_features=nOut, out_features=nClasses)
self.circle_core = CircleCore(m=m, s=s)
def forward(self, input: Tensor, label: Tensor) -> Tensor:
logits = self.classifier_linear(input)
loss = self.circle_core(logits, label)
return loss
class CircleLossV2(nn.Module):
def __init__(self, nOut: int, nClasses: int, m: float = 0.35, s: float = 256, use_cuda: bool = False) -> None:
super(CircleLossV2, self).__init__()
self.margin = m
self.gamma = s
self.soft_plus = nn.Softplus()
self.class_num = nClasses
self.emdsize = nOut
self.weight = nn.Parameter(torch.FloatTensor(self.class_num, self.emdsize))
nn.init.xavier_uniform_(self.weight)
self.use_cuda = use_cuda
def forward(self, input: Tensor, label: Tensor) -> Tensor:
similarity_matrix = nn.functional.linear(nn.functional.normalize(input, p=2, dim=1, eps=1e-12),
nn.functional.normalize(self.weight, p=2, dim=1, eps=1e-12))
# similarity_matrix = input
if self.use_cuda:
one_hot = torch.zeros(similarity_matrix.size(), device='cuda')
else:
one_hot = torch.zeros(similarity_matrix.size())
one_hot.scatter_(1, label.view(-1, 1).long(), 1)
one_hot = one_hot.type(dtype=torch.bool)
# sp = torch.gather(similarity_matrix, dim=1, index=label.unsqueeze(1))
sp = similarity_matrix[one_hot]
mask = one_hot.logical_not()
sn = similarity_matrix[mask]
sp = sp.view(input.size()[0], -1)
sn = sn.view(input.size()[0], -1)
ap = torch.clamp_min(-sp.detach() + 1 + self.margin, min=0.)
an = torch.clamp_min(sn.detach() + self.margin, min=0.)
delta_p = 1 - self.margin
delta_n = self.margin
logit_p = - ap * (sp - delta_p) * self.gamma
logit_n = an * (sn - delta_n) * self.gamma
loss = self.soft_plus(torch.logsumexp(logit_n, dim=1) + torch.logsumexp(logit_p, dim=1))
return loss.mean()
if __name__ == '__main__':
feat = torch.randn(10, 512)
lbl = torch.randint(high=1990, size=(10,))
# Circle Loss 1
criterion = CircleLossV1(nOut=512, nClasses=1991, m=0.25, s=256)
circle_loss_1 = criterion(feat, lbl)
print(circle_loss_1)
criterion = CircleLossV2(nOut=512, nClasses=1991, m=0.25, s=256)
circle_loss_2 = criterion(feat, lbl)
print(circle_loss_2)
print(circle_loss_1 / circle_loss_2)
CircleLossV1 与 V2仅仅只有 Norm部分不同 其他的部分相同 测试下来 在文本无关的任务下 V2的效果不如V1