NNML/NNML2/fprop.m

function [embedding_layer_state, hidden_layer_state, output_layer_state] = ...
  fprop(input_batch, word_embedding_weights, embed_to_hid_weights,...
  hid_to_output_weights, hid_bias, output_bias)
% This method forward propagates through a neural network.
% Inputs:
%   input_batch: The input data as a matrix of size numwords X batchsize where,
%     numwords is the number of words, batchsize is the number of data points.
%     So, if input_batch(i, j) = k then the ith word in data point j is word
%     index k of the vocabulary.
%
%   word_embedding_weights: Word embedding as a matrix of size
%     vocab_size X numhid1, where vocab_size is the size of the vocabulary
%     numhid1 is the dimensionality of the embedding space.
%
%   embed_to_hid_weights: Weights between the word embedding layer and hidden
%     layer as a matrix of soze numhid1*numwords X numhid2, numhid2 is the
%     number of hidden units.
%
%   hid_to_output_weights: Weights between the hidden layer and output softmax
%               unit as a matrix of size numhid2 X vocab_size
%
%   hid_bias: Bias of the hidden layer as a matrix of size numhid2 X 1.
%
%   output_bias: Bias of the output layer as a matrix of size vocab_size X 1.
%
% Outputs:
%   embedding_layer_state: State of units in the embedding layer as a matrix of
%     size numhid1*numwords X batchsize
%
%   hidden_layer_state: State of units in the hidden layer as a matrix of size
%     numhid2 X batchsize
%
%   output_layer_state: State of units in the output layer as a matrix of size
%     vocab_size X batchsize
%

[numwords, batchsize] = size(input_batch);
[vocab_size, numhid1] = size(word_embedding_weights);
numhid2 = size(embed_to_hid_weights, 2);

%% COMPUTE STATE OF WORD EMBEDDING LAYER.
% Look up the inputs word indices in the word_embedding_weights matrix.
embedding_layer_state = reshape(...
  word_embedding_weights(reshape(input_batch, 1, []),:)',...
  numhid1 * numwords, []);

%% COMPUTE STATE OF HIDDEN LAYER.
% Compute inputs to hidden units.
inputs_to_hidden_units = embed_to_hid_weights' * embedding_layer_state + ...
  repmat(hid_bias, 1, batchsize);

% Apply logistic activation function.
% FILL IN CODE. Replace the line below by one of the options.
% hidden_layer_state = zeros(numhid2, batchsize);
hidden_layer_state = 1 ./ (1 + exp(-inputs_to_hidden_units));
% Options
% (a) hidden_layer_state = 1 ./ (1 + exp(inputs_to_hidden_units));
% (b) hidden_layer_state = 1 ./ (1 - exp(-inputs_to_hidden_units));
% (c) hidden_layer_state = 1 ./ (1 + exp(-inputs_to_hidden_units));
% (d) hidden_layer_state = -1 ./ (1 + exp(-inputs_to_hidden_units));

%% COMPUTE STATE OF OUTPUT LAYER.
% Compute inputs to softmax.
% FILL IN CODE. Replace the line below by one of the options.
% inputs_to_softmax = zeros(vocab_size, batchsize);
inputs_to_softmax = hid_to_output_weights' * hidden_layer_state +  repmat(output_bias, 1, batchsize);
% Options
% (a) inputs_to_softmax = hid_to_output_weights' * hidden_layer_state +  repmat(output_bias, 1, batchsize);
% (b) inputs_to_softmax = hid_to_output_weights' * hidden_layer_state +  repmat(output_bias, batchsize, 1);
% (c) inputs_to_softmax = hidden_layer_state * hid_to_output_weights' +  repmat(output_bias, 1, batchsize);
% (d) inputs_to_softmax = hid_to_output_weights * hidden_layer_state +  repmat(output_bias, batchsize, 1);

% Subtract maximum. 
% Remember that adding or subtracting the same constant from each input to a
% softmax unit does not affect the outputs. Here we are subtracting maximum to
% make all inputs <= 0. This prevents overflows when computing their
% exponents.
inputs_to_softmax = inputs_to_softmax...
  - repmat(max(inputs_to_softmax), vocab_size, 1);

% Compute exp.
output_layer_state = exp(inputs_to_softmax);

% Normalize to get probability distribution.
output_layer_state = output_layer_state ./ repmat(...
  sum(output_layer_state, 1), vocab_size, 1);
Adding exercise 2 2017-02-11 18:00:13 +00:00			`function [embedding_layer_state, hidden_layer_state, output_layer_state] = ...`
			`fprop(input_batch, word_embedding_weights, embed_to_hid_weights,...`
			`hid_to_output_weights, hid_bias, output_bias)`
			`% This method forward propagates through a neural network.`
			`% Inputs:`
			`% input_batch: The input data as a matrix of size numwords X batchsize where,`
			`% numwords is the number of words, batchsize is the number of data points.`
			`% So, if input_batch(i, j) = k then the ith word in data point j is word`
			`% index k of the vocabulary.`
			`%`
			`% word_embedding_weights: Word embedding as a matrix of size`
			`% vocab_size X numhid1, where vocab_size is the size of the vocabulary`
			`% numhid1 is the dimensionality of the embedding space.`
			`%`
			`% embed_to_hid_weights: Weights between the word embedding layer and hidden`
			`% layer as a matrix of soze numhid1*numwords X numhid2, numhid2 is the`
			`% number of hidden units.`
			`%`
			`% hid_to_output_weights: Weights between the hidden layer and output softmax`
			`% unit as a matrix of size numhid2 X vocab_size`
			`%`
			`% hid_bias: Bias of the hidden layer as a matrix of size numhid2 X 1.`
			`%`
			`% output_bias: Bias of the output layer as a matrix of size vocab_size X 1.`
			`%`
			`% Outputs:`
			`% embedding_layer_state: State of units in the embedding layer as a matrix of`
			`% size numhid1*numwords X batchsize`
			`%`
			`% hidden_layer_state: State of units in the hidden layer as a matrix of size`
			`% numhid2 X batchsize`
			`%`
			`% output_layer_state: State of units in the output layer as a matrix of size`
			`% vocab_size X batchsize`
			`%`

			`[numwords, batchsize] = size(input_batch);`
			`[vocab_size, numhid1] = size(word_embedding_weights);`
			`numhid2 = size(embed_to_hid_weights, 2);`

			`%% COMPUTE STATE OF WORD EMBEDDING LAYER.`
			`% Look up the inputs word indices in the word_embedding_weights matrix.`
			`embedding_layer_state = reshape(...`
			`word_embedding_weights(reshape(input_batch, 1, []),:)',...`
			`numhid1 * numwords, []);`

			`%% COMPUTE STATE OF HIDDEN LAYER.`
			`% Compute inputs to hidden units.`
			`inputs_to_hidden_units = embed_to_hid_weights' * embedding_layer_state + ...`
			`repmat(hid_bias, 1, batchsize);`

			`% Apply logistic activation function.`
			`% FILL IN CODE. Replace the line below by one of the options.`
			`% hidden_layer_state = zeros(numhid2, batchsize);`
			`hidden_layer_state = 1 ./ (1 + exp(-inputs_to_hidden_units));`
			`% Options`
			`% (a) hidden_layer_state = 1 ./ (1 + exp(inputs_to_hidden_units));`
			`% (b) hidden_layer_state = 1 ./ (1 - exp(-inputs_to_hidden_units));`
			`% (c) hidden_layer_state = 1 ./ (1 + exp(-inputs_to_hidden_units));`
			`% (d) hidden_layer_state = -1 ./ (1 + exp(-inputs_to_hidden_units));`

			`%% COMPUTE STATE OF OUTPUT LAYER.`
			`% Compute inputs to softmax.`
			`% FILL IN CODE. Replace the line below by one of the options.`
			`% inputs_to_softmax = zeros(vocab_size, batchsize);`
			`inputs_to_softmax = hid_to_output_weights' * hidden_layer_state + repmat(output_bias, 1, batchsize);`
			`% Options`
			`% (a) inputs_to_softmax = hid_to_output_weights' * hidden_layer_state + repmat(output_bias, 1, batchsize);`
			`% (b) inputs_to_softmax = hid_to_output_weights' * hidden_layer_state + repmat(output_bias, batchsize, 1);`
			`% (c) inputs_to_softmax = hidden_layer_state * hid_to_output_weights' + repmat(output_bias, 1, batchsize);`
			`% (d) inputs_to_softmax = hid_to_output_weights * hidden_layer_state + repmat(output_bias, batchsize, 1);`

			`% Subtract maximum.`
			`% Remember that adding or subtracting the same constant from each input to a`
			`% softmax unit does not affect the outputs. Here we are subtracting maximum to`
			`% make all inputs <= 0. This prevents overflows when computing their`
			`% exponents.`
			`inputs_to_softmax = inputs_to_softmax...`
			`- repmat(max(inputs_to_softmax), vocab_size, 1);`

			`% Compute exp.`
			`output_layer_state = exp(inputs_to_softmax);`

			`% Normalize to get probability distribution.`
			`output_layer_state = output_layer_state ./ repmat(...`
			`sum(output_layer_state, 1), vocab_size, 1);`