blob: ff75c81d46caed70315a9405fb4a6259530268d4 [file] [log] [blame] [edit]
/*
* Copyright (C) 2017 Clifford Wolf <clifford@clifford.at>
*
* Permission to use, copy, modify, and/or distribute this software for any
* purpose with or without fee is hereby granted, provided that the above
* copyright notice and this permission notice appear in all copies.
*
* THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
* WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
* MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
* ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
* WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
* ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
* OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
*
*/
module smartbextdep (
input clock,
input bdep,
input [31:0] rs1,
input [31:0] rs2,
output reg [31:0] rd
);
wire din_mode;
wire [31:0] din_value;
wire [31:0] din_mask;
wire [31:0] dout_result;
assign din_mode = bdep;
assign din_value = rs1;
assign din_mask = rs2;
smartbextdep_direct smartbextdep_direct_inst (
.din_mode (din_mode ),
.din_value (din_value ),
.din_mask (din_mask ),
.dout_result(dout_result)
);
always @(posedge clock) begin
rd <= dout_result;
end
endmodule
// ========================================================================
`define smartbextdep_bfly_idx_a(k, i) ((2 << (k))*((i)/(1 << (k))) + (i)%(1 << (k)))
`define smartbextdep_bfly_idx_b(k, i) (`smartbextdep_bfly_idx_a(k, i) + (1<<(k)))
module smartbextdep_direct (
input din_mode,
input [31:0] din_value,
input [31:0] din_mask,
output [31:0] dout_result
);
wire [15:0] decoder_s1, decoder_s2, decoder_s4;
wire [15:0] decoder_s8, decoder_s16, decoder_s32;
smartbextdep_decoder decoder (
.mask (din_mask ),
.s1 (decoder_s1 ),
.s2 (decoder_s2 ),
.s4 (decoder_s4 ),
.s8 (decoder_s8 ),
.s16 (decoder_s16)
);
wire [31:0] result_fwd;
wire [31:0] result_bwd;
smartbextdep_bfly_fwd butterfly_fwd (
.din (din_value ),
.s1 (~decoder_s1 ),
.s2 (~decoder_s2 ),
.s4 (~decoder_s4 ),
.s8 (~decoder_s8 ),
.s16 (~decoder_s16),
.dout (result_fwd )
);
smartbextdep_bfly_bwd butterfly_bwd (
.din (din_value & din_mask),
.s1 (~decoder_s1 ),
.s2 (~decoder_s2 ),
.s4 (~decoder_s4 ),
.s8 (~decoder_s8 ),
.s16 (~decoder_s16),
.dout (result_bwd )
);
assign dout_result = din_mode ? (result_fwd & din_mask) : result_bwd;
endmodule
// ========================================================================
module smartbextdep_lrotcz #(
parameter integer N = 1,
parameter integer M = 1
) (
input [7:0] din,
output [M-1:0] dout
);
wire [2*M-1:0] mask = {M{1'b1}};
assign dout = (mask << din[N-1:0]) >> M;
endmodule
module smartbextdep_decoder (
input clock,
input enable,
input [31:0] mask,
output [15:0] s1, s2, s4, s8, s16
);
wire [8*32-1:0] ppsdata;
smartbextdep_pps32 pps_core (
.din (mask),
.dout (ppsdata)
);
genvar i;
generate
for (i = 0; i < 32/2; i = i+1) begin:stage1
smartbextdep_lrotcz #(.N(1), .M(1)) lrotc_zero (
.din(ppsdata[8*(2*i + 1 - 1) +: 8]),
.dout(s1[i])
);
end
for (i = 0; i < 32/4; i = i+1) begin:stage2
smartbextdep_lrotcz #(.N(2), .M(2)) lrotc_zero (
.din(ppsdata[8*(4*i + 2 - 1) +: 8]),
.dout(s2[2*i +: 2])
);
end
for (i = 0; i < 32/8; i = i+1) begin:stage4
smartbextdep_lrotcz #(.N(3), .M(4)) lrotc_zero (
.din(ppsdata[8*(8*i + 4 - 1) +: 8]),
.dout(s4[4*i +: 4])
);
end
for (i = 0; i < 32/16; i = i+1) begin:stage8
smartbextdep_lrotcz #(.N(4), .M(8)) lrotc_zero (
.din(ppsdata[8*(16*i + 8 - 1) +: 8]),
.dout(s8[8*i +: 8])
);
end
for (i = 0; i < 32/32; i = i+1) begin:stage16
smartbextdep_lrotcz #(.N(5), .M(16)) lrotc_zero (
.din(ppsdata[8*(32*i + 16 - 1) +: 8]),
.dout(s16[16*i +: 16])
);
end
endgenerate
endmodule
module smartbextdep_bfly_fwd (
input [31:0] din,
input [15:0] s1, s2, s4, s8, s16,
output [31:0] dout
);
reg [31:0] butterfly;
assign dout = butterfly;
integer k, i;
always @* begin
butterfly = din;
for (i = 0; i < 16; i = i+1)
if (s16[i]) {butterfly[`smartbextdep_bfly_idx_a(4, i)], butterfly[`smartbextdep_bfly_idx_b(4, i)]} =
{butterfly[`smartbextdep_bfly_idx_b(4, i)], butterfly[`smartbextdep_bfly_idx_a(4, i)]};
for (i = 0; i < 16; i = i+1)
if (s8[i]) {butterfly[`smartbextdep_bfly_idx_a(3, i)], butterfly[`smartbextdep_bfly_idx_b(3, i)]} =
{butterfly[`smartbextdep_bfly_idx_b(3, i)], butterfly[`smartbextdep_bfly_idx_a(3, i)]};
for (i = 0; i < 16; i = i+1)
if (s4[i]) {butterfly[`smartbextdep_bfly_idx_a(2, i)], butterfly[`smartbextdep_bfly_idx_b(2, i)]} =
{butterfly[`smartbextdep_bfly_idx_b(2, i)], butterfly[`smartbextdep_bfly_idx_a(2, i)]};
for (i = 0; i < 16; i = i+1)
if (s2[i]) {butterfly[`smartbextdep_bfly_idx_a(1, i)], butterfly[`smartbextdep_bfly_idx_b(1, i)]} =
{butterfly[`smartbextdep_bfly_idx_b(1, i)], butterfly[`smartbextdep_bfly_idx_a(1, i)]};
for (i = 0; i < 16; i = i+1)
if (s1[i]) {butterfly[`smartbextdep_bfly_idx_a(0, i)], butterfly[`smartbextdep_bfly_idx_b(0, i)]} =
{butterfly[`smartbextdep_bfly_idx_b(0, i)], butterfly[`smartbextdep_bfly_idx_a(0, i)]};
end
endmodule
module smartbextdep_bfly_bwd (
input [31:0] din,
input [15:0] s1, s2, s4, s8, s16,
output [31:0] dout
);
reg [31:0] butterfly;
assign dout = butterfly;
integer k, i;
always @* begin
butterfly = din;
for (i = 0; i < 16; i = i+1)
if (s1[i]) {butterfly[`smartbextdep_bfly_idx_a(0, i)], butterfly[`smartbextdep_bfly_idx_b(0, i)]} =
{butterfly[`smartbextdep_bfly_idx_b(0, i)], butterfly[`smartbextdep_bfly_idx_a(0, i)]};
for (i = 0; i < 16; i = i+1)
if (s2[i]) {butterfly[`smartbextdep_bfly_idx_a(1, i)], butterfly[`smartbextdep_bfly_idx_b(1, i)]} =
{butterfly[`smartbextdep_bfly_idx_b(1, i)], butterfly[`smartbextdep_bfly_idx_a(1, i)]};
for (i = 0; i < 16; i = i+1)
if (s4[i]) {butterfly[`smartbextdep_bfly_idx_a(2, i)], butterfly[`smartbextdep_bfly_idx_b(2, i)]} =
{butterfly[`smartbextdep_bfly_idx_b(2, i)], butterfly[`smartbextdep_bfly_idx_a(2, i)]};
for (i = 0; i < 16; i = i+1)
if (s8[i]) {butterfly[`smartbextdep_bfly_idx_a(3, i)], butterfly[`smartbextdep_bfly_idx_b(3, i)]} =
{butterfly[`smartbextdep_bfly_idx_b(3, i)], butterfly[`smartbextdep_bfly_idx_a(3, i)]};
for (i = 0; i < 16; i = i+1)
if (s16[i]) {butterfly[`smartbextdep_bfly_idx_a(4, i)], butterfly[`smartbextdep_bfly_idx_b(4, i)]} =
{butterfly[`smartbextdep_bfly_idx_b(4, i)], butterfly[`smartbextdep_bfly_idx_a(4, i)]};
end
endmodule
module smartbextdep_pps32 (
input [31:0] din,
output [255:0] dout
);
function [15:0] carry_save_add;
input [15:0] a, b;
reg [7:0] x, y;
begin
x = a[15:8] ^ a[7:0] ^ b[7:0];
y = ((a[15:8] & a[7:0]) | (a[15:8] & b[7:0]) | (a[7:0] & b[7:0])) << 1;
carry_save_add[7:0] = x ^ y ^ b[15:8];
carry_save_add[15:8] = ((x & y) | (x & b[15:8]) | (y & b[15:8])) << 1;
end
endfunction
function [7:0] carry_save_get;
input [15:0] a;
begin
carry_save_get = a[7:0] + a[15:8];
end
endfunction
// inputs
wire [15:0] e0s0 = {15'b0, din[0 +: 1]};
wire [15:0] e1s0 = {15'b0, din[1 +: 1]};
wire [15:0] e2s0 = {15'b0, din[2 +: 1]};
wire [15:0] e3s0 = {15'b0, din[3 +: 1]};
wire [15:0] e4s0 = {15'b0, din[4 +: 1]};
wire [15:0] e5s0 = {15'b0, din[5 +: 1]};
wire [15:0] e6s0 = {15'b0, din[6 +: 1]};
wire [15:0] e7s0 = {15'b0, din[7 +: 1]};
wire [15:0] e8s0 = {15'b0, din[8 +: 1]};
wire [15:0] e9s0 = {15'b0, din[9 +: 1]};
wire [15:0] e10s0 = {15'b0, din[10 +: 1]};
wire [15:0] e11s0 = {15'b0, din[11 +: 1]};
wire [15:0] e12s0 = {15'b0, din[12 +: 1]};
wire [15:0] e13s0 = {15'b0, din[13 +: 1]};
wire [15:0] e14s0 = {15'b0, din[14 +: 1]};
wire [15:0] e15s0 = {15'b0, din[15 +: 1]};
wire [15:0] e16s0 = {15'b0, din[16 +: 1]};
wire [15:0] e17s0 = {15'b0, din[17 +: 1]};
wire [15:0] e18s0 = {15'b0, din[18 +: 1]};
wire [15:0] e19s0 = {15'b0, din[19 +: 1]};
wire [15:0] e20s0 = {15'b0, din[20 +: 1]};
wire [15:0] e21s0 = {15'b0, din[21 +: 1]};
wire [15:0] e22s0 = {15'b0, din[22 +: 1]};
wire [15:0] e23s0 = {15'b0, din[23 +: 1]};
wire [15:0] e24s0 = {15'b0, din[24 +: 1]};
wire [15:0] e25s0 = {15'b0, din[25 +: 1]};
wire [15:0] e26s0 = {15'b0, din[26 +: 1]};
wire [15:0] e27s0 = {15'b0, din[27 +: 1]};
wire [15:0] e28s0 = {15'b0, din[28 +: 1]};
wire [15:0] e29s0 = {15'b0, din[29 +: 1]};
wire [15:0] e30s0 = {15'b0, din[30 +: 1]};
wire [15:0] e31s0 = {15'b0, din[31 +: 1]};
// forward pass
wire [15:0] e1s1 = carry_save_add(e1s0, e0s0);
wire [15:0] e3s1 = carry_save_add(e3s0, e2s0);
wire [15:0] e5s1 = carry_save_add(e5s0, e4s0);
wire [15:0] e7s1 = carry_save_add(e7s0, e6s0);
wire [15:0] e9s1 = carry_save_add(e9s0, e8s0);
wire [15:0] e11s1 = carry_save_add(e11s0, e10s0);
wire [15:0] e13s1 = carry_save_add(e13s0, e12s0);
wire [15:0] e15s1 = carry_save_add(e15s0, e14s0);
wire [15:0] e17s1 = carry_save_add(e17s0, e16s0);
wire [15:0] e19s1 = carry_save_add(e19s0, e18s0);
wire [15:0] e21s1 = carry_save_add(e21s0, e20s0);
wire [15:0] e23s1 = carry_save_add(e23s0, e22s0);
wire [15:0] e25s1 = carry_save_add(e25s0, e24s0);
wire [15:0] e27s1 = carry_save_add(e27s0, e26s0);
wire [15:0] e29s1 = carry_save_add(e29s0, e28s0);
wire [15:0] e31s1 = carry_save_add(e31s0, e30s0);
wire [15:0] e3s2 = carry_save_add(e3s1, e1s1);
wire [15:0] e7s2 = carry_save_add(e7s1, e5s1);
wire [15:0] e11s2 = carry_save_add(e11s1, e9s1);
wire [15:0] e15s2 = carry_save_add(e15s1, e13s1);
wire [15:0] e19s2 = carry_save_add(e19s1, e17s1);
wire [15:0] e23s2 = carry_save_add(e23s1, e21s1);
wire [15:0] e27s2 = carry_save_add(e27s1, e25s1);
wire [15:0] e31s2 = carry_save_add(e31s1, e29s1);
wire [15:0] e7s3 = carry_save_add(e7s2, e3s2);
wire [15:0] e15s3 = carry_save_add(e15s2, e11s2);
wire [15:0] e23s3 = carry_save_add(e23s2, e19s2);
wire [15:0] e31s3 = carry_save_add(e31s2, e27s2);
wire [15:0] e15s4 = carry_save_add(e15s3, e7s3);
wire [15:0] e31s4 = carry_save_add(e31s3, e23s3);
wire [15:0] e31s5 = carry_save_add(e31s4, e15s4);
// backward pass
wire [15:0] e23s6 = carry_save_add(e23s3, e15s4);
wire [15:0] e11s7 = carry_save_add(e11s2, e7s3);
wire [15:0] e19s7 = carry_save_add(e19s2, e15s4);
wire [15:0] e27s7 = carry_save_add(e27s2, e23s6);
wire [15:0] e5s8 = carry_save_add(e5s1, e3s2);
wire [15:0] e9s8 = carry_save_add(e9s1, e7s3);
wire [15:0] e13s8 = carry_save_add(e13s1, e11s7);
wire [15:0] e17s8 = carry_save_add(e17s1, e15s4);
wire [15:0] e21s8 = carry_save_add(e21s1, e19s7);
wire [15:0] e25s8 = carry_save_add(e25s1, e23s6);
wire [15:0] e29s8 = carry_save_add(e29s1, e27s7);
wire [15:0] e2s9 = carry_save_add(e2s0, e1s1);
wire [15:0] e4s9 = carry_save_add(e4s0, e3s2);
wire [15:0] e6s9 = carry_save_add(e6s0, e5s8);
wire [15:0] e8s9 = carry_save_add(e8s0, e7s3);
wire [15:0] e10s9 = carry_save_add(e10s0, e9s8);
wire [15:0] e12s9 = carry_save_add(e12s0, e11s7);
wire [15:0] e14s9 = carry_save_add(e14s0, e13s8);
wire [15:0] e16s9 = carry_save_add(e16s0, e15s4);
wire [15:0] e18s9 = carry_save_add(e18s0, e17s8);
wire [15:0] e20s9 = carry_save_add(e20s0, e19s7);
wire [15:0] e22s9 = carry_save_add(e22s0, e21s8);
wire [15:0] e24s9 = carry_save_add(e24s0, e23s6);
wire [15:0] e26s9 = carry_save_add(e26s0, e25s8);
wire [15:0] e28s9 = carry_save_add(e28s0, e27s7);
wire [15:0] e30s9 = carry_save_add(e30s0, e29s8);
// outputs
assign dout[0 +: 8] = carry_save_get(e0s0);
assign dout[8 +: 8] = carry_save_get(e1s1);
assign dout[16 +: 8] = carry_save_get(e2s9);
assign dout[24 +: 8] = carry_save_get(e3s2);
assign dout[32 +: 8] = carry_save_get(e4s9);
assign dout[40 +: 8] = carry_save_get(e5s8);
assign dout[48 +: 8] = carry_save_get(e6s9);
assign dout[56 +: 8] = carry_save_get(e7s3);
assign dout[64 +: 8] = carry_save_get(e8s9);
assign dout[72 +: 8] = carry_save_get(e9s8);
assign dout[80 +: 8] = carry_save_get(e10s9);
assign dout[88 +: 8] = carry_save_get(e11s7);
assign dout[96 +: 8] = carry_save_get(e12s9);
assign dout[104 +: 8] = carry_save_get(e13s8);
assign dout[112 +: 8] = carry_save_get(e14s9);
assign dout[120 +: 8] = carry_save_get(e15s4);
assign dout[128 +: 8] = carry_save_get(e16s9);
assign dout[136 +: 8] = carry_save_get(e17s8);
assign dout[144 +: 8] = carry_save_get(e18s9);
assign dout[152 +: 8] = carry_save_get(e19s7);
assign dout[160 +: 8] = carry_save_get(e20s9);
assign dout[168 +: 8] = carry_save_get(e21s8);
assign dout[176 +: 8] = carry_save_get(e22s9);
assign dout[184 +: 8] = carry_save_get(e23s6);
assign dout[192 +: 8] = carry_save_get(e24s9);
assign dout[200 +: 8] = carry_save_get(e25s8);
assign dout[208 +: 8] = carry_save_get(e26s9);
assign dout[216 +: 8] = carry_save_get(e27s7);
assign dout[224 +: 8] = carry_save_get(e28s9);
assign dout[232 +: 8] = carry_save_get(e29s8);
assign dout[240 +: 8] = carry_save_get(e30s9);
assign dout[248 +: 8] = carry_save_get(e31s5);
endmodule