Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
28 commits
Select commit Hold shift + click to select a range
cc1f68b
training-platform core: minimal MLP end-to-end training on Siracusa
runwangdl Apr 10, 2026
284f145
training-platform core: apply pre-commit formatting (yapf/isort/clang…
runwangdl Apr 10, 2026
763b464
training-platform core: canonicalise SoftmaxCrossEntropyLoss to 2 out…
runwangdl Apr 10, 2026
e348863
training-platform core: collapse InPlaceAccumulatorV2 template to sin…
runwangdl Apr 10, 2026
b844fe1
training-platform core: generalise SGD alias comment to L2 or L3
runwangdl Apr 10, 2026
ceeb951
training-platform core: trim InPlaceAccumulatorV2 tile constraint com…
runwangdl Apr 10, 2026
ecbffa0
training-platform core: drop leftover egress-target comment
runwangdl Apr 10, 2026
728c68f
training-platform core: drop stray ReluGradTileConstraint
runwangdl Apr 10, 2026
fc24a84
training-platform core: delete stray SoftmaxCrossEntropyLossDualOutpu…
runwangdl Apr 10, 2026
12597be
training-platform core: simplify MiniMalloc alias-skip block
runwangdl Apr 10, 2026
b42ea1d
training-platform core: restore per-layer { } block in generateInfere…
runwangdl Apr 10, 2026
5285021
training-platform core: restore upstream SoftmaxCrossEntropy kernel t…
runwangdl Apr 10, 2026
40e8339
training-platform core: drop legacy 1-output Softmax/CrossEntropy ker…
runwangdl Apr 10, 2026
91931cf
training-platform core: propagate loss verification result + drop dea…
runwangdl Apr 10, 2026
f177a5b
training-platform core: label Step B in run_optimizer_step
runwangdl Apr 10, 2026
0f853fc
training-platform core: drop _augment_path PATH manipulation
runwangdl Apr 10, 2026
55c91d0
training-platform core: extract training codegen helpers to trainingU…
runwangdl Apr 10, 2026
2d53fe2
training-platform core: collapse duplicate tiled/non-tiled training b…
runwangdl Apr 10, 2026
b689d3f
training-platform core: extract training codegen argparse builders to…
runwangdl Apr 10, 2026
4218ba1
training-platform core: lift execution.py training subprocess helpers…
runwangdl Apr 10, 2026
f5255d3
training-platform core: lift _resolve_optimizer_dir to trainingUtils
runwangdl Apr 10, 2026
5a839b1
training-platform core: decouple execution.py from training pipeline
runwangdl Apr 10, 2026
3d309b7
training-platform core: extract training codegen helpers to codeGener…
runwangdl Apr 10, 2026
969f593
training-platform core: drop redundant top-level deeployTrainingRunne…
runwangdl Apr 10, 2026
ac4df5b
training-platform core: drop non-training helper wrappers from traini…
runwangdl Apr 10, 2026
9d3445c
training-platform core: drop unused loop var and populate zero-sized …
runwangdl Apr 10, 2026
40c5da9
training-platform core: apply pre-commit yapf + autoflake autofixes
runwangdl Apr 10, 2026
191a30b
training-platform core: drop CCT2_FT2 from siracusa tiled L3 CI list
runwangdl Apr 10, 2026
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions Deeploy/DeeployTypes.py
Original file line number Diff line number Diff line change
Expand Up @@ -336,14 +336,14 @@ def has_live_aliases(self, ctxt: NetworkContext) -> bool:
True if this VariableBuffer has any live aliases, False otherwise
"""
# Do a breadth-first search across the aliasing double-linked list
live = self._live
live = self._live or self.is_input or self.is_output
queue = set(self.aliases)
visited = set(self.name)
while len(queue) > 0:
next = queue.pop()
buffNext = ctxt.lookup(next)
assert isinstance(buffNext, VariableBuffer)
live |= buffNext._live
live |= buffNext._live or buffNext.is_input or buffNext.is_output
visited.add(next)
queue |= buffNext.aliases - visited
return live
Expand Down
16 changes: 16 additions & 0 deletions Deeploy/Targets/Generic/Layers.py
Original file line number Diff line number Diff line change
Expand Up @@ -492,6 +492,22 @@ def __init__(self, maps: List[NodeMapper]):
super().__init__(maps)


class InPlaceAccumulatorV2Layer(ONNXLayer):
"""Layer for ORT InPlaceAccumulatorV2 operator (com.microsoft).

Gradient accumulation with optional reset:
if lazy_reset_grad: out = gradient
else: out = buffer + gradient
"""

def __init__(self, maps: List[NodeMapper]):
super().__init__(maps)

def computeOps(self):
# One conditional check + one element-wise op (copy or add) per element
return self.mapper.parser.operatorRepresentation['size']


class LinearAttentionLayer(ONNXLayer):

def __init__(self, maps: List[NodeMapper]):
Expand Down
59 changes: 54 additions & 5 deletions Deeploy/Targets/Generic/Parsers.py
Original file line number Diff line number Diff line change
Expand Up @@ -2611,15 +2611,18 @@ def parseNodeCtxt(self,


class SoftmaxCrossEntropyLossParser(NodeParser):
"""SoftmaxCrossEntropyLoss parser.

The canonical form has two outputs: a scalar mean cross-entropy loss and
a per-sample log_prob tensor, matching the signature emitted by ONNX
Runtime when exporting training graphs.
"""

def __init__(self):
super().__init__()

def parseNode(self, node: gs.Node) -> bool:

ret = all([len(node.inputs) == 2, len(node.outputs) == 1])

return ret
return all([len(node.inputs) == 2, len(node.outputs) == 2])

def parseNodeCtxt(self,
ctxt: NetworkContext,
Expand All @@ -2628,9 +2631,13 @@ def parseNodeCtxt(self,

logits = ctxt.lookup(node.inputs[0].name)
labels = ctxt.lookup(node.inputs[1].name)
log_prob = ctxt.lookup(node.outputs[0].name)
# outputs[0] = loss (0-d scalar, shape [1] after Deeploy normalisation)
# outputs[1] = log_prob tensor
loss = ctxt.lookup(node.outputs[0].name)
log_prob = ctxt.lookup(node.outputs[1].name)
self.operatorRepresentation['logits'] = logits.name
self.operatorRepresentation['labels'] = labels.name
self.operatorRepresentation['loss'] = loss.name
self.operatorRepresentation['log_prob'] = log_prob.name
self.operatorRepresentation['batch'] = logits.shape[0]
self.operatorRepresentation['num_classes'] = logits.shape[1]
Expand Down Expand Up @@ -2697,6 +2704,48 @@ def parseNodeCtxt(self,
return ctxt, True


class InPlaceAccumulatorV2Parser(NodeParser):
"""Parser for ORT InPlaceAccumulatorV2 operator (com.microsoft).

Semantics:
if lazy_reset_grad: out = gradient (reset)
else: out = buffer + gradient (accumulate)

Inputs:
0: buffer - current accumulation buffer (float tensor)
1: gradient - new gradient to accumulate (float tensor, same shape)
2: lazy_reset_grad - reset flag; if true, overwrite; else add (bool[1])

Output:
0: output_buffer - updated accumulation buffer (float tensor)
"""

def __init__(self):
super().__init__()

def parseNode(self, node: gs.Node) -> bool:
# Require exactly 3 inputs (buffer, gradient, lazy_reset_grad) and 1 output
return len(node.inputs) == 3 and len(node.outputs) == 1

def parseNodeCtxt(self,
ctxt: NetworkContext,
node: gs.Node,
channels_first: bool = True) -> Tuple[NetworkContext, bool]:

buffer = ctxt.lookup(node.inputs[0].name)
gradient = ctxt.lookup(node.inputs[1].name)
lazy_reset_grad = ctxt.lookup(node.inputs[2].name)
data_out = ctxt.lookup(node.outputs[0].name)

self.operatorRepresentation['accum_buffer'] = buffer.name
self.operatorRepresentation['gradient'] = gradient.name
self.operatorRepresentation['lazy_reset_grad'] = lazy_reset_grad.name
self.operatorRepresentation['data_out'] = data_out.name
self.operatorRepresentation['size'] = int(np.prod(buffer.shape))

return ctxt, True
Comment on lines +2726 to +2746
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

⚠️ Potential issue | 🟠 Major

Reject shape-mismatched InPlaceAccumulatorV2 nodes.

This parser only checks arity, but the generated kernel/tile path assumes buffer, gradient, and data_out have the same extent and that lazy_reset_grad is a single flag. If a malformed graph slips through, size is derived from buffer.shape and later code can read/write past the smaller tensor.

Proposed validation
     def parseNodeCtxt(self,
                       ctxt: NetworkContext,
                       node: gs.Node,
                       channels_first: bool = True) -> Tuple[NetworkContext, bool]:

         buffer = ctxt.lookup(node.inputs[0].name)
         gradient = ctxt.lookup(node.inputs[1].name)
         lazy_reset_grad = ctxt.lookup(node.inputs[2].name)
         data_out = ctxt.lookup(node.outputs[0].name)
+
+        if tuple(buffer.shape) != tuple(gradient.shape) or tuple(buffer.shape) != tuple(data_out.shape):
+            return ctxt, False
+        if int(np.prod(lazy_reset_grad.shape)) != 1:
+            return ctxt, False

         self.operatorRepresentation['accum_buffer'] = buffer.name
         self.operatorRepresentation['gradient'] = gradient.name
         self.operatorRepresentation['lazy_reset_grad'] = lazy_reset_grad.name
         self.operatorRepresentation['data_out'] = data_out.name
🤖 Prompt for AI Agents
Verify each finding against the current code and only fix it if needed.

In `@Deeploy/Targets/Generic/Parsers.py` around lines 2728 - 2748, The parser only
checks arity in parseNode/parseNodeCtxt but must validate tensor extents: in
parseNodeCtxt (using ctxt.lookup results for buffer, gradient, lazy_reset_grad,
data_out) verify that buffer.shape, gradient.shape and data_out.shape are
identical (same number of elements and per-dimension sizes) and that
lazy_reset_grad is a scalar/has size 1; if any check fails, return ctxt, False
(or otherwise reject the node) instead of proceeding; compute
operatorRepresentation['size'] from the validated common shape (e.g.,
prod(buffer.shape) after checks) and do not accept the node when shapes mismatch
to prevent out-of-bounds reads/writes.



class BatchNormParser(NodeParser):

def __init__(self):
Expand Down
30 changes: 28 additions & 2 deletions Deeploy/Targets/Generic/TypeCheckers.py
Original file line number Diff line number Diff line change
Expand Up @@ -577,11 +577,11 @@ def __init__(self, input_types: Sequence[Type[Pointer]], output_types: Sequence[
def _inferNumLevels(self, inputs: List[VariableBuffer],
operatorRepresentation: OperatorRepresentation) -> Optional[List[int]]:

return [2**(self.input_types[0].referencedType.typeWidth)]
return [2**(self.input_types[0].referencedType.typeWidth)] * len(self.output_types)

def _inferSignedness(self, inputs: List[VariableBuffer],
operatorRepresentation: OperatorRepresentation) -> Optional[List[bool]]:
return [False]
return [False] * len(self.output_types)


class SGDChecker(SignPropTypeChecker):
Expand All @@ -598,6 +598,32 @@ def _inferSignedness(self, inputs: List[VariableBuffer],
return [True]


class InPlaceAccumulatorV2Checker(SignPropTypeChecker):
"""Type checker for ORT InPlaceAccumulatorV2 operator (com.microsoft).

Inputs:
0: buffer (float32*)
1: gradient (float32*)
2: lazy_reset_grad (uint8_t* or bool* - 1 element)

Output:
0: output_buffer (float32*)
"""

def __init__(self, input_types: Sequence[Type[Pointer]], output_types: Sequence[Type[Pointer]]):
super().__init__(input_types, output_types)

def _inferNumLevels(self, inputs: List[VariableBuffer],
operatorRepresentation: OperatorRepresentation) -> List[int]:
# Output has same precision as the buffer input (float32)
return [2**(self.input_types[0].referencedType.typeWidth)]

def _inferSignedness(self, inputs: List[VariableBuffer],
operatorRepresentation: OperatorRepresentation) -> List[bool]:
# Float32 output is signed
return [True]


class BatchNormChecker(SignPropTypeChecker):

def __init__(self, input_types: Sequence[Type[Pointer]], output_types: Sequence[Type[Pointer]]):
Expand Down
28 changes: 19 additions & 9 deletions Deeploy/Targets/PULPOpen/Bindings.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,9 +18,9 @@
from Deeploy.Targets.Generic.Templates import AddTemplate, ConcatTemplate, DequantTemplate, FloatReduceSumTemplate, \
GatherTemplate, QuantTemplate, RQSiGELUTemplate, SliceTemplate, iHardswishTemplate
from Deeploy.Targets.Generic.TypeCheckers import AddChecker, ConcatChecker, ConvChecker, DequantChecker, \
GatherChecker, GELUChecker, GEMMChecker, HardswishChecker, LayerNormChecker, MatMulChecker, MulChecker, \
QuantChecker, ReduceMeanChecker, ReluChecker, ReshapeChecker, RQAddChecker, RQHardswishChecker, SGDChecker, \
SliceChecker, SoftmaxChecker, SoftmaxCrossEntropyLossChecker, TransposeChecker
GatherChecker, GELUChecker, GEMMChecker, HardswishChecker, InPlaceAccumulatorV2Checker, LayerNormChecker, \
MatMulChecker, MulChecker, QuantChecker, ReduceMeanChecker, ReluChecker, ReshapeChecker, RQAddChecker, \
RQHardswishChecker, SGDChecker, SliceChecker, SoftmaxChecker, SoftmaxCrossEntropyLossChecker, TransposeChecker
from Deeploy.Targets.PULPOpen.CodeTransformationPasses.PULPClusterSynch import PULPSynchCoresPass
from Deeploy.Targets.PULPOpen.CodeTransformationPasses.PULPClusterTiling import PULPClusterTiling
from Deeploy.Targets.PULPOpen.CodeTransformationPasses.PULPL3Tiling import PULPL3Tiling
Expand All @@ -29,11 +29,12 @@
from Deeploy.Targets.PULPOpen.DMA.L3Dma import l3DmaHack
from Deeploy.Targets.PULPOpen.DMA.MchanDma import MchanDma
from Deeploy.Targets.PULPOpen.Templates import ConvTemplate, DMASliceTemplate, FloatAddTemplate, FloatConvTemplate, \
FloatGELUTemplate, FloatGemmTemplate, FloatLayernormTemplate, FloatMatMulTemplate, FloatMaxPoolTemplate, \
FloatMulTemplate, FloatReduceMeanTemplate, FloatReluTemplate, FloatSoftmaxTemplate, GEMMTemplate, \
MatrixVectorTemplate, MaxPoolTemplate, MulTemplate, ReduceMeanTemplate, RequantShiftTemplate, ReshapeTemplate, \
RQAddTemplate, RQSiHardswishTemplate, SGDTemplate, SoftmaxCrossEntropyLossTemplate, TallGEMMTemplate, \
TransposeTemplate, UniformRequantShiftTemplate, iRMSNormTemplate, iSoftmaxTemplate
FloatGELUTemplate, FloatGemmTemplate, FloatInPlaceAccumulatorV2Template, FloatLayernormTemplate, \
FloatMatMulTemplate, FloatMaxPoolTemplate, FloatMulTemplate, FloatReduceMeanTemplate, FloatReluTemplate, \
FloatSoftmaxTemplate, GEMMTemplate, MatrixVectorTemplate, MaxPoolTemplate, MulTemplate, ReduceMeanTemplate, \
RequantShiftTemplate, ReshapeTemplate, RQAddTemplate, RQSiHardswishTemplate, SGDTemplate, \
SoftmaxCrossEntropyLossTemplate, TallGEMMTemplate, TransposeTemplate, UniformRequantShiftTemplate, \
iRMSNormTemplate, iSoftmaxTemplate
from Deeploy.Targets.PULPOpen.TypeCheckers import PULPConvChecker, PULPLinearChecker, PULPMaxPoolChecker, \
PULPRequantShiftChecker
from Deeploy.TilingExtension.CodeTransformationPasses.TilingVariableReplacement import TilingVariableReplacement, \
Expand Down Expand Up @@ -353,7 +354,8 @@

PULPSoftmaxCrossEntropyLossBindings = [
NodeBinding(
SoftmaxCrossEntropyLossChecker([PointerClass(float32_t), PointerClass(type)], [PointerClass(float32_t)]),
SoftmaxCrossEntropyLossChecker([PointerClass(float32_t), PointerClass(type)],
[PointerClass(float32_t), PointerClass(float32_t)]),
SoftmaxCrossEntropyLossTemplate.referenceTemplate, ForkTransformer) for type in IntegerDataTypes
]

Expand All @@ -368,6 +370,14 @@
SGDTemplate.referenceTemplate, ForkTransformer)
]

PULPInPlaceAccumulatorV2Bindings = [
NodeBinding(
InPlaceAccumulatorV2Checker(
[PointerClass(float32_t), PointerClass(float32_t),
PointerClass(uint8_t)], [PointerClass(float32_t)]), FloatInPlaceAccumulatorV2Template.referenceTemplate,
ForkTransformer)
]

PULPTransposeBindings = [
NodeBinding(TransposeChecker([PointerClass(type)], [PointerClass(type)]), TransposeTemplate.referenceTemplate,
ForkTransformer) for type in IntegerDataTypes
Expand Down
46 changes: 24 additions & 22 deletions Deeploy/Targets/PULPOpen/Platform.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,17 +14,17 @@
from Deeploy.Targets.Generic.Bindings import BasicGEMMBindings, BasicPad1DBindings, BasicPad2DBindings, \
BasicRQIntegerDivBinding
from Deeploy.Targets.Generic.Layers import AddLayer, ConcatLayer, ConvLayer, GatherLayer, GELUGradLayer, GELULayer, \
GEMMLayer, LayerNormGradLayer, LayerNormLayer, MatMulLayer, MaxPoolLayer, MulLayer, PadLayer, QuantLayer, \
ReduceMeanLayer, ReduceSumLayer, ReluLayer, RequantShiftLayer, ReshapeLayer, RQIntegerDivLayer, RQSiGELULayer, \
RQSiHardswishLayer, SGDLayer, SliceLayer, SoftmaxCrossEntropyLossGradLayer, SoftmaxCrossEntropyLossLayer, \
SoftmaxGradLayer, SoftmaxLayer, TransposeLayer, iHardswishLayer, iRMSNormLayer
GEMMLayer, InPlaceAccumulatorV2Layer, LayerNormGradLayer, LayerNormLayer, MatMulLayer, MaxPoolLayer, MulLayer, \
PadLayer, QuantLayer, ReduceMeanLayer, ReduceSumLayer, ReluLayer, RequantShiftLayer, ReshapeLayer, \
RQIntegerDivLayer, RQSiGELULayer, RQSiHardswishLayer, SGDLayer, SliceLayer, SoftmaxCrossEntropyLossGradLayer, \
SoftmaxCrossEntropyLossLayer, SoftmaxGradLayer, SoftmaxLayer, TransposeLayer, iHardswishLayer, iRMSNormLayer
from Deeploy.Targets.Generic.Parsers import AddParser, ConcatParser, DequantParser, FlattenParser, GatherParser, \
GELUGradParser, GELUParser, GEMMParser, LayerNormGradParser, LayerNormParser, MatMulParser, MaxPool1DParser, \
MaxPool2DParser, MulParser, Pad1DParser, Pad2DParser, QuantParser, ReduceSumParser, ReluParser, \
RequantShiftParser, ReshapeParser, RQAddParser, RQIntegerDivParser, RQSiGELUParser, RQSiHardswishParser, \
SGDParser, SliceParser, SoftmaxCrossEntropyLossGradParser, SoftmaxCrossEntropyLossParser, SoftmaxGradParser, \
SoftmaxParser, TransposeParser, UniformRequantShiftParser, UnsqueezeParser, iHardswishParser, iRMSNormParser, \
iSoftmaxParser
GELUGradParser, GELUParser, GEMMParser, InPlaceAccumulatorV2Parser, LayerNormGradParser, LayerNormParser, \
MatMulParser, MaxPool1DParser, MaxPool2DParser, MulParser, Pad1DParser, Pad2DParser, QuantParser, ReduceSumParser, \
ReluParser, RequantShiftParser, ReshapeParser, RQAddParser, RQIntegerDivParser, RQSiGELUParser, \
RQSiHardswishParser, SGDParser, SliceParser, SoftmaxCrossEntropyLossGradParser, SoftmaxCrossEntropyLossParser, \
SoftmaxGradParser, SoftmaxParser, TransposeParser, UniformRequantShiftParser, UnsqueezeParser, iHardswishParser, \
iRMSNormParser, iSoftmaxParser
from Deeploy.Targets.Generic.Templates import AllocateTemplate as BasicAllocateTemplate
from Deeploy.Targets.Generic.TopologyOptimizationPasses.Passes import DequantPatternPass, IntegerDivRequantMergePass, \
MergeConstAddAndRequantPass, MergeTrueIntegerDivRequantShiftPass, QuantPatternPass, RQSSplitPass, \
Expand All @@ -39,17 +39,17 @@
from Deeploy.Targets.PULPOpen.Tiler import PULPAddTilingReadyBindings, PULPConcatTilingReadyBindings, \
PULPConv2DTilingReadyBindings, PULPDWConv2DTilingReadyBindings, PULPFlattenTilingReadyBindings, \
PULPFPGELUGradTilingReadyBindings, PULPFPGELUTilingReadyBindings, PULPFPGEMMTilingReadyBindings, \
PULPGatherTilingReadyBindings, PULPiHardswishTilingReadyBindings, PULPiRMSNormTilingReadyBindings, \
PULPiRQSGELUTilingReadyBindings, PULPLayernormGradTilingReadyBindings, PULPLayernormTilingReadyBindings, \
PULPMatMulTilingReadyBindings, PULPMaxPool1DTilingReadyBindings, PULPMaxPool2DTilingReadyBindings, \
PULPMulTilingReadyBindings, PULPReduceMeanTilingReadyBindings, PULPReduceSumTilingReadyBindings, \
PULPReluTilingReadyBindings, PULPRQAddTilingReadyBindings, PULPRQSConv1DTilingReadyBindings, \
PULPRQSConv2DTilingReadyBindings, PULPRQSDWConv2DTilingReadyBindings, PULPRQSGEMMTilingReadyBindings, \
PULPRQSiHardswishTilingReadyBindings, PULPRQSMatrixVecTilingReadyBindings, PULPRQSTallGEMMTilingReadyBindings, \
PULPRQSTilingReadyBindings, PULPSGDTilingReadyBindings, PULPSliceTilingReadyBindings, \
PULPSoftmaxCrossEntropyGradTilingReadyBindings, PULPSoftmaxCrossEntropyTilingReadyBindings, \
PULPSoftmaxGradTilingReadyBindings, PULPSoftmaxTilingReadyBindings, PULPTransposeTilingReadyBindings, \
PULPUniformRQSTilingReadyBindings
PULPGatherTilingReadyBindings, PULPiHardswishTilingReadyBindings, PULPInPlaceAccumulatorV2TilingReadyBindings, \
PULPiRMSNormTilingReadyBindings, PULPiRQSGELUTilingReadyBindings, PULPLayernormGradTilingReadyBindings, \
PULPLayernormTilingReadyBindings, PULPMatMulTilingReadyBindings, PULPMaxPool1DTilingReadyBindings, \
PULPMaxPool2DTilingReadyBindings, PULPMulTilingReadyBindings, PULPReduceMeanTilingReadyBindings, \
PULPReduceSumTilingReadyBindings, PULPReluTilingReadyBindings, PULPRQAddTilingReadyBindings, \
PULPRQSConv1DTilingReadyBindings, PULPRQSConv2DTilingReadyBindings, PULPRQSDWConv2DTilingReadyBindings, \
PULPRQSGEMMTilingReadyBindings, PULPRQSiHardswishTilingReadyBindings, PULPRQSMatrixVecTilingReadyBindings, \
PULPRQSTallGEMMTilingReadyBindings, PULPRQSTilingReadyBindings, PULPSGDTilingReadyBindings, \
PULPSliceTilingReadyBindings, PULPSoftmaxCrossEntropyGradTilingReadyBindings, \
PULPSoftmaxCrossEntropyTilingReadyBindings, PULPSoftmaxGradTilingReadyBindings, PULPSoftmaxTilingReadyBindings, \
PULPTransposeTilingReadyBindings, PULPUniformRQSTilingReadyBindings
from Deeploy.Targets.PULPOpen.TopologyOptimizationPasses.Passes import PULPAddRequantMergePass, \
PULPConvRequantMergePass, PULPGEMMRequantMergePass, PULPMatMulRequantMergePass

Expand Down Expand Up @@ -108,6 +108,7 @@
SoftmaxCrossEntropyLossGradMapper = NodeMapper(SoftmaxCrossEntropyLossGradParser(),
PULPSoftmaxCrossEntropyGradTilingReadyBindings)
SGDMapper = NodeMapper(SGDParser(), PULPSGDTilingReadyBindings)
InPlaceAccumulatorV2Mapper = NodeMapper(InPlaceAccumulatorV2Parser(), PULPInPlaceAccumulatorV2TilingReadyBindings)
QuantMapper = NodeMapper(QuantParser(), BasicQuantBindings)
DequantMapper = NodeMapper(DequantParser(), BasicDequantBindings)
GEMMDequantMapper = NodeMapper(PULPGEMMParser(), BasicGEMMBindings)
Expand Down Expand Up @@ -151,7 +152,8 @@
'SoftmaxGrad': SoftmaxGradLayer([SoftmaxGradMapper]),
'SoftmaxCrossEntropyLoss': SoftmaxCrossEntropyLossLayer([SoftmaxCrossEntropyLossMapper]),
'SoftmaxCrossEntropyLossGrad': SoftmaxCrossEntropyLossGradLayer([SoftmaxCrossEntropyLossGradMapper]),
'SGD': SGDLayer([SGDMapper])
'SGD': SGDLayer([SGDMapper]),
'InPlaceAccumulatorV2': InPlaceAccumulatorV2Layer([InPlaceAccumulatorV2Mapper]),
}


Expand Down
5 changes: 3 additions & 2 deletions Deeploy/Targets/PULPOpen/Templates/FloatGemmTemplate.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,8 @@

from typing import Dict, List, Tuple

from Deeploy.AbstractDataTypes import float32_tPtr
from Deeploy.AbstractDataTypes import PointerClass
from Deeploy.CommonExtensions.DataTypes import float32_t
from Deeploy.DeeployTypes import NetworkContext, NodeTemplate, OperatorRepresentation


Expand All @@ -19,7 +20,7 @@ def alignToContext(self, ctxt: NetworkContext,
if 'C' not in operatorRepresentation or operatorRepresentation['C'] is None:
# No bias case - set C to NULL and provide a default type
operatorRepresentation['C'] = None
operatorRepresentation['C_type'] = float32_tPtr # Default to fp32 type
operatorRepresentation['C_type'] = PointerClass(float32_t) # Default to fp32 type
operatorRepresentation['C_batched'] = False

return ctxt, operatorRepresentation, []
Expand Down
Loading
Loading