diff --git a/graph_net/sample_pass/resumable_sample_pass_mixin.py b/graph_net/sample_pass/resumable_sample_pass_mixin.py index 804005988..7ffb4b2d6 100644 --- a/graph_net/sample_pass/resumable_sample_pass_mixin.py +++ b/graph_net/sample_pass/resumable_sample_pass_mixin.py @@ -45,7 +45,7 @@ def resumable_handle_sample(self, rel_model_path: str): self._inc_num_handled_models_or_exit() def _inc_num_handled_models_or_exit(self): - if self.config["limits_handled_models"] is None: + if self.config.get("limits_handled_models", None) is None: return self.num_handled_models += 1 if self.num_handled_models >= self.config["limits_handled_models"]: diff --git a/graph_net/tools/generate_subgraph_dataset.sh b/graph_net/tools/generate_subgraph_dataset.sh index 121977f3f..457864db7 100755 --- a/graph_net/tools/generate_subgraph_dataset.sh +++ b/graph_net/tools/generate_subgraph_dataset.sh @@ -3,16 +3,18 @@ set -x MIN_SEQ_OPS=${1:-4} MAX_SEQ_OPS=${2:-64} -GPU_ID=${3:-0} +GPU_ID=${3:-5} OP_RANGE=$MIN_SEQ_OPS-$MAX_SEQ_OPS export CUDA_VISIBLE_DEVICES="${GPU_ID}" +export PYTHONPATH=/work/GraphNet:/work/abstract_pass/Athena:$PYTHONPATH GRAPH_NET_ROOT=$(python3 -c "import graph_net; import os; print(os.path.dirname(os.path.dirname(graph_net.__file__)))") RESUME="true" -DECOMPOSE_WORKSPACE=/tmp/subgraph_dataset_workspace +#DECOMPOSE_WORKSPACE=/tmp/subgraph_dataset_workspace +DECOMPOSE_WORKSPACE=/work/graphnet_test_workspace/subgraph_dataset_20260203 DEVICE_REWRITED_OUTPUT_DIR=$DECOMPOSE_WORKSPACE/01_device_rewrited_samples DIMENSION_GENERALIZED_OUTPUT_DIR=$DECOMPOSE_WORKSPACE/02_dimension_generalized_samples OP_NAMES_OUTPUT_DIR=$DECOMPOSE_WORKSPACE/03_sample_op_names @@ -26,16 +28,24 @@ GROUPED_FUSIBLE_SUBGRAPH_RANGES_DIR=$DECOMPOSE_WORKSPACE/10_grouped_fusible_subg SUBGRAPH_DIMENSION_GENERALIZED_OUTPUT_DIR=$DECOMPOSE_WORKSPACE/11_dimension_generalized_fusible_subgraphs RENAMED_DIMENSION_GENERALIZED_FUSIBLE_SUBGRAPH_DIR=$DECOMPOSE_WORKSPACE/12_renamed_dimension_generalized_fusible_subgraphs DEDUPLICATED_DIMENSION_GENERALIZED_FUSIBLE_SUBGRAPH_DIR=$DECOMPOSE_WORKSPACE/13_deduplicated_dimension_generalized_fusible_subgraphs -UNITTESTS_OUTPUT_DIR=$DECOMPOSE_WORKSPACE/14_kernelbench_unittests +DTYPE_GENERALIZED_OUTPUT_DIR=$DECOMPOSE_WORKSPACE/14_dtype_generalized_fusible_subgraphs +UNITTESTS_OUTPUT_DIR=$DECOMPOSE_WORKSPACE/15_kernelbench_unittests mkdir -p "$DECOMPOSE_WORKSPACE" -model_list="$GRAPH_NET_ROOT/graph_net/config/small100_torch_samples_list.txt" +model_list="$GRAPH_NET_ROOT/graph_net/config/torch_samples_list.txt" device_rewrited_sample_list=${DECOMPOSE_WORKSPACE}/device_rewrited_sample_list.txt range_decomposed_subgraph_list=${DECOMPOSE_WORKSPACE}/range_decomposed_subgraph_sample_list.txt deduplicated_subgraph_list=${DECOMPOSE_WORKSPACE}/deduplicated_subgraph_sample_list.txt dimension_generalized_subgraph_list=${DECOMPOSE_WORKSPACE}/dimension_generalized_subgraph_sample_list.txt deduplicated_fusible_subgraphs_list=${DECOMPOSE_WORKSPACE}/deduplicated_dimension_generalized_subgraph_sample_list.txt +dtype_generalized_subgraphs_list=${DECOMPOSE_WORKSPACE}/dtype_generalized_subgraphs_sample_list.txt + +if [[ "$model_list" == *"torch_samples_list.txt" ]]; then + USE_SUBPROCESS_ARGS="--use-subprocess" +else + USE_SUBPROCESS_ARGS="" +fi function generate_generalized_subgraph_list() { local target_dir="$1" @@ -84,7 +94,7 @@ EOF function dimension_generalizer(){ echo ">>> [2] Apply dimension generalization for samples under ${device_rewrited_sample_list}." echo ">>>" - python3 -m graph_net.apply_sample_pass \ + python3 -m graph_net.apply_sample_pass ${USE_SUBPROCESS_ARGS} \ --model-path-list $device_rewrited_sample_list \ --sample-pass-file-path "$GRAPH_NET_ROOT/graph_net/dimension_generalizer.py" \ --sample-pass-class-name "ApplyDimGenPasses" \ @@ -104,7 +114,7 @@ EOF function generate_op_names() { echo ">>> [3] Generate op_names.txt for samples in ${model_list}." echo ">>>" - python3 -m graph_net.model_path_handler \ + python3 -m graph_net.model_path_handler ${USE_SUBPROCESS_ARGS} \ --model-path-list $model_list \ --handler-config=$(base64 -w 0 <>> [5] Decompose according to subgraph_ranges.json for samples in ${device_rewrited_sample_list}." echo ">>>" - python3 -m graph_net.model_path_handler \ + python3 -m graph_net.model_path_handler ${USE_SUBPROCESS_ARGS} \ --model-path-list "$device_rewrited_sample_list" \ --handler-config=$(base64 -w 0 <>> Generating dimension generalized subgraph variant index: ${index}" dimension_generalized_sample_list="${DIMENSION_GENERALIZED_OUTPUT_DIR}/${index}/dimension_generalized_sample_list.txt" - generate_subgraph_list ${DIMENSION_GENERALIZED_OUTPUT_DIR}/${index} ${dimension_generalized_samples_list} - python3 -m graph_net.model_path_handler \ + generate_subgraph_list ${DIMENSION_GENERALIZED_OUTPUT_DIR}/${index} ${dimension_generalized_sample_list} + python3 -m graph_net.model_path_handler ${USE_SUBPROCESS_ARGS} \ --model-path-list "${dimension_generalized_sample_list}" \ --handler-config $(base64 -w 0 <>> [12] Data type generalizer for samples under ${DEDUPLICATED_DIMENSION_GENERALIZED_FUSIBLE_SUBGRAPH_DIR}." + echo ">>>" + python3 -m graph_net.apply_sample_pass \ + --model-path-list $deduplicated_fusible_subgraphs_list \ + --sample-pass-file-path "$GRAPH_NET_ROOT/graph_net/torch/sample_pass/dtype_generalizer.py" \ + --sample-pass-class-name ApplyDataTypeGeneralizationPasses \ + --sample-pass-config $(base64 -w 0 <>> [12] Generate unittests for subgraph samples under ${DEDUPLICATED_DIMENSION_GENERALIZED_FUSIBLE_SUBGRAPH_DIR}. " + echo ">>> [13] Generate unittests for subgraph samples under ${DTYPE_GENERALIZED_OUTPUT_DIR}. " echo ">>>" python3 -m graph_net.model_path_handler \ - --model-path-list ${deduplicated_fusible_subgraphs_list} \ + --model-path-list ${dtype_generalized_subgraphs_list} \ --handler-config=$(base64 -w 0 <&1 | tee ${DECOMPOSE_WORKSPACE}/log_remove_duplicate_dimension_generalized_subgraphs_${suffix}.txt generate_generalized_subgraph_list ${DEDUPLICATED_DIMENSION_GENERALIZED_FUSIBLE_SUBGRAPH_DIR} ${deduplicated_fusible_subgraphs_list} + # dtype generalization + dtype_generalizer 2>&1 | tee ${DECOMPOSE_WORKSPACE}/log_dtype_generalizer_${suffix}.txt + generate_generalized_subgraph_list ${DTYPE_GENERALIZED_OUTPUT_DIR} ${dtype_generalized_subgraphs_list} + # generate kernelbench format unittest generate_unittests 2>&1 | tee ${DECOMPOSE_WORKSPACE}/log_unittests_${suffix}.txt } +summary() { + num_original_samples=`cat $model_list | grep "^samples/" | wc -l` + echo "Number of original graphnet samples: $num_original_samples" + + num_device_rewrited_samples=`find ${DEVICE_REWRITED_OUTPUT_DIR} -name "model.py" | wc -l` + device_rewrited_successed_precent=$(( num_device_rewrited_samples * 100 / num_original_samples )) + echo "- [Step 1] device rewrite: successed=${num_device_rewrited_samples}, percent=$device_rewrited_successed_precent%" + + num_successed_dimension_generalized_samples=`find ${DIMENSION_GENERALIZED_OUTPUT_DIR} -name "model.py" | wc -l` + dimension_generalized_samples_successed_percent=$((num_successed_dimension_generalized_samples * 100 / (num_original_samples * 9))) + echo "- [Step 2] dimension generalization: successed=${num_successed_dimension_generalized_samples}, percent=${dimension_generalized_samples_successed_percent}%" + for index in {0..8}; do + num_successed_dimension_generalized_samples=`find ${DIMENSION_GENERALIZED_OUTPUT_DIR}/${index} -name "model.py" | wc -l` + dimension_generalized_samples_successed_percent=$(( num_successed_dimension_generalized_samples * 100 / num_original_samples )) + echo " ${index}, successed=${num_successed_dimension_generalized_samples}, percent=${dimension_generalized_samples_successed_percent}%" + done + echo "" + + num_successed_op_names=`find ${OP_NAMES_OUTPUT_DIR} -name op_names.txt | wc -l` + op_names_successed_percent=$(( num_successed_op_names * 100 / num_original_samples )) + echo "- [Step 3] generate op names: successed=${num_successed_op_names}, percent=${op_names_successed_percent}%" + + num_typical_subgraph_ranges=`find ${SUBGRAPH_RANGES_JSON_ROOT} -name typical_subgraph_ranges.json | wc -l` + typical_subgraph_ranges_successed_percent=$(( num_typical_subgraph_ranges * 100 / num_original_samples )) + echo "- [Step 4] generate typical subgraph ranges: successed=${num_typical_subgraph_ranges}, percent=${typical_subgraph_ranges_successed_percent}%" + + num_successed_range_decomposed_subgraphs=`find ${RANGE_DECOMPOSE_OUTPUT_DIR} -name "model.py" | wc -l` + echo "- [Step 5] range decompose: successed=${num_successed_range_decomposed_subgraphs}" + + num_renamed_subgraphs=`find ${GRAPH_VAR_RENAME_OUTPUT_DIR} -name "model.py" | wc -l` + echo "- [Step 6] rename: successed=${num_renamed_subgraphs}" + + num_deduplicated_subgraphs=`find ${DEDUPLICATED_OUTPUT_DIR} -name "model.py" | wc -l` + echo "- [Step 7] remove duplicated: successed=${num_deduplicated_subgraphs}" + + num_successed_cumsum_kernels_subgraphs=`find ${CUMSUM_NUM_KERNELS_DIR} -name "cumsum_num_kernels.json" | wc -l` + cumsum_kernels_successed_percent=$((num_successed_cumsum_kernels_subgraphs * 100 / num_deduplicated_subgraphs)) + echo "- [Step 8] cumsum kernels: successed=${num_successed_cumsum_kernels_subgraphs}, percent=${cumsum_kernels_successed_percent}%" + + num_fusible_subgraph_ranges=`find ${FUSIBLE_SUBGRAPH_RANGES_DIR} -name "fusible_subgraph_ranges.json" | wc -l` + num_grouped_fusible_subgraph_ranges=`find ${GROUPED_FUSIBLE_SUBGRAPH_RANGES_DIR} -name "grouped_fusible_subgraph_ranges.json" | wc -l` + echo " fusible subgraph ranges: successed=${num_fusible_subgraph_ranges}" + echo " grouped fusible subgraph ranges: successed=${num_grouped_fusible_subgraph_ranges}" + echo "" + + num_successed_dimension_generalized_subgraphs=`find ${SUBGRAPH_DIMENSION_GENERALIZED_OUTPUT_DIR} -name "model.py" | wc -l` + echo "- [Step 9] subgraph dimension generalization: successed=${num_successed_dimension_generalized_subgraphs}" + for index in {0..8}; do + num_successed_dimension_generalized_subgraphs=`find ${SUBGRAPH_DIMENSION_GENERALIZED_OUTPUT_DIR}/${index} -name "model.py" | wc -l` + echo " ${index}, successed=${num_successed_dimension_generalized_subgraphs}" + done + echo "" + + num_renamed_fusible_subgraphs=`find ${RENAMED_DIMENSION_GENERALIZED_FUSIBLE_SUBGRAPH_DIR} -name "model.py" | wc -l` + echo "- [Step 10] rename: successed=${num_renamed_fusible_subgraphs}" + for index in {0..8}; do + num_renamed_fusible_subgraphs_index=`find ${RENAMED_DIMENSION_GENERALIZED_FUSIBLE_SUBGRAPH_DIR}/${index} -name "model.py" | wc -l` + echo " ${index}, successed=${num_renamed_fusible_subgraphs_index}" + done + echo "" + + num_deduplicated_fusible_subgraphs=`find ${DEDUPLICATED_DIMENSION_GENERALIZED_FUSIBLE_SUBGRAPH_DIR} -name "model.py" | wc -l` + echo "- [Step 11] remove duplicated: successed=${num_deduplicated_fusible_subgraphs}" + for index in {0..8}; do + num_deduplicated_fusible_subgraphs_index=`find ${DEDUPLICATED_DIMENSION_GENERALIZED_FUSIBLE_SUBGRAPH_DIR}/${index} -name "model.py" | wc -l` + echo " ${index}, successed=${num_deduplicated_fusible_subgraphs_index}" + done + echo "" + + num_successed_unittests=`find ${UNITTESTS_OUTPUT_DIR} -name "*_test.py" | wc -l` + unittest_successed_percent=$((num_successed_unittests * 100 / num_deduplicated_fusible_subgraphs)) + echo "- [Step 12] generate unittest: successed=${num_successed_unittests}, percent=${unittest_successed_percent}%" + for index in {0..8}; do + num_successed_unittests=`find ${UNITTESTS_OUTPUT_DIR}/${index} -name "*_test.py" | wc -l` + echo " ${index}, successed=${num_successed_unittests}" + done +} + main + +set +x +summary 2>&1 | tee ${DECOMPOSE_WORKSPACE}/summary.txt diff --git a/graph_net/torch/sample_pass/dtype_generalizer.py b/graph_net/torch/sample_pass/dtype_generalizer.py index 48c06803a..13a246244 100644 --- a/graph_net/torch/sample_pass/dtype_generalizer.py +++ b/graph_net/torch/sample_pass/dtype_generalizer.py @@ -17,6 +17,7 @@ from pathlib import Path from typing import Any, Dict, List +import torch import torch.fx as fx from graph_net.graph_net_json_file_util import ( @@ -236,9 +237,9 @@ class ApplyDataTypeGeneralizationPasses(SamplePass, ResumableSamplePassMixin): "output_dir": "/path/to/output", "model_path_prefix": "", "model_runnable_predicator_filepath": "...", - "resume": , - "limits_handled_models": , - "try_run": , + "resume": true, + "limits_handled_models": null, + "try_run": true, } """ @@ -268,6 +269,7 @@ def declare_config( output_dir: str, model_path_prefix: str, model_runnable_predicator_filepath: str, + device: str = "auto", resume: bool = False, limits_handled_models: int = None, try_run: bool = True, @@ -281,6 +283,13 @@ def _make_model_runnable_predicator(self, config: Dict[str, Any]): predicator_config = self.model_runnable_predicator_config return cls(predicator_config) + def _choose_device(self, device) -> str: + if device is None: + return None + if device in ["cpu", "cuda"]: + return device + return "cuda" if torch.cuda.is_available() else "cpu" + def sample_handled(self, rel_model_path: str) -> bool: model_path = Path(self.config["model_path_prefix"]) / rel_model_path dtype_pass_names = self._read_dtype_pass_names(model_path) @@ -320,7 +329,9 @@ def resume(self, rel_model_path: str) -> List[str]: return [] # Parse the computation graph - traced_model = parse_immutable_model_path_into_sole_graph_module(abs_model_path) + traced_model = parse_immutable_model_path_into_sole_graph_module( + abs_model_path, device=self._choose_device(self.config["device"]) + ) # Copy the originl sample files_copied = [