dstg Config

This is the config for dstg.

examples/tuning/deconv_dstg/hcc_liver/pipeline_params_tuning_config.yaml
---
type: preprocessor
tune_mode: pipeline_params
pipeline_tuning_top_k: 3
parameter_tuning_freq_n: 20
pipeline:
  - type: filter.gene
    target: FilterGenesCommon
    params:
      split_keys: [ref, test]
  - type: pseudobulk
    target: PseudoMixture
    params:
      n_pseudo: 500
      out_split_name: pseudo
  - type: misc
    target: RemoveSplit
    params:
      split_name: ref
      log_level: INFO
  - type: filter.gene
    include:
      - FilterGenesPercentile
      - FilterGenesScanpyOrder
      - FilterGenesPlaceHolder
    default_params:
      FilterGenesScanpyOrder:
        order: [min_counts, max_counts, min_cells, max_cells]
        min_counts: 100
        max_counts: 0.99
        min_cells: 0.01
        max_cells: 0.99
  - type: normalize
    include:
      - ColumnSumNormalize
      - ScTransform
      - Log1P
      - NormalizeTotal
      - NormalizePlaceHolder
      - NormalizeTotalLog1P
    default_params:
      ScTransform:
        processes_num: 8
      NormalizeTotalLog1P:
        target_sum: 1e4
      NormalizeTotal:
        target_sum: 1e4
  - type: filter.gene
    include:
      - HighlyVariableGenesLogarithmizedByMeanAndDisp
      - HighlyVariableGenesRawCount
      - FilterGenesNumberPlaceHolder
      - HighlyVariableGenesLogarithmizedByTopGenes
      - FilterGenesTopK
      - FilterGenesRegression
    default_params:
      FilterGenesTopK:
        num_genes: 3000
      FilterGenesRegression:
        num_genes: 3000
      HighlyVariableGenesRawCount:
        n_top_genes: 3000
        batch_key: batch
      HighlyVariableGenesLogarithmizedByTopGenes:
        n_top_genes: 3000
        batch_key: batch
      HighlyVariableGenesLogarithmizedByMeanAndDisp:
        batch_key: batch
  - type: feature.cell
    include:
      - CellPCA
      - CellSVD
      - WeightedFeaturePCA
      - WeightedFeatureSVD
      - GaussRandProjFeature
      - FeatureCellPlaceHolder
    params:
      out: feature.cell
      log_level: INFO
    default_params:
      CellPCA:
        n_components: 400
      CellSVD:
        n_components: 400
      GaussRandProjFeature:
        n_components: 400
      WeightedFeaturePCA:
        n_components: 400
      WeightedFeatureSVD:
        n_components: 400
  - type: graph.reference
    target: DSTGraph
    params:
      ref_split: pseudo
      channels: [feature.cell, feature.cell]
      channel_types: [obsm, obsm]
  - type: misc
    target: SetConfig
    params:
      config_dict:
        feature_channel: [DSTGraph, feature.cell]
        feature_channel_type: [obsp, obsm]
        label_channel: cell_type_portion
wandb:
  entity: xzy11632
  project: dance-dev
  method: grid  # try grid to provide a comprehensive search
  metric:
    name: MSE
    goal: minimize