Search Space of Preprocessing Function Parameters
After identifying the optimal preprocessing function in Stage 2, Stage 3 involves parameter tuning for this function. The parameter search is guided by a configuration file that defines the search space, as detailed below. This file is user-editable to allow for customization.
examples/tuning/step3_default_params.yaml
---
type: preprocessor
tune_mode: params
pipeline:
#filter.gene
- type: filter.gene
target: FilterGenesPercentile
params_to_tune:
min_val:
min: 1
max: 10
max_val:
min: 95
max: 99
mode:
values: [sum, var, cv, rv]
- type: filter.gene
target: FilterGenesScanpyOrder
params_to_tune:
order:
values:
- [min_counts, min_cells, max_counts, max_cells]
- [min_counts, min_cells, max_cells, max_counts]
- [min_counts, max_counts, min_cells, max_cells]
- [min_counts, max_counts, max_cells, min_cells]
- [min_counts, max_cells, min_cells, max_counts]
- [min_counts, max_cells, max_counts, min_cells]
- [min_cells, min_counts, max_counts, max_cells]
- [min_cells, min_counts, max_cells, max_counts]
- [min_cells, max_counts, min_counts, max_cells]
- [min_cells, max_counts, max_cells, min_counts]
- [min_cells, max_cells, min_counts, max_counts]
- [min_cells, max_cells, max_counts, min_counts]
- [max_counts, min_counts, min_cells, max_cells]
- [max_counts, min_counts, max_cells, min_cells]
- [max_counts, min_cells, min_counts, max_cells]
- [max_counts, min_cells, max_cells, min_counts]
- [max_counts, max_cells, min_counts, min_cells]
- [max_counts, max_cells, min_cells, min_counts]
- [max_cells, min_counts, min_cells, max_counts]
- [max_cells, min_counts, max_counts, min_cells]
- [max_cells, min_cells, min_counts, max_counts]
- [max_cells, min_cells, max_counts, min_counts]
- [max_cells, max_counts, min_counts, min_cells]
- [max_cells, max_counts, min_cells, min_counts]
# cta_problem
# min_counts:
# min: 1
# max: 10
# min_cells:
# min: 1
# max: 10
# max_counts:
# min: 500
# max: 5000
# max_cells:
# min: 500
# max: 5000
min_counts:
min: 3
max: 500
min_cells:
min: 0.0
max: 0.1
max_counts:
min: 0.9
max: 1.0
max_cells:
min: 0.95
max: 1.0
- type: filter.gene
target: FilterGenesPlaceHolder
#filter.cell
- type: filter.cell
target: FilterCellsScanpyOrder
params_to_tune:
order:
values:
- [min_counts, min_genes, max_counts, max_genes]
- [min_counts, min_genes, max_genes, max_counts]
- [min_counts, max_counts, min_genes, max_genes]
- [min_counts, max_counts, max_genes, min_genes]
- [min_counts, max_genes, min_genes, max_counts]
- [min_counts, max_genes, max_counts, min_genes]
- [min_genes, min_counts, max_counts, max_genes]
- [min_genes, min_counts, max_genes, max_counts]
- [min_genes, max_counts, min_counts, max_genes]
- [min_genes, max_counts, max_genes, min_counts]
- [min_genes, max_genes, min_counts, max_counts]
- [min_genes, max_genes, max_counts, min_counts]
- [max_counts, min_counts, min_genes, max_genes]
- [max_counts, min_counts, max_genes, min_genes]
- [max_counts, min_genes, min_counts, max_genes]
- [max_counts, min_genes, max_genes, min_counts]
- [max_counts, max_genes, min_counts, min_genes]
- [max_counts, max_genes, min_genes, min_counts]
- [max_genes, min_counts, min_genes, max_counts]
- [max_genes, min_counts, max_counts, min_genes]
- [max_genes, min_genes, min_counts, max_counts]
- [max_genes, min_genes, max_counts, min_counts]
- [max_genes, max_counts, min_counts, min_genes]
- [max_genes, max_counts, min_genes, min_counts]
min_counts:
min: 0.0 # Change occurs when joint embedding
max: 0.05
min_genes:
min: 0.0
max: 0.05
max_counts:
min: 0.95
max: 1.0
max_genes:
min: 0.95
max: 1.0
- type: filter.cell
target: FilterCellsPlaceHolder
- type: filter.cell
target: FilterCellsCommonMod
#normalize
- type: normalize
target: ColumnSumNormalize
params_to_tune:
mode:
values: [normalize, standardize, minmax, l2]
eps:
values: [-1, 0.1, 0.3, 0.5, 0.7]
- type: normalize
target: ScTransform
params_to_tune:
min_cells:
min: 1
max: 10
gmean_eps:
min: 1
max: 10
n_genes:
min: 1000
max: 3000
n_cells:
values: [null, 1, 10, 100]
bin_size:
min: 300
max: 800
bw_adjust:
min: 1.0
max: 5.0
params:
processes_num: 8
- type: normalize
target: Log1P
params_to_tune:
base:
min: 1.0
max: 10.0
- type: normalize
target: NormalizeTotal
params_to_tune:
target_sum:
values: [null, 1e3, 1e4, 1e5, 1e6]
max_fraction:
values: [0.01, 0.05, 0.5, 0.7, 1.0]
- type: normalize
target: NormalizeTotalLog1P
params_to_tune:
base:
min: 1.0
max: 10.0
target_sum:
values: [null, 1e3, 1e4, 1e5, 1e6]
max_fraction:
values: [0.01, 0.05, 0.5, 0.7, 1.0]
- type: normalize
target: tfidfTransform
- type: normalize
target: NormalizePlaceHolder
#filter.gene(highly_variable)
- type: filter.gene
target: FilterGenesTopK
params_to_tune:
num_genes:
min: 100
max: 10000
top:
values: [true, false]
mode:
values: [sum, var, cv, rv]
- type: filter.gene
target: FilterGenesRegression
params_to_tune:
method:
values: [enclasc, seurat3, scmap]
num_genes:
min: 100
max: 10000
- type: filter.gene
target: FilterGenesMatch
params:
prefixes: [ERCC, MT-]
- type: filter.gene
target: HighlyVariableGenesRawCount
params_to_tune:
n_top_genes:
min: 100
max: 10000
span:
min: 0.1
max: 0.6
- type: filter.gene
target: HighlyVariableGenesLogarithmizedByTopGenes
params_to_tune:
n_top_genes:
min: 100
max: 10000
n_bins:
min: 10
max: 30
flavor:
values: [seurat, cell_ranger]
- type: filter.gene
target: HighlyVariableGenesLogarithmizedByMeanAndDisp
params_to_tune:
min_disp:
min: 0.05
max: 0.5
max_disp:
min: 1.0
max: 100.0
min_mean:
min: 0.0
max: 0.0125
max_mean:
min: 3.0
max: 20.0
n_bins:
min: 10
max: 30
flavor:
values: [seurat, cell_ranger]
- type: filter.gene
target: FilterGenesNumberPlaceHolder
#feature.cell
- type: feature.cell
target: CellPCA
params:
out: feature.cell
params_to_tune:
n_components:
min: 100
max: 1000
svd_solver:
values: [auto, full, arpack, randomized]
- type: feature.cell
target: CellSVD
params:
out: feature.cell
params_to_tune:
n_components:
min: 100
max: 1000
algorithm:
values: [arpack, randomized]
- type: feature.cell
target: CellSparsePCA
params:
out: feature.cell
params_to_tune:
n_components:
min: 100
max: 1000
- type: feature.cell
target: WeightedFeaturePCA
params:
out: feature.cell
params_to_tune:
n_components:
min: 100
max: 1000
feat_norm_mode:
values: [null, normalize, standardize, minmax, l2]
- type: feature.cell
target: WeightedFeatureSVD
params:
out: feature.cell
params_to_tune:
n_components:
min: 100
max: 1000
feat_norm_mode:
values: [null, normalize, standardize, minmax, l2]
- type: feature.cell
target: GaussRandProjFeature
params:
out: feature.cell
log_level: INFO
params_to_tune:
n_components:
min: 100
max: 1000
- type: feature.cell
target: FeatureCellPlaceHolder
params:
out: feature.cell
wandb:
entity: xzy11632
project: dance-dev
method: bayes
metric:
name: acc # val/acc
goal: maximize