5  Typical end-to-end workflow

5.1 FASTQ Quality Control

iobrpy fastq_qc \
  --path1_fastq "/path/to/fastq" \
  --path2_fastp "/path/to/fastp" \
  --num_threads 8 \
  --batch_size 1
/path/to/fastp/
  <sample>_1.fastq.gz
  <sample>_2.fastq.gz
  <sample>_fastp.html
  <sample>_fastp.json
  <sample>.task.complete
  multiqc_report/multiqc_fastp_report.html

5.2 Prepare TPM

# From FASTQ_QC to Salmon
iobrpy batch_salmon \
  --index "/path/to/salmon/index" \
  --path_fq "/path/to/fastp" \
  --path_out "/path/to/salmon" \
  --num_threads 8 \
  --batch_size 1
/path/to/salmon/
  <sample>/quant.sf
iobrpy merge_salmon \
  --project MyProj \
  --path_salmon "/path/to/salmon" \
  --num_processes 8
/path/to/salmon/
  MyProj_salmon_count.tsv.gz
  MyProj_salmon_tpm.tsv.gz
# From Salmon to TPM
iobrpy prepare_salmon \
  -i MyProj_salmon_tpm.tsv.gz \
  -o TPM_matrix.csv \
  --return_feature symbol \
  --remove_version
Gene        TS99       TC89       TC68       TC40       813738     1929563
5S_rRNA     0.000      0.000      0.000      0.000      0.000      0.000
5_8S_rRNA   0.000      0.000      0.000      0.000      0.000      0.000
7SK         0.000      0.000      954.687    1488.249   3691.321   5399.889
A1BG        0.479      1.717      1.844      0.382      1.676      1.126
A1BG-AS1    0.149      0.348      0.755      0.000      0.314      0.400
# From FASTQ_QC to STAR
iobrpy batch_star_count \
  --index "/path/to/star/index" \
  --path_fq "/path/to/fastp" \
  --path_out "/path/to/star" \
  --num_threads 8 \
  --batch_size 1
/path/to/star/
  <sample>/
  <sample>__STARgenome/
  <sample>__STARpass1/
  <sample>_STARtmp/
  <sample>_Aligned.sortedByCoord.out.bam
  <sample>_Log.final.out
  <sample>_Log.out
  <sample>_Log.progress.out
  <sample>_ReadsPerGene.out.tab
  <sample>_SJ.out.tab
  <sample>.task.complete
  .batch_star_count.done
  .merge_star_count.done
iobrpy merge_star_count \
  --project MyProj \
  --path "/path/to/star"
/path/to/star/
  MyProj.STAR.count.tsv.gz
# b) From STAR to TPM
iobrpy count2tpm \
  -i MyProj.STAR.count.tsv.gz \
  -o TPM_matrix.csv \
  --idtype ensembl \
  --org hsa \
  --remove_version
# (Optionally provide transcript effective lengths)
#   --effLength_csv efflen.csv --id id --length eff_length --gene_symbol symbol
Name       SAMPLE-2e394f45066d_20180921  SAMPLE-88dc3e3cd88e_20180921  SAMPLE-b80d019c9afa_20180921  SAMPLE-586259880b46_20180926  SAMPLE-e95813c8875d_20180921  SAMPLE-7bd449ae436b_20180921
5S_rRNA    5.326                         2.314                         2.377                         3.439                         6.993                         3.630
5_8S_rRNA  0.000                         0.000                         0.000                         0.000                         0.000                         0.000
7SK        8.006                         13.969                        11.398                        5.504                         8.510                         6.418
A1BG       3.876                         2.576                         2.874                         2.533                         2.034                         2.828
A1BG-AS1   5.512                         4.440                         7.725                         4.610                         6.292                         5.336

5.3 (Optional) Mouse to Human symbol mapping

# Matrix mode: rows are mouse gene symbols, columns are samples
iobrpy mouse2human_eset \
  -i mouse_matrix.tsv \
  -o human_matrix.tsv \
  --is_matrix \
  --verbose
# Table mode: input has a symbol column (e.g., SYMBOL), will de-duplicate then map
iobrpy mouse2human_eset \
  -i mouse_table.csv \
  -o human_matrix.csv \
  --column_of_symbol SYMBOL \
  --verbose
Gene        Sample1    Sample2    Sample3    Sample4    Sample5    Sample6
SCMH1       0.905412   0.993271   0.826294   0.535761   0.515038   0.733388
NARF        0.116423   0.944370   0.847920   0.441993   0.736983   0.467756
CD52        0.988616   0.784523   0.303614   0.886433   0.608639   0.351713
CAV2        0.063843   0.993835   0.891718   0.702293   0.703912   0.248690
HOXB6       0.716829   0.555838   0.638682   0.971783   0.868208   0.802464

5.4 (Optional) Annotate / de‑duplicate

iobrpy anno_eset \
  -i TPM_matrix.csv \
  -o TPM_anno.csv \
  --annotation anno_grch38 \
  --symbol symbol \
  --probe id \
  --method mean \
  --remove_version
iobrpy anno_eset \
  -i TPM_matrix.csv \
  -o TPM_anno.csv \
  --annotation anno_hug133plus2 \
  --symbol symbol \
  --probe id \
  --method mean
# You can also use: --annotation-file my_anno.csv --annotation-key gene_id
Gene        GSM1523727   GSM1523728   GSM1523729   GSM1523744   GSM1523745   GSM1523746
SH3KBP1     4.3279743    4.316195     4.3514247    4.2957463    4.2566543    4.2168822
RPL41       4.2461486    4.2468076    4.2579398    4.2955956    4.2426114    4.3464246
EEF1A1      4.2937622    4.291038     4.2621994    4.2718415    4.1992331    4.2639275
HUWE1       4.2255821    4.2111235    4.1993775    4.2192063    4.2214823    4.2046394
LOC1019288  4.2193027    4.2196698    4.2132521    4.1819267    4.2345738    4.2104611

5.5 (Optional) Log2 transform

iobrpy log2_eset \
  -i expr.csv \
  -o expr.log2.csv
Name      SRR35344563_GSM8516765_Normal4   SRR35344561_GSM8516763_Normal2   SRR35344562_GSM8516764_Normal3   SRR35344560_GSM8516762_Normal1
A1BG      2.229246496                      0.636390662                      2.140913236                      1.420200061
A1BG-AS1  4.206586844                      3.591817651                      0.614426747                      6.842377234
A1CF      0.128261135                      0                                0.414914625                      0.205743238
A2M       1.999453226                      0.679106252                      2.816410018                      2.898826563

5.6 All-in-one TME profiling

5.6.1 Minimal usage for tme_profile

  • tme_profile runs the whole TME profiling stack from a TPM matrix in one command. It wraps and orchestrates:
    • Signature scoringcalculate_sig_score
    • Immune deconvolution (six methods)cibersort, IPS, estimate, mcpcounter, quantiseq, epic
    • Ligand–receptor scoringLR_cal
    • It also merges the deconvolution outputs into a single table

Inputs: genes × samples TPM matrix Outputs: standardized subfolders for signatures, TME deconvolution, and Ligand–receptor scoring
Heads-up: tme_profile does not include deside and any clustering (tme_cluster, nmf).

iobrpy tme_profile \
  -i TPM_matrix.csv \
  -o /path/to/outdir \
  --threads 1
# Expected layout

/path/to/outdir
|-- 01-signatures
|   `-- calculate_sig_score.csv
|-- 02-tme
|   |-- cibersort_results.csv
|   |-- epic_results.csv
|   |-- quantiseq_results.csv
|   |-- IPS_results.csv
|   |-- estimate_results.csv
|   |-- mcpcounter_results.csv
|   `-- deconvo_merged.csv
`-- 03-LR_cal
    `-- lr_cal.csv

5.6.2 Signature scoring

iobrpy calculate_sig_score \
  -i TPM_anno.csv \
  -o sig_scores.csv \
  --signature signature_collection \
  --method pca \
  --mini_gene_count 2 \
  --parallel_size 1 \
  --adjust_eset
# Accepts space‑separated or comma‑separated groups; use "all" for a full merge.
ID          CD_8_T_effector_PCA   DDR_PCA    APM_PCA    Immune_Checkpoint_PCA   CellCycle_Reg_PCA   Pan_F_TBRs_PCA
GSM1523727  -3.003007             0.112244   1.046749   -3.287490               1.226469            -3.836552
GSM1523728  0.631973              1.138303   1.999972   0.405965                1.431343            0.164805
GSM1523729  -2.568384             -1.490780  -0.940420  -2.087635               0.579742            -1.208286
GSM1523744  -0.834788             4.558424   -0.274724  -0.873015               1.400215            -2.880584
GSM1523745  -1.358852             4.754705   -2.215926  -1.086041               1.342590            -1.054318

5.6.3 Immune deconvolution (choose one or many)

# CIBERSORT
iobrpy cibersort \
  -i TPM_anno.csv \
  -o cibersort.csv \
  --perm 100 \
  --QN True \
  --threads 1
# CIBERSORT absolute mode
iobrpy cibersort \
  -i TPM_anno.csv \
  -o cibersort.csv \
  --perm 100 \
  --QN True \
  --absolute True \
  --abs_method sig.score \
  --threads 1
ID          B_cells_naive_CIBERSORT  B_cells_memory_CIBERSORT  Plasma_cells_CIBERSORT  T_cells_CD8_CIBERSORT  T_cells_CD4_naive_CIBERSORT  T_cells_CD4_memory_resting_CIBERSORT
GSM1523727  0.025261644              0.00067545                0.174139691             0.060873405             0                           0.143873862
GSM1523728  0.007497053              0.022985466               0.079320853             0.052005437             0                           0.137097071
GSM1523729  0.005356156              0.010721794               0.114171733             0                       0                           0.191541779
GSM1523744  0                        0.064645073               0.089539616             0.024437887             0                           0.147821928
GSM1523745  0                        0.014678117               0.121834835             0                       0                           0.176046775
# quanTIseq (method: lsei / robust norms)
iobrpy quantiseq \
  -i TPM_anno.csv \
  -o quantiseq.csv \
  --signame TIL10 \
  --method lsei \
  --tumor \
  --arrays \
  --scale_mrna
ID          B_cells_quantiseq   Macrophages_M1_quantiseq   Macrophages_M2_quantiseq   Monocytes_quantiseq   Neutrophils_quantiseq   NK_cells_quantiseq
GSM1523727  0.098243385         0.050936602                0.059696474                0                      0.208837962            0.057777168
GSM1523728  0.096665146         0.079422458                0.060696168                0                      0.247916520            0.057952322
GSM1523729  0.102140568         0.044950190                0.075727597                0                      0.230014524            0.060158368
GSM1523744  0.095363945         0.072341346                0.058039861                0                      0.213903654            0.059082891
GSM1523745  0.099119729         0.066757223                0.061254450                0                      0.236191857            0.056277179
# EPIC
iobrpy epic \
  -i TPM_anno.csv \
  -o epic.csv \
  --reference TRef
ID          Bcells_EPIC           CAFs_EPIC           CD4_Tcells_EPIC      CD8_Tcells_EPIC      Endothelial_EPIC      Macrophages_EPIC
GSM1523727  0.029043394           0.008960087         0.145125027          0.075330211          0.087619386           0.005567638
GSM1523728  0.029268307           0.010942391         0.159158789          0.074554506          0.095359587           0.007104695
GSM1523729  0.030334561           0.010648890         0.148159994          0.074191268          0.094116333           0.006359346
GSM1523744  0.027351486           0.010870086         0.144756807          0.070363208          0.085913230           0.006341159
GSM1523745  0.027688157           0.011024014         0.148947183          0.072791879          0.092757138           0.006766186
# ESTIMATE
iobrpy estimate \
  -i TPM_anno.csv \
  -o estimate.csv \
  --platform affymetrix
ID          StromalSignature_estimate   ImmuneSignature_estimate   ESTIMATEScore_estimate   TumorPurity_estimate
GSM1523727  -1250.182509                267.9107094                -982.2718                0.895696565
GSM1523728  197.4176128                 1333.936386                1531.353999              0.675043839
GSM1523729  -110.7937025                821.7451865                710.951484               0.758787601
GSM1523744  -118.685488                 662.3002928                543.6148048              0.774555972
GSM1523745  323.7935623                 1015.007089                1338.800651              0.695624427
# MCPcounter
iobrpy mcpcounter \
  -i TPM_anno.csv \
  -o mcpcounter.csv \
  --features HUGO_symbols
ID          T_cells_MCPcounter   CD8_T_cells_MCPcounter   Cytotoxic_lymphocytes_MCPcounter   B_lineage_MCPcounter   NK_cells_MCPcounter   Monocytic_lineage_MCPcounter
GSM1523727  1.4729234            1.1096225                1.3252089                          1.7530587              1.3129832             1.9197157
GSM1523728  1.5288218            1.0466424                1.5997275                          1.8069543              1.3283454             2.2191597
GSM1523729  1.4688324            1.0731858                1.3722626                          1.8967154              1.3185674             2.0802533
GSM1523744  1.4561831            1.0241529                1.440144                           1.7485736              1.3176502             2.2423225
GSM1523745  1.5078415            1.0987011                1.4883308                          1.7068269              1.3165186             2.27452
# IPS
iobrpy IPS \
  -i TPM_anno.csv \
  -o IPS.csv
ID          MHC_IPS    EC_IPS     SC_IPS     CP_IPS     AZ_IPS     IPS_IPS
GSM1523727  2.252749   0.403792   -0.19162   0.219981   2.684902   9
GSM1523728  2.373568   0.608176   -0.578189  -0.234406  2.16915    7
GSM1523729  2.101158   0.479571   -0.321637  0.099342   2.358434   8
GSM1523744  2.120172   0.535005   -0.332785  0.013166   2.335558   8
GSM1523745  1.911082   0.558811   -0.479384  0.087989   2.078497   7
# DeSide
iobrpy deside \
  --model_dir path/to/your/DeSide_model \
  -i TPM_anno.csv \
  -o deside.csv \
  -r path/to/your/plot/folder \
  --exp_type TPM \
  --method_adding_pathway add_to_end \
  --scaling_by_constant \
  --transpose \
  --print_info
                  Plasma_B_cells_deside  Non_plasma_B_cells_deside  CD4_T_deside  CD8_T_effector_deside  CD8_T_\(GZMK_high\)_deside  Double_neg_like_T_deside
TCGA-55-8508-01A  0.138                  0.014                      0.019         0.003                  0.001                       0
TCGA-67-3771-01A  0.05                   0.005                      0.016         0.002                  0.017                       0.001
TCGA-55-A4DG-01A  0.042                  0.049                      0.014         0.001                  0.035                       0.005
TCGA-91-7771-01A  0.032                  0.014                      0.032         0.006                  0.023                       0.01
TCGA-91-6849-01A  0.07                   0.011                      0.007         0.001                  0.014                       0

5.6.4 Ligand–receptor scoring

iobrpy LR_cal \
  -i TPM_anno.csv \
  -o LR_score.csv \
  --data_type tpm \
  --id_type symbol \
  --cancer_type pancan \
  --verbose
ID          A2M_APP_CALR_LRPAP1_PSAP_SERPING1_LRP1   ADAM10_AXL    ADAM10_EFNA1_EPHA3   ADAM12_ITGA9   ADAM12_ITGB1_SDC4   ADAM12_SDC4
GSM1523727  1.547225629                              1.566540118   1.017616452          1.476739407     1.492157038        1.492157038
GSM1523728  1.477988945                              1.757804434   1.408624847          1.492926847     1.492926847        1.492926847
GSM1523729  1.504309415                              1.730361606   1.5367173            1.473255496     1.473255496        1.473255496
GSM1523744  1.514383163                              1.73870604    1.308314516          1.469082453     1.492761796        1.492761796
GSM1523745  1.478643424                              1.76013689    1.552305282          1.449499815     1.449499815        1.449499815

5.7 TME clustering / NMF clustering

# KL index auto‑select k (k‑means)
iobrpy tme_cluster \
  -i cibersort.csv \
  -o tme_cluster.csv \
  --features 1:22 \
  --id ID \
  --min_nc 2 \
  --max_nc 5 \
  --print_result \
  --scale
ID          cluster   B_cells_naive_CIBERSORT   B_cells_memory_CIBERSORT   Plasma_cells_CIBERSORT   T_cells_CD8_CIBERSORT   T_cells_CD4_naive_CIBERSORT
GSM1523727  TME1      -0.218307125              -0.588626398               0.824242243              1.136773711             -0.142069534
GSM1523728  TME3      -0.531705309              0.093328188                -0.892611283             1.086091448             -0.142069534
GSM1523729  TME1      -0.359692153              -0.432511044               -0.481593953             -0.685959226            -0.142069534
GSM1523744  TME3      -0.531705309              0.952517071                -0.873856851             0.370938418             -0.142069534
GSM1523745  TME2      -0.531705309              -0.798612476               -0.132728742             -0.685959226            -0.142069534
# NMF clustering (auto k, excludes k=2)
iobrpy nmf \
  -i cibersort.csv \
  -o path/to/your/result/folder \
  --kmin 2 \
  --kmax 10 \
  --features 1:22 \
  --max-iter 10000 \
  --skip_k_2
sample      cluster   B_cells_naive_CIBERSORT  B_cells_memory_CIBERSORT  Plasma_cells_CIBERSORT  T_cells_CD8_CIBERSORT  T_cells_CD4_naive_CIBERSORT
GSM1523727  cluster2  0.006101201              0.013615524               0.149377703             0.049747382            0
GSM1523728  cluster3  0                        0.033869265               0.076470323             0.048364124            0
GSM1523729  cluster1  0.003348733              0.018252079               0.09392446              0                      0
GSM1523744  cluster2  0                        0.059386784               0.077266743             0.028845636            0
GSM1523745  cluster3  0                        0.007379033               0.108739264             0                      0

cluster   top_1                                 top_2                         top_3                                 top_4                             top_5                                   top_6
cluster1  T_cells_CD4_memory_resting_CIBERSORT  Plasma_cells_CIBERSORT        Macrophages_M2_CIBERSORT              T_cells_gamma_delta_CIBERSORT     Mast_cells_resting_CIBERSORT            T_cells_follicular_helper_CIBERSORT
cluster2  Macrophages_M2_CIBERSORT              Macrophages_M1_CIBERSORT      T_cells_follicular_helper_CIBERSORT   Plasma_cells_CIBERSORT            T_cells_CD4_memory_activated_CIBERSORT  Neutrophils_CIBERSORT
cluster3  T_cells_CD4_memory_resting_CIBERSORT  Neutrophils_CIBERSORT         Macrophages_M0_CIBERSORT              Macrophages_M2_CIBERSORT          Plasma_cells_CIBERSORT                  Mast_cells_activated_CIBERSORT