生存分析与Cox比例风险模型

Xianxiong Ma

2020年3月15日

Kaplan-Meier法估计

案例（黑色素瘤）

no : a numeric vector, patient code.

status : a numeric vector code, survival status; 1: dead from melanoma, 2: alive, 3: dead from other cause.

days : a numeric vector, observation time.

ulc : a numeric vector code, ulceration; 1: present, 2: absent.

thick : a numeric vector, tumor thickness (1/100 mm).

sex : a numeric vector code; 1: female, 2: male.

library(survival)
library(ISwR)
attach(melanom)
names(melanom)

## [1] "no"     "status" "days"   "ulc"    "thick"  "sex"

# Surv(days, status==1) survfit(Surv(days, status==1)~1)
surv.all <- survfit(Surv(days, status == 1) ~ 1)
summary(surv.all)

## Call: survfit(formula = Surv(days, status == 1) ~ 1)
## 
##  time n.risk n.event survival std.err lower 95% CI upper 95% CI
##   185    201       1    0.995 0.00496        0.985        1.000
##   204    200       1    0.990 0.00700        0.976        1.000
##   210    199       1    0.985 0.00855        0.968        1.000
##   232    198       1    0.980 0.00985        0.961        1.000
##   279    196       1    0.975 0.01100        0.954        0.997
##   295    195       1    0.970 0.01202        0.947        0.994
##   386    193       1    0.965 0.01297        0.940        0.991
##   426    192       1    0.960 0.01384        0.933        0.988
##   469    191       1    0.955 0.01465        0.927        0.984
##   529    189       1    0.950 0.01542        0.920        0.981
##   621    188       1    0.945 0.01615        0.914        0.977
##   629    187       1    0.940 0.01683        0.907        0.973
##   659    186       1    0.935 0.01748        0.901        0.970
##   667    185       1    0.930 0.01811        0.895        0.966
##   718    184       1    0.925 0.01870        0.889        0.962
##   752    183       1    0.920 0.01927        0.883        0.958
##   779    182       1    0.915 0.01981        0.877        0.954
##   793    181       1    0.910 0.02034        0.871        0.950
##   817    180       1    0.904 0.02084        0.865        0.946
##   833    178       1    0.899 0.02134        0.859        0.942
##   858    177       1    0.894 0.02181        0.853        0.938
##   869    176       1    0.889 0.02227        0.847        0.934
##   872    175       1    0.884 0.02272        0.841        0.930
##   967    174       1    0.879 0.02315        0.835        0.926
##   977    173       1    0.874 0.02357        0.829        0.921
##   982    172       1    0.869 0.02397        0.823        0.917
##  1041    171       1    0.864 0.02436        0.817        0.913
##  1055    170       1    0.859 0.02474        0.812        0.909
##  1062    169       1    0.854 0.02511        0.806        0.904
##  1075    168       1    0.849 0.02547        0.800        0.900
##  1156    167       1    0.844 0.02582        0.794        0.896
##  1228    166       1    0.838 0.02616        0.789        0.891
##  1252    165       1    0.833 0.02649        0.783        0.887
##  1271    164       1    0.828 0.02681        0.777        0.883
##  1312    163       1    0.823 0.02713        0.772        0.878
##  1435    161       1    0.818 0.02744        0.766        0.874
##  1506    159       1    0.813 0.02774        0.760        0.869
##  1516    155       1    0.808 0.02805        0.755        0.865
##  1548    152       1    0.802 0.02837        0.749        0.860
##  1560    150       1    0.797 0.02868        0.743        0.855
##  1584    148       1    0.792 0.02899        0.737        0.851
##  1621    146       1    0.786 0.02929        0.731        0.846
##  1667    137       1    0.780 0.02963        0.725        0.841
##  1690    134       1    0.775 0.02998        0.718        0.836
##  1726    131       1    0.769 0.03033        0.712        0.831
##  1933    110       1    0.762 0.03085        0.704        0.825
##  2061     95       1    0.754 0.03155        0.694        0.818
##  2062     94       1    0.746 0.03221        0.685        0.812
##  2103     90       1    0.737 0.03290        0.676        0.805
##  2108     88       1    0.729 0.03358        0.666        0.798
##  2256     80       1    0.720 0.03438        0.656        0.791
##  2388     75       1    0.710 0.03523        0.645        0.783
##  2467     69       1    0.700 0.03619        0.633        0.775
##  2565     63       1    0.689 0.03729        0.620        0.766
##  2782     57       1    0.677 0.03854        0.605        0.757
##  3042     52       1    0.664 0.03994        0.590        0.747
##  3338     35       1    0.645 0.04307        0.566        0.735

plot(surv.all, col = "blue")

plot of chunk unnamed-chunk-1

surv.bysex <- survfit(Surv(days, status == 1) ~ sex)
summary(surv.bysex)

## Call: survfit(formula = Surv(days, status == 1) ~ sex)
## 
##                 sex=1 
##  time n.risk n.event survival std.err lower 95% CI upper 95% CI
##   279    124       1    0.992 0.00803        0.976        1.000
##   295    123       1    0.984 0.01131        0.962        1.000
##   386    121       1    0.976 0.01384        0.949        1.000
##   469    120       1    0.968 0.01593        0.937        0.999
##   667    119       1    0.959 0.01775        0.925        0.995
##   817    118       1    0.951 0.01937        0.914        0.990
##   833    116       1    0.943 0.02087        0.903        0.985
##   858    115       1    0.935 0.02224        0.892        0.980
##   869    114       1    0.927 0.02351        0.882        0.974
##   872    113       1    0.919 0.02469        0.871        0.968
##   982    112       1    0.910 0.02580        0.861        0.962
##  1055    111       1    0.902 0.02684        0.851        0.956
##  1156    110       1    0.894 0.02782        0.841        0.950
##  1252    109       1    0.886 0.02875        0.831        0.944
##  1271    108       1    0.878 0.02963        0.821        0.938
##  1312    107       1    0.869 0.03046        0.812        0.931
##  1548    102       1    0.861 0.03134        0.802        0.924
##  1560    100       1    0.852 0.03218        0.791        0.918
##  1621     97       1    0.843 0.03303        0.781        0.911
##  1667     90       1    0.834 0.03396        0.770        0.903
##  1726     86       1    0.824 0.03493        0.759        0.896
##  1933     74       1    0.813 0.03619        0.745        0.887
##  2062     63       1    0.800 0.03785        0.729        0.878
##  2108     60       1    0.787 0.03950        0.713        0.868
##  2256     54       1    0.772 0.04137        0.695        0.858
##  2467     45       1    0.755 0.04386        0.674        0.846
##  3042     34       1    0.733 0.04787        0.645        0.833
##  3338     25       1    0.704 0.05419        0.605        0.818
## 
##                 sex=2 
##  time n.risk n.event survival std.err lower 95% CI upper 95% CI
##   185     76       1    0.987  0.0131        0.962        1.000
##   204     75       1    0.974  0.0184        0.938        1.000
##   210     74       1    0.961  0.0223        0.918        1.000
##   232     73       1    0.947  0.0256        0.898        0.999
##   426     72       1    0.934  0.0284        0.880        0.992
##   529     70       1    0.921  0.0310        0.862        0.984
##   621     69       1    0.908  0.0333        0.845        0.975
##   629     68       1    0.894  0.0354        0.827        0.966
##   659     67       1    0.881  0.0373        0.811        0.957
##   718     66       1    0.867  0.0390        0.794        0.947
##   752     65       1    0.854  0.0407        0.778        0.938
##   779     64       1    0.841  0.0422        0.762        0.928
##   793     63       1    0.827  0.0435        0.746        0.917
##   967     62       1    0.814  0.0448        0.731        0.907
##   977     61       1    0.801  0.0461        0.715        0.896
##  1041     60       1    0.787  0.0472        0.700        0.886
##  1062     59       1    0.774  0.0482        0.685        0.875
##  1075     58       1    0.761  0.0492        0.670        0.864
##  1228     57       1    0.747  0.0501        0.655        0.852
##  1435     55       1    0.734  0.0510        0.640        0.841
##  1506     53       1    0.720  0.0519        0.625        0.829
##  1516     51       1    0.706  0.0528        0.610        0.817
##  1584     50       1    0.692  0.0536        0.594        0.805
##  1690     47       1    0.677  0.0544        0.578        0.792
##  2061     32       1    0.656  0.0567        0.554        0.777
##  2103     29       1    0.633  0.0591        0.527        0.760
##  2388     25       1    0.608  0.0619        0.498        0.742
##  2565     22       1    0.580  0.0650        0.466        0.723
##  2782     21       1    0.553  0.0675        0.435        0.702

plot(surv.bysex, conf.int = T, col = c("red", "blue"))

plot of chunk unnamed-chunk-1

plot(surv.bysex, conf.int = F, col = c("red", "blue"))
legend("topright", legend = c("male", "female"), lty = 1, col = c("blue", 
    "red"))

plot of chunk unnamed-chunk-1

log-rank test

This function implements the G-rho family of Harrington and Fleming (1982), with weights on each death of S(t)^rho, where S is the Kaplan-Meier estimate of survival. With rho = 0 this is the log-rank or Mantel-Haenszel test, and with rho = 1 it is equivalent to the Peto & Peto modification of the Gehan-Wilcoxon test.

attach(melanom)
f1 <- survdiff(Surv(days, status == 1) ~ sex, rho = 0)
f1

## Call:
## survdiff(formula = Surv(days, status == 1) ~ sex, rho = 0)
## 
##         N Observed Expected (O-E)^2/E (O-E)^2/V
## sex=1 126       28     37.1      2.25      6.47
## sex=2  79       29     19.9      4.21      6.47
## 
##  Chisq= 6.5  on 1 degrees of freedom, p= 0.01

f2 <- survdiff(Surv(days, status == 1) ~ sex, rho = 1)
f2

## Call:
## survdiff(formula = Surv(days, status == 1) ~ sex, rho = 1)
## 
##         N Observed Expected (O-E)^2/E (O-E)^2/V
## sex=1 126     23.4     31.6      2.14      7.09
## sex=2  79     25.2     17.0      3.98      7.09
## 
##  Chisq= 7.1  on 1 degrees of freedom, p= 0.008

f3 <- survdiff(Surv(days, status == 1) ~ sex + strata(ulc))  # 控制变量ulc，类似前面的取均值
f3

## Call:
## survdiff(formula = Surv(days, status == 1) ~ sex + strata(ulc))
## 
##         N Observed Expected (O-E)^2/E (O-E)^2/V
## sex=1 126       28     34.7      1.28      3.31
## sex=2  79       29     22.3      1.99      3.31
## 
##  Chisq= 3.3  on 1 degrees of freedom, p= 0.07

LifeTable寿命表法

hmohiv <- read.table("hmohiv1.csv", sep = ",", header = TRUE)
attach(hmohiv)
head(hmohiv)

##   ID entdate enddate time age drug censor
## 1  1 15may90 14oct90    5  46    0      1
## 2  2 19sep89 20mar90    6  35    1      0
## 3  3 21apr91 20dec91    8  30    1      1
## 4  4 03jan91 04apr91    3  30    1      1
## 5  5 18sep89 19jul91   22  36    0      1
## 6  6 18mar91 17apr91    1  32    1      0

library(KMsurv)
library(nlme)
(t6m <- floor(time/6))

##   [1]  0  1  1  0  3  0  1  1  0  2  0  2  0  2  5  0  0  3  0  0  0  1
##  [23] 10  1  0  0  0  0  2  0  0  0  5  1  0  1  0  1  0  1  6  0  1  0
##  [45]  5  1  1  9  0  0  2  0  1  0  1  0  0  0  5  0  1  1  0  1  0  5
##  [67]  0  9  0  0  7  0  1  8  2  0  9  0  0  1  0  0  0  0  1  0  1  4
##  [89]  1  2  0  9  0  2  1  0  0 10  0  0

(tall <- data.frame(t6m, censor))

##     t6m censor
## 1     0      1
## 2     1      0
## 3     1      1
## 4     0      1
## 5     3      1
## 6     0      0
## 7     1      1
## 8     1      1
## 9     0      1
## 10    2      1
## 11    0      0
## 12    2      1
## 13    0      1
## 14    2      1
## 15    5      1
## 16    0      1
## 17    0      1
## 18    3      0
## 19    0      0
## 20    0      1
## 21    0      0
## 22    1      1
## 23   10      0
## 24    1      1
## 25    0      0
## 26    0      1
## 27    0      0
## 28    0      1
## 29    2      1
## 30    0      1
## 31    0      1
## 32    0      1
## 33    5      1
## 34    1      1
## 35    0      1
## 36    1      1
## 37    0      1
## 38    1      1
## 39    0      1
## 40    1      1
## 41    6      1
## 42    0      1
## 43    1      1
## 44    0      1
## 45    5      1
## 46    1      1
## 47    1      1
## 48    9      0
## 49    0      0
## 50    0      1
## 51    2      1
## 52    0      1
## 53    1      1
## 54    0      1
## 55    1      1
## 56    0      1
## 57    0      1
## 58    0      1
## 59    5      1
## 60    0      1
## 61    1      0
## 62    1      1
## 63    0      1
## 64    1      1
## 65    0      1
## 66    5      1
## 67    0      1
## 68    9      1
## 69    0      1
## 70    0      0
## 71    7      1
## 72    0      1
## 73    1      1
## 74    8      1
## 75    2      1
## 76    0      1
## 77    9      1
## 78    0      1
## 79    0      1
## 80    1      1
## 81    0      1
## 82    0      1
## 83    0      1
## 84    0      1
## 85    1      0
## 86    0      0
## 87    1      1
## 88    4      0
## 89    1      1
## 90    2      0
## 91    0      1
## 92    9      1
## 93    0      1
## 94    2      0
## 95    1      1
## 96    0      1
## 97    0      1
## 98   10      0
## 99    0      0
## 100   0      1

(die <- gsummary(tall, sum, groups = t6m))

##    t6m censor
## 0    0     41
## 1    1     21
## 2    2      6
## 3    3      1
## 4    4      0
## 5    5      5
## 6    6      1
## 7    7      1
## 8    8      1
## 9    9      3
## 10  10      0

(total <- gsummary(tall, length, groups = t6m))

##    t6m censor
## 0    0     51
## 1    1     24
## 2    2      8
## 3    3      2
## 4    4      1
## 5    5      5
## 6    6      1
## 7    7      1
## 8    8      1
## 9    9      4
## 10  10      2

rm(t6m)
ltab.data <- cbind(die[, 1:2], total[, 2])
detach(hmohiv)
attach(ltab.data)

lt = length(t6m)
t6m[lt + 1] = NA
(nevent = censor)

##  [1] 41 21  6  1  0  5  1  1  1  3  0

(nlost = total[, 2] - censor)

##  [1] 10  3  2  1  1  0  0  0  0  1  2

mytable <- lifetab(t6m, 100, nlost, nevent)
mytable[, 1:5]

##       nsubs nlost nrisk nevent       surv
## 0-1     100    10  95.0     41 1.00000000
## 1-2      49     3  47.5     21 0.56842105
## 2-3      25     2  24.0      6 0.31711911
## 3-4      17     1  16.5      1 0.23783934
## 4-5      15     1  14.5      0 0.22342483
## 5-6      14     0  14.0      5 0.22342483
## 6-7       9     0   9.0      1 0.14363025
## 7-8       8     0   8.0      1 0.12767133
## 8-9       7     0   7.0      1 0.11171242
## 9-10      6     1   5.5      3 0.09575350
## 10-NA     2     2   1.0      0 0.04352432

plot(t6m[1:11], mytable[, 5], type = "s", xlab = "Survival time in every 6 month", 
    ylab = "Proportion Surviving")

plot of chunk unnamed-chunk-3

detach(ltab.data)

logrank检验案例1

# install.packages('survival')
library(survival)
example15_3 <- read.table("example15_3.csv", header = TRUE, sep = ",")
attach(example15_3)
head(example15_3)

##     t censor group
## 1  28      0     A
## 2  29      0     A
## 3 175      0     A
## 4 195      0     A
## 5 309      0     A
## 6 377      1     A

total <- survfit(Surv(t, censor == 1) ~ 1)
summary(total)

## Call: survfit(formula = Surv(t, censor == 1) ~ 1)
## 
##  time n.risk n.event survival std.err lower 95% CI upper 95% CI
##   299     24       1   0.9583  0.0408       0.8816        1.000
##   300     23       1   0.9167  0.0564       0.8125        1.000
##   377     15       1   0.8556  0.0791       0.7137        1.000
##   393     12       1   0.7843  0.0996       0.6115        1.000
##   421     11       1   0.7130  0.1132       0.5223        0.973
##   429     10       1   0.6417  0.1223       0.4416        0.932
##   447      9       1   0.5704  0.1278       0.3676        0.885
##   709      6       1   0.4753  0.1374       0.2697        0.838
##   744      5       1   0.3802  0.1390       0.1858        0.778
##   770      4       1   0.2852  0.1328       0.1145        0.710
##  1106      3       1   0.1901  0.1177       0.0565        0.640
##  1119      2       1   0.0951  0.0894       0.0151        0.600

plot(total, conf.int = F)

plot of chunk unnamed-chunk-4

separate <- survfit(Surv(t, censor == 1) ~ group)
summary(separate)

## Call: survfit(formula = Surv(t, censor == 1) ~ group)
## 
##                 group=A 
##  time n.risk n.event survival std.err lower 95% CI upper 95% CI
##   377     10       1     0.90  0.0949       0.7320        1.000
##   393      9       1     0.80  0.1265       0.5868        1.000
##   421      8       1     0.70  0.1449       0.4665        1.000
##   447      7       1     0.60  0.1549       0.3617        0.995
##   709      5       1     0.48  0.1640       0.2458        0.938
##   744      4       1     0.36  0.1610       0.1498        0.865
##   770      3       1     0.24  0.1453       0.0732        0.786
##  1106      2       1     0.12  0.1117       0.0194        0.744
## 
##                 group=B 
##  time n.risk n.event survival std.err lower 95% CI upper 95% CI
##   299     13       1    0.923  0.0739        0.789            1
##   300     12       1    0.846  0.1001        0.671            1
##   429      3       1    0.564  0.2398        0.245            1
##  1119      1       1    0.000     NaN           NA           NA

plot(separate, lty = c("solid", "dashed"), col = c("black", "blue"), xlab = "survival time in days", 
    ylab = "survival probabilities")
legend("topright", c("Group A", " Group B"), lty = c("solid", "dashed"), col = c("black", 
    "blue"))

plot of chunk unnamed-chunk-4

survdiff(Surv(t, censor) ~ group)

## Call:
## survdiff(formula = Surv(t, censor) ~ group)
## 
##          N Observed Expected (O-E)^2/E (O-E)^2/V
## group=A 15        8     8.11   0.00145   0.00479
## group=B 19        4     3.89   0.00301   0.00479
## 
##  Chisq= 0  on 1 degrees of freedom, p= 0.9

survdiff(Surv(t, censor) ~ group, rho = 1)  # rho = 1 it is equivalent to the Peto & Peto modification of the Gehan-Wilcoxon test.

## Call:
## survdiff(formula = Surv(t, censor) ~ group, rho = 1)
## 
##          N Observed Expected (O-E)^2/E (O-E)^2/V
## group=A 15     4.91     5.16    0.0126    0.0524
## group=B 19     2.86     2.61    0.0249    0.0524
## 
##  Chisq= 0.1  on 1 degrees of freedom, p= 0.8

detach(example15_3)

logrank检验案例2

library(coin)
data(glioma)
library(survival)
g3 <- subset(glioma, histology =='Grade3')
fit <- survfit(Surv(time, event)~group,data = g3)
plot(fit, lty = c(2,1), col = c(2,1))
legend('bottomright', legend = c('Control','Treatment'), lty = c(2,1), col = c(2,1))
survdiff(Surv(time, event)~group,data = g3)

## Call:
## survdiff(formula = Surv(time, event) ~ group, data = g3)
## 
##                N Observed Expected (O-E)^2/E (O-E)^2/V
## group=Control  6        4     1.49      4.23      6.06
## group=RIT     11        2     4.51      1.40      6.06
## 
##  Chisq= 6.1  on 1 degrees of freedom, p= 0.01

logrank_test(Surv(time, event)~group,data = g3, distribution ="exact")

## 
##  Exact Two-Sample Logrank Test
## 
## data:  Surv(time, event) by group (Control, RIT)
## Z = -2.1711, p-value = 0.02877
## alternative hypothesis: true theta is not equal to 1

logrank_test(Surv(time, event)~group|histology,data = glioma, distribution = approximate(B = 1000)) #两组比较,coin包 logrank_test函数#SurvivalTests {coin}

## 
##  Approximative Two-Sample Logrank Test
## 
## data:  Surv(time, event) by
##   group (Control, RIT) 
##   stratified by histology
## Z = -3.6704, p-value < 0.001
## alternative hypothesis: true theta is not equal to 1

#画一幅高水准的生存曲线
library(survival)
library(survminer)
data(lung,package = "survival")
fit <- survfit(Surv(time, status) ~ sex, data = lung)

ggsurvplot(fit,
           pval = TRUE, # 在图上添加log rank检验的p值
           conf.int = TRUE,# 添加置信区间
           risk.table = TRUE, # 在图下方添加风险表
           risk.table.col = "strata", # 根据数据分组为风险表添加颜色
           linetype = "strata", # 改变不同组别的生存曲线的线型
           surv.median.line = "hv", # 标注出中位生存时间
           ggtheme = theme_bw(), # 改变图形风格
           palette = c("#E7B800", "#2E9FDF")) # 图形颜色风格

plot of chunk unnamed-chunk-5

ggsurvplot(
  fit,                    
  pval = FALSE,             
  conf.int = TRUE, 
  fun = "cumhaz",
  conf.int.style = "ribbon",  # 设置置信区间的风格
  xlab = "Time in days",   # 设置x轴标签
  break.time.by = 200,     # 将x轴按照200为间隔进行切分
  ggtheme = theme_light(), # 设置图形风格
  risk.table = "abs_pct",  # 在风险表中添加绝对数和相对数
  risk.table.y.text.col = TRUE,# 设置风险表的文字颜色
  risk.table.y.text = FALSE,# 以条柱展示风险表的标签，而非文字
  ncensor.plot = TRUE,      # 展示随访过程中不同时间点死亡和删失的情况
  surv.median.line = "hv",  # 添加中位生存时间
  legend.labs = 
    c("Male", "Female"),    # 改变图例标签
  palette = 
    c("#E7B800", "#2E9FDF") # 设置颜色
)

plot of chunk unnamed-chunk-5

ggsurvplot(fit,
           conf.int = TRUE,
           risk.table.col = "strata", 
           ggtheme = theme_bw(), 
           palette = c("#E7B800", "#2E9FDF"),
           fun = "cumhaz")

plot of chunk unnamed-chunk-5

dev.off()

## null device 
##           1

cox regression

attach(melanom)
f4 <- coxph(Surv(days, status == 1) ~ sex)
summary(f4)

## Call:
## coxph(formula = Surv(days, status == 1) ~ sex)
## 
##   n= 205, number of events= 57 
## 
##       coef exp(coef) se(coef)     z Pr(>|z|)  
## sex 0.6622    1.9390   0.2651 2.498   0.0125 *
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
##     exp(coef) exp(-coef) lower .95 upper .95
## sex     1.939     0.5157     1.153      3.26
## 
## Concordance= 0.59  (se = 0.034 )
## Likelihood ratio test= 6.15  on 1 df,   p=0.01
## Wald test            = 6.24  on 1 df,   p=0.01
## Score (logrank) test = 6.47  on 1 df,   p=0.01

# summary(coxph(Surv(days,status==1)~sex))
f5 <- coxph(Surv(days, status == 1) ~ sex + log(thick) + ulc)
summary(f5)

## Call:
## coxph(formula = Surv(days, status == 1) ~ sex + log(thick) + 
##     ulc)
## 
##   n= 205, number of events= 57 
## 
##               coef exp(coef) se(coef)      z Pr(>|z|)   
## sex         0.3813    1.4641   0.2706  1.409  0.15879   
## log(thick)  0.5756    1.7782   0.1794  3.209  0.00133 **
## ulc        -0.9389    0.3911   0.3243 -2.895  0.00379 **
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
##            exp(coef) exp(-coef) lower .95 upper .95
## sex           1.4641     0.6830    0.8615    2.4883
## log(thick)    1.7782     0.5624    1.2511    2.5273
## ulc           0.3911     2.5571    0.2071    0.7384
## 
## Concordance= 0.768  (se = 0.032 )
## Likelihood ratio test= 42.66  on 3 df,   p=3e-09
## Wald test            = 36.99  on 3 df,   p=5e-08
## Score (logrank) test = 42.81  on 3 df,   p=3e-09

# summary(coxph(Surv(days,status==1)~sex+log(thick)+strata(ulc)))
plot(survfit(coxph(Surv(days, status == 1) ~ log(thick) + sex + ulc)), col = c("red", 
    "blue"))
# plot(survfit(coxph(Surv(days,status==1)~
# log(thick)+sex+strata(ulc))),col=c('red','blue'))
legend("topright", legend = c("ulceration present", "ulceration absent"), 
    lty = 1, col = c("red", "blue"))

plot of chunk unnamed-chunk-6

detach(melanom)

Cox回归案例1

library(foreign)
library(survival)

pancer <- read.spss("pancer.sav")
pancer <- as.data.frame(pancer)
head(pancer)

##   caseno time censor age sex        trt      bui  ch  p stage
## 1      1  2.4   死亡  66  男 无术中放疗 头部以外 CH3 有  IV期
## 2      2  1.7   死亡  69  男 无术中放疗 头部以外 CH3 有  IV期
## 3      3  0.1   死亡  48  男 无术中放疗 头部以外 CH0 无 III期
## 4      4  1.0   死亡  73  男 无术中放疗 头部以外 CH3 无 III期
## 5      5  4.8   死亡  65  男 无术中放疗 头部以外 CH3 无  IV期
## 6      6  6.4   死亡  38  男 无术中放疗 胰脏头部 CH3 无 III期

pancer$Gender <- as.factor(ifelse(pancer$sex=='男',"Male","Female"))
pancer$ch <- as.factor(ifelse(pancer$ch=='CH3', "ch","nonch"))
pancer$censor <- ifelse(pancer$censor=='死亡',1,0)

#pancer$ch <- relevel(pancer$ch,ref="CH0") #设置因子的参照水平
#pancer$ch<- factor(pancer$ch,order=TRUE) #设置为等级变量
#options(contrasts=c("contr.treatment", "contr.treatment")) #指定等级变量的参照水平
#pancer$Gender <- relevel(pancer$Gender,ref='Female')


f<-coxph(Surv(time,censor==1)~age+Gender+trt+bui+ch+p+stage,data=pancer)
summary(f)

## Call:
## coxph(formula = Surv(time, censor == 1) ~ age + Gender + trt + 
##     bui + ch + p + stage, data = pancer)
## 
##   n= 83, number of events= 82 
## 
##                   coef exp(coef) se(coef)      z Pr(>|z|)   
## age            0.01575   1.01588  0.01322  1.191  0.23356   
## GenderMale     0.07528   1.07819  0.24358  0.309  0.75727   
## trt有术中放疗 -0.88487   0.41277  0.32113 -2.756  0.00586 **
## bui头部以外    0.45223   1.57181  0.29462  1.535  0.12480   
## chnonch        0.20457   1.22699  0.30375  0.673  0.50065   
## p有            0.29321   1.34072  0.27587  1.063  0.28784   
## stageIV期      0.23301   1.26240  0.25688  0.907  0.36437   
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
##               exp(coef) exp(-coef) lower .95 upper .95
## age              1.0159     0.9844    0.9899    1.0426
## GenderMale       1.0782     0.9275    0.6689    1.7379
## trt有术中放疗    0.4128     2.4227    0.2200    0.7746
## bui头部以外      1.5718     0.6362    0.8823    2.8002
## chnonch          1.2270     0.8150    0.6765    2.2254
## p有              1.3407     0.7459    0.7808    2.3023
## stageIV期        1.2624     0.7921    0.7630    2.0886
## 
## Concordance= 0.646  (se = 0.036 )
## Likelihood ratio test= 14.5  on 7 df,   p=0.04
## Wald test            = 14.84  on 7 df,   p=0.04
## Score (logrank) test = 15.2  on 7 df,   p=0.03

sum.surv<-summary(f)
c_index<-sum.surv$concordance
c_index

##          C      se(C) 
## 0.64582715 0.03617383

Cox回归案例2

library(survival)
example15_4 <- read.table("example15_4.csv", header = TRUE, sep = ",")
attach(example15_4)
head(example15_4)

##   group renal days censor
## 1     1     1    8      1
## 2     1     0   52      1
## 3     1     1   58      1
## 4     1     1   63      1
## 5     1     1   63      1
## 6     1     0  220      1

coxmodel <- coxph(Surv(days, censor) ~ group)
summary(coxmodel)

## Call:
## coxph(formula = Surv(days, censor) ~ group)
## 
##   n= 25, number of events= 20 
## 
##         coef exp(coef) se(coef)     z Pr(>|z|)
## group 0.3652    1.4408   0.4592 0.795    0.426
## 
##       exp(coef) exp(-coef) lower .95 upper .95
## group     1.441      0.694    0.5858     3.544
## 
## Concordance= 0.536  (se = 0.064 )
## Likelihood ratio test= 0.64  on 1 df,   p=0.4
## Wald test            = 0.63  on 1 df,   p=0.4
## Score (logrank) test = 0.64  on 1 df,   p=0.4

coxmode2 <- coxph(Surv(days, censor) ~ group + renal)
summary(coxmode2)

## Call:
## coxph(formula = Surv(days, censor) ~ group + renal)
## 
##   n= 25, number of events= 20 
## 
##          coef exp(coef) se(coef)     z Pr(>|z|)    
## group  0.9835    2.6738   0.5231 1.880 0.060068 .  
## renal  4.2718   71.6515   1.1588 3.686 0.000228 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
##       exp(coef) exp(-coef) lower .95 upper .95
## group     2.674    0.37399    0.9592     7.454
## renal    71.652    0.01396    7.3930   694.432
## 
## Concordance= 0.773  (se = 0.055 )
## Likelihood ratio test= 23.82  on 2 df,   p=7e-06
## Wald test            = 14.33  on 2 df,   p=8e-04
## Score (logrank) test = 31.13  on 2 df,   p=2e-07

anova(coxmodel, coxmode2)

## Analysis of Deviance Table
##  Cox model: response is  Surv(days, censor)
##  Model 1: ~ group
##  Model 2: ~ group + renal
##    loglik  Chisq Df P(>|Chi|)    
## 1 -52.715                        
## 2 -41.122 23.186  1 1.471e-06 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

detach(example15_4)

Cox回归案例3

data("GBSG2", package = "TH.data")
head(GBSG2)

##   horTh age menostat tsize tgrade pnodes progrec estrec time cens
## 1    no  70     Post    21     II      3      48     66 1814    1
## 2   yes  56     Post    12     II      7      61     77 2018    1
## 3   yes  58     Post    35     II      9      52    271  712    1
## 4   yes  59     Post    17     II      4      60     29 1807    1
## 5    no  73     Post    35     II      1      26     65  772    1
## 6    no  32      Pre    57    III     24       0     13  448    1

plot(survfit(Surv(time, cens) ~ horTh, data = GBSG2), lty = c(2, 1), col = c(2, 
    1), mark.time = T)
legend("bottomright", legend = c("yes", "no"), lty = c(2, 1), col = c(2, 1))

plot of chunk unnamed-chunk-10

coxreg <- coxph(Surv(time, cens) ~ ., data = GBSG2)
summary(coxreg)

## Call:
## coxph(formula = Surv(time, cens) ~ ., data = GBSG2)
## 
##   n= 686, number of events= 299 
## 
##                    coef  exp(coef)   se(coef)      z Pr(>|z|)    
## horThyes     -0.3462784  0.7073155  0.1290747 -2.683 0.007301 ** 
## age          -0.0094592  0.9905854  0.0093006 -1.017 0.309126    
## menostatPost  0.2584448  1.2949147  0.1834765  1.409 0.158954    
## tsize         0.0077961  1.0078266  0.0039390  1.979 0.047794 *  
## tgradeII      0.6361117  1.8891211  0.2492025  2.553 0.010693 *  
## tgradeIII     0.7796542  2.1807181  0.2684801  2.904 0.003685 ** 
## pnodes        0.0487886  1.0499984  0.0074471  6.551  5.7e-11 ***
## progrec      -0.0022172  0.9977852  0.0005735 -3.866 0.000111 ***
## estrec        0.0001973  1.0001973  0.0004504  0.438 0.661307    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
##              exp(coef) exp(-coef) lower .95 upper .95
## horThyes        0.7073     1.4138    0.5492    0.9109
## age             0.9906     1.0095    0.9727    1.0088
## menostatPost    1.2949     0.7723    0.9038    1.8553
## tsize           1.0078     0.9922    1.0001    1.0156
## tgradeII        1.8891     0.5293    1.1591    3.0788
## tgradeIII       2.1807     0.4586    1.2885    3.6909
## pnodes          1.0500     0.9524    1.0348    1.0654
## progrec         0.9978     1.0022    0.9967    0.9989
## estrec          1.0002     0.9998    0.9993    1.0011
## 
## Concordance= 0.692  (se = 0.015 )
## Likelihood ratio test= 104.8  on 9 df,   p=<2e-16
## Wald test            = 114.8  on 9 df,   p=<2e-16
## Score (logrank) test = 120.7  on 9 df,   p=<2e-16

# install.packages('party')
library(party)
tree <- ctree(Surv(time, cens) ~ ., data = GBSG2)
plot(tree)

plot of chunk unnamed-chunk-10