倾向性匹配得分在R语言中实现

Xianxiong Ma

2020年3月14日

PSM定义

举个例子:

MatchIt包

案例:所示数据中共有10个变量,614个观测,实验组185例,对照组429例。treat变量即为分组变量,"1"=实验组(接受职业培训),"0"=对照组(未接受职业培训)。age, educ, black, hispan, married, nodegree, re74, re75为协变量,re78为结局变量(年总收入)。事实上,倾向性匹配得分分析是要建立一个以分组变量(treat)为因变量,各个协变量(age, educ, black, hispan, married, nodegree, re74, re75)为自变量的回归方程。而结局变量(re78)在PSM过程中几乎不参与建模。

library(MatchIt)
data(lalonde)
head(lalonde)
##      treat age educ black hispan married nodegree re74 re75       re78
## NSW1     1  37   11     1      0       1        1    0    0  9930.0460
## NSW2     1  22    9     0      1       0        1    0    0  3595.8940
## NSW3     1  30   12     1      0       0        0    0    0 24909.4500
## NSW4     1  27   11     1      0       0        1    0    0  7506.1460
## NSW5     1  33    8     1      0       0        1    0    0   289.7899
## NSW6     1  22    9     1      0       0        1    0    0  4056.4940
f = matchit(treat ~ re74 + re75 + educ + black + hispan + age + married + nodegree, 
    data = lalonde, method = "nearest")
# f1=matchit(treat~re74+re75+educ+black+hispan+age+married+nodegree,data=lalonde,method='nearest',caliper=0.05)
# 设置卡钳值越大似乎更严谨,得到的结果更少
summary(f)
## 
## Call:
## matchit(formula = treat ~ re74 + re75 + educ + black + hispan + 
##     age + married + nodegree, data = lalonde, method = "nearest")
## 
## Summary of balance for all data:
##          Means Treated Means Control SD Control  Mean Diff   eQQ Med
## distance        0.5774        0.1822     0.2295     0.3952    0.5176
## re74         2095.5737     5619.2365  6788.7508 -3523.6628 2425.5720
## re75         1532.0553     2466.4844  3291.9962  -934.4291  981.0968
## educ           10.3459       10.2354     2.8552     0.1105    1.0000
## black           0.8432        0.2028     0.4026     0.6404    1.0000
## hispan          0.0595        0.1422     0.3497    -0.0827    0.0000
## age            25.8162       28.0303    10.7867    -2.2141    1.0000
## married         0.1892        0.5128     0.5004    -0.3236    0.0000
## nodegree        0.7081        0.5967     0.4911     0.1114    0.0000
##           eQQ Mean   eQQ Max
## distance    0.3955    0.5966
## re74     3620.9240 9216.5000
## re75     1060.6582 6795.0100
## educ        0.7027    4.0000
## black       0.6432    1.0000
## hispan      0.0811    1.0000
## age         3.2649   10.0000
## married     0.3243    1.0000
## nodegree    0.1135    1.0000
## 
## 
## Summary of balance for matched data:
##          Means Treated Means Control SD Control Mean Diff  eQQ Med eQQ Mean
## distance        0.5774        0.3629     0.2533    0.2145   0.1646   0.2146
## re74         2095.5737     2342.1076  4238.9757 -246.5339 131.2709 545.1182
## re75         1532.0553     1614.7451  2632.3533  -82.6898 152.1774 349.5371
## educ           10.3459       10.6054     2.6582   -0.2595   0.0000   0.4541
## black           0.8432        0.4703     0.5005    0.3730   0.0000   0.3730
## hispan          0.0595        0.2162     0.4128   -0.1568   0.0000   0.1568
## age            25.8162       25.3027    10.5864    0.5135   3.0000   3.3892
## married         0.1892        0.2108     0.4090   -0.0216   0.0000   0.0216
## nodegree        0.7081        0.6378     0.4819    0.0703   0.0000   0.0703
##             eQQ Max
## distance     0.4492
## re74     13121.7500
## re75     11365.7100
## educ         3.0000
## black        1.0000
## hispan       1.0000
## age          9.0000
## married      1.0000
## nodegree     1.0000
## 
## Percent Balance Improvement:
##          Mean Diff.   eQQ Med eQQ Mean  eQQ Max
## distance    45.7140   68.1921  45.7536  24.7011
## re74        93.0035   94.5880  84.9453 -42.3724
## re75        91.1508   84.4891  67.0453 -67.2655
## educ      -134.7737  100.0000  35.3846  25.0000
## black       41.7636  100.0000  42.0168   0.0000
## hispan     -89.4761    0.0000 -93.3333   0.0000
## age         76.8070 -200.0000  -3.8079  10.0000
## married     93.3191    0.0000  93.3333   0.0000
## nodegree    36.9046    0.0000  38.0952   0.0000
## 
## Sample sizes:
##           Control Treated
## All           429     185
## Matched       185     185
## Unmatched     244       0
## Discarded       0       0
matchdata = match.data(f)
matchdata
##       treat age educ black hispan married nodegree re74 re75       re78
## NSW1      1  37   11     1      0       1        1    0    0  9930.0460
## NSW2      1  22    9     0      1       0        1    0    0  3595.8940
## NSW3      1  30   12     1      0       0        0    0    0 24909.4500
## NSW4      1  27   11     1      0       0        1    0    0  7506.1460
## NSW5      1  33    8     1      0       0        1    0    0   289.7899
## NSW6      1  22    9     1      0       0        1    0    0  4056.4940
## NSW7      1  23   12     1      0       0        0    0    0     0.0000
## NSW8      1  32   11     1      0       0        1    0    0  8472.1580
## NSW9      1  22   16     1      0       0        0    0    0  2164.0220
## NSW10     1  33   12     0      0       1        0    0    0 12418.0700
## NSW11     1  19    9     1      0       0        1    0    0  8173.9080
## NSW12     1  21   13     1      0       0        0    0    0 17094.6400
## NSW13     1  18    8     1      0       0        1    0    0     0.0000
## NSW14     1  27   10     1      0       1        1    0    0 18739.9300
## NSW15     1  17    7     1      0       0        1    0    0  3023.8790
## NSW16     1  19   10     1      0       0        1    0    0  3228.5030
## NSW17     1  27   13     1      0       0        0    0    0 14581.8600
## NSW18     1  23   10     1      0       0        1    0    0  7693.4000
## NSW19     1  40   12     1      0       0        0    0    0 10804.3200
## NSW20     1  26   12     1      0       0        0    0    0 10747.3500
## NSW21     1  23   11     1      0       0        1    0    0     0.0000
## NSW22     1  41   14     0      0       0        0    0    0  5149.5010
## NSW23     1  38    9     0      0       0        1    0    0  6408.9500
## NSW24     1  24   11     1      0       0        1    0    0  1991.4000
## NSW25     1  18   10     1      0       0        1    0    0 11163.1700
## NSW26     1  29   11     1      0       1        1    0    0  9642.9990
## NSW27     1  25   11     1      0       0        1    0    0  9897.0490
## NSW28     1  27   10     0      1       0        1    0    0 11142.8700
## NSW29     1  17   10     1      0       0        1    0    0 16218.0400
## NSW30     1  24   11     1      0       0        1    0    0   995.7002
## NSW31     1  17   10     1      0       0        1    0    0     0.0000
## NSW32     1  48    4     1      0       0        1    0    0  6551.5920
## NSW33     1  25   11     1      0       1        1    0    0  1574.4240
## NSW34     1  20   12     1      0       0        0    0    0     0.0000
## NSW35     1  25   12     1      0       0        0    0    0  3191.7530
## NSW36     1  42   14     1      0       0        0    0    0 20505.9300
## NSW37     1  25    5     1      0       0        1    0    0  6181.8800
## NSW38     1  23   12     1      0       1        0    0    0  5911.5510
## NSW39     1  46    8     1      0       1        1    0    0  3094.1560
## NSW40     1  24   10     1      0       0        1    0    0     0.0000
## NSW41     1  21   12     1      0       0        0    0    0  1254.5820
## NSW42     1  19    9     0      0       0        1    0    0 13188.8300
## NSW43     1  17    8     1      0       0        1    0    0  8061.4850
## NSW44     1  18    8     0      1       1        1    0    0  2787.9600
## NSW45     1  20   11     1      0       0        1    0    0  3972.5400
## NSW46     1  25   11     1      0       1        1    0    0     0.0000
## NSW47     1  17    8     1      0       0        1    0    0     0.0000
## NSW48     1  17    9     1      0       0        1    0    0     0.0000
## NSW49     1  25    5     1      0       0        1    0    0 12187.4100
## NSW50     1  23   12     1      0       0        0    0    0  4843.1760
## NSW51     1  28    8     1      0       0        1    0    0     0.0000
## NSW52     1  31   11     1      0       1        1    0    0  8087.4870
## NSW53     1  18   11     1      0       0        1    0    0     0.0000
## NSW54     1  25   12     1      0       0        0    0    0  2348.9730
## NSW55     1  30   11     1      0       1        1    0    0   590.7818
## NSW56     1  17   10     1      0       0        1    0    0     0.0000
## NSW57     1  37    9     1      0       0        1    0    0  1067.5060
## NSW58     1  41    4     1      0       1        1    0    0  7284.9860
## NSW59     1  42   14     1      0       1        0    0    0 13167.5200
## NSW60     1  22   11     0      0       0        1    0    0  1048.4320
## NSW61     1  17    8     1      0       0        1    0    0     0.0000
## NSW62     1  29    8     1      0       0        1    0    0  1923.9380
## NSW63     1  35   10     1      0       0        1    0    0  4666.2360
## NSW64     1  27   11     1      0       0        1    0    0   549.2984
## NSW65     1  29    4     1      0       0        1    0    0   762.9146
## NSW66     1  28    9     1      0       0        1    0    0 10694.2900
## NSW67     1  27   11     1      0       0        1    0    0     0.0000
## NSW68     1  23    7     0      0       0        1    0    0     0.0000
## NSW69     1  45    5     1      0       1        1    0    0  8546.7150
## NSW70     1  29   13     1      0       0        0    0    0  7479.6560
## NSW71     1  27    9     1      0       0        1    0    0     0.0000
## NSW72     1  46   13     1      0       0        0    0    0   647.2046
## NSW73     1  18    6     1      0       0        1    0    0     0.0000
## NSW74     1  25   12     1      0       0        0    0    0 11965.8100
## NSW75     1  28   15     1      0       0        0    0    0  9598.5410
## NSW76     1  25   11     0      0       0        1    0    0 18783.3500
## NSW77     1  22   12     1      0       0        0    0    0 18678.0800
## NSW78     1  21    9     1      0       0        1    0    0     0.0000
## NSW79     1  40   11     1      0       0        1    0    0 23005.6000
## NSW80     1  22   11     1      0       0        1    0    0  6456.6970
## NSW81     1  25   12     1      0       0        0    0    0     0.0000
## NSW82     1  18   12     1      0       0        0    0    0  2321.1070
## NSW83     1  38   12     0      0       0        0    0    0  4941.8490
##         distance weights
## NSW1  0.63876993       1
## NSW2  0.22463424       1
## NSW3  0.67824388       1
## NSW4  0.77632408       1
## NSW5  0.70163874       1
## NSW6  0.69906990       1
## NSW7  0.65368426       1
## NSW8  0.78972311       1
## NSW9  0.77983825       1
## NSW10 0.04292461       1
## NSW11 0.68901996       1
## NSW12 0.68244400       1
## NSW13 0.64986767       1
## NSW14 0.56241073       1
## NSW15 0.60858629       1
## NSW16 0.72249036       1
## NSW17 0.70259562       1
## NSW18 0.73496416       1
## NSW19 0.71166489       1
## NSW20 0.66431981       1
## NSW21 0.76517492       1
## NSW22 0.13901525       1
## NSW23 0.12238224       1
## NSW24 0.76799791       1
## NSW25 0.71931601       1
## NSW26 0.60916715       1
## NSW27 0.77079713       1
## NSW28 0.26920316       1
## NSW29 0.71611962       1
## NSW30 0.76799791       1
## NSW31 0.71611962       1
## NSW32 0.60981680       1
## NSW33 0.59404299       1
## NSW34 0.64289289       1
## NSW35 0.66079247       1
## NSW36 0.77862159       1
## NSW37 0.56093965       1
## NSW38 0.45094835       1
## NSW39 0.55677960       1
## NSW40 0.73802599       1
## NSW41 0.64650679       1
## NSW42 0.09365296       1
## NSW43 0.64626932       1
## NSW44 0.09150550       1
## NSW45 0.75656387       1
## NSW46 0.59404299       1
## NSW47 0.64626932       1
## NSW48 0.68221881       1
## NSW49 0.56093965       1
## NSW50 0.65368426       1
## NSW51 0.68486666       1
## NSW52 0.61665323       1
## NSW53 0.75070545       1
## NSW54 0.66079247       1
## NSW55 0.61291686       1
## NSW56 0.71611962       1
## NSW57 0.74640480       1
## NSW58 0.37847866       1
## NSW59 0.60480778       1
## NSW60 0.13012192       1
## NSW61 0.64626932       1
## NSW62 0.68826176       1
## NSW63 0.77017180       1
## NSW64 0.77632408       1
## NSW65 0.53662873       1
## NSW66 0.71860150       1
## NSW67 0.77632408       1
## NSW68 0.07382733       1
## NSW69 0.43251547       1
## NSW70 0.70914659       1
## NSW71 0.71540019       1
## NSW72 0.76123332       1
## NSW73 0.57342619       1
## NSW74 0.66079247       1
## NSW75 0.76818297       1
## NSW76 0.13557377       1
## NSW77 0.65010402       1
## NSW78 0.69574047       1
## NSW79 0.80991743       1
## NSW80 0.76232821       1
## NSW81 0.66079247       1
## NSW82 0.63561643       1
## NSW83 0.10034130       1
##  [ reached 'max' / getOption("max.print") -- omitted 287 rows ]
library(foreign)
matchdata$id <- 1:nrow(matchdata)
write.csv(matchdata, "matchdata.csv")

nonrandom包

library(nonrandom)
data(stu1)
dim(stu1)
## [1] 646   9
head(stu1)
##   klinik idnr tmass therapie alter tgr age      ewb   pst
## 1      3  782     6        1    42   1   1 63.46154 81.25
## 2      3  975     9        1    49   1   1 90.38462 93.75
## 3      4  448    10        1    39   1   1 73.07692 93.75
## 4      6  835     6        1    53   1   1 75.00000 81.25
## 5      6  866    10        1    52   1   1 34.61538 56.25
## 6      6  953    10        1    51   1   1 63.46154 93.75

klinik : clinic center

idnr : patient id

tmass : tumor size in mm

alter : age

therapie : therapy: 1:mastectomy; 2:breast conservation

tgr : tumor size: 1: <= 10 mm; 2: > 10 mm

age : 1:<= 55 yrs; 2: > 55 yrs

ewb : emotional status

pst: physical status

stu1.ps <- pscore(data = stu1, 
                  formula = therapie~tgr+age) #匹配肿瘤大小和年龄
stu1.match <- ps.match(object = stu1.ps,
                       ratio  = 2,  # 不宜设置过大,越大越难匹配
                       caliper = 0.05,  # 不宜设置过大,越大越难匹配
                       givenTmatchingC = FALSE,
                       matched.by = "pscore",
                       setseed = 38902,
                       combine.output=TRUE)
matchdata<- stu1.match$data.matched
matchdata
##     klinik idnr tmass therapie alter tgr age      ewb    pst    pscore
## 1        3  782     6        1    42   1   1 63.46154  81.25 0.8832166
## 2        3  975     9        1    49   1   1 90.38462  93.75 0.8832166
## 3        4  448    10        1    39   1   1 73.07692  93.75 0.8832166
## 4        6  835     6        1    53   1   1 75.00000  81.25 0.8832166
## 5        6  866    10        1    52   1   1 34.61538  56.25 0.8832166
## 6        6  953    10        1    51   1   1 63.46154  93.75 0.8832166
## 7        7  555     7        1    46   1   1 76.92308  81.25 0.8832166
## 8        7  592     7        1    53   1   1 48.07692  75.00 0.8832166
## 9        7  613     9        1    44   1   1 69.23077  81.25 0.8832166
## 10       7  666     6        1    46   1   1 55.76923  62.50 0.8832166
## 11       8  183     8        1    41   1   1 61.53846  87.50 0.8832166
## 12      10   18     7        1    55   1   1 78.84615  93.75 0.8832166
## 13      10   45     9        1    45   1   1 75.00000  81.25 0.8832166
## 14      10  107     8        1    42   1   1 69.23077  81.25 0.8832166
## 15      11  514     8        0    47   1   1 75.00000 100.00 0.8832166
## 18      15  188    10        0    43   1   1 92.30769 100.00 0.8832166
## 19      16  505    10        0    36   1   1 75.00000  93.75 0.8832166
## 46      38   19     8        0    51   1   1 51.92308  81.25 0.8832166
## 60      40 1029     5        0    40   1   1 63.46154  75.00 0.8832166
## 61      40 1048     7        0    49   1   1 65.38462 100.00 0.8832166
## 76      63  226     7        0    55   1   1 55.76923  68.75 0.8832166
## 96       2  321    18        1    48   2   1 75.00000  93.75 0.8238997
## 97       2  363    14        0    50   2   1 72.91667  81.25 0.8238997
## 98       2  373    13        1    47   2   1 38.46154  68.75 0.8238997
## 99       2  648    12        1    39   2   1 54.16667  62.50 0.8238997
## 100      2  813    15        0    50   2   1 65.38462 100.00 0.8238997
## 101      4   26    13        1    32   2   1 79.16667  81.25 0.8238997
## 102      4   79    15        1    33   2   1 69.23077  93.75 0.8238997
## 103      4   94    15        1    50   2   1 67.30769  81.25 0.8238997
## 104      4  354    18        1    49   2   1 65.38462  87.50 0.8238997
## 105      4  378    18        1    38   2   1 67.30769  93.75 0.8238997
## 106      4  418    15        1    45   2   1 71.15385  87.50 0.8238997
## 107      4  816    15        1    38   2   1 82.69231 100.00 0.8238997
## 108      4  896    19        1    33   2   1 67.30769  81.25 0.8238997
## 109      4  915    12        1    35   2   1 78.84615  87.50 0.8238997
## 110      4  935    15        0    44   2   1 53.84615  68.75 0.8238997
## 111      6  228    12        1    46   2   1 65.38462  75.00 0.8238997
## 112      6  271    15        1    30   2   1 65.38462  62.50 0.8238997
## 113      6  750    19        1    45   2   1 51.92308  62.50 0.8238997
## 114      6  836    16        1    53   2   1 71.15385  75.00 0.8238997
## 115      6  961    13        1    46   2   1 78.84615  81.25 0.8238997
## 116      6  966    15        1    54   2   1 73.07692  87.50 0.8238997
## 117      6 1013    11        1    48   2   1 65.38462  87.50 0.8238997
## 118      7  415    15        1    48   2   1 60.41667  81.25 0.8238997
## 119      7  491    13        1    34   2   1 46.15385  68.75 0.8238997
## 120      7  524    17        1    49   2   1 51.92308  68.75 0.8238997
## 121      7  567    12        1    37   2   1 76.92308  87.50 0.8238997
## 122      7  572    13        1    52   2   1 64.58333  81.25 0.8238997
## 123      7  705    12        1    48   2   1 83.33333 100.00 0.8238997
## 124      7  759    16        1    48   2   1 34.61538  56.25 0.8238997
## 125      7  781    15        1    39   2   1 47.91667  62.50 0.8238997
## 126      7  811    16        1    41   2   1 57.69231  81.25 0.8238997
## 127      7 1015    15        1    53   2   1 59.61538  81.25 0.8238997
## 128      8   65    13        0    54   2   1 40.90909  68.75 0.8238997
## 129      8  127    13        1    45   2   1 34.61538  50.00 0.8238997
## 130      8  133    18        1    41   2   1 82.69231  81.25 0.8238997
## 131     10   89    14        1    45   2   1 90.38462 100.00 0.8238997
## 132     10  339    17        1    55   2   1 76.92308  81.25 0.8238997
## 133     10 1009    18        1    33   2   1 42.30769  56.25 0.8238997
## 134     11  348    12        1    35   2   1 46.15385  56.25 0.8238997
## 135     11  993    13        0    47   2   1 76.92308  81.25 0.8238997
## 136     11 1105    13        0    55   2   1 82.69231  93.75 0.8238997
## 137     11 1110    15        0    49   2   1 65.38462  75.00 0.8238997
## 138     13   13    13        1    39   2   1 42.30769  62.50 0.8238997
## 139     13  181    14        1    35   2   1 42.30769  75.00 0.8238997
## 140     13  358    16        1    55   2   1 90.38462  93.75 0.8238997
## 141     13  553    13        1    55   2   1 71.15385  87.50 0.8238997
## 142     13  667    17        1    39   2   1 86.53846  93.75 0.8238997
## 143     14  535    15        1    38   2   1 76.92308  87.50 0.8238997
## 144     14  822    18        1    52   2   1 59.61538  68.75 0.8238997
## 145     16  251    12        0    36   2   1 65.38462  68.75 0.8238997
## 146     16  682    17        1    38   2   1 82.69231  93.75 0.8238997
## 147     16  842    15        1    51   2   1 30.76923  75.00 0.8238997
## 148     17   34    18        1    29   2   1 80.76923  93.75 0.8238997
## 149     17  261    15        1    44   2   1 71.15385  87.50 0.8238997
## 150     17  362    14        1    42   2   1 30.76923  56.25 0.8238997
## 151     17  427    14        0    45   2   1 59.61538  68.75 0.8238997
## 152     17  794    12        1    46   2   1 80.76923 100.00 0.8238997
## 153     18  408    15        0    52   2   1 94.23077 100.00 0.8238997
## 154     18  590    20        1    32   2   1 36.53846  81.25 0.8238997
## 155     18  636    20        1    46   2   1 40.38462  68.75 0.8238997
## 156     18  776    19        1    49   2   1 55.76923  75.00 0.8238997
## 157     18  973    17        1    52   2   1 75.00000  75.00 0.8238997
## 158     18 1094    12        1    51   2   1 59.61538  87.50 0.8238997
## 159     18 1117    15        0    51   2   1 48.07692  50.00 0.8238997
## 160     19  730    15        1    38   2   1 71.15385  81.25 0.8238997
## 161     19  850    12        1    55   2   1 50.00000  62.50 0.8238997
## 162     19 1091    13        1    50   2   1 67.30769  93.75 0.8238997
## 163     20    1    11        1    42   2   1 69.23077  93.75 0.8238997
## 164     20   20    12        1    47   2   1 61.53846  81.25 0.8238997
##     match.index
## 1             1
## 2             1
## 3             2
## 4             2
## 5             3
## 6             3
## 7             4
## 8             4
## 9             5
## 10            5
## 11            6
## 12            6
## 13            7
## 14            7
## 15            1
## 18            2
## 19            3
## 46            4
## 60            5
## 61            6
## 76            7
## 96            8
## 97            8
## 98            8
## 99            9
## 100           9
## 101           9
## 102          10
## 103          10
## 104          11
## 105          11
## 106          12
## 107          12
## 108          13
## 109          13
## 110          10
## 111          14
## 112          14
## 113          15
## 114          15
## 115          16
## 116          16
## 117          17
## 118          17
## 119          18
## 120          18
## 121          19
## 122          19
## 123          20
## 124          20
## 125          21
## 126          21
## 127          22
## 128          11
## 129          22
## 130          23
## 131          23
## 132          24
## 133          24
## 134          25
## 135          12
## 136          13
## 137          14
## 138          25
## 139          26
## 140          26
## 141          27
## 142          27
## 143          28
## 144          28
## 145          15
## 146          29
## 147          29
## 148          30
## 149          30
## 150          31
## 151          16
## 152          31
## 153          17
## 154          32
## 155          32
## 156          33
## 157          33
## 158          34
## 159          18
## 160          34
## 161          35
## 162          35
## 163          36
## 164          36
##  [ reached 'max' / getOption("max.print") -- omitted 356 rows ]
library(foreign)
matchdata$id<-1:nrow(matchdata)
write.csv(matchdata,"stu1matchdata.csv")


data(stu1)
stu1.ps <- pscore(data = stu1, 
                  formula = therapie~tgr+age)
plot.pscore(x = stu1.ps,
            main = "PS distribution",
            xlab = "",
            par.1=list(col="red"),
            par.0=list(lwd=2),
            par.dens=list(kernel="gaussian"))

plot of chunk unnamed-chunk-3

有关PSM的几点思考

对照组样本量足够大,对照组与试验组样本量之比5:1以上,确保绝大多数试验组对象可以匹配上合适的对照,最好所有试验组对象均得到良好匹配。

能用PSM的均可以用回归分析,可以用回归的未必可以用PSM,建议同时采用PSM与回归分析处理数据,当两者结果一致的时候说明结果较可信