我有从 2005 年到 2015 年的面板数据和从 1 到 33(不包括 2、4 和 31)的部门。我想运行一些循环并分别保存每个年份-部门组合的输出。这是我的代码:
for (i in 2005:2015){
ntm_data <-subset(ntm_data_wip, StartDate <=i & EndDate >i)
for(s in c(1, 3, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 32, 33)){
ntm_data <-subset(ntm_data, ISIC4==s)
# Once the data is loaded, I exclude NTM codes which are missing.
# I only need the reporter, NTM code and product codes (HS 6-digit codes).
ntm_data <- ntm_data[!is.na(ntm_data$ntmcode)&ntm_data$ntmcode!="",]
ntm_data <- ntm_data[,c("reporter", "ntmcode", "hs6")]
# I group the data by reporter, NTM and product code (hs6) and count the number of combinations in a new variable called count.
ntm_data <- ntm_data %>% group_by(reporter, ntmcode, hs6) %>%
summarise(count = n())
head(ntm_data)
# I prepare the regulatory matrix by creating a list of countries for which I want the regulatory distance. The
# regulatory matrix shows the distance between two countries and has as column and row names the ISO3 codes of the countries.
# As specified above, I am interested in having the analysis for all available countries.
avail_iso3s <- unique(ntm_data$reporter)
# I create an empty regulatory distance matrix. For column size I use the length of avail_iso3s and add 1 for the reporter column.
# I populate the column names with reporter and the ISO3 codes with the option dimnames.
regulatory_distance_matrix <- data.frame(matrix(vector(),0,length(avail_iso3s)+1,
dimnames = list(c(), c("reporter", avail_iso3s )
)),
stringsAsFactors=F)
#' Now I can move on to calculating the regulatory distance formula in page 3 of "DEEP REGIONAL INTEGRATION AND NON-TARIFF MEASURES:A METHODOLOGY FOR DATA ANALYSIS (2015)" .
#' As N is a constant, I start with calculating it outside of the loop
N <- ntm_data %>% group_by(ntmcode, hs6) %>% count()
N <- nrow(N)
# I now fill in the regulatory distance matrix with values
for (g in 1:length(avail_iso3s)){
country_a <- ntm_data[ntm_data$reporter==avail_iso3s[g],c("ntmcode", "hs6")]
country_a$country_a <- 1
regulatory_distance_matrix[g,"reporter"] <- avail_iso3s[g]
for (k in 1:length(avail_iso3s)){
if (!is.na(regulatory_distance_matrix[k,avail_iso3s[g]])){next }
country_b <- ntm_data[ntm_data$reporter==avail_iso3s[k],c("ntmcode", "hs6")]
country_b$country_b <- 1
merged <- merge(country_a, country_b, by=c("ntmcode", "hs6"), all = TRUE)
merged[is.na(merged)] <- 0
merged$abs_diff <- abs(merged$country_a-merged$country_b)
rd <- sum(merged$abs_diff)/N
regulatory_distance_matrix[g,avail_iso3s[k]] <- rd
}
}
# Now I fill in the missing values and create a Stata dta.file.
for (g in 1:length(avail_iso3s)){
for (k in 1:length(avail_iso3s)){
if (is.na(regulatory_distance_matrix[k,avail_iso3s[g]])){
regulatory_distance_matrix[k,avail_iso3s[g]] <- regulatory_distance_matrix[g,avail_iso3s[k]]
}
}
}
regulatory_distance_matrix$year <-i
regulatory_distance_matrix$ISIC4 <-s
write.dta(regulatory_distance_matrix, paste0("C:/Users/Utente/Desktop/Master's thesis/Thesis analysis/- RD construction/Binary sectoral RD/regulatory_distance_matrix_",i,"_",s,".dta"))
}
}
但是,在正确创建第一个文件 (regulatory_distance_matrix_",i,"_",s,".dta") 后,在创建第二个文件期间出现以下错误:
Error in eval(e, x, parent.frame()) : oggetto "ISIC4" non trovato
有人如何解决这个问题?提前致谢!
编辑:
> dput(head(ntm_data_wip))
structure(list(reporter = c("TUR", "ARG", "BRA", "CHN", "USA",
"EUN"), Reporter_ISO_N = c("792", "032", "076", "156", "842",
"918"), hs6 = c("910610", "851679", "040221", "620449", "021012",
"284990"), ntmcode = c("B31", "A11", "B33", "B83", "A83", "B33"
), partner = c("TON", "WLD", "WLD", "IRN", "VAT", "WLD"), Partner_ISO_N = c("776",
"000", "000", "364", "336", "000"), nbr = c(1L, 1L, 1L, 1L, 2L,
1L), Year = c(2016L, 2014L, 2013L, 2016L, 2017L, 2011L), NTMNomenclature = c("M4",
"M4", "M4", "M4", "M4", "M4"), NomenCode = c("H4", "H4", "H4",
"H4", "H4", "H3"), Dataset_id = c(161L, 174L, 174L, 131L, 179L,
111L), ntm_1_digit = c("B", "A", "B", "B", "A", "B"), StartDate = c(2015L,
2006L, 2008L, 2011L, 1992L, 2009L), EndDate = c(9999L, 9999L,
9999L, 9999L, 9999L, 2011L), new_ISIC4 = c("32", "28", "10",
"13", "10", "19"), ISIC4 = c(32L, 28L, 10L, 13L, 10L, 19L)), datalabel = "", time.stamp = "31 Jul 2021 11:34", formats = c("%9s",
"%9s", "%9s", "%9s", "%9s", "%9s", "%9.0g", "%12.0g", "%9s",
"%9s", "%12.0g", "%9s", "%10.0g", "%10.0g", "%9s", "%10.0g"), types = c(3L,
3L, 6L, 4L, 3L, 3L, 65530L, 65529L, 2L, 3L, 65529L, 1L, 65529L,
65529L, 2L, 65530L), val.labels = structure(c("", "", "", "",
"", "", "", "", "", "", "", "", "", "", "", ""), .Names = c("",
"", "", "", "", "", "", "", "", "", "", "", "", "", "", "")), var.labels = c("",
"", "", "", "", "", "Number of NTM, distinct codes", "", "",
"", "", "", "(min) StartDate", "(max) EndDate", "", ""), version = 118L, label.table = list(), expansion.fields = list(
c("ISIC4", "destring", "Characters removed were:"), c("ISIC4",
"destring_cmd", "destring new_ISIC4, gen(ISIC4)")), byteorder = "LSF", orig.dim = c(6953474L,
16L), row.names = c(NA, 6L), class = "data.frame")
评论太长了。
问题是,内循环for (s in c(...)) {...}
使用ntm_data
,其本身覆盖内环内。所以在第一遍,ntm_data$ICIS4
是一列。另外,由于我们是子集化,我们不希望后续迭代基于之前的s
.
这是开始代码顶部的一个片段,在外循环的开始处有一个新对象。:
library(dplyr)
for (i in 2005:2015){
## CHANGED - make a different object for the inner loop to subset from
ntm_data_years <-subset(ntm_data_wip, StartDate <=i & EndDate >i)
for(s in c(1, 3, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 32, 33)){
ntm_data <-subset(ntm_data_years, ISIC4==s) ## CHANGED to subset(ntm_data_years, ...)
ntm_data <- ntm_data[!is.na(ntm_data$ntmcode)&ntm_data$ntmcode!="",]
ntm_data <- ntm_data[,c("reporter", "ntmcode", "hs6")] ## This is the line that removes ISIC4 from the data frame
...
}
本文收集自互联网,转载请注明来源。
如有侵权,请联系 [email protected] 删除。
我来说两句