nodes (because closing tags # can be just before those

nodes). node <- getNodeSet(html, '//body/*[not(@id="savebutton") and not(@id="submitbutton")]') # Get line numbers of those top level nodes nxt <- sapply(node, getLineNumber) # Subset those greater or equal to src.start as closing tags can be in # one line with opening tags nxtn <- nxt[nxt >= src.start[i]] # Search for closing tags if (length(nxtn) == 1) { # For last elements. There can be no more element # after it so nxtn has only one element. index <- grep(endTags[i], src[nxtn:length(src)]) nxtnLines <- seq(nxtn, length(src)) src.end[i] <- nxtnLines[index] } else { # If nxtn finds many other elements after the starting tag, # just choose the first (in case for the closing tag in the same # line) and second (starting line of the next node). nxtn2 <- nxtn[c(1,2)] nxtnLines <- seq(nxtn2[1], nxtn2[2]) index <- grep(endTags[i], src[nxtnLines]) src.end[i] <- max(nxtnLines[index]) } } ##### Generate a list of sequence from src.start to src.end. # Each element of the list is a chunk. src.chunks <- mapply(FUN = seq, src.start, src.end, SIMPLIFY = FALSE) ##### Find which src lines are actually to be edited. # In case ALL editable sections are being edited. if (length(which.chunks) > 0) { for (i in 1:length(which.chunks)) { # Find the "id" of "NOT MODIFIED" editor chunks notEditedLines <- unlist(editor.chunks[which.chunks[i]]) # Remove some strings to get the right id (i.e. "Editor-##"). notEditedId <- gsub("^EDITOR\\s(.+)\\sNOT MODIFIED$", "\\1", editor[notEditedLines][1]) # Match the id from editor to the src chunks notEditedIndex <- sapply(src.chunks, function(x) { grepl(notEditedId, src[x[1]]) }) # "notEditedIndex" is a logical vector, TRUE for the correct match # so we want to subset for the FALSE ones. src.chunks <- src.chunks[!(notEditedIndex)] } } ########################################################################### #################### Replace old lines with new lines ##################### ########################################################################### ##### Get rid of "NOT MODIFIED" lines. if (length(which.chunks) > 0) { editor.chunks <- editor.chunks[-which.chunks] } ##### Check if the lengths are equal. if (length(editor.chunks) != length(src.chunks)) { stop("Number of new content does not equal number of old content") } ##### Remove everything except # ASSUMES only whitespace in front of new.src <- vector("numeric", length = length(src.start)) for (i in 1:length(src.start)) { # Match for first "<" and ">" (i.e. opening tags), and # remove the rest. new.src[i] <- gsub("(.*?<.+?>).+$", "\\1", src[src.start[i]]) } if (length(which.chunks) > 0) { new.src <- new.src[-which.chunks] } # Go BACKWARDS through file, replacing last chunk first # (so that indices for lines-to-change remain valid) for (i in length(editor.chunks):1) { oldLines <- src.chunks[[i]] # Parse as a text. srcId <- htmlParse(src[oldLines], asText=TRUE) idNode <- getNodeSet(srcId, "//body/*[1]") # search for id attributes id <- xmlGetAttr(idNode[[1]], "id") # find which editor.chunks contain the matched id index <- sapply(editor.chunks, function(x) { grepl(id, editor[x[1]]) }) newLines <- unlist(editor.chunks[index]) firstOldLine <- src.chunks[[i]][1] end.tag <- gsub("(^.*<)(.+?)\\s.+$", "\\1/\\2>", new.src[i]) # Rip out old lines and append new lines in the old place src <- append(src[-oldLines], # Subset: get rid of the first lines in editor # (e.g. EDITOR Editor-1) AND the last lines which # are just empty spaces. c(new.src[i], editor[newLines[-1]], end.tag), firstOldLine - 1) } if (is.null(outfile)) { outfile <- gsub("anns.html", "save.html", infile) } writeLines(src, outfile) }