从Google表格中提取嵌入式图片并重新插入为链接

马克·米尔斯

我有一个Google表格可以处理100张内嵌图片。该工作表加载非常慢。我已经读到,如果将图像作为Google云端硬盘文件中的链接而不是作为电子表格中的嵌入式图像加载,则工作表的性能会更好。

所以我想写一个Google Apps脚本...

  1. 浏览每个工作表上的每个单元格;
  2. 决定单元格是否包含图像;
  3. 将图片保存到Google云端硬盘;
  4. 获取图像文件的链接;
  5. 删除内联图像;
  6. 重新插入图像作为链接。

到目前为止,我有这个...

function extract_images() {
  var spreadsheet_id = "1NGtkBj7HOqf-rvV7zdRb7lrj3MRFoJH9Ulnx9YIO7Hs" // MARKS COPY OF "RAW Exam Question Reference AQA GCSE PE 9-1"
  var images_folder = DriveApp.getFolderById("1nEF_E7rZmTpTif3uDS8B0Jnio1x6IaSm");
  var ss = SpreadsheetApp.openById(spreadsheet_id);
  var sheets = ss.getSheets();
  sheets.forEach (sh => {
    Logger.log("Looking through '"+sh.getName()+"'");
    var w = sh.getLastColumn();
    var h = sh.getLastRow();
    for (i=1; i<h; i++){
      for (j=1; j<w; j++){
        var value = sh.getRange(i,j).getValue();
        var range = sh.getRange(i,j);
        if (value.valueOf().toString().startsWith("com")){
          Logger.log(`Image at (${i},${j})`);
        }
      }
    }
  });
}

...通过单元格值在工作表中查找图像,对于一个图像,该单元格似乎具有com[email protected]IDID为8个字符的十六进制代码的格式

我曾尝试使用Kanshi TANAIKE编写的这个令人惊叹的库...

https://github.com/tanaikech/DocsServiceApp

...但是我找不到一种使用它从一个单元格中提取一张图像的方法。我只能看到一次提取所有图像的方法,尝试执行“ exportSizeLimitExceeded”时会出现错误。

我觉得我很亲近,这很令人沮丧。请帮忙!

马克·米尔斯

感谢@Tanaike建议的帖子,我想出的解决方案如下。它很脏,写得不好,但是可以用。

工作流程...

  1. 获取电子表格ID和文件夹ID;
  2. 打开电子表格的副本;
  3. 导出为XLSX;
  4. 将文件类型更改为zip;
  5. 解压缩成斑点;
  6. 创建对象以保存xlsx工作表ID /工作表名称,图像位置和文件名以及新创建图像的驱动器图像ID;
  7. 遍历Blob中的每个Blob;
  8. 查找xl / workbook.xml文件,并获取工作表ID和名称;
  9. 查找每个xl / drawings / drawing#.xml文件并检索图纸,位置和图像文件名;
  10. 查找所有图像,保存到驱动器并检索并将驱动器ID映射到文件名;
  11. 浏览Google工作表中的每个工作表;
  12. 遍历每个单元格;
  13. 当您找到一个以“ com”开头而不是一个公式的单元格时,请从工作表ID,行和列生成图像ID,找到相应的图像名称,然后找到相应的Google Drive图像ID。
  14. 清除单元格内容,然后重新插入= IMAGE()公式;
  15. 将注释添加到具有图像名称的单元格。
var spreadsheet_id = "SHEET_ID"; // Spreadsheet ID
var images_folder = DriveApp.getFolderById("FOLDER_ID"); // ID of folder to store images in

function extract_images() {
  
  // Let's get started
  var ss = SpreadsheetApp.openById(spreadsheet_id); // Open a copy of the spreadsheet
  const url = "https://docs.google.com/spreadsheets/export?exportFormat=xlsx&id=" + spreadsheet_id; // Export as XLSX
  const blob = UrlFetchApp.fetch(url, {headers: {authorization: `Bearer ${ScriptApp.getOAuthToken()}`}}).getBlob().setContentType(MimeType.ZIP); // Change filetype to ZIP
  const xlsx = Utilities.unzip(blob); // Unzip folder into collection of binary files
  const xlsx_workbook_filename = "xl/workbook.xml"; // This is the filename of the worksheet list!
  var worksheets = new Object(); // Create object to maps sheet names to XLSX sheet ids
  const xlsx_drawing_filename_stub = "xl/drawings/drawing"; // add '#.xml' where # is sheetid.
  var images = new Object(); // Create object to maps cell references of images to image files
  var image_ids = new Object(); // Create object to map ID of newly created image to image name

  // Let's go through every item in the xlsx blob! There may be a better way!
  xlsx.forEach(b =>{
    // Check to see if the blob name is the same as the worksheets.xml file
    if (b.getName() == xlsx_workbook_filename){
      var xlsx_workbook_file = b.getAs("text/xml"); // convert to text/xml
      Logger.log(`Found '${xlsx_workbook_filename}'`);
      var document = XmlService.parse(xlsx_workbook_file.getDataAsString("UTF-8")); // parse the xml file so I can access the nodes
      var namespace = XmlService.getNamespace("http://schemas.openxmlformats.org/spreadsheetml/2006/main"); // This is the namespace
      var root = document.getRootElement(); // Get the root element so we know where to start
      var sheets = root.getChild("sheets",namespace).getChildren("sheet",namespace); // Get a collection of sheet nodes
      // Iterate through the sheet nodes
      sheets.forEach(s => {
        worksheets[s.getAttribute("name").getValue()] = s.getAttribute("sheetId").getValue(); // Create objects to map sheet name to xlsx sheet id
      });
    }
    // Check to see if the blob name starts with the drawing file stub
    if (b.getName().startsWith(xlsx_drawing_filename_stub)){
      var this_sheet_id = b.getName().substr(19,1);
      var xlsx_drawing_file = b.getAs("text/xml"); // Convert to text/xml
      Logger.log(`Found drawing file '${b.getName()}'`);
      var document = XmlService.parse(xlsx_drawing_file.getDataAsString("UTF-8")); // Parse the xml file!
      var namespace = XmlService.getNamespace("http://schemas.openxmlformats.org/drawingml/2006/spreadsheetDrawing"); // This is the namespace
      var root = document.getRootElement(); // Get root element so we know where to start
      var children = root.getChildren(); // Get all the children - each one is an embedded image
      Logger.log(`${children.length} images in worksheet ${this_sheet_id}`); // Nice!
      // Iterate through each embedded image
      children.forEach(t =>{
        var col = t.getChild("from",namespace).getChild("col",namespace).getText(); // Get the column reference
        var row = t.getChild("from",namespace).getChild("row",namespace).getText(); // Get the row reference
        var image_filename = t.getChild("pic",namespace).getChild("nvPicPr",namespace).getChild("cNvPr",namespace).getAttribute("name").getValue(); // Dig down!
        Logger.log(`Image in sheet ${this_sheet_id} at ${col},${row} called '${image_filename}'`);
        images[`${this_sheet_id},${col},${row}`] = image_filename; // Create object to hold location and filename!
      });
    }
    // Get the actual images!
    if (b.getContentType() == 'image/png'){
      var the_name = b.getName().substr(9); // The image name will start with 'xl/media/' otherwise!
      Logger.log(`Saving image '${the_name}'...`);
      var id = images_folder.createFile(b).setName(the_name).getId(); // Create the image file in the image folder
      Logger.log(`This is image id '${id}'`);
      image_ids[the_name] = id; // Create object to map image name to google drive image id
  }
  });;

  // Now, lets replace the images!
  var sheets = ss.getSheets();
  // Go through each worksheet in the Google sheet
  sheets.forEach (sh => {
    var sheet_name = sh.getName(); // Get the sheet name
    Logger.log(`Looking through '${sheet_name}'`);
    var sheet_id = worksheets[sheet_name]; // This is the corresponding xlsx sheet id
    Logger.log(`This is worksheet id '${sheet_id}'. Looking for images...`);
    var w = sh.getLastColumn()+1; // Last occupied column
    var h = sh.getLastRow()+1; // Last occupied row
    // Iterate through all cells!
    for (i=1; i<h; i++){
      for (j=1; j<w; j++){
        var range = sh.getRange(i,j); // Get the range object
        var value = range.getValue(); // Get the value
        Logger.log(`The cell contains '${value.valueOf().toString()}'`);
        // Only replace the image if there is an image in the cell AND it's NOT a formula!
        if (value.valueOf().toString().startsWith("com") && !range.getFormula()){
          var img_ref = `${sheet_id},${j-1},${i-1}`; // Generate the image reference
          var img_name = images[img_ref]; // Get the image name
          var img_id = image_ids[img_name]; // Get the Google drive image id
          Logger.log(`Image at ${img_ref}`);
          Logger.log(`This is called ${img_name}`);
          Logger.log(`It's ID is ${img_id}`);
          range.setValue(`=IMAGE("https://docs.google.com/uc?export=view&id=${img_id}",1)`); // Use 'IMAGE()' to embed the image
          range.setNote(`${img_name}`); // Add a note with the image name, cause I can.
        }
      }
    }
  });
}

本文收集自互联网,转载请注明来源。

如有侵权,请联系 [email protected] 删除。

编辑于
0

我来说两句

0 条评论
登录 后参与评论

相关文章