Sync a large table with API

I have a large public doc, here, that comes from a messier private doc. I tried to sync them via API with Al Chen’s excellent blog post, but I got a 400 error: “request entity too large”.

There’s a bug that prevents me from updating the target table by copying a row. Does anyone know how to break a sync up into multiple requests? I thought about breaking my large table into several views in both the source and target table, then syncing each view, but I don’t know how to access the ID of a view, or if a view even has one. Thanks for your help!

1 Like

Hi David, if you have that much data in the rows you’re syncing in (I believe we cap each API request at 2MB now), you can break up the sync into multiple requests by calling CodaAPI.upsertRows() multiple times instead of all at once. Here’s what the updated code might look like:

  CHUNK_SIZE = 100; // Adjust as needed until you don't get 400s.

  for (var i = 0; i < upsertBodyRows.length; i += CHUNK_SIZE) {
    var chunk = upsertBodyRows.slice(i, i + CHUNK_SIZE);
    CodaAPI.upsertRows(target.doc, target.table, {rows: chunk, keyColumns: [TARGET_TABLE_SOURCE_ROW_COLUMN]});
  }

Let me know if that helps.

2 Likes

Thanks so much, Oleg! I’m still getting 400s, though. I tried adjusting the chunk size by ten down to ten, then put it at 1. I also tried increasing it to 200, but I keep getting the same error message. Here’s the full code:

// Setup and global variables

CodaAPI.authenticate(‘[I put my token here]’);  // Replace with your token.


// Functions


SOURCE_TABLES = [
  {
    doc: ‘abcdEFG’,
    table: 'grid-H9NVeVM265', //Law School Medians
  },
  // Add more as needed.
];
TARGET_TABLE = {
  doc: 'HtSEUS1vRv', //Test Update Table
  table: 'grid-H9NVeVM265', //Law School Medians
};
TARGET_TABLE_SOURCE_ROW_COLUMN = 'Source Row URL';

/** Run me! */
function oneWaySync() {
  for each (var source in SOURCE_TABLES) {
    syncSpecificTable(source, TARGET_TABLE);
  }
}

// TODO: handle pagination for syncing source tables with >500 items.
function syncSpecificTable(source, target) {
  // Get info on the source and target tables.
  var sourceTable = CodaAPI.getTable(source.doc, source.table);
  var targetTable = CodaAPI.getTable(target.doc, target.table);
  Logger.log('::::: Syncing "%s" => "%s"...', sourceTable.name, targetTable.name);

  // Find which columns we have to sync.
  var sourceColumns = CodaAPI.listColumns(source.doc, source.table).items.map(function(item) { return item.name; });
  var targetColumns = CodaAPI.listColumns(target.doc, target.table).items.map(function(item) { return item.name; });
  var commonColumns = intersection(sourceColumns, targetColumns);
  Logger.log('Syncing columns: %s', commonColumns.join(', '));
  
  // Pull down all the rows in the source table.
  var sourceRows = CodaAPI.listRows(source.doc, source.table, {limit: 500, useColumnNames: true}).items;
  Logger.log('Source table has %s rows', sourceRows.length);

  // Upsert all rows in the source table into the target table.
  var upsertBodyRows = sourceRows.map(function(row) {
    var cells = commonColumns.map(function(colName) {
      return {
        column: colName,
        value: row.values[colName],
      };
    });
    // Add a URL to the source row in the target, table, which will also be used as the upsert key.
    cells.push({column: TARGET_TABLE_SOURCE_ROW_COLUMN, value: row.browserLink})
    return {cells: cells};
  });
//OLEG's Updated Code:

 CHUNK_SIZE = 100; // Adjust as needed until you don't get 400s.

  for (var i = 0; i < upsertBodyRows.length; i += CHUNK_SIZE) {
    var chunk = upsertBodyRows.slice(i, i + CHUNK_SIZE);
    CodaAPI.upsertRows(target.doc, target.table, {rows: chunk, keyColumns: [TARGET_TABLE_SOURCE_ROW_COLUMN]});
  }

}

function intersection(a, b) {
  var result = [];
  for each (var x in a) {
    if (b.indexOf(x) !== -1) {
      result.push(x);
    }
  }
  return result;
}

I might have put your code in the wrong way.

You would want to decrease that number if you’re getting 400s. The lower the number, the fewer the number of rows that would get sent in a single request, and hence the smaller the request.

EDIT: misread the post, if this happens even with 1, then a single row is really big, larger than 2MB. You probably won’t be able to insert too many of those into Coda.

@DSB, did you solve this? Since you tried 1 row at a time and still got 400, also judging from your other post, probably a single row is exceeding the request size limit?

Maybe it boils down to upserting one column at a time (even though it might still go over the limit…). Here’s a code that could work (untested):

// Setup and global variables

CodaAPI.authenticate(‘[I put my token here]’);  // Replace with your token.


// Functions


SOURCE_TABLES = [
  {
    doc: ‘abcdEFG’,
    table: 'grid-H9NVeVM265', //Law School Medians
  },
  // Add more as needed.
];
TARGET_TABLE = {
  doc: 'HtSEUS1vRv', //Test Update Table
  table: 'grid-H9NVeVM265', //Law School Medians
};
TARGET_TABLE_SOURCE_ROW_COLUMN = 'Source Row URL';

/** Run me! */
function oneWaySync() {
  for each (var source in SOURCE_TABLES) {
    syncSpecificTable(source, TARGET_TABLE);
  }
}

// TODO: handle pagination for syncing source tables with >500 items.
function syncSpecificTable(source, target) {
  // Get info on the source and target tables.
  var sourceTable = CodaAPI.getTable(source.doc, source.table);
  var targetTable = CodaAPI.getTable(target.doc, target.table);
  Logger.log('::::: Syncing "%s" => "%s"...', sourceTable.name, targetTable.name);

  // Find which columns we have to sync.
  var sourceColumns = CodaAPI.listColumns(source.doc, source.table).items.map(function(item) { return item.name; });
  var targetColumns = CodaAPI.listColumns(target.doc, target.table).items.map(function(item) { return item.name; });
  var commonColumns = intersection(sourceColumns, targetColumns);
  Logger.log('Syncing columns: %s', commonColumns.join(', '));
  
  // Pull down all the rows in the source table.
  var sourceRows = CodaAPI.listRows(source.doc, source.table, {limit: 500, useColumnNames: true}).items;
  Logger.log('Source table has %s rows', sourceRows.length);

-   // Upsert all rows in the source table into the target table.
-   var upsertBodyRows = sourceRows.map(function(row) {
-     var cells = commonColumns.map(function(colName) {
-       return {
-         column: colName,
-         value: row.values[colName],
-       };
-     });
-     // Add a URL to the source row in the target, table, which will also be used as the upsert key.
-     cells.push({column: TARGET_TABLE_SOURCE_ROW_COLUMN, value: row.browserLink})
-     return {cells: cells};
-   });
- //OLEG's Updated Code:

-  CHUNK_SIZE = 100; // Adjust as needed until you don't get 400s.

-   for (var i = 0; i < upsertBodyRows.length; i += CHUNK_SIZE) {
-     var chunk = upsertBodyRows.slice(i, i + CHUNK_SIZE);
-     CodaAPI.upsertRows(target.doc, target.table, {rows: chunk, keyColumns: [TARGET_TABLE_SOURCE_ROW_COLUMN]});
-   }
+	sourceRows.forEach(row => {
+	  commonColumns.forEach(colName => {
+	    const upsertBodyRows = [
+	        {
+	          cells: [
+	            {
+	              column: colName,
+	              value: row.values[colName]
+	            },
+	            {
+	              column: TARGET_TABLE_SOURCE_ROW_COLUMN, 
+	              value: row.browserLink
+	            }
+	          ]
+	        }
+	      ]
+	      CodaAPI.upsertRows(target.doc, target.table, {rows: upsertBodyRows, keyColumns: [TARGET_TABLE_SOURCE_ROW_COLUMN]});
+	  })
+	})
+	Logger.log('Updated %s!', targetTable.name);
}

function intersection(a, b) {
  var result = [];
  for each (var x in a) {
    if (b.indexOf(x) !== -1) {
      result.push(x);
    }
  }
  return result;
}


1 Like

@Dalmo_Mendonca you’re incredible! Thank you and @oleg both. I decided earlier today to cut the Gordian knot—I just deleted all of the private columns from my source table, made it public, and pinned notes to the top of every section of the old public table (the target table in these syncs) to redirect people to the source. Now I just hope that no one looks at the version history and giggles at my team’s clownish back-and-forth as we wrangled that table into shape—but if they do, it’s not the end of the world.

1 Like

@DSB glad you figured it out! If you were inserting a single row that was causing that error to be hit, then that row is over 2 MB of content (1000 pages of text). Your Coda doc would get slow to load if you had many rows like that. At that point, I would encourage you to create separate docs for each row/school, and then you can always Embed() links to those individual docs in your master table to how them show up. That way, the master doc will still stay zippy and quick to load.

1 Like

@oleg One of the larger rows was about 90 KB of text, so I’m not sure why I was still getting 400s when I set the chunk size to 1. Thanks for the suggestion about the embeds, though.

Ok, I actually looked more into this, and while the limit for requests is 2 MB, there is a limit of 85 KB for any given row. This is in place to make sure any one else editing the doc freezes from large row updates. We’ll update the error message to make it clearer, but @Dalmo_Mendonca’s suggestion of splitting up columns is the recommended path if you need to insert large cells.

1 Like

Good to know. Thanks!

Hey Oleg, would be awesome if we can get these limits documented or sent back in headers! I would like to have more efficient calls to your API! Thank you for these details!

1 Like