Sunday, June 25, 2017

An AWS lambda function to import data from CSV files in S3 bucket to Redshift

Following lambda function will load CSV data from an AWS S3 bucket to a database in AWS redshift cluster.
var pg = require("pg");
/**
* Creates a table in Redshift and imports data from a CSV file in the S3 bucket.
* Make sure to specify the Environment variables for the connection string, s3 bucket and the role that have access to Resdshift cluster
*/
var pushData = function(context, entityName, schema) {
const conn = process.env.DB_CON_STR; //e.g., pg://user:pass@host:port/db_name
const s3Bucket = process.env.S3_BUCKET; //e.g., my.bucket.com
const credsRole = process.env.CREDS_ROLE; //e.g., arn:aws:iam::1234567890:role/myRedshiftRole
const client = new pg.Client(conn);
console.log("Reading "+entityName+" from bucket "+s3Bucket+" and pushing to redshift cluster");
const dropQry = 'DROP TABLE IF EXISTS '+entityName+ ';' ;
const createQry = 'CREATE TABLE IF NOT EXISTS '+entityName+'('+schema+');' ;
const copyQry = 'COPY '+entityName+' FROM \'s3://'+s3Bucket+'/'+entityName+'.csv\' credentials \'aws_iam_role='+credsRole+'\' region \'ap-southeast-2\' delimiter \',\' IGNOREHEADER 1 removequotes;'
const qry = dropQry + createQry + copyQry;
console.log("Executing query: "+qry);
client.connect();
client.query(qry).on("end", function (result) {
console.log(result);
client.end();
context.succeed("Done");
}).on("error", function (error) {
console.log(error);
client.end();
context.fail(error);
});
}
exports.handler = function(event, context) {
/*
Change the CSV file name and the sameple schema accordingly.
*/
pushData(context,
'Customers',
'id varchar(255), firstName varchar(255), lastName varchar(255), createdAt BIGINT, age INTEGER');
};
view raw .js hosted with ❤ by GitHub