#URL In Caption Below:
#jdbc:postgresql://wfx-nonprod-analytics.citte9qqv5jw.us-east-1.redshift.amazonaws.com:5439/wfxanalytics
Image(url='http://s3.amazonaws.com/wfxnp-dev-robby/screen_redshift.png')
staq_user='twc_staq'
staq_pass='Lg3@*JKU'
url='wfx-nonprod-analytics.citte9qqv5jw.us-east-1.redshift.amazonaws.com'
database='wfxanalytics'
getfroms3(prefix='staq/staq_sample_output.xlsx')
#Some string formatting
staq_sample = pd.read_excel('staq_sample_output.xlsx')
staq_sample.columns = [col.lower().replace(' ','').rstrip('($)') for col in staq_sample.columns]
staq_sample = staq_sample.replace('-', np.nan)
staq_sample['date'] = staq_sample['date'].apply(lambda x: x.strftime('%Y-%m-%d'))
staq_sample['datevalue'] = staq_sample['datevalue'].apply(lambda x: x.strftime('%Y-%m-%d'))
staq_sample.info()
#String Length review for creating table
staq_sample.applymap(lambda x: len(str(x))).max()
%%file /ebs/staq/staq_sample_ddl.sql
drop table if exists staq.staq_sample;
create table staq.staq_sample(
lineitem varchar(512),
order_name varchar(512),
date date,
lineitemid varchar(64),
orderid varchar(64),
orderponumber varchar(64),
rate double precision,
total_impressions bigint,
total_clicks bigint,
lineitem_date varchar(64),
type_of_inv varchar(64),
director_network varchar(64),
type_of_revenue varchar(32),
metarefresh boolean,
revenue double precision,
placement varchar(64),
lineitem_type varchar(64),
position varchar(64),
cr_size varchar(64),
source varchar(32),
site varchar(64),
property varchar(64),
billed_imps double precision,
datevalue date
)
diststyle key distkey(lineitemid)
sortkey(date, lineitemid, orderid);
%%bash -s "$devhost_rs" "$devport_rs" "$staq_user" "$staq_pass" "$devdb_rs"
PGPASSWORD=$4 psql -h $1 -p $2 -U $3 $5 -f '/ebs/staq/staq_sample_ddl.sql'
%%bash -s "$devhost_rs" "$devport_rs" "$staq_user" "$staq_pass" "$devdb_rs"
PGPASSWORD=$4 psql -h $1 -p $2 -U $3 $5 -c """set search_path to 'staq';\
select * from pg_table_def where schemaname='staq'"""
staq_sample.to_csv('staq_sample.csv', header=None, index=False)
uploadtos3(filepattern='staq_sample.csv', s3path='staq', split=True, compression=True)
%%file /ebs/staq/staq_sample_copy.sql
copy staq.staq_sample from 's3://redshift_etl_files/staq/staq_sample.csv'
credentials 'aws_access_key_id=xxx;aws_secret_access_key=xxx'
gzip
csv;
%%bash -s "$devhost_rs" "$devport_rs" "$staq_user" "$staq_pass" "$devdb_rs"
PGPASSWORD=$4 psql -h $1 -p $2 -U $3 $5 -f '/ebs/staq/staq_sample_copy.sql'
querydev("""select * from staq.staq_sample limit 3""")
from IPython.display import HTML, Javascript, display
js = """
<script type='text/javascript' src='https://online.tableausoftware.com/javascripts/api/viz_v1.js'>\
</script><div class='tableauPlaceholder' style='width: 1024px; height: 768px;'>\
<object class='tableauViz' width='1024' height='768' style='display:none;'>\
<param name='host_url' value='https%3A%2F%2Fonline.tableausoftware.com%2F' /> \
<param name='site_root' value='/t/theweatherchannelllcweatherfx' />\
<param name='name' value='staq_demo/Sheet1' /><param name='tabs' value='no' />\
<param name='toolbar' value='yes' /></object></div>"""
HTML(js)