11_12_2018

Differences

This shows you the differences between two versions of the page.

Link to this comparison view

Both sides previous revision Previous revision
Next revision
Previous revision
11_12_2018 [2018/11/14 21:37] cynthia.kahn11_12_2018 [2018/11/14 22:01] (current) cynthia.kahn
Line 1: Line 1:
 ===== Week Ending November 12 ===== ===== Week Ending November 12 =====
  
-===Amazon AI Hackathon=== +Started work on tweets dataset 
-Started work on tweets dataset +  *  Created new table tweets_info in database cynthiak 
-Created new table tweets_info in database cynthiak +  *  Parsed and formatted the json file and inserted majority columns in tweets_info table. 
-Parsed and formatted the json file and inserted majority columns in tweets_info table. +  *  Will finish the rest of the upload in a few days. 
-Will finish the rest of the upload in a few days. +  *  Heading to the airport in a few hours for Singapore!  
-Heading to the airport in a few hours for Singapore!  + 
 +**//Some useful commands for handling tweets dataset//** 
 + 
 +sed -i 's/\\//g' filename \\ 
 +string = string.replace("\n","").replace("\r","") \\ 
 +\\  
 +**//SQL to create table tweets_info//** 
 +\\  
 +DROP TABLE IF EXISTS tweets_info; \\ 
 +\\  
 +CREATE TABLE tweets_info( \\ 
 + tweetCreated VARCHAR(50), \\  
 +        tweetID BIGINT, \\ 
 +        tweetText TEXT, \\ 
 +        display_text_range VARCHAR(50) NULL, \\ 
 +        source TEXT,  \\ 
 +        truncated VARCHAR(10), \\ 
 +        in_reply_to_status_id VARCHAR(50), \\ 
 +        in_reply_to_user_id VARCHAR(50), \\ 
 +        in_reply_to_screen_name TEXT, \\ 
 +        tweetUser BIGINT, \\ 
 +        tweetGeoType TEXT, \\ 
 +        tweetGeoCoord TEXT, \\ 
 +        tweetCoordType TEXT, \\ 
 +        tweetCoord TEXT, \\ 
 +        placeID TEXT, \\ 
 +        placeURL TEXT, \\ 
 +        placeType TEXT, \\ 
 +        placeCountry TEXT, \\ 
 +        tweetPFullName TEXT, \\ 
 +        tweetPCountryCode TEXT, \\  
 +        tweetPname TEXT,  \\ 
 +        tweetPBBox1x VARCHAR(50), \\  
 +        tweetPBBox1y VARCHAR(50), \\ 
 +        tweetPBBox2x VARCHAR(50),  \\ 
 +        tweetPBBox2y VARCHAR(50),  \\ 
 +        tweetPBBox3x VARCHAR(50),  \\ 
 +        tweetPBBox3y VARCHAR(50),  \\ 
 +        tweetPBBox4x VARCHAR(50),  \\ 
 +        tweetPBBox4y VARCHAR(50), \\ 
 +        placeAttributes TEXT,  \\ 
 +        tweetContributors TEXT NULL, \\ 
 +        is_quote_status VARCHAR(50),  \\ 
 +        retweetCount INTEGER,  \\ 
 +        favoriteCount INTEGER,  \\ 
 +        entitiesID BIGINT,  \\ 
 +        entitiesMediaID BIGINT,   \\ 
 +        extendedEntitiesID BIGINT,  \\ 
 +        favorited VARCHAR(10),  \\ 
 +        retweeted VARCHAR(10),   \\ 
 +        possibly_sensitive VARCHAR(50),   \\ 
 +        filter_level TEXT,    \\ 
 +        tweetLang TEXT,    \\ 
 +        tweetTimestamp BIGINT,   \\ 
 +        matchingRulesTag TEXT NULL,   \\ 
 +        matchingRulesID BIGINT   \\ 
 +);  \\ 
 +\\  
 +psql cynthiak   \\ 
 +\copy tweets_info FROM '/home/cynthiak/tab_to_semi.txt' WITH DELIMITER ';'  \\ 
 + 
 + 
 + 
 + 
  • 11_12_2018.1542249457.txt.gz
  • Last modified: 2018/11/14 21:37
  • by cynthia.kahn