Differences
This shows you the differences between two versions of the page.
Both sides previous revision Previous revision Next revision | Previous revision | ||
11_12_2018 [2018/11/14 21:39] – cynthia.kahn | 11_12_2018 [2018/11/14 22:01] (current) – cynthia.kahn | ||
---|---|---|---|
Line 1: | Line 1: | ||
===== Week Ending November 12 ===== | ===== Week Ending November 12 ===== | ||
- | ===Amazon AI Hackathon=== | + | * Started work on tweets dataset |
- | * | + | * Created new table tweets_info in database cynthiak |
- | * | + | * Parsed and formatted the json file and inserted majority columns in tweets_info table. |
- | * | + | * Will finish the rest of the upload in a few days. |
- | * | + | * Heading to the airport in a few hours for Singapore! |
- | * | + | |
+ | **//Some useful commands for handling tweets dataset// | ||
+ | |||
+ | sed -i ' | ||
+ | string = string.replace(" | ||
+ | \\ | ||
+ | **//SQL to create table tweets_info// | ||
+ | \\ | ||
+ | DROP TABLE IF EXISTS tweets_info; | ||
+ | \\ | ||
+ | CREATE TABLE tweets_info( \\ | ||
+ | tweetCreated VARCHAR(50), | ||
+ | tweetID BIGINT, \\ | ||
+ | tweetText TEXT, \\ | ||
+ | display_text_range VARCHAR(50) NULL, \\ | ||
+ | source TEXT, \\ | ||
+ | truncated VARCHAR(10), | ||
+ | in_reply_to_status_id VARCHAR(50), | ||
+ | in_reply_to_user_id VARCHAR(50), | ||
+ | in_reply_to_screen_name TEXT, \\ | ||
+ | tweetUser BIGINT, \\ | ||
+ | tweetGeoType TEXT, \\ | ||
+ | tweetGeoCoord TEXT, \\ | ||
+ | tweetCoordType TEXT, \\ | ||
+ | tweetCoord TEXT, \\ | ||
+ | placeID TEXT, \\ | ||
+ | placeURL TEXT, \\ | ||
+ | placeType TEXT, \\ | ||
+ | placeCountry TEXT, \\ | ||
+ | tweetPFullName TEXT, \\ | ||
+ | tweetPCountryCode TEXT, \\ | ||
+ | tweetPname TEXT, \\ | ||
+ | tweetPBBox1x VARCHAR(50), | ||
+ | tweetPBBox1y VARCHAR(50), | ||
+ | tweetPBBox2x VARCHAR(50), | ||
+ | tweetPBBox2y VARCHAR(50), | ||
+ | tweetPBBox3x VARCHAR(50), | ||
+ | tweetPBBox3y VARCHAR(50), | ||
+ | tweetPBBox4x VARCHAR(50), | ||
+ | tweetPBBox4y VARCHAR(50), | ||
+ | placeAttributes TEXT, \\ | ||
+ | tweetContributors TEXT NULL, \\ | ||
+ | is_quote_status VARCHAR(50), | ||
+ | retweetCount INTEGER, | ||
+ | favoriteCount INTEGER, | ||
+ | entitiesID BIGINT, | ||
+ | entitiesMediaID BIGINT, | ||
+ | extendedEntitiesID BIGINT, | ||
+ | favorited VARCHAR(10), | ||
+ | retweeted VARCHAR(10), | ||
+ | possibly_sensitive VARCHAR(50), | ||
+ | filter_level TEXT, \\ | ||
+ | tweetLang TEXT, \\ | ||
+ | tweetTimestamp BIGINT, | ||
+ | matchingRulesTag TEXT NULL, \\ | ||
+ | matchingRulesID BIGINT | ||
+ | ); \\ | ||
+ | \\ | ||
+ | psql cynthiak | ||
+ | \copy tweets_info FROM '/ | ||
+ | |||
+ | |||
+ | |||
+ |