11_12_2018

Differences

This shows you the differences between two versions of the page.

Link to this comparison view

Next revision
Previous revision
11_12_2018 [2018/11/14 21:30] – created cynthia.kahn11_12_2018 [2018/11/14 22:01] (current) cynthia.kahn
Line 1: Line 1:
 ===== Week Ending November 12 ===== ===== Week Ending November 12 =====
 +
 +* Started work on tweets dataset
 +  *  Created new table tweets_info in database cynthiak
 +  *  Parsed and formatted the json file and inserted majority columns in tweets_info table.
 +  *  Will finish the rest of the upload in a few days.
 +  *  Heading to the airport in a few hours for Singapore! 
 +
 +**//Some useful commands for handling tweets dataset//**
 +
 +sed -i 's/\\//g' filename \\
 +string = string.replace("\n","").replace("\r","") \\
 +\\ 
 +**//SQL to create table tweets_info//**
 +\\ 
 +DROP TABLE IF EXISTS tweets_info; \\
 +\\ 
 +CREATE TABLE tweets_info( \\
 + tweetCreated VARCHAR(50), \\ 
 +        tweetID BIGINT, \\
 +        tweetText TEXT, \\
 +        display_text_range VARCHAR(50) NULL, \\
 +        source TEXT,  \\
 +        truncated VARCHAR(10), \\
 +        in_reply_to_status_id VARCHAR(50), \\
 +        in_reply_to_user_id VARCHAR(50), \\
 +        in_reply_to_screen_name TEXT, \\
 +        tweetUser BIGINT, \\
 +        tweetGeoType TEXT, \\
 +        tweetGeoCoord TEXT, \\
 +        tweetCoordType TEXT, \\
 +        tweetCoord TEXT, \\
 +        placeID TEXT, \\
 +        placeURL TEXT, \\
 +        placeType TEXT, \\
 +        placeCountry TEXT, \\
 +        tweetPFullName TEXT, \\
 +        tweetPCountryCode TEXT, \\ 
 +        tweetPname TEXT,  \\
 +        tweetPBBox1x VARCHAR(50), \\ 
 +        tweetPBBox1y VARCHAR(50), \\
 +        tweetPBBox2x VARCHAR(50),  \\
 +        tweetPBBox2y VARCHAR(50),  \\
 +        tweetPBBox3x VARCHAR(50),  \\
 +        tweetPBBox3y VARCHAR(50),  \\
 +        tweetPBBox4x VARCHAR(50),  \\
 +        tweetPBBox4y VARCHAR(50), \\
 +        placeAttributes TEXT,  \\
 +        tweetContributors TEXT NULL, \\
 +        is_quote_status VARCHAR(50),  \\
 +        retweetCount INTEGER,  \\
 +        favoriteCount INTEGER,  \\
 +        entitiesID BIGINT,  \\
 +        entitiesMediaID BIGINT,   \\
 +        extendedEntitiesID BIGINT,  \\
 +        favorited VARCHAR(10),  \\
 +        retweeted VARCHAR(10),   \\
 +        possibly_sensitive VARCHAR(50),   \\
 +        filter_level TEXT,    \\
 +        tweetLang TEXT,    \\
 +        tweetTimestamp BIGINT,   \\
 +        matchingRulesTag TEXT NULL,   \\
 +        matchingRulesID BIGINT   \\
 +);  \\
 +\\ 
 +psql cynthiak   \\
 +\copy tweets_info FROM '/home/cynthiak/tab_to_semi.txt' WITH DELIMITER ';'  \\
 +
 +
 +
 +
 +
  • 11_12_2018.1542249059.txt.gz
  • Last modified: 2018/11/14 21:30
  • by cynthia.kahn