11_26_2018

* Completed uploading entire tweets data on cynthiak DB

  • Two new tables created : users_info and entities_info
  • All python files and parsed files are in /home/cynthiak on deepml.cs.gsu.edu

Some useful commands for handling tweets dataset

sed 's.@..g' old_filename > new_filename
sed -n 'line_numberp;line_number+1q' filename
sed '997s/@vidavictoriosa/vidavictoriosa/' old_filename > new_filename

SQL to create table users_info
DROP TABLE IF EXISTS users_info;

CREATE TABLE users_info(

      userID BIGINT,
userName TEXT,
screenName TEXT,
userLocation TEXT,
url TEXT,
description TEXT,
translatorType TEXT,
protected TEXT,
verified TEXT,
followersCount INTEGER,
friendsCount INTEGER,
listedCount INTEGER,
favouritesCount INTEGER,
statusesCount INTEGER,
userCreated VARCHAR(50),
utcOffset TEXT,
userTimeZone VARCHAR(50),
geoEnabled TEXT,
lang TEXT,
contributorsEnabled TEXT,
isTranslator TEXT,
profileBackgroundColor TEXT,
profileBackgroundImageURL TEXT,
profileBackgroundTile TEXT,
profileLinkColor TEXT,
profileSidebarBorderColor TEXT,
profileSidebarFillColor TEXT,
profileTextColor TEXT,
profileUseBackgroundImage TEXT,
profileImageURL TEXT,
defaultProfile TEXT,
defaultProfileImage TEXT,
userFollowing TEXT, 
followRequestSent TEXT,
notifications TEXT

);


psql cynthiak
\copy users_info FROM '/home/cynthiak/users_tab_to_semi.txt' WITH DELIMITER ';'


SQL to create table entities_info
DROP TABLE IF EXISTS entities_info;

CREATE TABLE entities_info(
hashtags TEXT,

	entitiesURL TEXT,
	userMentionsID1 BIGINT,
	userMentionsID2 BIGINT,
	symbols TEXT,
	entitiesMediaID BIGINT,
	entitiesMediaIndices TEXT,
	entitiesMediaURL TEXT,
      entitiesType TEXT,
      entitiesSizeSmall TEXT,
      entitiesSizeMedium TEXT,
      entitiesSizeThumb TEXT,
      entitiesSizeLarge TEXT,
	extendedEntitiesMediaID BIGINT,
	extendedEntitiesMediaIndices TEXT,
	extendedEntitiesMediaURL TEXT,
	extendedDisplayURL TEXT,
	extendedExpandedURL TEXT,
	extendedEntitiesType TEXT,
      extendedSizeSmall TEXT,
      extendedSizeMedium TEXT,
      extendedSizeThumb TEXT,
      extendedSizeLarge TEXT

);


psql cynthiak
\copy entities_info FROM '/home/cynthiak/entities_table.txt' WITH DELIMITER ';'

  • 11_26_2018.txt
  • Last modified: 2018/11/26 21:30
  • by cynthia.kahn