11_26_2018

This is an old revision of the document!


* Completed uploading entire tweets data on cynthiak DB

  • Two new tables created : users_info and entities_info
  • All python files and parsed files are in /home/cynthiak on deepml.cs.gsu.edu

Some useful commands for handling tweets dataset

sed 's.@..g' old_filename > new_filename
sed -n 'line_numberp;line_number+1q' filename
sed '997s/@vidavictoriosa/vidavictoriosa/' old_filename > new_filename

SQL to create table users_info
DROP TABLE IF EXISTS users_info;

CREATE TABLE users_info(

      userID BIGINT,
userName TEXT,
screenName TEXT,
userLocation TEXT,
url TEXT,
description TEXT,
translatorType TEXT,
protected TEXT,
verified TEXT,
followersCount INTEGER,
friendsCount INTEGER,
listedCount INTEGER,
favouritesCount INTEGER,
statusesCount INTEGER,
userCreated VARCHAR(50),
utcOffset TEXT,
userTimeZone VARCHAR(50),
geoEnabled TEXT,
lang TEXT,
contributorsEnabled TEXT,
isTranslator TEXT,
profileBackgroundColor TEXT,
profileBackgroundImageURL TEXT,
profileBackgroundTile TEXT,
profileLinkColor TEXT,
profileSidebarBorderColor TEXT,
profileSidebarFillColor TEXT,
profileTextColor TEXT,
profileUseBackgroundImage TEXT,
profileImageURL TEXT,
defaultProfile TEXT,
defaultProfileImage TEXT,
userFollowing TEXT, 
followRequestSent TEXT,
notifications TEXT

);

  • 11_26_2018.1543285643.txt.gz
  • Last modified: 2018/11/26 21:27
  • by cynthia.kahn