From 662b4578529462e9d001ef0feccb39029ec6f63d Mon Sep 17 00:00:00 2001 From: Nathan Stitt Date: Mon, 15 Oct 2012 11:08:08 -0500 Subject: [PATCH] Fully provision a DocumentCloud vagrant box. Starting with a base Ubuntu environment, install all DocumentCloud dependencies, and configures them accordingly. At the end install a demo user and start up the various solr and cloud_crowd services. Created a script runner and expanded the existing scripts, running them in the correct order for proper operations. Most scripts are still written in shell, with a few in Ruby. This method will *hopefully* prove to be fairly resilient and (arguably) simpler than a full-blown Chef/Puppet provisioning. --- .gitignore | 4 +- ReadMe.markdown | 7 +- Vagrantfile | 10 +- scripts/base.sh | 50 ------- scripts/clone_documentcloud | 5 + scripts/cloudcrowd_node.sh | 0 scripts/cloudcrowd_server.sh | 1 - .../{configure_nginx.rb => configure_nginx} | 3 + scripts/{db.sh => configure_postgresql} | 0 scripts/copy_secrets | 5 + scripts/github_keys | 8 + scripts/install_dependencies | 9 ++ scripts/install_gems | 16 ++ scripts/{app.sh => install_nginx} | 9 +- scripts/keygen.sh | 5 - scripts/pretty_motd | 13 ++ scripts/provision | 54 +++++++ scripts/runner | 68 +++++++++ scripts/script.rb | 139 ++++++++++++++++++ scripts/speed_up_ssh | 6 + scripts/start_up_everything | 23 +++ scripts/update | 4 + 22 files changed, 373 insertions(+), 66 deletions(-) delete mode 100755 scripts/base.sh create mode 100644 scripts/clone_documentcloud delete mode 100644 scripts/cloudcrowd_node.sh delete mode 100644 scripts/cloudcrowd_server.sh rename scripts/{configure_nginx.rb => configure_nginx} (98%) mode change 100644 => 100755 rename scripts/{db.sh => configure_postgresql} (100%) create mode 100644 scripts/copy_secrets create mode 100644 scripts/github_keys create mode 100644 scripts/install_dependencies create mode 100644 scripts/install_gems rename scripts/{app.sh => install_nginx} (76%) delete mode 100644 scripts/keygen.sh create mode 100644 scripts/pretty_motd create mode 100755 scripts/provision create mode 100755 scripts/runner create mode 100644 scripts/script.rb create mode 100644 scripts/speed_up_ssh create mode 100644 scripts/start_up_everything create mode 100644 scripts/update diff --git a/.gitignore b/.gitignore index c5cd59a..7972475 100644 --- a/.gitignore +++ b/.gitignore @@ -1,4 +1,6 @@ .DS_STORE *.box .vagrant -.bashrc \ No newline at end of file +.bashrc +scripts/*.output +scripts/*.fail diff --git a/ReadMe.markdown b/ReadMe.markdown index 40ea3d1..eadcac0 100644 --- a/ReadMe.markdown +++ b/ReadMe.markdown @@ -1,3 +1,8 @@ # DocumentCloud Vagrant VM -These Vagrant scripts provide a(n ALPHA-STAGE) development environment for the DocumentCloud platform. [DocumentCloud](http://www.documentcloud.org) itself is a web based platform for uploading, analyzing, annotating and publishing primary source material. \ No newline at end of file +These Vagrant scripts provide a(n ALPHA-STAGE) development environment for the DocumentCloud platform. [DocumentCloud](http://www.documentcloud.org) itself is a web based platform for uploading, analyzing, annotating and publishing primary source material. + +Vagrant will execute the [runner script](documentcloud-vagrant/blob/master/scripts/runner) script to provision the box. + +The script has several variables that can be modified at the top, such as the user login and password to create. + diff --git a/Vagrantfile b/Vagrantfile index 6f20288..9570af5 100644 --- a/Vagrantfile +++ b/Vagrantfile @@ -5,9 +5,11 @@ Vagrant::Config.run do |config| # All Vagrant configuration is done here. The most common configuration # options are documented and commented below. For a complete reference, # please see the online documentation at vagrantup.com. - + + # This assumes the following directory layout: username = "ubuntu" - local_app_dir = "/Users/ted/dc/documentcloud/" + local_app_dir = "#{ File.dirname(__FILE__) }/../documentcloud/" + app_root = "/home/#{username}/documentcloud" rails_env = "development" @@ -17,7 +19,7 @@ Vagrant::Config.run do |config| # The url from where the 'config.vm.box' box will be fetched if it # doesn't already exist on the user's system. - # config.vm.box_url = "http://domain.com/path/to/above.box" + config.vm.box_url = "http://s3.documentcloud.org/documentcloud.2012.10.15.box" # Assign this VM to a host-only network IP, allowing you to access it # via the IP. Host-only networks can talk to the host machine as well as @@ -43,7 +45,7 @@ Vagrant::Config.run do |config| script = <<-SHELL export USERNAME=#{username}; export RAILS_ENV=#{rails_env}; - sh /vagrant/scripts/base.sh; + ruby /vagrant/scripts/runner SHELL config.vm.provision :shell, :inline => script diff --git a/scripts/base.sh b/scripts/base.sh deleted file mode 100755 index 25b928c..0000000 --- a/scripts/base.sh +++ /dev/null @@ -1,50 +0,0 @@ -# Update apt repository -apt-get update -apt-get -y upgrade - -test $USERNAME || { echo "USERNAME has to be set" >&2; exit 1; } -test $RAILS_ENV || { echo "USERNAME has to be set" >&2; exit 1; } - -# Set up system dependencies -BASICS='git build-essential postgresql libpq-dev git sqlite3 libsqlite3-dev libpcre3-dev lzop libxml2-dev libxslt-dev libcurl4-gnutls-dev libitext-java ruby1.8 rubygems' -echo $BASICS | xargs apt-get install -y - -DOCSPLIT_DEPS='graphicsmagick pdftk xpdf poppler-utils libreoffice libreoffice-java-common tesseract-ocr ghostscript' -echo $DOCSPLIT_DEPS | xargs apt-get install -y - -cd /home/$USERNAME - -# approve github ssh host key -grep -q github .ssh/known_hosts 2>/dev/null || ssh-keyscan -t rsa github.com > .ssh/known_hosts - -#git clone https://github.com/documentcloud/documentcloud.git - -# Install gems -cd documentcloud -git checkout --track -b bundler origin/bundler -git pull -gem install bundler --no-ri --no-rdoc -bundle install -git checkout master -git pull - -# Ensure that the secrets directory exists -test -e secrets || { cp -r config/server/secrets ./secrets; } - -# disable ssh dns to avoid long pause before login -grep -q '^UseDNS no' /etc/ssh/sshd_config || echo 'UseDNS no' >> /etc/ssh/sshd_config -/etc/init.d/ssh reload - -# replace annoying motd with new one -rm /etc/motd -cat >/etc/motd <<'EOF' - -______ _ _____ _ _ -| _ \ | | / __ \ | | | -| | | |___ ___ _ _ _ __ ___ ___ _ __ | |_| / \/ | ___ _ _ __| | -| | | / _ \ / __| | | | '_ ` _ \ / _ \ '_ \| __| | | |/ _ \| | | |/ _` | -| |/ / (_) | (__| |_| | | | | | | __/ | | | |_| \__/\ | (_) | |_| | (_| | -|___/ \___/ \___|\__,_|_| |_| |_|\___|_| |_|\__|\____/_|\___/ \__,_|\__,_| - -EOF -uname -a | tee -a /etc/motd diff --git a/scripts/clone_documentcloud b/scripts/clone_documentcloud new file mode 100644 index 0000000..fb73c1f --- /dev/null +++ b/scripts/clone_documentcloud @@ -0,0 +1,5 @@ + +cd /home/$USERNAME + +git clone https://github.com/documentcloud/documentcloud.git + diff --git a/scripts/cloudcrowd_node.sh b/scripts/cloudcrowd_node.sh deleted file mode 100644 index e69de29..0000000 diff --git a/scripts/cloudcrowd_server.sh b/scripts/cloudcrowd_server.sh deleted file mode 100644 index 712174d..0000000 --- a/scripts/cloudcrowd_server.sh +++ /dev/null @@ -1 +0,0 @@ -# verify that cloudcrowd database is accessible and has schema loaded. \ No newline at end of file diff --git a/scripts/configure_nginx.rb b/scripts/configure_nginx old mode 100644 new mode 100755 similarity index 98% rename from scripts/configure_nginx.rb rename to scripts/configure_nginx index d8a5f7b..1d6b7aa --- a/scripts/configure_nginx.rb +++ b/scripts/configure_nginx @@ -1,3 +1,5 @@ +#!/usr/bin/ruby + require 'fileutils' require 'erb' =begin @@ -32,3 +34,4 @@ dc_conf_template = File.open(File.join(here, "erb", "documentcloud.conf.erb")).read dc_conf.puts ERB.new(dc_conf_template).result(binding) end + diff --git a/scripts/db.sh b/scripts/configure_postgresql similarity index 100% rename from scripts/db.sh rename to scripts/configure_postgresql diff --git a/scripts/copy_secrets b/scripts/copy_secrets new file mode 100644 index 0000000..59ab346 --- /dev/null +++ b/scripts/copy_secrets @@ -0,0 +1,5 @@ + +cd /home/$USERNAME/documentcloud + +# Ensure that the secrets directory exists +test -e secrets || { cp -r config/server/secrets ./secrets; } diff --git a/scripts/github_keys b/scripts/github_keys new file mode 100644 index 0000000..9281c40 --- /dev/null +++ b/scripts/github_keys @@ -0,0 +1,8 @@ + +cd /home/$USERNAME + +# approve github ssh host key +grep -q github .ssh/known_hosts 2>/dev/null || ssh-keyscan -t rsa github.com > .ssh/known_hosts + + + diff --git a/scripts/install_dependencies b/scripts/install_dependencies new file mode 100644 index 0000000..89ce9eb --- /dev/null +++ b/scripts/install_dependencies @@ -0,0 +1,9 @@ + +apt-get update -q + +# Set up system dependencies +BASICS='git build-essential postgresql libpq-dev git sqlite3 libsqlite3-dev libpcre3-dev lzop libxml2-dev libxslt-dev libcurl4-gnutls-dev libitext-java ruby1.8 rubygems' +echo $BASICS | xargs apt-get install -q -y + +DOCSPLIT_DEPS='graphicsmagick pdftk xpdf poppler-utils libreoffice libreoffice-java-common tesseract-ocr ghostscript' +echo $DOCSPLIT_DEPS | xargs apt-get install -q -y diff --git a/scripts/install_gems b/scripts/install_gems new file mode 100644 index 0000000..ae31f46 --- /dev/null +++ b/scripts/install_gems @@ -0,0 +1,16 @@ + +cd /home/$USERNAME/documentcloud + +# Install gems +git branch | grep -q bundler +if [ "$?"=="0" ]; then + git checkout bundler +else + git checkout --track -b bundler origin/bundler +fi + +git pull +sudo gem install bundler --no-ri --no-rdoc +sudo bundle install + +git checkout master diff --git a/scripts/app.sh b/scripts/install_nginx similarity index 76% rename from scripts/app.sh rename to scripts/install_nginx index cb00130..6b272d9 100644 --- a/scripts/app.sh +++ b/scripts/install_nginx @@ -1,5 +1,5 @@ # Ensure nginx is installed -test -e /usr/local/nginx || /usr/local/bin/passenger-install-nginx-module --auto --auto-download \ +/usr/local/bin/passenger-install-nginx-module --auto --auto-download \ --prefix /usr/local/nginx --extra-configure-flags='--with-http_gzip_static_module --with-http_ssl_module --with-http_stub_status_module' LINE='export PATH=$PATH:/usr/local/nginx/sbin' @@ -8,12 +8,13 @@ grep -q "$LINE" .bashrc 2>/dev/null || echo "$LINE" >> .bashrc mkdir -p /usr/local/nginx/conf/sites-enabled /var/log/nginx/ mkdir -p /var/log/nginx -ruby /vagrant/scripts/configure_nginx.rb - cd /home/ubuntu/documentcloud test -e secrets/keys || { cp -r config/server/keys secrets/ ; } cp config/server/nginx/nginx.init /etc/init.d/nginx + update-rc.d nginx defaults -/etc/init.d/nginx start + + + diff --git a/scripts/keygen.sh b/scripts/keygen.sh deleted file mode 100644 index a3ad3cd..0000000 --- a/scripts/keygen.sh +++ /dev/null @@ -1,5 +0,0 @@ -openssl genrsa -des3 -out dev.dcloud.org.key 1024 -openssl req -new -key dev.dcloud.org.key -out dev.dcloud.org.csr -cp dev.dcloud.org.key dev.dcloud.org.key.org -openssl rsa -in dev.dcloud.org.key.org -out dev.dcloud.org.key -openssl x509 -req -in dev.dcloud.org.csr -signkey dev.dcloud.org.key -out dev.dcloud.org.crt diff --git a/scripts/pretty_motd b/scripts/pretty_motd new file mode 100644 index 0000000..7321ecc --- /dev/null +++ b/scripts/pretty_motd @@ -0,0 +1,13 @@ +# replace annoying motd with new one +rm /etc/motd +cat >/etc/motd <<'EOF' + +______ _ _____ _ _ +| _ \ | | / __ \ | | | +| | | |___ ___ _ _ _ __ ___ ___ _ __ | |_| / \/ | ___ _ _ __| | +| | | / _ \ / __| | | | '_ ` _ \ / _ \ '_ \| __| | | |/ _ \| | | |/ _` | +| |/ / (_) | (__| |_| | | | | | | __/ | | | |_| \__/\ | (_) | |_| | (_| | +|___/ \___/ \___|\__,_|_| |_| |_|\___|_| |_|\__|\____/_|\___/ \__,_|\__,_| + +EOF +uname -a | tee -a /etc/motd diff --git a/scripts/provision b/scripts/provision new file mode 100755 index 0000000..c2262f0 --- /dev/null +++ b/scripts/provision @@ -0,0 +1,54 @@ +#!/usr/bin/env ruby + +DIR='/home/ubuntu/documentcloud/' + +require 'optparse' + +options = { :login=> 'testing@documentcloud.org', :password=> 'testing42' } + +OptionParser.new do |opts| + opts.banner = "Usage: #{$0} [options]" + + opts.on("-l", '--login=LOGIN_EMAIL',"User Login to create") do |l| + options[:login] = l + end + opts.on("-p", '--password=PASSWORD', "Password") do |l| + options[:password] = l + end +end.parse! + +Dir.chdir DIR + +require "./config/environment" +require 'sqlite3' +require 'cloud-crowd' + +organization = Organization.find_by_slug( 'test' ) +if organization.nil? + organization = Organization.create!({:name=>'Testing',:slug=>'test'}) + puts "Created Testing organization, id: #{organization.id}" +end + +account = Account.find_by_email( options[:login] ) +if account.nil? + account=Account.create!({:organization=>organization,:first_name=>'Testing',:last_name=>'Account', :email=> options[:login], \ + :hashed_password=>BCrypt::Password.create( options[:password] ), :role=>Account::ADMINISTRATOR }) + puts "Created Testing account. Login: #{options[:login]}, pw: #{options[:password]}, id: #{account.id}" +end + +# the cloud crowd test server will need this +db = SQLite3::Database.new( "#{DIR}cloud_crowd.db" ) +exists = db.get_first_value( "SELECT name FROM sqlite_master WHERE type='table' AND name='schema_migrations'" ) +if exists.nil? + puts "Creating schema_migrations table in #{DIR}cloud_crowd.db and running migrations" + db.execute( "CREATE TABLE schema_migrations (version varchar(255) NOT NULL)" ) +end +db.close + + +CloudCrowd.configure("config/cloud_crowd/development/config.yml") +require 'cloud_crowd/models' +CloudCrowd.configure_database("config/cloud_crowd/development/database.yml", false) +require 'cloud_crowd/schema.rb' + + diff --git a/scripts/runner b/scripts/runner new file mode 100755 index 0000000..7aac026 --- /dev/null +++ b/scripts/runner @@ -0,0 +1,68 @@ +#!/usr/bin/env ruby + +require File.dirname(__FILE__) + '/script' + +# This is intended to be ran as part of Vagrant provisioning. +# It will use the Script class (in script.rb) +# to execute the scripts below both in parallel and sequence depending +# on the scripts importance to the following ones. +# +# The end result should configure a Vagrant instance with a +# fully functional DocumentCloud server +# +# The scripts are located in the scripts directory, and can be re-ran if needed +# from there. We've attempted to make them as re-entrant as possible +# but they may not be fully there yet. +# + + +ENV['RAILS_ENV'] = 'development' +LOGIN = 'test@example.com' +PASSWORD = 'documentcloud' +Script.user = ENV['USERNAME'] = 'ubuntu' +Script.directory = '/vagrant/scripts' +REPOSITORY = 'https://github.com/documentcloud/documentcloud.git' + +Script[ :update ].run_weekly :as_root=>true, :wait=>true, :msg=>'Will take >10 minutes' +Script[ :install_dependencies ].run_once :as_root=>true, :wait=>true, :msg=>'Will take >5 minutes' + +Script[ :speed_up_ssh ].run_once :as_root=>true +Script[ :pretty_motd ].run_once :as_root=>true +Script[ :github_keys ].run_once +Script[ :copy_secrets ].run_once + +Script.wait_for_completion + +unless File.directory?( 'documentcloud' ) + Script.output do + STDERR.puts "documentcloud directory doesn't exist." + STDERR.puts "Checking it out from #{REPOSITORY}" + Script[:clone_documentcloud].run :wait=>true, :args=>{:repo=>REPOSITORY} + end +end + +Script[:install_gems ].run_once :wait=>true + +Script[:install_nginx ].run_once :wait=>true, :as_root=>true + +Script[:configure_nginx ].run_once :as_root=>true,:shell=>'/usr/bin/ruby' +Script[:configure_postgresql ].run_once :as_root=>true + +Script.wait_for_completion + +Script[:provision ].run_once :shell=>'/usr/bin/ruby', :wait=>true, :args=>{ :login=> LOGIN, :password=>PASSWORD } +Script[:start_up_everything ].run +Script.wait_for_completion + +ip = `/sbin/ifconfig eth1 | grep 'inet addr:' | cut -d: -f2 | awk '{ print $1}'` + +Script.output do + puts <<-EOS +#################################################################### +# All Done. +# A fully configured DocumentCloud should be running on vagrant at +# http://#{ip} +# Log in as: #{LOGIN} with password: #{PASSWORD} +#################################################################### +EOS +end diff --git a/scripts/script.rb b/scripts/script.rb new file mode 100644 index 0000000..78a25a5 --- /dev/null +++ b/scripts/script.rb @@ -0,0 +1,139 @@ +# A small class that can run multiple scripts in parallel. +# It logs the output of scripts to a file with the same +# name as the script with extension .output +# If the script exits with a non-zero status, the +# .output file is moved to a .fail file, +# allowing the step to be re-ran. +# +# Using the .output files' mtime, the runner +# can prevent re-running scripts that are not +# needed +# +# Example Usage: +# Script['test'].run_daily, :as_root=>true +# For more examples, see the runner script + + +require 'thread' + +class Script + + @@directory = '/vagrant/scripts' + @@user = 'ubuntu' + @@threads = [] + @@semaphore = Mutex.new + def initialize( name ) + @name = name + @file = "#{@@directory}/#{name}" + @status = "#{@@directory}/#{name}.output" + end + + def log( str ) + File.open(@status,'a') do | status | + status.write str + "\n" + end + end + + def has_ran? + File.exists?( @status ) + end + + def last_run + has_ran? ? File.mtime( @status ) : Time.at(0) + end + + def run(opts) + t=Thread.new do + File.unlink( @status ) if has_ran? + STDOUT.sync = true + shell = opts[:shell] ? opts[:shell] : '/bin/bash' + args = opts[:args] ? opts[:args].map{|k,v| "--#{k}=#{v}" }.join(' ') : '' + cmd = opts[:as_root] ? "#{shell} #{@file} #{args} 2>&1" : "su #{@@user} -s #{shell} #{@file} -- #{args} 2>&1" + log cmd + Script.output "Running: #{@name} #{opts[:msg]}" + IO.popen( cmd ) do | stdout | + while line = stdout.gets + log line + end + end + if 0 != $?.exitstatus + Script.output do + STDERR.puts "ERROR: #{@name} exited with status #{$?.exitstatus}" + end + File.rename @status, "#{@@directory}/#{@name}.fail" + end + @@semaphore.synchronize { @@threads.delete( Thread.current ) } + end + @@semaphore.synchronize { @@threads << t } + if opts[:wait] + t.join + end + t + end + + def run_if( condition, opts ) + if condition + run( opts ) + else + Script.output( "Skipping: #{@name} #{opts[:skip_msg]}" ) + end + + end + + class << self + @@output_mutex = Mutex.new + + def user=(u) + @@user=u + end + def directory=(d) + @@directory=d + end + + def output( msg='' ) + @@output_mutex.synchronize do + puts msg unless msg.empty? + yield if block_given? + end + end + + def []( script_name ) + script = Script.new( script_name ) + return Script::ConditionalRunner.new( script ) + end + + def wait_for_completion( thread=nil ) + waiting = thread ? [ thread ] : @@threads + waiting.each do | t | + t.join + end + end + + end + + + class ConditionalRunner + DAY = 60 * 60 * 24 + attr_reader :script + + def initialize( script ) + @script = script + end + + def run( opts={} ) + return @script.run(opts) + end + + def run_once( opts={} ) + @script.run_if( ! @script.has_ran? , opts ) + end + + def run_daily( opts={} ) + @script.run_if( @script.last_run < Time.now-DAY, opts ) + end + + def run_weekly( opts={} ) + @script.run_if( @script.last_run < Time.now-(DAY*7), opts ) + end + end +end diff --git a/scripts/speed_up_ssh b/scripts/speed_up_ssh new file mode 100644 index 0000000..1b93897 --- /dev/null +++ b/scripts/speed_up_ssh @@ -0,0 +1,6 @@ + +grep -q '^UseDNS no' /etc/ssh/sshd_config || echo 'UseDNS no' >> /etc/ssh/sshd_config + +reload -v ssh + + diff --git a/scripts/start_up_everything b/scripts/start_up_everything new file mode 100644 index 0000000..429d8cf --- /dev/null +++ b/scripts/start_up_everything @@ -0,0 +1,23 @@ + + +sudo service nginx start + +sudo service postgresql start + +cd /home/$USERNAME/documentcloud + +if [ $(pgrep -f solr) ]; then + rake sunspot:solr:stop +fi +rake sunspot:solr:start + +if [ $(pgrep -f 'thin.*cloud-crowd-server') ]; then + rake crowd:server:stop +fi +rake crowd:server:start + + +if [ $(pgrep -f 'thin.*cloud-crowd-node') ]; then + rake crowd:node:stop +fi +rake crowd:node:start diff --git a/scripts/update b/scripts/update new file mode 100644 index 0000000..ee95db8 --- /dev/null +++ b/scripts/update @@ -0,0 +1,4 @@ + +apt-get -q update + +apt-get -q -y upgrade