Skip to content

Commit

Permalink
requeue enhancements
Browse files Browse the repository at this point in the history
spot instances
soft stop Analysis
redis max client 40000
6 concurent analyses
  • Loading branch information
brianlball committed Aug 23, 2024
1 parent 64f597f commit 8c65283
Show file tree
Hide file tree
Showing 18 changed files with 128 additions and 24 deletions.
2 changes: 1 addition & 1 deletion .github/workflows/openstudio-server-tests.yml
Original file line number Diff line number Diff line change
Expand Up @@ -127,7 +127,7 @@ jobs:
if: |
github.ref == 'refs/heads/master' ||
github.ref == 'refs/heads/develop'
# github.ref == 'refs/heads/3.6.1-4'
# github.ref == 'refs/heads/3.6.1-3'
shell: bash
run: ./docker/deployment/scripts/deploy_docker_github_actions.sh
env:
Expand Down
4 changes: 2 additions & 2 deletions docker-compose.deploy.yml
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@ services:
image: redis:6.0.9
ports:
- "6379:6379"
command: "redis-server --requirepass ${REDIS_PASSWORD}"
command: "redis-server --requirepass ${REDIS_PASSWORD} --maxclients 40000"
web:
image: nrel/openstudio-server:latest
ports:
Expand Down Expand Up @@ -72,7 +72,7 @@ services:
worker:
image: nrel/openstudio-server:latest
environment:
- QUEUES=simulations
- QUEUES=requeued,simulations
- COUNT=1
- REDIS_URL=${REDIS_URL}
- MONGO_USER=${MONGO_USER}
Expand Down
4 changes: 2 additions & 2 deletions docker-compose.local.yml
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@ services:
image: redis:6.0.9
ports:
- "6379:6379"
command: "redis-server --requirepass ${REDIS_PASSWORD}"
command: "redis-server --requirepass ${REDIS_PASSWORD} --maxclients 40000"
web:
image: 127.0.0.1:5000/openstudio-server
build:
Expand Down Expand Up @@ -69,7 +69,7 @@ services:
rails_env: docker
environment:
- OS_SERVER_NUMBER_OF_WORKERS=${OS_SERVER_NUMBER_OF_WORKERS}
- QUEUES=simulations
- QUEUES=requeued,simulations
- COUNT=1
- REDIS_URL=${REDIS_URL}
- MONGO_USER=${MONGO_USER}
Expand Down
4 changes: 2 additions & 2 deletions docker-compose.test.yml
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@ services:
image: redis:6.0.9
ports:
- "6379:6379"
command: "redis-server --requirepass ${REDIS_PASSWORD}"
command: "redis-server --requirepass ${REDIS_PASSWORD} --maxclients 40000"
web:
image: nrel/openstudio-server:latest
build:
Expand Down Expand Up @@ -75,7 +75,7 @@ services:
bundle_args: ''
environment:
- OS_SERVER_NUMBER_OF_WORKERS=1
- QUEUES=simulations
- QUEUES=requeued,simulations
- COUNT=1
- REDIS_URL=${REDIS_URL}
- MONGO_USER=${MONGO_USER}
Expand Down
4 changes: 2 additions & 2 deletions docker-compose.yml
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@ services:
image: redis:6.0.9
ports:
- "6379:6379"
command: "redis-server --requirepass ${REDIS_PASSWORD}"
command: "redis-server --requirepass ${REDIS_PASSWORD} --maxclients 40000"
web:
image: nrel/openstudio-server:latest
build:
Expand Down Expand Up @@ -69,7 +69,7 @@ services:
rails_env: docker
environment:
- OS_SERVER_NUMBER_OF_WORKERS=${OS_SERVER_NUMBER_OF_WORKERS}
- QUEUES=simulations
- QUEUES=requeued,simulations
- COUNT=1
- REDIS_URL=${REDIS_URL}
- MONGO_USER=${MONGO_USER}
Expand Down
2 changes: 1 addition & 1 deletion docker/server/start-web-background.sh
Original file line number Diff line number Diff line change
Expand Up @@ -8,4 +8,4 @@ echo "Waiting for Redis to start"

#cd /opt/openstudio/server && bundle exec rake environment resque:work
echo "Startup two resque workers"
cd /opt/openstudio/server && COUNT=2 bundle exec rake environment resque:workers
cd /opt/openstudio/server && COUNT=6 bundle exec rake environment resque:workers
4 changes: 2 additions & 2 deletions local_setup_scripts/docker-compose.yml
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,7 @@ services:
resources:
reservations:
cpus: "1"
command: "redis-server --requirepass openstudio"
command: "redis-server --requirepass openstudio --maxclients 40000"
web:
image: 127.0.0.1:5000/openstudio-server
ports:
Expand Down Expand Up @@ -84,7 +84,7 @@ services:
worker:
image: 127.0.0.1:5000/openstudio-server
environment:
- QUEUES=simulations
- QUEUES=requeued,simulations
- COUNT=1
- REDIS_URL=redis://:openstudio@queue:6379
- MONGO_USER=openstudio
Expand Down
4 changes: 2 additions & 2 deletions local_setup_scripts/win64/docker-compose-mock.yml
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@ services:
placement:
constraints:
- node.role == manager
command: "redis-server --requirepass openstudio"
command: "redis-server --requirepass openstudio --maxclients 40000"
web:
image: 127.0.0.1:5000/openstudio-server
ports:
Expand Down Expand Up @@ -72,7 +72,7 @@ services:
worker:
image: 127.0.0.1:5000/openstudio-server
environment:
- QUEUES=simulations
- QUEUES=requeued,simulations
- COUNT=1
- REDIS_URL=redis://:openstudio@queue:6379
- MONGO_USER=openstudio
Expand Down
4 changes: 2 additions & 2 deletions local_setup_scripts/win64/docker-compose.yml
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@ services:
placement:
constraints:
- node.role == manager
command: "redis-server --requirepass openstudio"
command: "redis-server --requirepass openstudio --maxclients 40000"
web:
image: 127.0.0.1:5000/openstudio-server
ports:
Expand Down Expand Up @@ -72,7 +72,7 @@ services:
worker:
image: 127.0.0.1:5000/openstudio-server
environment:
- QUEUES=simulations
- QUEUES=requeued,simulations
- COUNT=1
- REDIS_URL=redis://:openstudio@queue:6379
- MONGO_USER=openstudio
Expand Down
22 changes: 22 additions & 0 deletions server/app/controllers/analyses_controller.rb
Original file line number Diff line number Diff line change
Expand Up @@ -198,6 +198,10 @@ def destroy
# stop analysis button action
def stop
@analysis = Analysis.find(params[:id])
if @analysis.nil?
logger.error "Analysis with ID #{params[:id]} not found."
redirect_to analyses_path, alert: 'Analysis not found.' and return
end
res = @analysis.stop_analysis

respond_to do |format|
Expand All @@ -208,6 +212,24 @@ def stop
end
end
end

# stop analysis button action
def soft_stop
@analysis = Analysis.find(params[:id])
if @analysis.nil?
logger.error "Analysis with ID #{params[:id]} not found."
redirect_to analyses_path, alert: 'Analysis not found.' and return
end
res = @analysis.soft_stop_analysis

respond_to do |format|
if res[0]
format.html { redirect_to @analysis, notice: 'Analysis flag changed to stop. Will NOT wait until the last submitted run finishes before killing.' }
else
format.html { redirect_to @analysis, notice: 'Analysis flag did NOT change.' }
end
end
end

# Controller for submitting the action via post. This right now only works with the API
# and will only return a JSON response based on whether or not the analysis has been
Expand Down
20 changes: 19 additions & 1 deletion server/app/controllers/data_points_controller.rb
Original file line number Diff line number Diff line change
Expand Up @@ -302,13 +302,15 @@ def requeue
Rails.logger.warn "data_points_controller.REQUEUE"
@data_point = DataPoint.find(params[:id])
analysis_id = @data_point.analysis
Rails.logger.warn "data_points_contoller.REQUEUEing #{@data_point.id}"
Rails.logger.debug "data_points_controller.id: #{@data_point.id}"
Rails.logger.debug "data_points_controller.job_id: #{@data_point.job_id}"
# Destroy the existing job in Resque queue; this is tied to a worker_host:PID:uuid
Resque::Job.destroy(:requeue, 'ResqueJobs::RunSimulateDataPoint', @data_point.job_id)
Resque::Job.destroy(:simulations, 'ResqueJobs::RunSimulateDataPoint', @data_point.job_id)

# Enqueue a new job
Resque.enqueue(ResqueJobs::RunSimulateDataPoint, @data_point.job_id)
Resque.enqueue_to(:requeued, ResqueJobs::RunSimulateDataPoint, @data_point.job_id)

# Attempt to find the worker processing this job
#worker = find_resque_worker_by_job_id(@data_point.job_id)
Expand All @@ -335,6 +337,22 @@ def requeue
end
end

def requeue_started
@analysis = Analysis.find(params[:id])
Rails.logger.warn "Requeueing all NOT completed normal simulations for analysis #{@analysis.id}"
data_points_to_requeue = @analysis.data_points.where.not(status_message: 'completed normal')

data_points_to_requeue.each do |dp|
Rails.logger.warn "Requeueing DataPoint #{dp.id}"
Resque.enqueue_to(:requeued, ResqueJobs::RunSimulateDataPoint, dp.job_id)
end

respond_to do |format|
format.html { redirect_to analysis_path(@analysis), notice: 'Simulations were successfully requeued.' }
format.json { head :no_content }
end
end

def find_resque_worker_by_job_id(job_id)
Rails.logger.debug "data_points_controller.find_resque_worker_by_job_id"
Resque.workers.each do |worker|
Expand Down
31 changes: 29 additions & 2 deletions server/app/controllers/pages_controller.rb
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,7 @@ def dashboard
# data for dashboard header
@projects = Project.all
# sort works because the states are queued, started, completed, na. started is the last in the list...
@analyses = Analysis.all.order_by(:updated_at.asc)
#@analyses = Analysis.all.order_by(:updated_at.asc)
failed_runs = DataPoint.where(status_message: 'datapoint failure').count
total_runs = DataPoint.all.count
completed_cnt = DataPoint.where(status: 'completed').count
Expand All @@ -50,7 +50,34 @@ def dashboard

# Finding the current analysis
#candidates = Analysis.includes(:jobs).order_by(:updated_at.asc)
@current = @analyses.detect { |analysis| analysis.jobs.any? { |job| job.status == 'started' } }
#@current = @analyses.detect { |analysis| analysis.jobs.any? { |job| job.status == 'started' } }

# Step 1: Fetch all analyses ordered by updated_at descending (newest first)
all_analyses = Analysis.includes(:jobs).order_by(updated_at: :desc)

# Step 2: Select the first two 'started' analyses
#started_analyses = all_analyses.select { |analysis| analysis.jobs.any? { |job| job.status == 'started' } }.first(2)
# Step 2: Efficiently select the first two 'started' analyses, leveraging Ruby for fine-tuned sorting
started_analyses = all_analyses.select { |analysis| analysis.jobs.any? { |job| job.status == 'started' }
}.sort_by { |analysis|
# This finds the earliest start time among the started jobs for sorting
analysis.jobs.select { |job| job.status == 'started' }.min_by(&:created_at).created_at
}.first(2)

# Step 3: Set @current and prepare @analyses
if started_analyses.any?
# Assume @current is the most recently updated
@current = started_analyses.first
# Ensure @analyses includes other analyses without changing the original order too much
# Remove @current from all_analyses and prepend the second 'started' analysis if it exists
all_analyses -= started_analyses
all_analyses.prepend(started_analyses.second) if started_analyses.length > 1
else
# Fallback if no 'started' analyses found
@current = all_analyses.first
end

@analyses = all_analyses
# If no 'started' analysis is currently running, optionally set @current to the most recently updated analysis
@current ||= @analyses.first

Expand Down
20 changes: 20 additions & 0 deletions server/app/models/analysis.rb
Original file line number Diff line number Diff line change
Expand Up @@ -169,6 +169,26 @@ def stop_analysis
[save!, errors]
end

def soft_stop_analysis
Rails.logger.info('attempting to stop analysis')

self.run_flag = false

jobs.each do |j|
unless j.status == 'completed'
j.status = 'completed'
j.end_time = Time.new
j.status_message = 'datapoint canceled'
j.save!
end
end

# Remove all the queued background jobs for this analysis
data_points.where(status: 'queued').each(&:set_soft_canceled_state)

[save!, errors]
end

# Method that pulls out the variables from the uploaded problem/analysis JSON.
def pull_out_os_variables
pat_json = false
Expand Down
11 changes: 11 additions & 0 deletions server/app/models/data_point.rb
Original file line number Diff line number Diff line change
Expand Up @@ -125,6 +125,16 @@ def set_canceled_state
self.status_message = 'datapoint canceled'
save!
end

def set_soft_canceled_state
Rails.logger.debug "data_point.set_soft_canceled_state"
#destroy_background_job # destroy queued job
self.run_start_time ||= Time.now
self.run_end_time = Time.now
self.status = :completed
self.status_message = 'datapoint canceled'
save!
end

def set_queued_state
Rails.logger.debug "data_point.set_queued_state"
Expand Down Expand Up @@ -171,6 +181,7 @@ def destroy_background_job
elsif Rails.application.config.job_manager == :resque
if job_id
Resque::Job.destroy(:simulations, 'ResqueJobs::RunSimulateDataPoint', job_id)
Resque::Job.destroy(:requeued, 'ResqueJobs::RunSimulateDataPoint', job_id)
end
else
raise 'Rails.application.config.job_manager must be set to :resque or :delayed_job'
Expand Down
5 changes: 5 additions & 0 deletions server/app/views/analyses/_table.html.erb
Original file line number Diff line number Diff line change
Expand Up @@ -71,6 +71,11 @@
:view_all => 0), :class => "btn btn-mini") %>
</div>
<% end %>
<div class="button-container">
<%= link_to("Requeue Started", requeue_started_data_point_path(@analysis), method: :post,
class: "btn btn-mini btn-warning",
data: { confirm: 'Are you sure you want to requeue all simulations without a completed normal Status Message?' }) %>
</div>
</div>
</div>

Expand Down
3 changes: 2 additions & 1 deletion server/app/views/analyses/show.html.erb
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,8 @@
<td><%= @analysis.status %>
<% if @analysis.status == 'started' %>
| <%= link_to("Stop Analysis", stop_analysis_path(@analysis)) %>
<% end %>
| <%= link_to "Soft Stop", soft_stop_analysis_path(@analysis) %>
<% end %>
</td>
</tr>
<tr>
Expand Down
5 changes: 3 additions & 2 deletions server/config/initializers/redis.rb
Original file line number Diff line number Diff line change
Expand Up @@ -10,15 +10,16 @@
require 'resque'
uri = URI.parse(ENV['REDIS_URL'])
Resque.redis = Redis.new(host: uri.host, port: uri.port, password: uri.password)
# Set a custom prune interval (in seconds)
Resque.prune_interval = 120 # Set the interval to 121 seconds
Resque.prune_interval = 180 # Set the interval to 180 seconds
elsif ['development', 'test'].include? Rails.env
require 'resque'
Resque.redis = 'localhost:6379'
Resque.prune_interval = 180 # Set the interval to 180 seconds
else
require 'resque'
uri = ENV.has_key?('REDIS_URL') ? ENV['REDIS_URL'] : 'queue:6379'
uri = URI.parse(uri)
#Resque.redis = 'queue:6379'
Resque.redis = Redis.new(host: uri.host, port: uri.port, password: uri.password)
Resque.prune_interval = 180 # Set the interval to 180 seconds
end
3 changes: 1 addition & 2 deletions server/config/initializers/set_log_level.rb
Original file line number Diff line number Diff line change
@@ -1,2 +1 @@
#Rails.logger.level = Logger::INFO
Rails.logger.level = Logger::DEBUG
Rails.logger.level = Logger::INFO

0 comments on commit 8c65283

Please sign in to comment.