Source code for evalys.utils

# -*- coding: utf-8 -*-

[docs]def bulksetattr(obj, **kwargs): """ Safely assign attributes in bulk. For each keyword argument kw, the function checks that kw is the name of one of the object's attributes. If kw is not the name of an attribute, the function raises an AttributeError. Otherwise, the function assigns the value of the keyword argument to the attribute, provided the object allows it. """ for attr in kwargs: getattr(obj, attr) # check attr is a valid attribute, if not raise setattr(obj, attr, kwargs[attr]) # attr is valid, update its value
[docs]def cut_workload(workload_df, begin_time, end_time): """ Extract any workload dataframe between begin_time and end_time. Datafram must contain 'submission_time', 'waiting_time' and 'execution_time' + 'jobID' columns. Jobs that are queued (submitted but not running) before `begin_time` and jobs that are running before `begin_time` and/or after `end_time` are cut to fit in this time slice. Example with :py:class:`evalys.Workload`: >>> from evalys.workload import Workload >>> w = Workload.from_csv("./examples/UniLu-Gaia-2014-2.swf") >>> cut_w = cut_workload(w.df, 500000, 600000) Example with :py:class:`evalys.JobSet`: >>> from evalys.jobset import JobSet >>> js = JobSet.from_csv("./examples/jobs.csv") >>> cut_js = cut_workload(js.df, 1000, 2000) """ assert begin_time < end_time # reindex workload by start time to extract easily df = workload_df.copy() contains_starting_time = 'starting_time' in df df['starting_time'] = df['submission_time'] + df['waiting_time'] df = df.sort_values(by='submission_time').set_index(['submission_time'], drop=False) # find closest index begin = df.index.searchsorted(begin_time) end = df.index.searchsorted(end_time) # Extract jobs that start in the period to_export = df.iloc[begin:end] # Get job in queue (submission before period begin and start in the period) queued_jobs = df[(df["submission_time"] < begin_time) & (df["starting_time"] >= begin_time)] # Get running jobs (start before and stop during or after the period) running_jobs = df[ (df["starting_time"] < begin_time) & (df["starting_time"] + df["execution_time"] > begin_time)] # return dataframe sorted without starting_time column and a proper index if not contains_starting_time: to_export.drop('starting_time', inplace=True, axis=1) queued_jobs.drop('starting_time', inplace=True, axis=1) running_jobs.drop('starting_time', inplace=True, axis=1) return { "workload": to_export.sort_values(by="jobID").reset_index(drop=True), "queue": queued_jobs.sort_values(by="jobID").reset_index(drop=True), "running": running_jobs.sort_values(by="jobID").reset_index(drop=True)}