Skip to content

Commit

Permalink
feat(listjobs): Add log_url and items_url for running jobs
Browse files Browse the repository at this point in the history
  • Loading branch information
jpmckinney committed Jul 25, 2024
1 parent e3b51ee commit 9d83ad4
Show file tree
Hide file tree
Showing 6 changed files with 24 additions and 11 deletions.
11 changes: 9 additions & 2 deletions docs/api.rst
Original file line number Diff line number Diff line change
Expand Up @@ -245,6 +245,8 @@ Get the pending, running and finished jobs of a project.

.. note:: The default :ref:`jobstorage` setting stores jobs in memory, such that jobs are lost when the Scrapyd process ends.

.. note:: ``items_url`` in the response returns HTTP 404 Not Found if :ref:`items_dir` is disabled (or was disabled at the time the job was run). Similarly, if a log file or item feed is deleted (for example, by :ref:`jobs_to_keep`), ``log_url`` or ``items_url`` returns HTTP 404.

Supported request methods
``GET``
Parameters
Expand All @@ -263,16 +265,21 @@ Example:
{
"id": "78391cc0fcaf11e1b0090800272a6d06",
"project": "myproject",
"spider": "spider1"
"spider": "spider1",
"version": "0.1",
"settings": {"DOWNLOAD_DELAY=2"},
"args": {"arg1": "val1"},
}
],
"running": [
{
"id": "422e608f9f28cef127b3d5ef93fe9399",
"project": "myproject",
"spider": "spider2",
"pid": 93956,
"start_time": "2012-09-12 10:14:03.594664",
"pid": 93956
"log_url": "/logs/myproject/spider3/2f16646cfcaf11e1b0090800272a6d06.log",
"items_url": "/items/myproject/spider3/2f16646cfcaf11e1b0090800272a6d06.jl"
}
],
"finished": [
Expand Down
3 changes: 2 additions & 1 deletion docs/news.rst
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@ Added
~~~~~

- Add ``version`` (egg version), ``settings`` (Scrapy settings) and ``args`` (spider arguments) to the pending jobs in the response from the :ref:`listjobs.json` webservice.
- Add ``log_url`` and ``items_url`` to the running jobs in the response from the :ref:`listjobs.json` webservice.
- Add a :ref:`status.json` webservice, to get the status of a job.
- Add a :ref:`unix_socket_path` setting, to listen on a Unix socket.
- Add a :ref:`poller` setting.
Expand Down Expand Up @@ -78,7 +79,7 @@ Library
- ``sorted_versions`` to ``scrapyd.eggstorage``
- ``get_crawl_args`` to ``scrapyd.launcher``

- :ref:`jobstorage` uses the ``ScrapyProcessProtocol`` class, by default. If :ref:`jobstorage` is set to ``scrapyd.jobstorage.SqliteJobStorage``, Scrapyd 1.3.0 uses a ``Job`` class, instead.
- :ref:`jobstorage` uses the ``ScrapyProcessProtocol`` class, by default. If :ref:`jobstorage` is set to ``scrapyd.jobstorage.SqliteJobStorage``, Scrapyd 1.3.0 uses a ``Job`` class, instead. To promote parity, the ``Job`` class is removed.
- Move the ``activate_egg`` function from the ``scrapyd.eggutils`` module to its caller, the ``scrapyd.runner`` module.
- Move the ``job_items_url`` and ``job_log_url`` functions from the ``scrapyd.jobstorage`` module to the ``scrapyd.utils`` module. :ref:`jobstorage` is not responsible for URLs.
- Change the ``get_crawl_args`` function to no longer convert ``bytes`` to ``str``, as already done by its caller.
Expand Down
5 changes: 4 additions & 1 deletion scrapyd/launcher.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@

from scrapyd import __version__
from scrapyd.interfaces import IEnvironment, IJobStorage, IPoller
from scrapyd.utils import job_items_url, job_log_url

log = Logger()

Expand Down Expand Up @@ -138,11 +139,13 @@ def processEnded(self, status):

def asdict(self):
return {
"id": self.job,
"project": self.project,
"spider": self.spider,
"id": self.job,
"pid": self.pid,
"start_time": str(self.start_time),
"log_url": job_log_url(self),
"items_url": job_items_url(self),
}

def log(self, level, action):
Expand Down
4 changes: 2 additions & 2 deletions scrapyd/webservice.py
Original file line number Diff line number Diff line change
Expand Up @@ -349,9 +349,9 @@ def render_GET(self, txrequest, project):
return {
"pending": [
{
"id": message["_job"],
"project": queue_name,
"spider": message["name"],
"id": message["_job"],
"version": message.get("_version"),
"settings": message.get("settings", {}),
"args": {k: v for k, v in message.items() if k not in ("name", "_job", "_version", "settings")},
Expand All @@ -366,9 +366,9 @@ def render_GET(self, txrequest, project):
],
"finished": [
{
"id": finished.job,
"project": finished.project,
"spider": finished.spider,
"id": finished.job,
"start_time": str(finished.start_time),
"end_time": str(finished.end_time),
"log_url": job_log_url(finished),
Expand Down
2 changes: 1 addition & 1 deletion tests/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,7 @@ def get_finished_job(project="p1", spider="s1", job="j1", start_time=None, end_t
start_time = datetime.datetime.now()
if end_time is None:
end_time = datetime.datetime.now()
process = ScrapyProcessProtocol(project, spider, job, {}, [])
process = ScrapyProcessProtocol(project, spider, job, env={}, args=[])
process.start_time = start_time
process.end_time = end_time
return process
10 changes: 6 additions & 4 deletions tests/test_webservice.py
Original file line number Diff line number Diff line change
Expand Up @@ -337,13 +337,13 @@ def test_list_jobs(txrequest, root, scrapy_process, args):

expected["finished"].append(
{
"id": "j1",
"project": "p1",
"spider": "s1",
"id": "j1",
"start_time": "2001-02-03 04:05:06.000007",
"end_time": "2001-02-03 04:05:06.000008",
"items_url": "/items/p1/s1/j1.jl",
"log_url": "/logs/p1/s1/j1.log",
"items_url": "/items/p1/s1/j1.jl",
},
)
assert_content(txrequest, root, "GET", "listjobs", args, expected)
Expand All @@ -352,11 +352,13 @@ def test_list_jobs(txrequest, root, scrapy_process, args):

expected["running"].append(
{
"id": "j1",
"project": "p1",
"spider": "s1",
"id": "j1",
"pid": None,
"start_time": "2001-02-03 04:05:06.000009",
"log_url": "/logs/p1/s1/j1.log",
"items_url": "/items/p1/s1/j1.jl",
}
)
assert_content(txrequest, root, "GET", "listjobs", args, expected)
Expand All @@ -372,9 +374,9 @@ def test_list_jobs(txrequest, root, scrapy_process, args):

expected["pending"].append(
{
"id": "j1",
"project": "p1",
"spider": "s1",
"id": "j1",
"version": "0.1",
"settings": {"DOWNLOAD_DELAY=2": "TRACK=Cause = Time"},
"args": {"other": "one"},
Expand Down

0 comments on commit 9d83ad4

Please sign in to comment.