-
Notifications
You must be signed in to change notification settings - Fork 114
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
A new fetcher for Puppeteer based JS rendering (#272)
Creates a splash replacement. I have tested it with just one target so far, it's hard to say it's perfect, but it might be an alternative to splash that is seems to be a bit dead right now
- Loading branch information
1 parent
bfae1b5
commit cdc3727
Showing
3 changed files
with
142 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,72 @@ | ||
defmodule Crawly.Fetchers.CrawlyRenderServer do | ||
@moduledoc """ | ||
Implements Crawly.Fetchers.Fetcher behavior for Crawly Render Server | ||
Javascript rendering. | ||
Crawly Render Server is a lightweight puppeteer based Javascript rendering | ||
engine server. Quite experimental. See more: | ||
https://github.com/elixir-crawly/crawly-render-server | ||
It exposes /render endpoint that renders JS on incoming requests. For example: | ||
curl -X POST \ | ||
http://localhost:3000/render \ | ||
-H 'Content-Type: application/json' \ | ||
-d '{ | ||
"url": "https://example.com", | ||
"headers": {"User-Agent": "Custom User Agent"} | ||
}' | ||
In this case you have to configure the fetcher in the following way: | ||
`fetcher: {Crawly.Fetchers.CrawlyRenderServer, [base_url: "http://localhost:3000/render"]}` | ||
""" | ||
@behaviour Crawly.Fetchers.Fetcher | ||
|
||
require Logger | ||
|
||
def fetch(request, client_options) do | ||
base_url = | ||
case Keyword.get(client_options, :base_url, nil) do | ||
nil -> | ||
Logger.error( | ||
"The base_url is not set. CrawlyRenderServer can't be used! " <> | ||
"Please set :base_url in fetcher options to continue. " <> | ||
"For example: " <> | ||
"fetcher: {Crawly.Fetchers.CrawlyRenderServer, [base_url: <url>]}" | ||
) | ||
|
||
raise RuntimeError | ||
|
||
base_url -> | ||
base_url | ||
end | ||
|
||
req_body = | ||
Poison.encode!(%{ | ||
url: request.url, | ||
headers: Map.new(request.headers) | ||
}) | ||
|
||
case HTTPoison.post( | ||
base_url, | ||
req_body, | ||
[{"content-type", "application/json"}], | ||
request.options | ||
) do | ||
{:ok, response} -> | ||
js = Poison.decode!(response.body) | ||
|
||
new_response = %HTTPoison.Response{ | ||
body: Map.get(js, "page"), | ||
status_code: Map.get(js, "status"), | ||
headers: Map.get(js, "headers"), | ||
request_url: request.url, | ||
request: request | ||
} | ||
|
||
{:ok, new_response} | ||
|
||
err -> | ||
err | ||
end | ||
end | ||
end |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,38 @@ | ||
defmodule Crawly.Fetchers.CrawlyRenderServerTest do | ||
use ExUnit.Case | ||
import Crawly.Fetchers.CrawlyRenderServer | ||
|
||
test "throws an error when base_url is not set" do | ||
request = %{ | ||
url: "https://example.com", | ||
headers: %{"User-Agent" => "Custom User Agent"} | ||
} | ||
|
||
client_options = [] | ||
|
||
assert_raise RuntimeError, fn -> | ||
fetch(request, client_options) | ||
end | ||
end | ||
|
||
test "composes correct request to render server" do | ||
request = %{ | ||
url: "https://example.com", | ||
headers: [{"User-Agent", "Custom User Agent"}], | ||
options: [] | ||
} | ||
|
||
client_options = [base_url: "http://localhost:3000"] | ||
|
||
:meck.expect(HTTPoison, :post, fn base_url, body, headers, _options -> | ||
assert headers == [{"content-type", "application/json"}] | ||
assert base_url == "http://localhost:3000" | ||
|
||
body = Poison.decode!(body, %{keys: :atoms}) | ||
assert "https://example.com" == body.url | ||
assert %{:"User-Agent" => "Custom User Agent"} == body.headers | ||
end) | ||
|
||
fetch(request, client_options) | ||
end | ||
end |