user-agent flag
This commit is contained in:
parent
61be304871
commit
e6abe68e6e
@ -8,11 +8,12 @@ from rich.console import Console
|
|||||||
from rich.panel import Panel
|
from rich.panel import Panel
|
||||||
from typing_extensions import Annotated
|
from typing_extensions import Annotated
|
||||||
from importlib.metadata import version
|
from importlib.metadata import version
|
||||||
|
from .user_agents import USER_AGENTS
|
||||||
|
|
||||||
cli = typer.Typer(help="whsk: web harvesting/scraping toolKit")
|
cli = typer.Typer(help="whsk: web harvesting/scraping toolKit")
|
||||||
|
|
||||||
VERSION = version("whsk")
|
VERSION = version("whsk")
|
||||||
_user_agent = f"whsk/{VERSION}"
|
_default_user_agent = f"whsk/{VERSION}"
|
||||||
|
|
||||||
# Common Options
|
# Common Options
|
||||||
opt = {
|
opt = {
|
||||||
@ -54,6 +55,21 @@ def make_request(url, *, headers, user_agent, postdata):
|
|||||||
- lxml.etree.Element
|
- lxml.etree.Element
|
||||||
"""
|
"""
|
||||||
header_dict = parse_headers(headers)
|
header_dict = parse_headers(headers)
|
||||||
|
|
||||||
|
# user agent either from headers, shortcut, or default
|
||||||
|
if "user-agent" in headers and user_agent:
|
||||||
|
typer.secho("Cannot use --ua shortcut and also pass --header User-Agent")
|
||||||
|
raise typer.Exit(1)
|
||||||
|
elif "user-agent" in header_dict:
|
||||||
|
pass # make no changes
|
||||||
|
elif not user_agent:
|
||||||
|
header_dict["user-agent"] = _default_user_agent
|
||||||
|
elif user_agent in USER_AGENTS:
|
||||||
|
header_dict["user-agent"] = USER_AGENTS[user_agent]
|
||||||
|
else:
|
||||||
|
typer.secho("--ua shortcut must be one of: " + ", ".join(USER_AGENTS))
|
||||||
|
raise typer.Exit(1)
|
||||||
|
|
||||||
method = "GET"
|
method = "GET"
|
||||||
if postdata:
|
if postdata:
|
||||||
method = "POST"
|
method = "POST"
|
||||||
@ -106,7 +122,7 @@ WWWWW H H SS K K v{VERSION}
|
|||||||
@cli.command()
|
@cli.command()
|
||||||
def query(
|
def query(
|
||||||
url: Annotated[str, typer.Argument(help="URL to scrape")],
|
url: Annotated[str, typer.Argument(help="URL to scrape")],
|
||||||
user_agent: Annotated[str, opt["user_agent"]] = _user_agent,
|
user_agent: Annotated[str, opt["user_agent"]] = "",
|
||||||
postdata: Annotated[str, opt["postdata"]] = "",
|
postdata: Annotated[str, opt["postdata"]] = "",
|
||||||
headers: Annotated[list[str], opt["headers"]] = [],
|
headers: Annotated[list[str], opt["headers"]] = [],
|
||||||
css: Annotated[str, opt["css"]] = "",
|
css: Annotated[str, opt["css"]] = "",
|
||||||
@ -132,7 +148,7 @@ def query(
|
|||||||
@cli.command()
|
@cli.command()
|
||||||
def shell(
|
def shell(
|
||||||
url: Annotated[str, typer.Argument(help="URL to scrape")],
|
url: Annotated[str, typer.Argument(help="URL to scrape")],
|
||||||
user_agent: Annotated[str, opt["user_agent"]] = _user_agent,
|
user_agent: Annotated[str, opt["user_agent"]] = "",
|
||||||
postdata: Annotated[str, opt["postdata"]] = "",
|
postdata: Annotated[str, opt["postdata"]] = "",
|
||||||
headers: Annotated[list[str], opt["headers"]] = [],
|
headers: Annotated[list[str], opt["headers"]] = [],
|
||||||
css: Annotated[str, opt["css"]] = "",
|
css: Annotated[str, opt["css"]] = "",
|
||||||
|
11
src/whsk/user_agents.py
Normal file
11
src/whsk/user_agents.py
Normal file
@ -0,0 +1,11 @@
|
|||||||
|
# based on the list from https://www.useragents.me
|
||||||
|
USER_AGENTS = {
|
||||||
|
"linux.chrome": "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/131.0.0.0 Safari/537.3",
|
||||||
|
"linux.firefox": "Mozilla/5.0 (X11; Linux x86_64) Gecko/20100101 Firefox/133",
|
||||||
|
"mac.chrome": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_12_6) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/103.0.0.0 Safari/537.3",
|
||||||
|
"mac.firefox": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_12_6) Gecko/20100101 Firefox/133",
|
||||||
|
"mac.safari": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/17.6 Safari/605.1.1",
|
||||||
|
"win.chrome": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/131.0.0.0 Safari/537.3",
|
||||||
|
"win.edge": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/131.0.0.0 Safari/537.36 Edg/131.0.0.",
|
||||||
|
"win.firefox": "Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:133.0) Gecko/20100101 Firefox/133",
|
||||||
|
}
|
Loading…
Reference in New Issue
Block a user