user-agent flag
This commit is contained in:
parent
61be304871
commit
e6abe68e6e
@ -8,11 +8,12 @@ from rich.console import Console
|
||||
from rich.panel import Panel
|
||||
from typing_extensions import Annotated
|
||||
from importlib.metadata import version
|
||||
from .user_agents import USER_AGENTS
|
||||
|
||||
cli = typer.Typer(help="whsk: web harvesting/scraping toolKit")
|
||||
|
||||
VERSION = version("whsk")
|
||||
_user_agent = f"whsk/{VERSION}"
|
||||
_default_user_agent = f"whsk/{VERSION}"
|
||||
|
||||
# Common Options
|
||||
opt = {
|
||||
@ -54,6 +55,21 @@ def make_request(url, *, headers, user_agent, postdata):
|
||||
- lxml.etree.Element
|
||||
"""
|
||||
header_dict = parse_headers(headers)
|
||||
|
||||
# user agent either from headers, shortcut, or default
|
||||
if "user-agent" in headers and user_agent:
|
||||
typer.secho("Cannot use --ua shortcut and also pass --header User-Agent")
|
||||
raise typer.Exit(1)
|
||||
elif "user-agent" in header_dict:
|
||||
pass # make no changes
|
||||
elif not user_agent:
|
||||
header_dict["user-agent"] = _default_user_agent
|
||||
elif user_agent in USER_AGENTS:
|
||||
header_dict["user-agent"] = USER_AGENTS[user_agent]
|
||||
else:
|
||||
typer.secho("--ua shortcut must be one of: " + ", ".join(USER_AGENTS))
|
||||
raise typer.Exit(1)
|
||||
|
||||
method = "GET"
|
||||
if postdata:
|
||||
method = "POST"
|
||||
@ -106,7 +122,7 @@ WWWWW H H SS K K v{VERSION}
|
||||
@cli.command()
|
||||
def query(
|
||||
url: Annotated[str, typer.Argument(help="URL to scrape")],
|
||||
user_agent: Annotated[str, opt["user_agent"]] = _user_agent,
|
||||
user_agent: Annotated[str, opt["user_agent"]] = "",
|
||||
postdata: Annotated[str, opt["postdata"]] = "",
|
||||
headers: Annotated[list[str], opt["headers"]] = [],
|
||||
css: Annotated[str, opt["css"]] = "",
|
||||
@ -132,7 +148,7 @@ def query(
|
||||
@cli.command()
|
||||
def shell(
|
||||
url: Annotated[str, typer.Argument(help="URL to scrape")],
|
||||
user_agent: Annotated[str, opt["user_agent"]] = _user_agent,
|
||||
user_agent: Annotated[str, opt["user_agent"]] = "",
|
||||
postdata: Annotated[str, opt["postdata"]] = "",
|
||||
headers: Annotated[list[str], opt["headers"]] = [],
|
||||
css: Annotated[str, opt["css"]] = "",
|
||||
|
11
src/whsk/user_agents.py
Normal file
11
src/whsk/user_agents.py
Normal file
@ -0,0 +1,11 @@
|
||||
# based on the list from https://www.useragents.me
|
||||
USER_AGENTS = {
|
||||
"linux.chrome": "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/131.0.0.0 Safari/537.3",
|
||||
"linux.firefox": "Mozilla/5.0 (X11; Linux x86_64) Gecko/20100101 Firefox/133",
|
||||
"mac.chrome": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_12_6) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/103.0.0.0 Safari/537.3",
|
||||
"mac.firefox": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_12_6) Gecko/20100101 Firefox/133",
|
||||
"mac.safari": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/17.6 Safari/605.1.1",
|
||||
"win.chrome": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/131.0.0.0 Safari/537.3",
|
||||
"win.edge": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/131.0.0.0 Safari/537.36 Edg/131.0.0.",
|
||||
"win.firefox": "Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:133.0) Gecko/20100101 Firefox/133",
|
||||
}
|
Loading…
Reference in New Issue
Block a user