Skip to content

Person

Developer docs

A class to represent a person

Source code in src/whitepyges/person.py
 10
 11
 12
 13
 14
 15
 16
 17
 18
 19
 20
 21
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
class Person:
    """
    A class to represent a person
    """

    def __init__(
        self,
        first_name: str,
        last_name: str,
        age: int | None = None,
        city: str | None = None,
        state: str | None = None,
        zip_code: str | None = None,
    ) -> None:
        """
        Initialize a new Person object

        Args:
            first_name (str): The first name of the person
            last_name (str): The last name of the person
            age (int, optional): The age of the person. Defaults to None.
            city (str, optional): The city of the person. Defaults to None.
            state (str, optional): The state of the person. Defaults to None.
            zip_code (str, optional): The zip code of the person. Defaults to None.

        Returns:
            None
        """

        if not first_name or not last_name:
            raise ValueError("First name and last name are required")

        if not isinstance(first_name, str) or not isinstance(last_name, str):
            raise ValueError("First name and last name must be strings")

        self.name = helper.format_name(first_name, last_name)
        self.age = helper.format_age(age)
        self.location = helper.format_location(city, state, zip_code)

        self.headers = config.HEADERS

        self.logger = logging.getLogger(f"Person-{self.name}")
        logging.basicConfig(level=logging.WARNING)

        self.logger.info(f"Initialized Person object: {repr(self)}")

    def _clean_person_data(self, person_data: dict, age: str) -> list[dict]:
        """
        Clean the person data by filtering and repositioning the items.

        Args:
            person_data (dict): The raw person data.
            age (str): The age of the person.

        Returns:
            list[dict]: The cleaned list of items.
        """

        raw_items: list[dict] = person_data.get("itemListElement", [])
        cleaned_items: list[dict] = []

        for item in raw_items:
            if item.get("@type") != "ListItem":
                continue

            item_data: dict = item.get("item", {})

            if item_data.get("@type") != "Person":
                continue

            if age is not None:
                if not item_data.get("description", "").startswith(
                    f"{item_data.get('name')} in their {age}"
                ) and not item_data.get("description", "").startswith(
                    f"{item_data.get('name')},"
                ):
                    continue

            item_data.pop("@type", None)

            item_data["url"] = config.BASE_URL + item_data.get("url", "")

            cleaned_items.append(item_data)

        return cleaned_items

    def search(
        self,
        count: int = -1,
        timeout: int = 10,
        max_retries: int = 3,
        randomize_headers: bool = False,
        ignore_robots: bool = False,
    ) -> list[dict] | None:
        """
        Perform a search for the person

        Args:
            count (int, optional): The number of results to return. -1 returns all results. Defaults to -1.
            timeout (int, optional): The timeout for the request. Defaults to 10.
            max_retries (int, optional): The maximum number of retries. Defaults to 3.
            randomize_headers (bool, optional): Randomize the headers for the request. Defaults to False.
            ignore_robots (bool, optional): Ignore the robots.txt file. Defaults to False.

        Returns:
            list[dict] | None: Possible data for the person
        """

        if count == 0 or count < -1:
            self.logger.error("Count must be a positive integer or -1")
            raise ValueError("Count must be a positive integer or -1")

        endpoint: str = "name"

        if self.location:
            endpoint = "name_and_location"

        url: str = helper.get_endpoint(
            "people", endpoint, name=self.name, location=self.location
        )

        search_headers: dict = self.headers.copy()  # dont modify the original headers
        if randomize_headers:
            search_headers = helper.get_random_headers()

        response: requests.Response = helper.make_request_with_retries(
            url, search_headers, max_retries, timeout, ignore_robots
        )

        soup: BeautifulSoup = BeautifulSoup(response.text, "html.parser")
        script_tag = soup.find("script", type="application/ld+json")

        if not script_tag:
            self.logger.warning("No script tag found in the response")
            return None

        # This is only really a list with one item (I would assume this is for the pages of users)
        person_data: dict = json.loads(script_tag.string)[0]

        cleaned_items: list[dict] = self._clean_person_data(person_data, self.age)

        return (
            cleaned_items[:count]
            if count != -1 and count <= len(cleaned_items)
            else cleaned_items
        )

    def __repr__(self) -> str:
        """
        Return an unambiguous string representation of the Person object.

        Returns:
            str: The unambiguous string representation of the Person object
        """

        return helper.format_repr(self)

    def __str__(self) -> str:
        """
        Return a readable string representation of the Person object.

        Returns:
            str: The readable string representation of the Person object
        """

        return helper.format_str(self)

__init__

Initialize a new Person object

Parameters:

Name Type Description Default
first_name str

The first name of the person

required
last_name str

The last name of the person

required
age int

The age of the person. Defaults to None.

None
city str

The city of the person. Defaults to None.

None
state str

The state of the person. Defaults to None.

None
zip_code str

The zip code of the person. Defaults to None.

None

Returns:

Type Description
None

None

Source code in src/whitepyges/person.py
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
def __init__(
    self,
    first_name: str,
    last_name: str,
    age: int | None = None,
    city: str | None = None,
    state: str | None = None,
    zip_code: str | None = None,
) -> None:
    """
    Initialize a new Person object

    Args:
        first_name (str): The first name of the person
        last_name (str): The last name of the person
        age (int, optional): The age of the person. Defaults to None.
        city (str, optional): The city of the person. Defaults to None.
        state (str, optional): The state of the person. Defaults to None.
        zip_code (str, optional): The zip code of the person. Defaults to None.

    Returns:
        None
    """

    if not first_name or not last_name:
        raise ValueError("First name and last name are required")

    if not isinstance(first_name, str) or not isinstance(last_name, str):
        raise ValueError("First name and last name must be strings")

    self.name = helper.format_name(first_name, last_name)
    self.age = helper.format_age(age)
    self.location = helper.format_location(city, state, zip_code)

    self.headers = config.HEADERS

    self.logger = logging.getLogger(f"Person-{self.name}")
    logging.basicConfig(level=logging.WARNING)

    self.logger.info(f"Initialized Person object: {repr(self)}")

__repr__

Return an unambiguous string representation of the Person object.

Returns:

Name Type Description
str str

The unambiguous string representation of the Person object

Source code in src/whitepyges/person.py
157
158
159
160
161
162
163
164
165
def __repr__(self) -> str:
    """
    Return an unambiguous string representation of the Person object.

    Returns:
        str: The unambiguous string representation of the Person object
    """

    return helper.format_repr(self)

__str__

Return a readable string representation of the Person object.

Returns:

Name Type Description
str str

The readable string representation of the Person object

Source code in src/whitepyges/person.py
167
168
169
170
171
172
173
174
175
def __str__(self) -> str:
    """
    Return a readable string representation of the Person object.

    Returns:
        str: The readable string representation of the Person object
    """

    return helper.format_str(self)

_clean_person_data

Clean the person data by filtering and repositioning the items.

Parameters:

Name Type Description Default
person_data dict

The raw person data.

required
age str

The age of the person.

required

Returns:

Type Description
list[dict]

list[dict]: The cleaned list of items.

Source code in src/whitepyges/person.py
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
def _clean_person_data(self, person_data: dict, age: str) -> list[dict]:
    """
    Clean the person data by filtering and repositioning the items.

    Args:
        person_data (dict): The raw person data.
        age (str): The age of the person.

    Returns:
        list[dict]: The cleaned list of items.
    """

    raw_items: list[dict] = person_data.get("itemListElement", [])
    cleaned_items: list[dict] = []

    for item in raw_items:
        if item.get("@type") != "ListItem":
            continue

        item_data: dict = item.get("item", {})

        if item_data.get("@type") != "Person":
            continue

        if age is not None:
            if not item_data.get("description", "").startswith(
                f"{item_data.get('name')} in their {age}"
            ) and not item_data.get("description", "").startswith(
                f"{item_data.get('name')},"
            ):
                continue

        item_data.pop("@type", None)

        item_data["url"] = config.BASE_URL + item_data.get("url", "")

        cleaned_items.append(item_data)

    return cleaned_items

search

Perform a search for the person

Parameters:

Name Type Description Default
count int

The number of results to return. -1 returns all results. Defaults to -1.

-1
timeout int

The timeout for the request. Defaults to 10.

10
max_retries int

The maximum number of retries. Defaults to 3.

3
randomize_headers bool

Randomize the headers for the request. Defaults to False.

False
ignore_robots bool

Ignore the robots.txt file. Defaults to False.

False

Returns:

Type Description
list[dict] | None

list[dict] | None: Possible data for the person

Source code in src/whitepyges/person.py
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
def search(
    self,
    count: int = -1,
    timeout: int = 10,
    max_retries: int = 3,
    randomize_headers: bool = False,
    ignore_robots: bool = False,
) -> list[dict] | None:
    """
    Perform a search for the person

    Args:
        count (int, optional): The number of results to return. -1 returns all results. Defaults to -1.
        timeout (int, optional): The timeout for the request. Defaults to 10.
        max_retries (int, optional): The maximum number of retries. Defaults to 3.
        randomize_headers (bool, optional): Randomize the headers for the request. Defaults to False.
        ignore_robots (bool, optional): Ignore the robots.txt file. Defaults to False.

    Returns:
        list[dict] | None: Possible data for the person
    """

    if count == 0 or count < -1:
        self.logger.error("Count must be a positive integer or -1")
        raise ValueError("Count must be a positive integer or -1")

    endpoint: str = "name"

    if self.location:
        endpoint = "name_and_location"

    url: str = helper.get_endpoint(
        "people", endpoint, name=self.name, location=self.location
    )

    search_headers: dict = self.headers.copy()  # dont modify the original headers
    if randomize_headers:
        search_headers = helper.get_random_headers()

    response: requests.Response = helper.make_request_with_retries(
        url, search_headers, max_retries, timeout, ignore_robots
    )

    soup: BeautifulSoup = BeautifulSoup(response.text, "html.parser")
    script_tag = soup.find("script", type="application/ld+json")

    if not script_tag:
        self.logger.warning("No script tag found in the response")
        return None

    # This is only really a list with one item (I would assume this is for the pages of users)
    person_data: dict = json.loads(script_tag.string)[0]

    cleaned_items: list[dict] = self._clean_person_data(person_data, self.age)

    return (
        cleaned_items[:count]
        if count != -1 and count <= len(cleaned_items)
        else cleaned_items
    )

Features

  • Search for people by first name, last name, and state
  • Returns names, profile URLs, addresses, phone numbers, and related people
  • Handles request retries and optional header randomization

Example Usage

from whitepyges import Person

person: Person = Person(first_name='John', last_name='Doe', state='WA')
info: dict = person.search()

print(info)

Example Response

[
    {
        "name": "Jon Doe",
        "givenName": "Jon",
        "familyName": "Doe",
        "description": "Jon Doe in their 70s, currently living in Example, WA",
        "url": "https://www.whitepages.com/name/Jon-Doe/Example-WA/random_letters",
        "address": [
            {
                "@type": "PostalAddress",
                "streetAddress": "123 St",
                "addressLocality": "Example",
                "addressRegion": "WA",
                "addressCountry": "US"
            }
        ],
        "telephone": "(123) 456-7890",
        "relatedTo": []
    },
    {
        "name": "Jon Doe",
        "givenName": "Jon",
        "familyName": "Doe",
        "description": "Jon Doe in their 40s, currently living in Example-2, WA",
        "url": "https://www.whitepages.com/name/Jon-Doe/Example-2-WA/random_letters2",
        "address": [
            {
                "@type": "PostalAddress",
                "streetAddress": "123 Ave",
                "addressLocality": "Example-2",
                "addressRegion": "WA",
                "addressCountry": "US"
            }
        ],
        "telephone": "(123) 456-7890",
        "relatedTo": []
    }
]