Skip to content

Commit

Permalink
Merge branch '3.9'
Browse files Browse the repository at this point in the history
  • Loading branch information
datawhores committed Apr 9, 2024
2 parents 03e7a03 + 8a6c024 commit fecdcbe
Show file tree
Hide file tree
Showing 20 changed files with 432 additions and 306 deletions.
6 changes: 6 additions & 0 deletions .github/workflows/commit_bundler.yml
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@ on:
push:
jobs:
create_version:
if: github.event_name == 'push' && !contains(github.ref, 'refs/tags/')
runs-on: ubuntu-20.04
name: create version doc
steps:
Expand Down Expand Up @@ -37,6 +38,7 @@ jobs:
# - name: list files 4
# run: ls /home/runner/work/OF-Scraper/OF-Scraper/ofscraper
linux:
if: github.event_name == 'push' && !contains(github.ref, 'refs/tags/')
needs: [create_version]
runs-on: ubuntu-20.04
name: create linux release
Expand Down Expand Up @@ -117,6 +119,7 @@ jobs:
path: "dist/ofscraper_linux_${{ steps.commit.outputs.short}}.zip"

windows_dir:
if: github.event_name == 'push' && !contains(github.ref, 'refs/tags/')
needs: [create_version]
runs-on: windows-latest
name: create windows release dir
Expand Down Expand Up @@ -183,6 +186,7 @@ jobs:
path: "dist/ofscraper_windows_${{steps.commit.outputs.short}}.zip"

windows_file:
if: github.event_name == 'push' && !contains(github.ref, 'refs/tags/')
needs: [create_version]
runs-on: windows-latest
name: create windows release file
Expand Down Expand Up @@ -254,6 +258,7 @@ jobs:
path: "dist/ofscraper_windows_${{steps.commit.outputs.short}}.zip"

mac:
if: github.event_name == 'push' && !contains(github.ref, 'refs/tags/')
needs: [create_version]
runs-on: macos-latest
name: create mac release
Expand Down Expand Up @@ -351,6 +356,7 @@ jobs:
path: "dist/ofscraper_macos_${{steps.commit.outputs.short}}.zip"

publish_release:
if: github.event_name == 'push' && !contains(github.ref, 'refs/tags/')
runs-on: ubuntu-latest
name: publish
needs: [mac, windows_dir, windows_file, linux]
Expand Down
85 changes: 56 additions & 29 deletions .github/workflows/docker-daily.yml
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,61 @@ on:
tags: ["*.*.*"]

jobs:
build:
release_ghcr:
if: github.event_name == 'push' && !contains(github.ref, 'refs/tags/')
runs-on: ubuntu-latest
permissions:
contents: read
packages: write
# This is used to complete the identity challenge
# with sigstore/fulcio when running outside of PRs.
id-token: write

steps:
- name: Checkout repository
uses: actions/checkout@v3
with:
fetch-depth: 0
- name: get tag
run: echo release=$(git describe --tags `git rev-list --tags --max-count=1`) >> $GITHUB_ENV
- name: Setup Docker buildx
uses: docker/setup-buildx-action@v2
- name: Set up QEMU
uses: docker/setup-qemu-action@v2

- name: Set up Docker Buildx
uses: docker/setup-buildx-action@v2

- name: login to ghcr
uses: docker/login-action@v2
with:
registry: ghcr.io
username: ${{ github.actor }}
password: ${{ secrets.GITHUB_TOKEN}}
# Extract metadata (tags, labels) for Docker
# https://github.com/docker/metadata-action
- name: Extract Docker metadata ghrc.io
id: meta2
uses: docker/metadata-action@v4
with:
images: |
ghcr.io/datawhores/of-scraper
tags: |
type=raw,value={{branch}}
type=raw,value={{branch}}-{{sha}}
- name: push to image ghrc.io
id: build-and-push2
uses: docker/build-push-action@v4
with:
context: .
tags: ${{ steps.meta2.outputs.tags }}
labels: ${{ steps.meta2.outputs.labels }}
cache-to: type=gha,mode=max
push: true

release_github:
if: github.event_name == 'push' && !contains(github.ref, 'refs/tags/')
runs-on: ubuntu-latest
permissions:
contents: read
Expand Down Expand Up @@ -91,31 +145,4 @@ jobs:
# # against the sigstore community Fulcio instance.
# run: echo "${{ steps.meta.outputs.tags }}" | xargs -I {} cosign sign {}@${{ steps.build-and-push.outputs.digest }}
#Login against a Docker registry except on PR
#https://github.com/docker/login-action
- name: login to ghcr
uses: docker/login-action@v2
with:
registry: ghcr.io
username: ${{ github.actor }}
password: ${{ secrets.GITHUB_TOKEN}}
# Extract metadata (tags, labels) for Docker
# https://github.com/docker/metadata-action
- name: Extract Docker metadata ghrc.io
id: meta2
uses: docker/metadata-action@v4
with:
images: |
ghcr.io/datawhores/of-scraper
tags: |
type=raw,value={{branch}}
type=raw,value={{branch}}-{{sha}}
- name: push to image ghrc.io
id: build-and-push2
uses: docker/build-push-action@v4
with:
context: .
tags: ${{ steps.meta2.outputs.tags }}
labels: ${{ steps.meta2.outputs.labels }}
cache-to: type=gha,mode=max
push: true
#https://github.com/docker/login-action
5 changes: 4 additions & 1 deletion .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -148,4 +148,7 @@ core
doc*
out*
*.txt
*.log
*.log
# Idea
/workspace.xml
.idea
Empty file removed cd
Empty file.
8 changes: 4 additions & 4 deletions ofscraper/actions/process.py
Original file line number Diff line number Diff line change
Expand Up @@ -160,7 +160,7 @@ def normal_post_process():
)
try:
model_id = ele.id
operations.table_init_create(model_id, ele.name)
operations.table_init_create(model_id=model_id, username=ele.name)
combined_urls, posts = asyncio.run(OF.process_areas(ele, model_id))
download.download_process(
ele.name, model_id, combined_urls, posts=posts
Expand Down Expand Up @@ -190,8 +190,8 @@ def process_like():
f"Getting {','.join(areas.get_like_area())} for [bold]{ele.name}[/bold]\n[bold]Subscription Active:[/bold] {ele.active}"
)
model_id = ele.id
operations.table_init_create(model_id, ele.name)
unfavorited_posts = like.get_post_for_like(model_id, ele.name)
operations.table_init_create(model_id=model_id, username=ele.name)
unfavorited_posts = like.get_post_for_like(model_id=model_id, username=ele.name)
unfavorited_posts = filters.post_filter_for_like(
unfavorited_posts, like=True
)
Expand All @@ -217,7 +217,7 @@ def process_unlike():
f"Getting {','.join(areas.get_like_area())} for [bold]{ele.name}[/bold]\n[bold]Subscription Active:[/bold] {ele.active}"
)
model_id = profile.get_id(ele.name)
operations.table_init_create(model_id, ele.name)
operations.table_init_create(model_id=model_id, username=ele.name)
favorited_posts = like.get_posts_for_unlike(model_id, ele.name)
favorited_posts = filters.post_filter_for_like(
favorited_posts, like=False
Expand Down
3 changes: 2 additions & 1 deletion ofscraper/classes/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -33,14 +33,15 @@ def text_trunicate(self, text):

def file_cleanup(self, text, mediatype=None):
text = str(text)
text = re.sub('<[^>]*>', "", text)
text = re.sub('[\n<>:"/\|?*:;]+', "", text)
text = re.sub("-+", "_", text)
text = re.sub(" +", " ", text)
text = re.sub(" ", data.get_spacereplacer(mediatype=mediatype), text)
return text

def db_cleanup(self, string):
text = str(text)
string = re.sub("<[^>]*>", "", string)
string = " ".join(string.split())
string = BeautifulSoup(string, html_parser).get_text()
return string
12 changes: 8 additions & 4 deletions ofscraper/classes/media.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@
import ofscraper.utils.config.data as data
import ofscraper.utils.constants as constants
import ofscraper.utils.logs.helpers as log_helpers
import ofscraper.utils.dates as dates

warnings.filterwarnings("ignore", category=MarkupResemblesLocatorWarning)

Expand Down Expand Up @@ -69,14 +70,14 @@ def mediatype(self):
return f"{self._media['type']}s".lower()

@property
def length(self):
def duration(self):
return self._media.get("duration") or self.media_source.get("duration")

@property
def numeric_length(self):
if not self.length:
def numeric_duration(self):
if not self.duration:
return "N/A"
return str((arrow.get(self.length) - arrow.get(0)))
return str((arrow.get(self.duration) - arrow.get(0)))

@property
def url(self):
Expand Down Expand Up @@ -414,6 +415,9 @@ def username(self):
def model_id(self):
return self._post.model_id

@property
def duration_string(self):
return dates.format_seconds(self.duration) if self.duration else None
def get_text(self):
if self.responsetype != "Profile":
text = (
Expand Down
2 changes: 1 addition & 1 deletion ofscraper/classes/posts.py
Original file line number Diff line number Diff line change
Expand Up @@ -97,7 +97,7 @@ def responsetype(self):
elif self.pinned:
return "pinned"
elif self.archived:
return "self.archived"
return "archived"
elif self.post.get("responseType") == "post":
return "timeline"
return self.post.get("responseType")
Expand Down
Loading

0 comments on commit fecdcbe

Please sign in to comment.