You are on page 1of 47

untitled13

February 15, 2024

[3]: from google.colab import files

uploaded = files.upload()

<IPython.core.display.HTML object>
Saving sample4.csv to sample4.csv

[5]: import pandas as pd

# �������� ������ �� ����� data.csv


data = pd.read_csv('sample4.csv')

# �������� ������ ���������� ����� ������


print(data.head())

Game Number "Game Length"


0 1 30
1 2 29
2 3 31
3 4 16
4 5 24

[7]: from google.colab import files

uploaded = files.upload()

<IPython.core.display.HTML object>
Saving sample4.xlsx to sample4.xlsx

[24]: import pandas as pd

xlsx_file_path = 'sample4.xlsx'

df = pd.read_excel(xlsx_file_path)

print(df)

1
Game Number Game Length
0 1 30
1 2 29
2 3 31
3 4 16
4 5 24
.. … …
995 996 24
996 997 21
997 998 47
998 999 30
999 1000 12

[1000 rows x 2 columns]

[9]: import pandas as pd

excel_file = 'sample4.xlsx'
xls = pd.ExcelFile(excel_file)
sheet_names = xls.sheet_names

for sheet_name in sheet_names:


df = pd.read_excel(excel_file, sheet_name=sheet_name)
print(f"Contents of sheet '{sheet_name}':")
print(df)
print()

Contents of sheet 'Worksheet':


Game Number Game Length
0 1 30
1 2 29
2 3 31
3 4 16
4 5 24
.. … …
995 996 24
996 997 21
997 998 47
998 999 30
999 1000 12

[1000 rows x 2 columns]

[11]: from google.colab import files

uploaded = files.upload()

2
<IPython.core.display.HTML object>
Saving sample4.txt to sample4.txt

[16]: import pandas as pd

text_file = 'sample4.txt'
with open(text_file, 'r') as file:
lines = file.readlines()
print(f"First row of '{text_file}': {lines[0]}")
print(f"Last row of '{text_file}': {lines[-1]}")

csv_file = 'sample4.csv'
csv_data = pd.read_csv(csv_file, skiprows=1, usecols=[0, 1], names=['Game',␣
↪'Points'])

print(f"\nParsed CSV data from '{csv_file}' with selected columns and renamed␣
↪columns:")

print(csv_data)

First row of 'sample4.txt': Game Game

Last row of 'sample4.txt':

Parsed CSV data from 'sample4.csv' with selected columns and renamed columns:
Game Points
0 1 30
1 2 29
2 3 31
3 4 16
4 5 24
.. … …
995 996 24
996 997 21
997 998 47
998 999 30
999 1000 12

[1000 rows x 2 columns]

[17]: import pandas as pd

csv_file = 'sample4.csv'

csv_data = pd.read_csv(csv_file, skiprows=1, usecols=[1, 2],␣


↪names=['New_Column2', 'New_Column3'])

print(f"\nParsed CSV data from '{csv_file}' with selected columns and renamed␣
↪columns:")

print(csv_data)

3
Parsed CSV data from 'sample4.csv' with selected columns and renamed columns:
New_Column2 New_Column3
0 30 NaN
1 29 NaN
2 31 NaN
3 16 NaN
4 24 NaN
.. … …
995 24 NaN
996 21 NaN
997 47 NaN
998 30 NaN
999 12 NaN

[1000 rows x 2 columns]


<ipython-input-17-f3f1bbee4a5f>:7: FutureWarning: Defining usecols with out of
bounds indices is deprecated and will raise a ParserError in a future version.
csv_data = pd.read_csv(csv_file, skiprows=1, usecols=[1, 2],
names=['New_Column2', 'New_Column3'])

[18]: from google.colab import files

uploaded = files.upload()

<IPython.core.display.HTML object>
Saving sample4.pdf to sample4.pdf

[19]: !pip install PyPDF2

Collecting PyPDF2
Downloading pypdf2-3.0.1-py3-none-any.whl (232 kB)
���������������������������������������� 232.6/232.6
kB 3.4 MB/s eta 0:00:00
Installing collected packages: PyPDF2
Successfully installed PyPDF2-3.0.1

[27]: import PyPDF2

pdf_file_path = 'sample4.pdf'

with open(pdf_file_path, 'rb') as file:


pdf_reader = PyPDF2.PdfReader(file)

num_pages = len(pdf_reader.pages)

4
for page_num in range(num_pages):
page = pdf_reader.pages[page_num]
text = page.extract_text()
print(f"Text from page {page_num + 1}:\n{text}\n")

Text from page 1:


Game
NumberGame
Length
1
30
2
29
3
31
4
16
5
24
6
29
7
28
8
117
9
42
10
23
11
40
12
15
13
18
14
51
15
15
16
19
17
30
18
25
19
17

5
20
55
21
20
22
12
23
39
24
25
25
56
26
61
27
77
28
34
29
14
30
8
31
31
32
34
33
22
34
12
35
52
36
50
37
24
38
20
39
91
40
33
41
27
42
25
43
83

6
44
12
45
21
46
38
47
20
48
24
49
37
50
18
51
56
52
20
53
47
54
51
55
22
56
46
57
24
58
35
59
28
60
12
61
28
62
43
63
50
64
37
65
127
66
11
67
12

7
68
22

Text from page 2:


69
25
70
57
71
53
72
38
73
33
74
16
75
35
76
29
77
23
78
21
79
11
80
31
81
12
82
37
83
40
84
29
85
14
86
15
87
38
88
74
89
19
90
121

8
91
52
92
25
93
14
94
12
95
31
96
25
97
16
98
19
99
54
100
24
101
62
102
27
103
21
104
30
105
63
106
14
107
27
108
46
109
8
110
19
111
81
112
136
113
60
114
44

9
115
44
116
115
117
28
118
15
119
26
120
31
121
46
122
29
123
22
124
25
125
37
126
78
127
11
128
17
129
20
130
38
131
30
132
51
133
42
134
64
135
37
136
32
137
32
138
35

10
Text from page 3:
139
34
140
36
141
55
142
35
143
13
144
109
145
39
146
39
147
69
148
28
149
13
150
16
151
23
152
14
153
116
154
36
155
41
156
40
157
31
158
39
159
21
160
34
161
11

11
162
46
163
27
164
10
165
47
166
12
167
42
168
34
169
16
170
19
171
100
172
34
173
20
174
23
175
29
176
32
177
81
178
15
179
50
180
67
181
76
182
30
183
16
184
20
185
37

12
186
23
187
18
188
47
189
13
190
24
191
8
192
16
193
24
194
26
195
35
196
14
197
11
198
43
199
48
200
19
201
40
202
197
203
33
204
41
205
28
206
25
207
17

Text from page 4:


208
62

13
209
17
210
71
211
30
212
41
213
17
214
51
215
9
216
33
217
47
218
13
219
59
220
17
221
9
222
51
223
26
224
39
225
39
226
35
227
18
228
45
229
36
230
34
231
30
232
34

14
233
36
234
14
235
7
236
28
237
34
238
49
239
95
240
66
241
36
242
32
243
25
244
25
245
30
246
15
247
12
248
40
249
32
250
26
251
18
252
60
253
21
254
79
255
17
256
42

15
257
40
258
23
259
39
260
58
261
19
262
45
263
17
264
66
265
31
266
25
267
38
268
16
269
36
270
20
271
29
272
26
273
32
274
22
275
11
276
60

Text from page 5:


277
133
278
46
279
13

16
280
20
281
34
282
20
283
79
284
44
285
9
286
63
287
65
288
43
289
19
290
68
291
20
292
24
293
12
294
83
295
20
296
33
297
15
298
29
299
19
300
63
301
31
302
47
303
20

17
304
45
305
10
306
13
307
10
308
23
309
8
310
84
311
38
312
22
313
43
314
25
315
32
316
29
317
20
318
26
319
26
320
40
321
129
322
45
323
23
324
67
325
49
326
90
327
17

18
328
76
329
27
330
136
331
33
332
21
333
18
334
14
335
24
336
14
337
30
338
26
339
26
340
29
341
18
342
53
343
47
344
93
345
47

Text from page 6:


346
9
347
56
348
60
349
12
350
69

19
351
18
352
38
353
38
354
27
355
42
356
61
357
33
358
39
359
47
360
13
361
27
362
17
363
99
364
44
365
42
366
43
367
24
368
29
369
48
370
34
371
43
372
52
373
10
374
28

20
375
41
376
87
377
9
378
22
379
60
380
19
381
33
382
35
383
26
384
22
385
24
386
68
387
23
388
19
389
32
390
32
391
23
392
22
393
23
394
49
395
20
396
11
397
64
398
24

21
399
29
400
47
401
20
402
13
403
15
404
34
405
89
406
81
407
22
408
79
409
37
410
18
411
26
412
35
413
26
414
13
415
25

Text from page 7:


416
118
417
13
418
23
419
14
420
12
421
17

22
422
16
423
35
424
19
425
114
426
97
427
24
428
71
429
17
430
22
431
25
432
36
433
53
434
51
435
14
436
48
437
45
438
25
439
18
440
17
441
16
442
83
443
22
444
16
445
47

23
446
25
447
48
448
15
449
57
450
14
451
24
452
38
453
100
454
34
455
37
456
59
457
24
458
16
459
55
460
36
461
28
462
33
463
65
464
12
465
14
466
32
467
15
468
28
469
21

24
470
84
471
46
472
35
473
31
474
16
475
26
476
16
477
14
478
29
479
9
480
29
481
35
482
47
483
30
484
13
485
23

Text from page 8:


486
16
487
30
488
23
489
27
490
13
491
37
492
26

25
493
17
494
56
495
73
496
144
497
40
498
21
499
47
500
39
501
16
502
37
503
12
504
17
505
13
506
12
507
48
508
73
509
41
510
50
511
10
512
65
513
58
514
13
515
18
516
22

26
517
10
518
11
519
31
520
27
521
77
522
21
523
30
524
16
525
15
526
18
527
47
528
56
529
95
530
33
531
42
532
51
533
13
534
29
535
53
536
41
537
30
538
16
539
24
540
16

27
541
70
542
28
543
24
544
45
545
47
546
64
547
8
548
23
549
16
550
54
551
11
552
92
553
11
554
64
555
11

Text from page 9:


556
58
557
35
558
19
559
30
560
23
561
157
562
40
563
19

28
564
15
565
39
566
77
567
30
568
13
569
11
570
18
571
42
572
27
573
16
574
35
575
37
576
72
577
31
578
20
579
36
580
40
581
23
582
7
583
20
584
46
585
103
586
90
587
15

29
588
63
589
86
590
18
591
13
592
20
593
15
594
8
595
39
596
54
597
52
598
36
599
9
600
41
601
14
602
66
603
34
604
13
605
19
606
12
607
35
608
37
609
46
610
25
611
53

30
612
30
613
27
614
30
615
28
616
91
617
18
618
19
619
32
620
24
621
52
622
18
623
88
624
38
625
25

Text from page 10:


626
20
627
51
628
25
629
44
630
14
631
17
632
54
633
35
634
40

31
635
59
636
34
637
30
638
30
639
33
640
60
641
46
642
26
643
61
644
45
645
35
646
33
647
18
648
31
649
44
650
24
651
12
652
15
653
60
654
24
655
13
656
40
657
44
658
17

32
659
14
660
19
661
16
662
40
663
96
664
23
665
35
666
33
667
45
668
18
669
33
670
15
671
43
672
19
673
36
674
28
675
32
676
9
677
34
678
8
679
21
680
26
681
13
682
34

33
683
15
684
32
685
30
686
21
687
28
688
28
689
37
690
10
691
28
692
38
693
18

Text from page 11:


694
23
695
46
696
30
697
31
698
34
699
26
700
13
701
36
702
11
703
48
704
24
705
48

34
706
18
707
152
708
13
709
106
710
19
711
12
712
20
713
12
714
39
715
20
716
20
717
52
718
77
719
37
720
79
721
14
722
23
723
32
724
56
725
83
726
47
727
17
728
12
729
22

35
730
27
731
47
732
25
733
33
734
30
735
19
736
36
737
75
738
20
739
57
740
12
741
76
742
30
743
35
744
77
745
10
746
73
747
13
748
39
749
34
750
31
751
13
752
14
753
10

36
754
45
755
55
756
29
757
25
758
47
759
95
760
13
761
54
762
17

Text from page 12:


763
35
764
74
765
60
766
14
767
50
768
30
769
55
770
22
771
43
772
92
773
35
774
47
775
12
776
51

37
777
12
778
93
779
41
780
47
781
69
782
36
783
38
784
32
785
52
786
13
787
20
788
48
789
52
790
33
791
39
792
56
793
20
794
41
795
16
796
70
797
57
798
85
799
23
800
17

38
801
30
802
33
803
11
804
26
805
50
806
40
807
20
808
68
809
12
810
75
811
14
812
36
813
35
814
39
815
30
816
13
817
62
818
23
819
26
820
56
821
30
822
40
823
12
824
23

39
825
30
826
17
827
19
828
17
829
19
830
45
831
14

Text from page 13:


832
60
833
49
834
32
835
12
836
44
837
43
838
21
839
9
840
12
841
16
842
14
843
17
844
18
845
29
846
56
847
34

40
848
24
849
58
850
27
851
12
852
23
853
75
854
15
855
20
856
31
857
51
858
10
859
70
860
70
861
71
862
53
863
35
864
12
865
23
866
26
867
25
868
48
869
45
870
36
871
38

41
872
27
873
29
874
55
875
34
876
38
877
17
878
43
879
32
880
40
881
27
882
8
883
24
884
18
885
29
886
11
887
23
888
30
889
64
890
65
891
19
892
52
893
8
894
29
895
48

42
896
67
897
18
898
18
899
11
900
53
901
87

Text from page 14:


902
24
903
25
904
27
905
14
906
80
907
17
908
32
909
16
910
32
911
11
912
27
913
15
914
9
915
25
916
48
917
18
918
38

43
919
20
920
27
921
23
922
19
923
21
924
21
925
18
926
49
927
9
928
58
929
24
930
18
931
17
932
25
933
87
934
31
935
14
936
39
937
16
938
29
939
10
940
14
941
14
942
12

44
943
34
944
26
945
53
946
55
947
41
948
55
949
29
950
26
951
12
952
46
953
32
954
62
955
52
956
78
957
37
958
91
959
119
960
28
961
30
962
31
963
66
964
32
965
26
966
22

45
967
20
968
22
969
37
970
22
971
73

Text from page 15:


972
33
973
52
974
44
975
9
976
31
977
17
978
22
979
20
980
20
981
22
982
49
983
8
984
65
985
22
986
38
987
29
988
8
989
44

46
990
25
991
24
992
27
993
28
994
32
995
93
996
24
997
21
998
47
999
30
1000
12

Text from page 16:

[ ]:

47

You might also like